Added LinAlg.euclideanDistance, optimized softmax, rebuilt SO.

2025-04-17 21:06:32 +02:00 · 2021-06-01 18:29:35 -07:00 · 2021-06-01 18:29:35 -07:00 · bc00f633ac
commit bc00f633ac
parent 629f63059c
10 changed files with 41 additions and 50 deletions
--- a/MLPP/Activation/Activation.cpp
+++ b/MLPP/Activation/Activation.cpp
@ -50,16 +50,18 @@ namespace MLPP{
    }

    std::vector<double> Activation::softmax(std::vector<double> z){
+        LinAlg alg;
        std::vector<double> a;
        a.resize(z.size());
-        for(int i = 0; i < z.size(); i++){
-            double sum = 0;
-            for(int j = 0; j < z.size(); j++){
-                sum += exp(z[j]);
-            }
-            a[i] = exp(z[i]) / sum;
-        }
+        std::vector<double> expZ = alg.exp(z);
+        double sum = 0;
        
+        for(int i = 0; i < z.size(); i++){
+            sum += expZ[i];
+        }
+        for(int i = 0; i < z.size(); i++){
+            a[i] = expZ[i] / sum;
+        }
        return a;
    }

@ -80,16 +82,7 @@ namespace MLPP{
        double C = -*max_element(z.begin(), z.end());
        z = alg.scalarAdd(C, z);

-        a.resize(z.size());
-        for(int i = 0; i < z.size(); i++){
-            double sum = 0;
-            for(int j = 0; j < z.size(); j++){
-                sum += exp(z[j]);
-            }
-            a[i] = exp(z[i]) / sum;
-        }
-        
-        return a;
+        return softmax(z);
    }
    
    std::vector<std::vector<double>> Activation::adjSoftmax(std::vector<std::vector<double>> z){
--- a/MLPP/KMeans/KMeans.cpp
+++ b/MLPP/KMeans/KMeans.cpp
@ -24,11 +24,13 @@ namespace MLPP{
    }

    std::vector<std::vector<double>> KMeans::modelSetTest(std::vector<std::vector<double>> X){
+        LinAlg alg;
        std::vector<std::vector<double>> closestCentroids; 
        for(int i = 0; i < inputSet.size(); i++){
            std::vector<double> closestCentroid = mu[0];
            for(int j = 0; j < r[0].size(); j++){
-                if(euclideanDistance(X[i], mu[j]) < euclideanDistance(X[i], closestCentroid)){
+                bool isCentroidCloser = alg.euclideanDistance(X[i], mu[j]) < alg.euclideanDistance(X[i], closestCentroid);
+                if(isCentroidCloser){
                    closestCentroid = mu[j];
                }
            }
@ -38,9 +40,10 @@ namespace MLPP{
    }

    std::vector<double> KMeans::modelTest(std::vector<double> x){
+        LinAlg alg;
        std::vector<double> closestCentroid = mu[0];
        for(int j = 0; j < mu.size(); j++){
-            if(euclideanDistance(x, mu[j]) < euclideanDistance(x, closestCentroid)){
+            if(alg.euclideanDistance(x, mu[j]) < alg.euclideanDistance(x, closestCentroid)){
                closestCentroid = mu[j];
            }
        }
@ -81,6 +84,7 @@ namespace MLPP{
    }

    std::vector<double> KMeans::silhouette_scores(){
+        LinAlg alg;
        std::vector<std::vector<double>> closestCentroids = modelSetTest(inputSet);
        std::vector<double> silhouette_scores;
        for(int i = 0; i < inputSet.size(); i++){
@ -88,7 +92,7 @@ namespace MLPP{
            double a = 0;
            for(int j = 0; j < inputSet.size(); j++){
                if(i != j && r[i] == r[j]){
-                    a += euclideanDistance(inputSet[i], inputSet[j]);
+                    a += alg.euclideanDistance(inputSet[i], inputSet[j]);
                }
            }   
            // NORMALIZE a[i]
@ -101,7 +105,7 @@ namespace MLPP{
                if(closestCentroids[i] != mu[j]){
                    double sum = 0;
                    for(int k = 0; k < inputSet.size(); k++){
-                        sum += euclideanDistance(inputSet[i], inputSet[k]);
+                        sum += alg.euclideanDistance(inputSet[i], inputSet[k]);
                    }
                    // NORMALIZE b[i]
                    double k_clusterSize = 0;
@ -130,6 +134,7 @@ namespace MLPP{

    // This simply computes r_nk
    void KMeans::Evaluate(){
+        LinAlg alg;
        r.resize(inputSet.size());
        
        for(int i = 0; i < r.size(); i++){
@ -139,7 +144,8 @@ namespace MLPP{
        for(int i = 0; i < r.size(); i++){
            std::vector<double> closestCentroid = mu[0];
            for(int j = 0; j < r[0].size(); j++){
-                if(euclideanDistance(inputSet[i], mu[j]) < euclideanDistance(inputSet[i], closestCentroid)){
+                bool isCentroidCloser = alg.euclideanDistance(inputSet[i], mu[j]) < alg.euclideanDistance(inputSet[i], closestCentroid);
+                if(isCentroidCloser){
                    closestCentroid = mu[j];
                }
            }
@ -190,6 +196,7 @@ namespace MLPP{
    }

    void KMeans::kmeansppInitialization(int k){
+        LinAlg alg;
        std::random_device rd;
        std::default_random_engine generator(rd()); 
        std::uniform_int_distribution<int> distribution(0, int(inputSet.size() - 1));
@ -203,7 +210,7 @@ namespace MLPP{
                AS TO SPREAD OUT THE CLUSTER CENTROIDS. */
                double sum = 0;
                for(int k = 0; k < mu.size(); k++){
-                    sum += euclideanDistance(inputSet[j], mu[k]);
+                    sum += alg.euclideanDistance(inputSet[j], mu[k]);
                }
                if(sum * sum > max_dist){
                    farthestCentroid = inputSet[j];
@ -224,13 +231,4 @@ namespace MLPP{
        }
        return sum;
    }
-
-    // Multidimensional Euclidean Distance
-    double KMeans::euclideanDistance(std::vector<double> A, std::vector<double> B){
-        double dist = 0;
-        for(int i = 0; i < A.size(); i++){
-            dist += (A[i] - B[i])*(A[i] - B[i]);
-        }
-        return sqrt(dist);
-    }
 }
--- a/MLPP/LinAlg/LinAlg.cpp
+++ b/MLPP/LinAlg/LinAlg.cpp
@ -811,6 +811,15 @@ namespace MLPP{
        return b;
    }

+    // Multidimensional Euclidean Distance
+    double LinAlg::euclideanDistance(std::vector<double> a, std::vector<double> b){
+        double dist = 0;
+        for(int i = 0; i < a.size(); i++){
+            dist += (a[i] - b[i])*(a[i] - b[i]);
+        }
+        return std::sqrt(dist);
+    }
+
    double LinAlg::norm_sq(std::vector<double> a){
        double n_sq = 0;
        for(int i = 0; i < a.size(); i++){
--- a/MLPP/LinAlg/LinAlg.hpp
+++ b/MLPP/LinAlg/LinAlg.hpp
@ -137,6 +137,8 @@ namespace MLPP{
        double min(std::vector<double> a);

        std::vector<double> round(std::vector<double> a);
+
+        double euclideanDistance(std::vector<double> a, std::vector<double> b);
        
        double norm_sq(std::vector<double> a);
        
--- a/MLPP/Utilities/Utilities.hpp
+++ b/MLPP/Utilities/Utilities.hpp
@ -34,7 +34,6 @@ namespace MLPP{
            static void UI(std::vector<double> weights, double bias);
            static void UI(std::vector<double> weights, std::vector<double> initial, double bias);
            static void UI(std::vector<std::vector<double>>, std::vector<double> bias);
-
            static void CostInfo(int epoch, double cost_prev, double Cost);

            // F1 score, Precision/Recall, TP, FP, TN, FN, etc. 
--- a/MLPP/kNN/kNN.cpp
+++ b/MLPP/kNN/kNN.cpp
@ -65,6 +65,7 @@ namespace MLPP{
    }
    
    std::vector<double> kNN::nearestNeighbors(std::vector<double> x){
+        LinAlg alg;
        // The nearest neighbors
        std::vector<double> knn;
        
@ -73,22 +74,14 @@ namespace MLPP{
        for(int i = 0; i < k; i++){
            int neighbor = 0;
            for(int j = 0; j < inputUseSet.size(); j++){
-                if(euclideanDistance(x, inputUseSet[j]) < euclideanDistance(x, inputUseSet[neighbor])){
+                bool isNeighborNearer = alg.euclideanDistance(x, inputUseSet[j]) < alg.euclideanDistance(x, inputUseSet[neighbor]);
+                if(isNeighborNearer){
                    neighbor = j;
                }
            }
            knn.push_back(neighbor);
-            inputUseSet.erase(inputUseSet.begin() + neighbor);
+            inputUseSet.erase(inputUseSet.begin() + neighbor); // This is why we maintain an extra input"Use"Set
        }
        return knn;
    }
-
-    // Multidimensional Euclidean Distance
-    double kNN::euclideanDistance(std::vector<double> A, std::vector<double> B){
-        double dist = 0;
-        for(int i = 0; i < A.size(); i++){
-            dist += (A[i] - B[i])*(A[i] - B[i]);
-        }
-        return sqrt(dist);
-    }
 }
--- a/MLPP/kNN/kNN.hpp
+++ b/MLPP/kNN/kNN.hpp
@ -23,9 +23,6 @@ namespace MLPP{
            // Private Model Functions
            std::vector<double> nearestNeighbors(std::vector<double> x);
            int determineClass(std::vector<double> knn);
-        
-            // Helper Functions
-            double euclideanDistance(std::vector<double> A, std::vector<double> B);
            
            // Model Inputs and Parameters
            std::vector<std::vector<double>> inputSet;
--- a/SharedLib/MLPP.so
+++ b/SharedLib/MLPP.so
--- a/a.out
+++ b/a.out
--- a/main.cpp
+++ b/main.cpp
@ -186,6 +186,7 @@ int main() {
    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;

    // // SOFTMAX REGRESSION
+    // time_t start = time(0);
    // std::vector<std::vector<double>> inputSet; 
    // std::vector<double> tempOutputSet; 
    // data.setData(4, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
@ -194,7 +195,6 @@ int main() {
    // SoftmaxReg model(inputSet, outputSet); 
    // model.SGD(0.001, 20000, 0);
    // alg.printMatrix(model.modelSetTest(inputSet));
-    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;

    // // MLP
    // std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
@ -354,11 +354,11 @@ int main() {
    // std::cout << avn.softsign(z_s, 1) << std::endl;

    // std::vector<double> z_v = {0.001, 5};
-    // alg.printVector(avn.softsign(z_v));
+    // alg.printVector(avn.softmax(z_v));
    // alg.printVector(avn.softsign(z_v, 1));

    // std::vector<std::vector<double>> Z_m = {{0.001, 5}};
-    // alg.printMatrix(avn.softsign(Z_m));
+    // alg.printMatrix(avn.softmax(Z_m));
    // alg.printMatrix(avn.softsign(Z_m, 1));

    // std::cout << alg.trace({{1,2}, {3,4}}) << std::endl;