diff --git a/MLPP/Activation/Activation.cpp b/MLPP/Activation/Activation.cpp index f868745..cb0070b 100644 --- a/MLPP/Activation/Activation.cpp +++ b/MLPP/Activation/Activation.cpp @@ -50,16 +50,18 @@ namespace MLPP{ } std::vector Activation::softmax(std::vector z){ + LinAlg alg; std::vector a; a.resize(z.size()); - for(int i = 0; i < z.size(); i++){ - double sum = 0; - for(int j = 0; j < z.size(); j++){ - sum += exp(z[j]); - } - a[i] = exp(z[i]) / sum; - } + std::vector expZ = alg.exp(z); + double sum = 0; + for(int i = 0; i < z.size(); i++){ + sum += expZ[i]; + } + for(int i = 0; i < z.size(); i++){ + a[i] = expZ[i] / sum; + } return a; } @@ -80,16 +82,7 @@ namespace MLPP{ double C = -*max_element(z.begin(), z.end()); z = alg.scalarAdd(C, z); - a.resize(z.size()); - for(int i = 0; i < z.size(); i++){ - double sum = 0; - for(int j = 0; j < z.size(); j++){ - sum += exp(z[j]); - } - a[i] = exp(z[i]) / sum; - } - - return a; + return softmax(z); } std::vector> Activation::adjSoftmax(std::vector> z){ diff --git a/MLPP/KMeans/KMeans.cpp b/MLPP/KMeans/KMeans.cpp index d014241..7cd118b 100644 --- a/MLPP/KMeans/KMeans.cpp +++ b/MLPP/KMeans/KMeans.cpp @@ -24,11 +24,13 @@ namespace MLPP{ } std::vector> KMeans::modelSetTest(std::vector> X){ + LinAlg alg; std::vector> closestCentroids; for(int i = 0; i < inputSet.size(); i++){ std::vector closestCentroid = mu[0]; for(int j = 0; j < r[0].size(); j++){ - if(euclideanDistance(X[i], mu[j]) < euclideanDistance(X[i], closestCentroid)){ + bool isCentroidCloser = alg.euclideanDistance(X[i], mu[j]) < alg.euclideanDistance(X[i], closestCentroid); + if(isCentroidCloser){ closestCentroid = mu[j]; } } @@ -38,9 +40,10 @@ namespace MLPP{ } std::vector KMeans::modelTest(std::vector x){ + LinAlg alg; std::vector closestCentroid = mu[0]; for(int j = 0; j < mu.size(); j++){ - if(euclideanDistance(x, mu[j]) < euclideanDistance(x, closestCentroid)){ + if(alg.euclideanDistance(x, mu[j]) < alg.euclideanDistance(x, closestCentroid)){ closestCentroid = mu[j]; } } @@ -81,6 +84,7 @@ namespace MLPP{ } std::vector KMeans::silhouette_scores(){ + LinAlg alg; std::vector> closestCentroids = modelSetTest(inputSet); std::vector silhouette_scores; for(int i = 0; i < inputSet.size(); i++){ @@ -88,7 +92,7 @@ namespace MLPP{ double a = 0; for(int j = 0; j < inputSet.size(); j++){ if(i != j && r[i] == r[j]){ - a += euclideanDistance(inputSet[i], inputSet[j]); + a += alg.euclideanDistance(inputSet[i], inputSet[j]); } } // NORMALIZE a[i] @@ -101,7 +105,7 @@ namespace MLPP{ if(closestCentroids[i] != mu[j]){ double sum = 0; for(int k = 0; k < inputSet.size(); k++){ - sum += euclideanDistance(inputSet[i], inputSet[k]); + sum += alg.euclideanDistance(inputSet[i], inputSet[k]); } // NORMALIZE b[i] double k_clusterSize = 0; @@ -130,6 +134,7 @@ namespace MLPP{ // This simply computes r_nk void KMeans::Evaluate(){ + LinAlg alg; r.resize(inputSet.size()); for(int i = 0; i < r.size(); i++){ @@ -139,7 +144,8 @@ namespace MLPP{ for(int i = 0; i < r.size(); i++){ std::vector closestCentroid = mu[0]; for(int j = 0; j < r[0].size(); j++){ - if(euclideanDistance(inputSet[i], mu[j]) < euclideanDistance(inputSet[i], closestCentroid)){ + bool isCentroidCloser = alg.euclideanDistance(inputSet[i], mu[j]) < alg.euclideanDistance(inputSet[i], closestCentroid); + if(isCentroidCloser){ closestCentroid = mu[j]; } } @@ -190,6 +196,7 @@ namespace MLPP{ } void KMeans::kmeansppInitialization(int k){ + LinAlg alg; std::random_device rd; std::default_random_engine generator(rd()); std::uniform_int_distribution distribution(0, int(inputSet.size() - 1)); @@ -203,7 +210,7 @@ namespace MLPP{ AS TO SPREAD OUT THE CLUSTER CENTROIDS. */ double sum = 0; for(int k = 0; k < mu.size(); k++){ - sum += euclideanDistance(inputSet[j], mu[k]); + sum += alg.euclideanDistance(inputSet[j], mu[k]); } if(sum * sum > max_dist){ farthestCentroid = inputSet[j]; @@ -224,13 +231,4 @@ namespace MLPP{ } return sum; } - - // Multidimensional Euclidean Distance - double KMeans::euclideanDistance(std::vector A, std::vector B){ - double dist = 0; - for(int i = 0; i < A.size(); i++){ - dist += (A[i] - B[i])*(A[i] - B[i]); - } - return sqrt(dist); - } } diff --git a/MLPP/LinAlg/LinAlg.cpp b/MLPP/LinAlg/LinAlg.cpp index 072a2d0..fe6955e 100644 --- a/MLPP/LinAlg/LinAlg.cpp +++ b/MLPP/LinAlg/LinAlg.cpp @@ -811,6 +811,15 @@ namespace MLPP{ return b; } + // Multidimensional Euclidean Distance + double LinAlg::euclideanDistance(std::vector a, std::vector b){ + double dist = 0; + for(int i = 0; i < a.size(); i++){ + dist += (a[i] - b[i])*(a[i] - b[i]); + } + return std::sqrt(dist); + } + double LinAlg::norm_sq(std::vector a){ double n_sq = 0; for(int i = 0; i < a.size(); i++){ diff --git a/MLPP/LinAlg/LinAlg.hpp b/MLPP/LinAlg/LinAlg.hpp index cb1c1ad..0494a8e 100644 --- a/MLPP/LinAlg/LinAlg.hpp +++ b/MLPP/LinAlg/LinAlg.hpp @@ -137,6 +137,8 @@ namespace MLPP{ double min(std::vector a); std::vector round(std::vector a); + + double euclideanDistance(std::vector a, std::vector b); double norm_sq(std::vector a); diff --git a/MLPP/Utilities/Utilities.hpp b/MLPP/Utilities/Utilities.hpp index 9cbd23f..df97c57 100644 --- a/MLPP/Utilities/Utilities.hpp +++ b/MLPP/Utilities/Utilities.hpp @@ -34,7 +34,6 @@ namespace MLPP{ static void UI(std::vector weights, double bias); static void UI(std::vector weights, std::vector initial, double bias); static void UI(std::vector>, std::vector bias); - static void CostInfo(int epoch, double cost_prev, double Cost); // F1 score, Precision/Recall, TP, FP, TN, FN, etc. diff --git a/MLPP/kNN/kNN.cpp b/MLPP/kNN/kNN.cpp index ec53f65..feda69c 100644 --- a/MLPP/kNN/kNN.cpp +++ b/MLPP/kNN/kNN.cpp @@ -65,6 +65,7 @@ namespace MLPP{ } std::vector kNN::nearestNeighbors(std::vector x){ + LinAlg alg; // The nearest neighbors std::vector knn; @@ -73,22 +74,14 @@ namespace MLPP{ for(int i = 0; i < k; i++){ int neighbor = 0; for(int j = 0; j < inputUseSet.size(); j++){ - if(euclideanDistance(x, inputUseSet[j]) < euclideanDistance(x, inputUseSet[neighbor])){ + bool isNeighborNearer = alg.euclideanDistance(x, inputUseSet[j]) < alg.euclideanDistance(x, inputUseSet[neighbor]); + if(isNeighborNearer){ neighbor = j; } } knn.push_back(neighbor); - inputUseSet.erase(inputUseSet.begin() + neighbor); + inputUseSet.erase(inputUseSet.begin() + neighbor); // This is why we maintain an extra input"Use"Set } return knn; } - - // Multidimensional Euclidean Distance - double kNN::euclideanDistance(std::vector A, std::vector B){ - double dist = 0; - for(int i = 0; i < A.size(); i++){ - dist += (A[i] - B[i])*(A[i] - B[i]); - } - return sqrt(dist); - } } \ No newline at end of file diff --git a/MLPP/kNN/kNN.hpp b/MLPP/kNN/kNN.hpp index a786c5a..740543e 100644 --- a/MLPP/kNN/kNN.hpp +++ b/MLPP/kNN/kNN.hpp @@ -23,9 +23,6 @@ namespace MLPP{ // Private Model Functions std::vector nearestNeighbors(std::vector x); int determineClass(std::vector knn); - - // Helper Functions - double euclideanDistance(std::vector A, std::vector B); // Model Inputs and Parameters std::vector> inputSet; diff --git a/SharedLib/MLPP.so b/SharedLib/MLPP.so index 3a8b729..948a499 100755 Binary files a/SharedLib/MLPP.so and b/SharedLib/MLPP.so differ diff --git a/a.out b/a.out deleted file mode 100755 index 9129d28..0000000 Binary files a/a.out and /dev/null differ diff --git a/main.cpp b/main.cpp index f069d95..5f5abc0 100644 --- a/main.cpp +++ b/main.cpp @@ -186,6 +186,7 @@ int main() { // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; // // SOFTMAX REGRESSION + // time_t start = time(0); // std::vector> inputSet; // std::vector tempOutputSet; // data.setData(4, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet); @@ -194,7 +195,6 @@ int main() { // SoftmaxReg model(inputSet, outputSet); // model.SGD(0.001, 20000, 0); // alg.printMatrix(model.modelSetTest(inputSet)); - // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; // // MLP // std::vector> inputSet = {{0,0,1,1}, {0,1,0,1}}; @@ -354,11 +354,11 @@ int main() { // std::cout << avn.softsign(z_s, 1) << std::endl; // std::vector z_v = {0.001, 5}; - // alg.printVector(avn.softsign(z_v)); + // alg.printVector(avn.softmax(z_v)); // alg.printVector(avn.softsign(z_v, 1)); // std::vector> Z_m = {{0.001, 5}}; - // alg.printMatrix(avn.softsign(Z_m)); + // alg.printMatrix(avn.softmax(Z_m)); // alg.printMatrix(avn.softsign(Z_m, 1)); // std::cout << alg.trace({{1,2}, {3,4}}) << std::endl;