diff --git a/.DS_Store b/.DS_Store index bf6d0b3..94cb8b0 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/MLPP/.DS_Store b/MLPP/.DS_Store index 342bf4f..8316a0a 100644 Binary files a/MLPP/.DS_Store and b/MLPP/.DS_Store differ diff --git a/MLPP/Cost/Cost.cpp b/MLPP/Cost/Cost.cpp index 12a84a7..e6b1084 100644 --- a/MLPP/Cost/Cost.cpp +++ b/MLPP/Cost/Cost.cpp @@ -348,7 +348,7 @@ namespace MLPP{ Reg regularization; return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge"); } - double Cost::HingeLoss(std::vector> y_hat, std::vector> y, std::vector weights, double C){ + double Cost::HingeLoss(std::vector> y_hat, std::vector> y, std::vector> weights, double C){ LinAlg alg; Reg regularization; return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge"); @@ -364,4 +364,29 @@ namespace MLPP{ Reg regularization; return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y)); } + + double Cost::dualFormSVM(std::vector alpha, std::vector> X, std::vector y){ + LinAlg alg; + std::vector> Y = alg.diag(y); // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y. + std::vector> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations. + std::vector> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y); + double alphaQ = alg.matmult(alg.matmult({alpha}, Q), alg.transpose({alpha}))[0][0]; + std::vector one = alg.onevec(alpha.size()); + + return -alg.dot(one, alpha) + 0.5 * alphaQ; + } + + std::vector Cost::dualFormSVMDeriv(std::vector alpha, std::vector> X, std::vector y){ + LinAlg alg; + std::vector> Y = alg.zeromat(y.size(), y.size()); + for(int i = 0; i < y.size(); i++){ + Y[i][i] = y[i]; // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y. + } + std::vector> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations. + std::vector> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y); + std::vector alphaQDeriv = alg.mat_vec_mult(Q, alpha); + std::vector one = alg.onevec(alpha.size()); + + return alg.subtraction(alphaQDeriv, one); + } } \ No newline at end of file diff --git a/MLPP/Cost/Cost.hpp b/MLPP/Cost/Cost.hpp index 41d60b5..22aea30 100644 --- a/MLPP/Cost/Cost.hpp +++ b/MLPP/Cost/Cost.hpp @@ -63,10 +63,14 @@ namespace MLPP{ std::vector> HingeLossDeriv(std::vector> y_hat, std::vector> y); double HingeLoss(std::vector y_hat, std::vector y, std::vector weights, double C); - double HingeLoss(std::vector> y_hat, std::vector> y, std::vector weights, double C); + double HingeLoss(std::vector> y_hat, std::vector> y, std::vector> weights, double C); std::vector HingeLossDeriv(std::vector y_hat, std::vector y, double C); std::vector> HingeLossDeriv(std::vector> y_hat, std::vector> y, double C); + + double dualFormSVM(std::vector alpha, std::vector> X, std::vector y); // TO DO: DON'T forget to add non-linear kernelizations. + + std::vector dualFormSVMDeriv(std::vector alpha, std::vector> X, std::vector y); private: diff --git a/MLPP/DualSVC/.DS_Store b/MLPP/DualSVC/.DS_Store new file mode 100644 index 0000000..f54bfae Binary files /dev/null and b/MLPP/DualSVC/.DS_Store differ diff --git a/MLPP/DualSVC/DualSVC.cpp b/MLPP/DualSVC/DualSVC.cpp new file mode 100644 index 0000000..6c6d65f --- /dev/null +++ b/MLPP/DualSVC/DualSVC.cpp @@ -0,0 +1,241 @@ +// +// DualSVC.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "DualSVC.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP{ + DualSVC::DualSVC(std::vector> inputSet, std::vector outputSet, double C, std::string kernel) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), C(C), kernel(kernel) + { + y_hat.resize(n); + bias = Utilities::biasInitialization(); + alpha = Utilities::weightInitialization(n); // One alpha for all training examples, as per the lagrangian multipliers. + K = createK(); // For now this is unused. When non-linear kernels are added, the K will be manipulated. + } + + std::vector DualSVC::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + double DualSVC::modelTest(std::vector x){ + return Evaluate(x); + } + + void DualSVC::gradientDescent(double learning_rate, int max_epoch, bool UI){ + class Cost cost; + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(alpha, inputSet, outputSet); + + alpha = alg.subtraction(alpha, alg.scalarMultiply(learning_rate, cost.dualFormSVMDeriv(alpha, inputSet, outputSet))); + + alphaProjection(); + + // Calculating the bias + double biasGradient = 0; + for(int i = 0; i < alpha.size(); i++){ + double sum = 0; + if(alpha[i] < C && alpha[i] > 0){ + for(int j = 0; j < alpha.size(); j++){ + if(alpha[j] > 0){ + sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], inputSet[i]); // TO DO: DON'T forget to add non-linear kernelizations. + } + } + } + biasGradient = (1 - outputSet[i] * sum) / outputSet[i]; + break; + } + bias -= biasGradient * learning_rate; + + forwardPass(); + + // UI PORTION + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(alpha, inputSet, outputSet)); + Utilities::UI(alpha, bias); + std::cout << score() << std::endl; // TO DO: DELETE THIS. + } + epoch++; + + if(epoch > max_epoch) { break; } + + } + } + + // void DualSVC::SGD(double learning_rate, int max_epoch, bool UI){ + // class Cost cost; + // Activation avn; + // LinAlg alg; + // Reg regularization; + + // double cost_prev = 0; + // int epoch = 1; + + // while(true){ + // std::random_device rd; + // std::default_random_engine generator(rd()); + // std::uniform_int_distribution distribution(0, int(n - 1)); + // int outputIndex = distribution(generator); + + // cost_prev = Cost(alpha, inputSet[outputIndex], outputSet[outputIndex]); + + // // Bias updation + // bias -= learning_rate * costDeriv; + + // y_hat = Evaluate({inputSet[outputIndex]}); + + // if(UI) { + // Utilities::CostInfo(epoch, cost_prev, Cost(alpha)); + // Utilities::UI(weights, bias); + // } + // epoch++; + + // if(epoch > max_epoch) { break; } + // } + // forwardPass(); + // } + + // void DualSVC::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + // class Cost cost; + // Activation avn; + // LinAlg alg; + // Reg regularization; + // double cost_prev = 0; + // int epoch = 1; + + // // Creating the mini-batches + // int n_mini_batch = n/mini_batch_size; + // auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + // while(true){ + // for(int i = 0; i < n_mini_batch; i++){ + // std::vector y_hat = Evaluate(inputMiniBatches[i]); + // std::vector z = propagate(inputMiniBatches[i]); + // cost_prev = Cost(z, outputMiniBatches[i], weights, C); + + // // Calculating the weight gradients + // weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C)))); + // weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge"); + + + // // Calculating the bias gradients + // bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n; + + // forwardPass(); + + // y_hat = Evaluate(inputMiniBatches[i]); + + // if(UI) { + // Utilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C)); + // Utilities::UI(weights, bias); + // } + // } + // epoch++; + // if(epoch > max_epoch) { break; } + // } + // forwardPass(); + // } + + double DualSVC::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void DualSVC::save(std::string fileName){ + Utilities util; + util.saveParameters(fileName, alpha, bias); + } + + double DualSVC::Cost(std::vector alpha, std::vector> X, std::vector y){ + class Cost cost; + return cost.dualFormSVM(alpha, X, y); + } + + std::vector DualSVC::Evaluate(std::vector> X){ + Activation avn; + return avn.sign(propagate(X)); + } + + std::vector DualSVC::propagate(std::vector> X){ + LinAlg alg; + std::vector z; + for(int i = 0; i < X.size(); i++){ + double sum = 0; + for(int j = 0; j < alpha.size(); j++){ + if(alpha[j] != 0){ + sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], X[i]); // TO DO: DON'T forget to add non-linear kernelizations. + } + } + sum += bias; + z.push_back(sum); + } + return z; + } + + double DualSVC::Evaluate(std::vector x){ + Activation avn; + return avn.sign(propagate(x)); + } + + double DualSVC::propagate(std::vector x){ + LinAlg alg; + double z = 0; + for(int j = 0; j < alpha.size(); j++){ + if(alpha[j] != 0){ + z += alpha[j] * outputSet[j] * alg.dot(inputSet[j], x); // TO DO: DON'T forget to add non-linear kernelizations. + } + } + z += bias; + return z; + } + + void DualSVC::forwardPass(){ + LinAlg alg; + Activation avn; + + z = propagate(inputSet); + y_hat = avn.sign(z); + } + + void DualSVC::alphaProjection(){ + for(int i = 0; i < alpha.size(); i++){ + if(alpha[i] > C){ + alpha[i] = C; + } + else if(alpha[i] < 0){ + alpha[i] = 0; + } + } + } + + double DualSVC::kernelFunction(std::vector u, std::vector v){ + LinAlg alg; + if(kernel == "Linear"){ + return alg.dot(u, v); + } + } + + std::vector> DualSVC::createK(){ + LinAlg alg; + if(kernel == "Linear"){ + return alg.matmult(inputSet, alg.transpose(inputSet)); + } // warning: non-void function does not return a value in all control paths [-Wreturn-type] + } +} \ No newline at end of file diff --git a/MLPP/DualSVC/DualSVC.hpp b/MLPP/DualSVC/DualSVC.hpp new file mode 100644 index 0000000..025c7f5 --- /dev/null +++ b/MLPP/DualSVC/DualSVC.hpp @@ -0,0 +1,71 @@ +// +// DualSVC.hpp +// +// Created by Marc Melikyan on 10/2/20. +// +// http://disp.ee.ntu.edu.tw/~pujols/Support%20Vector%20Machine.pdf +// http://ciml.info/dl/v0_99/ciml-v0_99-ch11.pdf +// Were excellent for the practical intution behind the dual formulation. + +#ifndef DualSVC_hpp +#define DualSVC_hpp + + +#include +#include + +namespace MLPP { + + class DualSVC{ + + public: + DualSVC(std::vector> inputSet, std::vector outputSet, double C, std::string kernel = "Linear"); + DualSVC(std::vector> inputSet, std::vector outputSet, double C, std::string kernel, double p, double c); + + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + double score(); + void save(std::string fileName); + private: + + void init(); + + double Cost(std::vector alpha, std::vector> X, std::vector y); + + std::vector Evaluate(std::vector> X); + std::vector propagate(std::vector> X); + double Evaluate(std::vector x); + double propagate(std::vector x); + void forwardPass(); + + void alphaProjection(); + + double kernelFunction(std::vector u, std::vector v); + std::vector> createK(); + + std::vector> inputSet; + std::vector outputSet; + std::vector z; + std::vector y_hat; + double bias; + + std::vector alpha; + std::vector> K; + + double C; + int n; + int k; + + std::string kernel; + double p; // Poly + double c; // Poly + + // UI Portion + void UI(int epoch, double cost_prev); + }; +} + +#endif /* DualSVC_hpp */ diff --git a/a.out b/a.out index 75810b3..bd54490 100755 Binary files a/a.out and b/a.out differ diff --git a/main.cpp b/main.cpp index ab06dba..e4040c9 100644 --- a/main.cpp +++ b/main.cpp @@ -46,6 +46,7 @@ #include "MLPP/Convolutions/Convolutions.hpp" #include "MLPP/SVC/SVC.hpp" #include "MLPP/NumericalAnalysis/NumericalAnalysis.hpp" +#include "MLPP/DualSVC/DualSVC.hpp" using namespace MLPP; @@ -487,11 +488,11 @@ int main() { // alg.printMatrix(wordEmbeddings); // std::cout << std::endl; - std::vector textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"}; + // std::vector textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"}; - alg.printMatrix(data.LSA(textArchive, 2)); - //alg.printMatrix(data.BOW(textArchive, "Default")); - std::cout << std::endl; + // alg.printMatrix(data.LSA(textArchive, 2)); + // //alg.printMatrix(data.BOW(textArchive, "Default")); + // std::cout << std::endl; // std::vector> inputSet = {{1,2},{2,3},{3,4},{4,5},{5,6}}; @@ -640,8 +641,18 @@ int main() { // std::vector b = {4,4,4}; // alg.printVector(alg.cross(a,b)); + //SUPPORT VECTOR CLASSIFICATION (kernel method) + // std::vector> inputSet; + // std::vector outputSet; + // data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancerSVM.csv", inputSet, outputSet); + std::vector> inputSet; + std::vector outputSet; + data.setData(4, "/Users/marcmelikyan/Desktop/Data/IrisSVM.csv", inputSet, outputSet); + DualSVC kernelSVM(inputSet, outputSet, 1000); + kernelSVM.gradientDescent(0.0001, 20, 1); + return 0; }