Dual formulation of SVM [LINEAR KERNEL ONLY, BATCH GD ONLY]

This commit is contained in:
novak_99 2021-12-31 18:22:44 -08:00
parent bf667b0a2d
commit 3e287f3b95
9 changed files with 358 additions and 6 deletions

BIN
.DS_Store vendored

Binary file not shown.

BIN
MLPP/.DS_Store vendored

Binary file not shown.

View File

@ -348,7 +348,7 @@ namespace MLPP{
Reg regularization;
return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
}
double Cost::HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<double> weights, double C){
double Cost::HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<std::vector<double>> weights, double C){
LinAlg alg;
Reg regularization;
return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
@ -364,4 +364,29 @@ namespace MLPP{
Reg regularization;
return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y));
}
double Cost::dualFormSVM(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y){
LinAlg alg;
std::vector<std::vector<double>> Y = alg.diag(y); // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
std::vector<std::vector<double>> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations.
std::vector<std::vector<double>> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y);
double alphaQ = alg.matmult(alg.matmult({alpha}, Q), alg.transpose({alpha}))[0][0];
std::vector<double> one = alg.onevec(alpha.size());
return -alg.dot(one, alpha) + 0.5 * alphaQ;
}
std::vector<double> Cost::dualFormSVMDeriv(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y){
LinAlg alg;
std::vector<std::vector<double>> Y = alg.zeromat(y.size(), y.size());
for(int i = 0; i < y.size(); i++){
Y[i][i] = y[i]; // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
}
std::vector<std::vector<double>> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations.
std::vector<std::vector<double>> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y);
std::vector<double> alphaQDeriv = alg.mat_vec_mult(Q, alpha);
std::vector<double> one = alg.onevec(alpha.size());
return alg.subtraction(alphaQDeriv, one);
}
}

View File

@ -63,11 +63,15 @@ namespace MLPP{
std::vector<std::vector<double>> HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
double HingeLoss(std::vector <double> y_hat, std::vector<double> y, std::vector<double> weights, double C);
double HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<double> weights, double C);
double HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<std::vector<double>> weights, double C);
std::vector<double> HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y, double C);
std::vector<std::vector<double>> HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double C);
double dualFormSVM(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y); // TO DO: DON'T forget to add non-linear kernelizations.
std::vector<double> dualFormSVMDeriv(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y);
private:
};

BIN
MLPP/DualSVC/.DS_Store vendored Normal file

Binary file not shown.

241
MLPP/DualSVC/DualSVC.cpp Normal file
View File

@ -0,0 +1,241 @@
//
// DualSVC.cpp
//
// Created by Marc Melikyan on 10/2/20.
//
#include "DualSVC.hpp"
#include "Activation/Activation.hpp"
#include "LinAlg/LinAlg.hpp"
#include "Regularization/Reg.hpp"
#include "Utilities/Utilities.hpp"
#include "Cost/Cost.hpp"
#include <iostream>
#include <random>
namespace MLPP{
DualSVC::DualSVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C, std::string kernel)
: inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), C(C), kernel(kernel)
{
y_hat.resize(n);
bias = Utilities::biasInitialization();
alpha = Utilities::weightInitialization(n); // One alpha for all training examples, as per the lagrangian multipliers.
K = createK(); // For now this is unused. When non-linear kernels are added, the K will be manipulated.
}
std::vector<double> DualSVC::modelSetTest(std::vector<std::vector<double>> X){
return Evaluate(X);
}
double DualSVC::modelTest(std::vector<double> x){
return Evaluate(x);
}
void DualSVC::gradientDescent(double learning_rate, int max_epoch, bool UI){
class Cost cost;
Activation avn;
LinAlg alg;
Reg regularization;
double cost_prev = 0;
int epoch = 1;
forwardPass();
while(true){
cost_prev = Cost(alpha, inputSet, outputSet);
alpha = alg.subtraction(alpha, alg.scalarMultiply(learning_rate, cost.dualFormSVMDeriv(alpha, inputSet, outputSet)));
alphaProjection();
// Calculating the bias
double biasGradient = 0;
for(int i = 0; i < alpha.size(); i++){
double sum = 0;
if(alpha[i] < C && alpha[i] > 0){
for(int j = 0; j < alpha.size(); j++){
if(alpha[j] > 0){
sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], inputSet[i]); // TO DO: DON'T forget to add non-linear kernelizations.
}
}
}
biasGradient = (1 - outputSet[i] * sum) / outputSet[i];
break;
}
bias -= biasGradient * learning_rate;
forwardPass();
// UI PORTION
if(UI) {
Utilities::CostInfo(epoch, cost_prev, Cost(alpha, inputSet, outputSet));
Utilities::UI(alpha, bias);
std::cout << score() << std::endl; // TO DO: DELETE THIS.
}
epoch++;
if(epoch > max_epoch) { break; }
}
}
// void DualSVC::SGD(double learning_rate, int max_epoch, bool UI){
// class Cost cost;
// Activation avn;
// LinAlg alg;
// Reg regularization;
// double cost_prev = 0;
// int epoch = 1;
// while(true){
// std::random_device rd;
// std::default_random_engine generator(rd());
// std::uniform_int_distribution<int> distribution(0, int(n - 1));
// int outputIndex = distribution(generator);
// cost_prev = Cost(alpha, inputSet[outputIndex], outputSet[outputIndex]);
// // Bias updation
// bias -= learning_rate * costDeriv;
// y_hat = Evaluate({inputSet[outputIndex]});
// if(UI) {
// Utilities::CostInfo(epoch, cost_prev, Cost(alpha));
// Utilities::UI(weights, bias);
// }
// epoch++;
// if(epoch > max_epoch) { break; }
// }
// forwardPass();
// }
// void DualSVC::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
// class Cost cost;
// Activation avn;
// LinAlg alg;
// Reg regularization;
// double cost_prev = 0;
// int epoch = 1;
// // Creating the mini-batches
// int n_mini_batch = n/mini_batch_size;
// auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
// while(true){
// for(int i = 0; i < n_mini_batch; i++){
// std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
// std::vector<double> z = propagate(inputMiniBatches[i]);
// cost_prev = Cost(z, outputMiniBatches[i], weights, C);
// // Calculating the weight gradients
// weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C))));
// weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge");
// // Calculating the bias gradients
// bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n;
// forwardPass();
// y_hat = Evaluate(inputMiniBatches[i]);
// if(UI) {
// Utilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C));
// Utilities::UI(weights, bias);
// }
// }
// epoch++;
// if(epoch > max_epoch) { break; }
// }
// forwardPass();
// }
double DualSVC::score(){
Utilities util;
return util.performance(y_hat, outputSet);
}
void DualSVC::save(std::string fileName){
Utilities util;
util.saveParameters(fileName, alpha, bias);
}
double DualSVC::Cost(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y){
class Cost cost;
return cost.dualFormSVM(alpha, X, y);
}
std::vector<double> DualSVC::Evaluate(std::vector<std::vector<double>> X){
Activation avn;
return avn.sign(propagate(X));
}
std::vector<double> DualSVC::propagate(std::vector<std::vector<double>> X){
LinAlg alg;
std::vector<double> z;
for(int i = 0; i < X.size(); i++){
double sum = 0;
for(int j = 0; j < alpha.size(); j++){
if(alpha[j] != 0){
sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], X[i]); // TO DO: DON'T forget to add non-linear kernelizations.
}
}
sum += bias;
z.push_back(sum);
}
return z;
}
double DualSVC::Evaluate(std::vector<double> x){
Activation avn;
return avn.sign(propagate(x));
}
double DualSVC::propagate(std::vector<double> x){
LinAlg alg;
double z = 0;
for(int j = 0; j < alpha.size(); j++){
if(alpha[j] != 0){
z += alpha[j] * outputSet[j] * alg.dot(inputSet[j], x); // TO DO: DON'T forget to add non-linear kernelizations.
}
}
z += bias;
return z;
}
void DualSVC::forwardPass(){
LinAlg alg;
Activation avn;
z = propagate(inputSet);
y_hat = avn.sign(z);
}
void DualSVC::alphaProjection(){
for(int i = 0; i < alpha.size(); i++){
if(alpha[i] > C){
alpha[i] = C;
}
else if(alpha[i] < 0){
alpha[i] = 0;
}
}
}
double DualSVC::kernelFunction(std::vector<double> u, std::vector<double> v){
LinAlg alg;
if(kernel == "Linear"){
return alg.dot(u, v);
}
}
std::vector<std::vector<double>> DualSVC::createK(){
LinAlg alg;
if(kernel == "Linear"){
return alg.matmult(inputSet, alg.transpose(inputSet));
} // warning: non-void function does not return a value in all control paths [-Wreturn-type]
}
}

71
MLPP/DualSVC/DualSVC.hpp Normal file
View File

@ -0,0 +1,71 @@
//
// DualSVC.hpp
//
// Created by Marc Melikyan on 10/2/20.
//
// http://disp.ee.ntu.edu.tw/~pujols/Support%20Vector%20Machine.pdf
// http://ciml.info/dl/v0_99/ciml-v0_99-ch11.pdf
// Were excellent for the practical intution behind the dual formulation.
#ifndef DualSVC_hpp
#define DualSVC_hpp
#include <vector>
#include <string>
namespace MLPP {
class DualSVC{
public:
DualSVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C, std::string kernel = "Linear");
DualSVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C, std::string kernel, double p, double c);
std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
double modelTest(std::vector<double> x);
void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
void SGD(double learning_rate, int max_epoch, bool UI = 1);
void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
double score();
void save(std::string fileName);
private:
void init();
double Cost(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y);
std::vector<double> Evaluate(std::vector<std::vector<double>> X);
std::vector<double> propagate(std::vector<std::vector<double>> X);
double Evaluate(std::vector<double> x);
double propagate(std::vector<double> x);
void forwardPass();
void alphaProjection();
double kernelFunction(std::vector<double> u, std::vector<double> v);
std::vector<std::vector<double>> createK();
std::vector<std::vector<double>> inputSet;
std::vector<double> outputSet;
std::vector<double> z;
std::vector<double> y_hat;
double bias;
std::vector<double> alpha;
std::vector<std::vector<double>> K;
double C;
int n;
int k;
std::string kernel;
double p; // Poly
double c; // Poly
// UI Portion
void UI(int epoch, double cost_prev);
};
}
#endif /* DualSVC_hpp */

BIN
a.out

Binary file not shown.

View File

@ -46,6 +46,7 @@
#include "MLPP/Convolutions/Convolutions.hpp"
#include "MLPP/SVC/SVC.hpp"
#include "MLPP/NumericalAnalysis/NumericalAnalysis.hpp"
#include "MLPP/DualSVC/DualSVC.hpp"
using namespace MLPP;
@ -487,11 +488,11 @@ int main() {
// alg.printMatrix(wordEmbeddings);
// std::cout << std::endl;
std::vector<std::string> textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"};
// std::vector<std::string> textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"};
alg.printMatrix(data.LSA(textArchive, 2));
//alg.printMatrix(data.BOW(textArchive, "Default"));
std::cout << std::endl;
// alg.printMatrix(data.LSA(textArchive, 2));
// //alg.printMatrix(data.BOW(textArchive, "Default"));
// std::cout << std::endl;
// std::vector<std::vector<double>> inputSet = {{1,2},{2,3},{3,4},{4,5},{5,6}};
@ -640,7 +641,17 @@ int main() {
// std::vector<double> b = {4,4,4};
// alg.printVector(alg.cross(a,b));
//SUPPORT VECTOR CLASSIFICATION (kernel method)
// std::vector<std::vector<double>> inputSet;
// std::vector<double> outputSet;
// data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancerSVM.csv", inputSet, outputSet);
std::vector<std::vector<double>> inputSet;
std::vector<double> outputSet;
data.setData(4, "/Users/marcmelikyan/Desktop/Data/IrisSVM.csv", inputSet, outputSet);
DualSVC kernelSVM(inputSet, outputSet, 1000);
kernelSVM.gradientDescent(0.0001, 20, 1);
return 0;