mirror of
https://github.com/Relintai/MLPP.git
synced 2025-02-08 16:00:04 +01:00
Dual formulation of SVM [LINEAR KERNEL ONLY, BATCH GD ONLY]
This commit is contained in:
parent
bf667b0a2d
commit
3e287f3b95
BIN
MLPP/.DS_Store
vendored
BIN
MLPP/.DS_Store
vendored
Binary file not shown.
@ -348,7 +348,7 @@ namespace MLPP{
|
||||
Reg regularization;
|
||||
return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
|
||||
}
|
||||
double Cost::HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<double> weights, double C){
|
||||
double Cost::HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<std::vector<double>> weights, double C){
|
||||
LinAlg alg;
|
||||
Reg regularization;
|
||||
return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
|
||||
@ -364,4 +364,29 @@ namespace MLPP{
|
||||
Reg regularization;
|
||||
return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y));
|
||||
}
|
||||
|
||||
double Cost::dualFormSVM(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y){
|
||||
LinAlg alg;
|
||||
std::vector<std::vector<double>> Y = alg.diag(y); // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
|
||||
std::vector<std::vector<double>> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations.
|
||||
std::vector<std::vector<double>> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y);
|
||||
double alphaQ = alg.matmult(alg.matmult({alpha}, Q), alg.transpose({alpha}))[0][0];
|
||||
std::vector<double> one = alg.onevec(alpha.size());
|
||||
|
||||
return -alg.dot(one, alpha) + 0.5 * alphaQ;
|
||||
}
|
||||
|
||||
std::vector<double> Cost::dualFormSVMDeriv(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y){
|
||||
LinAlg alg;
|
||||
std::vector<std::vector<double>> Y = alg.zeromat(y.size(), y.size());
|
||||
for(int i = 0; i < y.size(); i++){
|
||||
Y[i][i] = y[i]; // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
|
||||
}
|
||||
std::vector<std::vector<double>> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations.
|
||||
std::vector<std::vector<double>> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y);
|
||||
std::vector<double> alphaQDeriv = alg.mat_vec_mult(Q, alpha);
|
||||
std::vector<double> one = alg.onevec(alpha.size());
|
||||
|
||||
return alg.subtraction(alphaQDeriv, one);
|
||||
}
|
||||
}
|
@ -63,10 +63,14 @@ namespace MLPP{
|
||||
std::vector<std::vector<double>> HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
|
||||
|
||||
double HingeLoss(std::vector <double> y_hat, std::vector<double> y, std::vector<double> weights, double C);
|
||||
double HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<double> weights, double C);
|
||||
double HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<std::vector<double>> weights, double C);
|
||||
|
||||
std::vector<double> HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y, double C);
|
||||
std::vector<std::vector<double>> HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double C);
|
||||
|
||||
double dualFormSVM(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y); // TO DO: DON'T forget to add non-linear kernelizations.
|
||||
|
||||
std::vector<double> dualFormSVMDeriv(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y);
|
||||
|
||||
|
||||
private:
|
||||
|
BIN
MLPP/DualSVC/.DS_Store
vendored
Normal file
BIN
MLPP/DualSVC/.DS_Store
vendored
Normal file
Binary file not shown.
241
MLPP/DualSVC/DualSVC.cpp
Normal file
241
MLPP/DualSVC/DualSVC.cpp
Normal file
@ -0,0 +1,241 @@
|
||||
//
|
||||
// DualSVC.cpp
|
||||
//
|
||||
// Created by Marc Melikyan on 10/2/20.
|
||||
//
|
||||
|
||||
#include "DualSVC.hpp"
|
||||
#include "Activation/Activation.hpp"
|
||||
#include "LinAlg/LinAlg.hpp"
|
||||
#include "Regularization/Reg.hpp"
|
||||
#include "Utilities/Utilities.hpp"
|
||||
#include "Cost/Cost.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
|
||||
namespace MLPP{
|
||||
DualSVC::DualSVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C, std::string kernel)
|
||||
: inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), C(C), kernel(kernel)
|
||||
{
|
||||
y_hat.resize(n);
|
||||
bias = Utilities::biasInitialization();
|
||||
alpha = Utilities::weightInitialization(n); // One alpha for all training examples, as per the lagrangian multipliers.
|
||||
K = createK(); // For now this is unused. When non-linear kernels are added, the K will be manipulated.
|
||||
}
|
||||
|
||||
std::vector<double> DualSVC::modelSetTest(std::vector<std::vector<double>> X){
|
||||
return Evaluate(X);
|
||||
}
|
||||
|
||||
double DualSVC::modelTest(std::vector<double> x){
|
||||
return Evaluate(x);
|
||||
}
|
||||
|
||||
void DualSVC::gradientDescent(double learning_rate, int max_epoch, bool UI){
|
||||
class Cost cost;
|
||||
Activation avn;
|
||||
LinAlg alg;
|
||||
Reg regularization;
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
forwardPass();
|
||||
|
||||
while(true){
|
||||
cost_prev = Cost(alpha, inputSet, outputSet);
|
||||
|
||||
alpha = alg.subtraction(alpha, alg.scalarMultiply(learning_rate, cost.dualFormSVMDeriv(alpha, inputSet, outputSet)));
|
||||
|
||||
alphaProjection();
|
||||
|
||||
// Calculating the bias
|
||||
double biasGradient = 0;
|
||||
for(int i = 0; i < alpha.size(); i++){
|
||||
double sum = 0;
|
||||
if(alpha[i] < C && alpha[i] > 0){
|
||||
for(int j = 0; j < alpha.size(); j++){
|
||||
if(alpha[j] > 0){
|
||||
sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], inputSet[i]); // TO DO: DON'T forget to add non-linear kernelizations.
|
||||
}
|
||||
}
|
||||
}
|
||||
biasGradient = (1 - outputSet[i] * sum) / outputSet[i];
|
||||
break;
|
||||
}
|
||||
bias -= biasGradient * learning_rate;
|
||||
|
||||
forwardPass();
|
||||
|
||||
// UI PORTION
|
||||
if(UI) {
|
||||
Utilities::CostInfo(epoch, cost_prev, Cost(alpha, inputSet, outputSet));
|
||||
Utilities::UI(alpha, bias);
|
||||
std::cout << score() << std::endl; // TO DO: DELETE THIS.
|
||||
}
|
||||
epoch++;
|
||||
|
||||
if(epoch > max_epoch) { break; }
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// void DualSVC::SGD(double learning_rate, int max_epoch, bool UI){
|
||||
// class Cost cost;
|
||||
// Activation avn;
|
||||
// LinAlg alg;
|
||||
// Reg regularization;
|
||||
|
||||
// double cost_prev = 0;
|
||||
// int epoch = 1;
|
||||
|
||||
// while(true){
|
||||
// std::random_device rd;
|
||||
// std::default_random_engine generator(rd());
|
||||
// std::uniform_int_distribution<int> distribution(0, int(n - 1));
|
||||
// int outputIndex = distribution(generator);
|
||||
|
||||
// cost_prev = Cost(alpha, inputSet[outputIndex], outputSet[outputIndex]);
|
||||
|
||||
// // Bias updation
|
||||
// bias -= learning_rate * costDeriv;
|
||||
|
||||
// y_hat = Evaluate({inputSet[outputIndex]});
|
||||
|
||||
// if(UI) {
|
||||
// Utilities::CostInfo(epoch, cost_prev, Cost(alpha));
|
||||
// Utilities::UI(weights, bias);
|
||||
// }
|
||||
// epoch++;
|
||||
|
||||
// if(epoch > max_epoch) { break; }
|
||||
// }
|
||||
// forwardPass();
|
||||
// }
|
||||
|
||||
// void DualSVC::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
|
||||
// class Cost cost;
|
||||
// Activation avn;
|
||||
// LinAlg alg;
|
||||
// Reg regularization;
|
||||
// double cost_prev = 0;
|
||||
// int epoch = 1;
|
||||
|
||||
// // Creating the mini-batches
|
||||
// int n_mini_batch = n/mini_batch_size;
|
||||
// auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
// while(true){
|
||||
// for(int i = 0; i < n_mini_batch; i++){
|
||||
// std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||
// std::vector<double> z = propagate(inputMiniBatches[i]);
|
||||
// cost_prev = Cost(z, outputMiniBatches[i], weights, C);
|
||||
|
||||
// // Calculating the weight gradients
|
||||
// weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C))));
|
||||
// weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge");
|
||||
|
||||
|
||||
// // Calculating the bias gradients
|
||||
// bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n;
|
||||
|
||||
// forwardPass();
|
||||
|
||||
// y_hat = Evaluate(inputMiniBatches[i]);
|
||||
|
||||
// if(UI) {
|
||||
// Utilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C));
|
||||
// Utilities::UI(weights, bias);
|
||||
// }
|
||||
// }
|
||||
// epoch++;
|
||||
// if(epoch > max_epoch) { break; }
|
||||
// }
|
||||
// forwardPass();
|
||||
// }
|
||||
|
||||
double DualSVC::score(){
|
||||
Utilities util;
|
||||
return util.performance(y_hat, outputSet);
|
||||
}
|
||||
|
||||
void DualSVC::save(std::string fileName){
|
||||
Utilities util;
|
||||
util.saveParameters(fileName, alpha, bias);
|
||||
}
|
||||
|
||||
double DualSVC::Cost(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y){
|
||||
class Cost cost;
|
||||
return cost.dualFormSVM(alpha, X, y);
|
||||
}
|
||||
|
||||
std::vector<double> DualSVC::Evaluate(std::vector<std::vector<double>> X){
|
||||
Activation avn;
|
||||
return avn.sign(propagate(X));
|
||||
}
|
||||
|
||||
std::vector<double> DualSVC::propagate(std::vector<std::vector<double>> X){
|
||||
LinAlg alg;
|
||||
std::vector<double> z;
|
||||
for(int i = 0; i < X.size(); i++){
|
||||
double sum = 0;
|
||||
for(int j = 0; j < alpha.size(); j++){
|
||||
if(alpha[j] != 0){
|
||||
sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], X[i]); // TO DO: DON'T forget to add non-linear kernelizations.
|
||||
}
|
||||
}
|
||||
sum += bias;
|
||||
z.push_back(sum);
|
||||
}
|
||||
return z;
|
||||
}
|
||||
|
||||
double DualSVC::Evaluate(std::vector<double> x){
|
||||
Activation avn;
|
||||
return avn.sign(propagate(x));
|
||||
}
|
||||
|
||||
double DualSVC::propagate(std::vector<double> x){
|
||||
LinAlg alg;
|
||||
double z = 0;
|
||||
for(int j = 0; j < alpha.size(); j++){
|
||||
if(alpha[j] != 0){
|
||||
z += alpha[j] * outputSet[j] * alg.dot(inputSet[j], x); // TO DO: DON'T forget to add non-linear kernelizations.
|
||||
}
|
||||
}
|
||||
z += bias;
|
||||
return z;
|
||||
}
|
||||
|
||||
void DualSVC::forwardPass(){
|
||||
LinAlg alg;
|
||||
Activation avn;
|
||||
|
||||
z = propagate(inputSet);
|
||||
y_hat = avn.sign(z);
|
||||
}
|
||||
|
||||
void DualSVC::alphaProjection(){
|
||||
for(int i = 0; i < alpha.size(); i++){
|
||||
if(alpha[i] > C){
|
||||
alpha[i] = C;
|
||||
}
|
||||
else if(alpha[i] < 0){
|
||||
alpha[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double DualSVC::kernelFunction(std::vector<double> u, std::vector<double> v){
|
||||
LinAlg alg;
|
||||
if(kernel == "Linear"){
|
||||
return alg.dot(u, v);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<double>> DualSVC::createK(){
|
||||
LinAlg alg;
|
||||
if(kernel == "Linear"){
|
||||
return alg.matmult(inputSet, alg.transpose(inputSet));
|
||||
} // warning: non-void function does not return a value in all control paths [-Wreturn-type]
|
||||
}
|
||||
}
|
71
MLPP/DualSVC/DualSVC.hpp
Normal file
71
MLPP/DualSVC/DualSVC.hpp
Normal file
@ -0,0 +1,71 @@
|
||||
//
|
||||
// DualSVC.hpp
|
||||
//
|
||||
// Created by Marc Melikyan on 10/2/20.
|
||||
//
|
||||
// http://disp.ee.ntu.edu.tw/~pujols/Support%20Vector%20Machine.pdf
|
||||
// http://ciml.info/dl/v0_99/ciml-v0_99-ch11.pdf
|
||||
// Were excellent for the practical intution behind the dual formulation.
|
||||
|
||||
#ifndef DualSVC_hpp
|
||||
#define DualSVC_hpp
|
||||
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace MLPP {
|
||||
|
||||
class DualSVC{
|
||||
|
||||
public:
|
||||
DualSVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C, std::string kernel = "Linear");
|
||||
DualSVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C, std::string kernel, double p, double c);
|
||||
|
||||
std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
|
||||
double modelTest(std::vector<double> x);
|
||||
void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
|
||||
void SGD(double learning_rate, int max_epoch, bool UI = 1);
|
||||
void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
|
||||
double score();
|
||||
void save(std::string fileName);
|
||||
private:
|
||||
|
||||
void init();
|
||||
|
||||
double Cost(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y);
|
||||
|
||||
std::vector<double> Evaluate(std::vector<std::vector<double>> X);
|
||||
std::vector<double> propagate(std::vector<std::vector<double>> X);
|
||||
double Evaluate(std::vector<double> x);
|
||||
double propagate(std::vector<double> x);
|
||||
void forwardPass();
|
||||
|
||||
void alphaProjection();
|
||||
|
||||
double kernelFunction(std::vector<double> u, std::vector<double> v);
|
||||
std::vector<std::vector<double>> createK();
|
||||
|
||||
std::vector<std::vector<double>> inputSet;
|
||||
std::vector<double> outputSet;
|
||||
std::vector<double> z;
|
||||
std::vector<double> y_hat;
|
||||
double bias;
|
||||
|
||||
std::vector<double> alpha;
|
||||
std::vector<std::vector<double>> K;
|
||||
|
||||
double C;
|
||||
int n;
|
||||
int k;
|
||||
|
||||
std::string kernel;
|
||||
double p; // Poly
|
||||
double c; // Poly
|
||||
|
||||
// UI Portion
|
||||
void UI(int epoch, double cost_prev);
|
||||
};
|
||||
}
|
||||
|
||||
#endif /* DualSVC_hpp */
|
19
main.cpp
19
main.cpp
@ -46,6 +46,7 @@
|
||||
#include "MLPP/Convolutions/Convolutions.hpp"
|
||||
#include "MLPP/SVC/SVC.hpp"
|
||||
#include "MLPP/NumericalAnalysis/NumericalAnalysis.hpp"
|
||||
#include "MLPP/DualSVC/DualSVC.hpp"
|
||||
|
||||
|
||||
using namespace MLPP;
|
||||
@ -487,11 +488,11 @@ int main() {
|
||||
// alg.printMatrix(wordEmbeddings);
|
||||
// std::cout << std::endl;
|
||||
|
||||
std::vector<std::string> textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"};
|
||||
// std::vector<std::string> textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"};
|
||||
|
||||
alg.printMatrix(data.LSA(textArchive, 2));
|
||||
//alg.printMatrix(data.BOW(textArchive, "Default"));
|
||||
std::cout << std::endl;
|
||||
// alg.printMatrix(data.LSA(textArchive, 2));
|
||||
// //alg.printMatrix(data.BOW(textArchive, "Default"));
|
||||
// std::cout << std::endl;
|
||||
|
||||
|
||||
// std::vector<std::vector<double>> inputSet = {{1,2},{2,3},{3,4},{4,5},{5,6}};
|
||||
@ -640,8 +641,18 @@ int main() {
|
||||
// std::vector<double> b = {4,4,4};
|
||||
// alg.printVector(alg.cross(a,b));
|
||||
|
||||
//SUPPORT VECTOR CLASSIFICATION (kernel method)
|
||||
// std::vector<std::vector<double>> inputSet;
|
||||
// std::vector<double> outputSet;
|
||||
// data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancerSVM.csv", inputSet, outputSet);
|
||||
|
||||
std::vector<std::vector<double>> inputSet;
|
||||
std::vector<double> outputSet;
|
||||
data.setData(4, "/Users/marcmelikyan/Desktop/Data/IrisSVM.csv", inputSet, outputSet);
|
||||
|
||||
DualSVC kernelSVM(inputSet, outputSet, 1000);
|
||||
kernelSVM.gradientDescent(0.0001, 20, 1);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user