Dual formulation of SVM [LINEAR KERNEL ONLY, BATCH GD ONLY]

2025-04-15 21:06:05 +02:00 · 2021-12-31 18:22:44 -08:00 · 2021-12-31 18:22:44 -08:00 · 3e287f3b95
commit 3e287f3b95
parent bf667b0a2d
9 changed files with 358 additions and 6 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/MLPP/.DS_Store
+++ b/MLPP/.DS_Store
--- a/MLPP/Cost/Cost.cpp
+++ b/MLPP/Cost/Cost.cpp
@ -348,7 +348,7 @@ namespace MLPP{
        Reg regularization;
        return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
    }
-    double Cost::HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<double> weights, double C){
+    double Cost::HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<std::vector<double>> weights, double C){
        LinAlg alg; 
        Reg regularization;
        return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
@ -364,4 +364,29 @@ namespace MLPP{
        Reg regularization;
        return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y));
    }
+
+    double Cost::dualFormSVM(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y){
+        LinAlg alg;
+        std::vector<std::vector<double>> Y = alg.diag(y); // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
+        std::vector<std::vector<double>> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations. 
+        std::vector<std::vector<double>> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y);
+        double alphaQ = alg.matmult(alg.matmult({alpha}, Q), alg.transpose({alpha}))[0][0];
+        std::vector<double> one = alg.onevec(alpha.size());
+
+        return -alg.dot(one, alpha) + 0.5 * alphaQ;
+    }
+
+    std::vector<double> Cost::dualFormSVMDeriv(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y){
+        LinAlg alg;
+        std::vector<std::vector<double>> Y = alg.zeromat(y.size(), y.size());
+        for(int i = 0; i < y.size(); i++){
+            Y[i][i] = y[i]; // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
+        }
+        std::vector<std::vector<double>> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations. 
+        std::vector<std::vector<double>> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y);
+        std::vector<double> alphaQDeriv = alg.mat_vec_mult(Q, alpha);
+        std::vector<double> one = alg.onevec(alpha.size());
+
+        return alg.subtraction(alphaQDeriv, one);
+    }
 }
--- a/MLPP/Cost/Cost.hpp
+++ b/MLPP/Cost/Cost.hpp
@ -63,11 +63,15 @@ namespace MLPP{
            std::vector<std::vector<double>> HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);

            double HingeLoss(std::vector <double> y_hat, std::vector<double> y, std::vector<double> weights, double C);
-            double HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<double> weights, double C);
+            double HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<std::vector<double>> weights, double C);

            std::vector<double> HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y, double C); 
            std::vector<std::vector<double>> HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double C);

+            double dualFormSVM(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y); // TO DO: DON'T forget to add non-linear kernelizations. 
+
+            std::vector<double> dualFormSVMDeriv(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y);
+            

        private:
    };
--- a/MLPP/DualSVC/.DS_Store
+++ b/MLPP/DualSVC/.DS_Store
--- a/MLPP/DualSVC/DualSVC.cpp
+++ b/MLPP/DualSVC/DualSVC.cpp
@ -0,0 +1,241 @@
+//
+//  DualSVC.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "DualSVC.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    DualSVC::DualSVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C, std::string kernel)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), C(C), kernel(kernel)
+    {
+        y_hat.resize(n);
+        bias = Utilities::biasInitialization();
+        alpha = Utilities::weightInitialization(n); // One alpha for all training examples, as per the lagrangian multipliers.
+        K = createK(); // For now this is unused. When non-linear kernels are added, the K will be manipulated.
+    }
+
+    std::vector<double> DualSVC::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double DualSVC::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void DualSVC::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        class Cost cost;
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(alpha, inputSet, outputSet);
+
+            alpha = alg.subtraction(alpha, alg.scalarMultiply(learning_rate, cost.dualFormSVMDeriv(alpha, inputSet, outputSet)));
+
+            alphaProjection();
+
+            // Calculating the bias 
+            double biasGradient = 0; 
+            for(int i = 0; i < alpha.size(); i++){
+                double sum = 0;
+                if(alpha[i] < C && alpha[i] > 0){
+                    for(int j = 0; j < alpha.size(); j++){
+                        if(alpha[j] > 0){  
+                            sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], inputSet[i]); // TO DO: DON'T forget to add non-linear kernelizations. 
+                        }
+                    }
+                }
+                biasGradient = (1 - outputSet[i] * sum) / outputSet[i];
+                break;
+            }
+            bias -= biasGradient * learning_rate;
+            
+            forwardPass();
+                
+            // UI PORTION
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(alpha, inputSet, outputSet));
+                Utilities::UI(alpha, bias);
+                std::cout << score() << std::endl; // TO DO: DELETE THIS. 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+
+        }
+    }
+
+    // void DualSVC::SGD(double learning_rate, int max_epoch, bool UI){
+    //     class Cost cost;
+    //     Activation avn;
+    //     LinAlg alg;
+    //     Reg regularization;
+        
+    //     double cost_prev = 0;
+    //     int epoch = 1;
+        
+    //     while(true){
+    //         std::random_device rd;
+    //         std::default_random_engine generator(rd()); 
+    //         std::uniform_int_distribution<int> distribution(0, int(n - 1));
+    //         int outputIndex = distribution(generator);
+
+    //         cost_prev = Cost(alpha, inputSet[outputIndex], outputSet[outputIndex]);
+            
+    //         // Bias updation
+    //         bias -= learning_rate * costDeriv;
+
+    //         y_hat = Evaluate({inputSet[outputIndex]});
+                
+    //         if(UI) { 
+    //             Utilities::CostInfo(epoch, cost_prev, Cost(alpha));
+    //             Utilities::UI(weights, bias); 
+    //         }
+    //         epoch++;
+            
+    //         if(epoch > max_epoch) { break; }
+    //     }
+    //     forwardPass();
+    // }
+
+    // void DualSVC::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+    //     class Cost cost; 
+    //     Activation avn;
+    //     LinAlg alg;
+    //     Reg regularization;
+    //     double cost_prev = 0;
+    //     int epoch = 1;
+        
+    //     // Creating the mini-batches
+    //     int n_mini_batch = n/mini_batch_size;
+    //     auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+
+    //     while(true){
+    //         for(int i = 0; i < n_mini_batch; i++){
+    //             std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+    //             std::vector<double> z = propagate(inputMiniBatches[i]);
+    //             cost_prev = Cost(z, outputMiniBatches[i], weights, C);
+
+    //             // Calculating the weight gradients
+    //             weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C))));
+    //             weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge");
+                
+
+    //             // Calculating the bias gradients
+    //             bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n;
+            
+    //             forwardPass();
+
+    //             y_hat = Evaluate(inputMiniBatches[i]);
+                    
+    //             if(UI) { 
+    //                 Utilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C));
+    //                 Utilities::UI(weights, bias); 
+    //             }
+    //         }
+    //         epoch++;
+    //         if(epoch > max_epoch) { break; }
+    //     }
+    //     forwardPass(); 
+    // }
+
+    double DualSVC::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+     void DualSVC::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, alpha, bias);
+     }
+
+    double DualSVC::Cost(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y){
+        class Cost cost; 
+        return cost.dualFormSVM(alpha, X, y);    
+    }
+
+    std::vector<double> DualSVC::Evaluate(std::vector<std::vector<double>> X){
+        Activation avn;
+        return avn.sign(propagate(X)); 
+    }
+    
+    std::vector<double> DualSVC::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg; 
+        std::vector<double> z; 
+        for(int i = 0; i < X.size(); i++){
+            double sum = 0;
+            for(int j = 0; j < alpha.size(); j++){
+                if(alpha[j] != 0){
+                    sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], X[i]); // TO DO: DON'T forget to add non-linear kernelizations. 
+                }
+            }
+            sum += bias; 
+            z.push_back(sum);
+        }
+        return z; 
+    }
+
+    double DualSVC::Evaluate(std::vector<double> x){
+        Activation avn;
+        return avn.sign(propagate(x));
+    }
+
+    double DualSVC::propagate(std::vector<double> x){
+        LinAlg alg;
+        double z = 0;
+        for(int j = 0; j < alpha.size(); j++){
+            if(alpha[j] != 0){
+                z += alpha[j] * outputSet[j] * alg.dot(inputSet[j], x); // TO DO: DON'T forget to add non-linear kernelizations. 
+            }
+        }
+        z += bias; 
+        return z; 
+    }
+
+    void DualSVC::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        
+        z = propagate(inputSet);
+        y_hat = avn.sign(z);
+    }
+
+    void DualSVC::alphaProjection(){
+        for(int i = 0; i < alpha.size(); i++){
+            if(alpha[i] > C){
+                alpha[i] = C;
+            }
+            else if(alpha[i] < 0){
+                alpha[i] = 0;
+            }
+        }
+    }
+
+    double DualSVC::kernelFunction(std::vector<double> u, std::vector<double> v){
+        LinAlg alg;
+        if(kernel == "Linear"){
+            return alg.dot(u, v);
+        }
+    }
+
+    std::vector<std::vector<double>> DualSVC::createK(){
+        LinAlg alg;
+        if(kernel == "Linear"){
+            return alg.matmult(inputSet, alg.transpose(inputSet));
+        } // warning: non-void function does not return a value in all control paths [-Wreturn-type]
+    }
+}
--- a/MLPP/DualSVC/DualSVC.hpp
+++ b/MLPP/DualSVC/DualSVC.hpp
@ -0,0 +1,71 @@
+//
+//  DualSVC.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+// http://disp.ee.ntu.edu.tw/~pujols/Support%20Vector%20Machine.pdf
+// http://ciml.info/dl/v0_99/ciml-v0_99-ch11.pdf
+// Were excellent for the practical intution behind the dual formulation. 
+
+#ifndef DualSVC_hpp
+#define DualSVC_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class DualSVC{
+        
+        public:
+            DualSVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C, std::string kernel = "Linear");
+            DualSVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C, std::string kernel, double p, double c);
+
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+        private:
+
+            void init();
+
+            double Cost(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y);
+        
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            std::vector<double> propagate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            double propagate(std::vector<double> x);
+            void forwardPass();
+
+            void alphaProjection();
+
+            double kernelFunction(std::vector<double> u, std::vector<double> v);
+            std::vector<std::vector<double>> createK();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> z;
+            std::vector<double> y_hat;
+            double bias;
+
+            std::vector<double> alpha;
+            std::vector<std::vector<double>> K;
+
+            double C;
+            int n; 
+            int k;
+
+            std::string kernel;
+            double p; // Poly
+            double c; // Poly
+        
+            // UI Portion
+            void UI(int epoch, double cost_prev);        
+    };
+}
+
+#endif /* DualSVC_hpp */
--- a/a.out
+++ b/a.out
--- a/main.cpp
+++ b/main.cpp
@ -46,6 +46,7 @@
 #include "MLPP/Convolutions/Convolutions.hpp"
 #include "MLPP/SVC/SVC.hpp"
 #include "MLPP/NumericalAnalysis/NumericalAnalysis.hpp"
+#include "MLPP/DualSVC/DualSVC.hpp"


 using namespace MLPP;
@ -487,11 +488,11 @@ int main() {
    // alg.printMatrix(wordEmbeddings);
    // std::cout << std::endl;

-    std::vector<std::string> textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"};
+    // std::vector<std::string> textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"};

-    alg.printMatrix(data.LSA(textArchive, 2));
-    //alg.printMatrix(data.BOW(textArchive, "Default"));
-    std::cout << std::endl;
+    // alg.printMatrix(data.LSA(textArchive, 2));
+    // //alg.printMatrix(data.BOW(textArchive, "Default"));
+    // std::cout << std::endl;
    

    // std::vector<std::vector<double>> inputSet = {{1,2},{2,3},{3,4},{4,5},{5,6}};
@ -640,7 +641,17 @@ int main() {
    // std::vector<double> b = {4,4,4};
    // alg.printVector(alg.cross(a,b));

+    //SUPPORT VECTOR CLASSIFICATION (kernel method)
+    // std::vector<std::vector<double>> inputSet; 
+    // std::vector<double> outputSet; 
+    // data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancerSVM.csv", inputSet, outputSet);

+    std::vector<std::vector<double>> inputSet; 
+    std::vector<double> outputSet; 
+    data.setData(4, "/Users/marcmelikyan/Desktop/Data/IrisSVM.csv", inputSet, outputSet);
+
+    DualSVC kernelSVM(inputSet, outputSet, 1000);
+    kernelSVM.gradientDescent(0.0001, 20, 1);
    

    return 0;