Added SVC (linear support vector classification) optimizer with SGD

2025-04-13 21:00:46 +02:00 · 2021-09-24 16:40:02 -07:00 · 2021-09-24 16:40:02 -07:00 · d47cd7e976
commit d47cd7e976
parent cce2bad23b
13 changed files with 194 additions and 74 deletions
--- a/MLPP/Activation/Activation.cpp
+++ b/MLPP/Activation/Activation.cpp
@ -566,6 +566,57 @@ namespace MLPP{
        return a;
    }

+    double Activation::sign(double z, bool deriv){
+        if(deriv){
+            return 0;
+        }
+        if(z < 0){
+            return -1;
+        }
+        else if(z == 0){
+            return 0;
+        }
+        else{
+            return 1;
+        }
+    }
+
+    std::vector<double> Activation::sign(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = sign(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = sign(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::sign(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = sign(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = sign(z[i]);
+        }
+        return a;
+    }
+
    double Activation::sinh(double z, bool deriv){
        if(deriv){ return cosh(z); }
        return 0.5 * (exp(z) - exp(-z));
--- a/MLPP/Activation/Activation.hpp
+++ b/MLPP/Activation/Activation.hpp
@ -81,6 +81,10 @@ namespace MLPP{
            std::vector<double> GELU(std::vector<double> z, bool deriv = 0);
            std::vector<std::vector<double>> GELU(std::vector<std::vector<double>> z, bool deriv = 0);

+            double sign(double z, bool deriv = 0);
+            std::vector<double> sign(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> sign(std::vector<std::vector<double>> z, bool deriv = 0);
+
            double sinh(double z, bool deriv = 0);
            std::vector<double> sinh(std::vector<double> z, bool deriv = 0);
            std::vector<std::vector<double>> sinh(std::vector<std::vector<double>> z, bool deriv = 0);
--- a/MLPP/Cost/Cost.cpp
+++ b/MLPP/Cost/Cost.cpp
@ -7,6 +7,7 @@
 #include <iostream>
 #include "Cost.hpp"
 #include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"

 namespace MLPP{
    double Cost::MSE(std::vector <double> y_hat, std::vector<double> y){
@ -341,4 +342,26 @@ namespace MLPP{
        }
        return deriv;
    }
+
+    double Cost::HingeLoss(std::vector <double> y_hat, std::vector<double> y, std::vector<double> weights, double C){
+        LinAlg alg; 
+        Reg regularization;
+        return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
+    }
+    double Cost::HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<double> weights, double C){
+        LinAlg alg; 
+        Reg regularization;
+        return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
+    }
+
+    std::vector<double> Cost::HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y, double C){
+        LinAlg alg;
+        Reg regularization;
+        return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y));
+    } 
+    std::vector<std::vector<double>> Cost::HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double C){
+        LinAlg alg;
+        Reg regularization;
+        return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y));
+    }
 }
--- a/MLPP/Cost/Cost.hpp
+++ b/MLPP/Cost/Cost.hpp
@ -61,6 +61,12 @@ namespace MLPP{

            std::vector<double> HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y); 
            std::vector<std::vector<double>> HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            double HingeLoss(std::vector <double> y_hat, std::vector<double> y, std::vector<double> weights, double C);
+            double HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<double> weights, double C);
+
+            std::vector<double> HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y, double C); 
+            std::vector<std::vector<double>> HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double C);
            

        private:
--- a/MLPP/HiddenLayer/HiddenLayer.cpp
+++ b/MLPP/HiddenLayer/HiddenLayer.cpp
@ -52,6 +52,9 @@ namespace MLPP {
        activation_map["GELU"] = &Activation::GELU;
        activationTest_map["GELU"] = &Activation::GELU;

+        activation_map["Sign"] = &Activation::unitStep;
+        activationTest_map["Sign"] = &Activation::unitStep;
+
        activation_map["UnitStep"] = &Activation::unitStep;
        activationTest_map["UnitStep"] = &Activation::unitStep;

--- a/MLPP/MultiOutputLayer/MultiOutputLayer.cpp
+++ b/MLPP/MultiOutputLayer/MultiOutputLayer.cpp
@ -54,6 +54,9 @@ namespace MLPP {
        activation_map["GELU"] = &Activation::GELU;
        activationTest_map["GELU"] = &Activation::GELU;

+        activation_map["Sign"] = &Activation::unitStep;
+        activationTest_map["Sign"] = &Activation::unitStep;
+
        activation_map["UnitStep"] = &Activation::unitStep;
        activationTest_map["UnitStep"] = &Activation::unitStep;

--- a/MLPP/OutputLayer/OutputLayer.cpp
+++ b/MLPP/OutputLayer/OutputLayer.cpp
@ -51,6 +51,9 @@ namespace MLPP {
        activation_map["GELU"] = &Activation::GELU;
        activationTest_map["GELU"] = &Activation::GELU;

+        activation_map["Sign"] = &Activation::unitStep;
+        activationTest_map["Sign"] = &Activation::unitStep;
+
        activation_map["UnitStep"] = &Activation::unitStep;
        activationTest_map["UnitStep"] = &Activation::unitStep;

--- a/MLPP/Regularization/Reg.cpp
+++ b/MLPP/Regularization/Reg.cpp
@ -7,6 +7,7 @@
 #include <iostream>
 #include <random>
 #include "Reg.hpp"
+#include "Activation/Activation.hpp"

 namespace MLPP{

@ -85,14 +86,15 @@ namespace MLPP{
    }

    double Reg::regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg, int j){
+        Activation act;
        if(reg == "Ridge"){
            return lambda * weights[j];
        }
        else if(reg == "Lasso"){
-            return lambda * sign(weights[j]);
+            return lambda * act.sign(weights[j]);
        }
        else if(reg == "ElasticNet"){
-            return alpha * lambda * sign(weights[j]) + (1 - alpha) * lambda * weights[j];
+            return alpha * lambda * act.sign(weights[j]) + (1 - alpha) * lambda * weights[j];
        }
        else {
            return 0;
@ -100,29 +102,18 @@ namespace MLPP{
    }

    double Reg::regDerivTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg, int i, int j){
+        Activation act;
        if(reg == "Ridge"){
            return lambda * weights[i][j];
        }
        else if(reg == "Lasso"){
-            return lambda * sign(weights[i][j]);
+            return lambda * act.sign(weights[i][j]);
        }
        else if(reg == "ElasticNet"){
-            return alpha * lambda * sign(weights[i][j]) + (1 - alpha) * lambda * weights[i][j];
+            return alpha * lambda * act.sign(weights[i][j]) + (1 - alpha) * lambda * weights[i][j];
        }
        else {
            return 0;
        }
    }
-
-    int Reg::sign(double weight){
-        if(weight < 0){
-            return -1;
-        }
-        else if(weight == 0){
-            return 0;
-        }
-        else{
-            return 1;
-        }
-    }
 }
--- a/MLPP/Regularization/Reg.hpp
+++ b/MLPP/Regularization/Reg.hpp
@ -22,7 +22,6 @@ namespace MLPP{
        private:
            double regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg, int j);
            double regDerivTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg, int i, int j);
-            int sign(double weight);
    };
 }

--- a/MLPP/SVC/SVC.cpp
+++ b/MLPP/SVC/SVC.cpp
@ -5,23 +5,20 @@
 //

 #include "SVC.hpp"
+#include "Activation/Activation.hpp"
 #include "LinAlg/LinAlg.hpp"
-#include "Stat/Stat.hpp"
 #include "Regularization/Reg.hpp"
 #include "Utilities/Utilities.hpp"
 #include "Cost/Cost.hpp"

 #include <iostream>
-#include <cmath>
 #include <random>

 namespace MLPP{
-
-    SVC::SVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg, double lambda, double alpha)
-    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    SVC::SVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), C(C)
    {
        y_hat.resize(n);
-
        weights = Utilities::weightInitialization(k);
        bias = Utilities::biasInitialization();
    }
@ -35,6 +32,8 @@ namespace MLPP{
    }

    void SVC::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        class Cost cost;
+        Activation avn;
        LinAlg alg;
        Reg regularization;
        double cost_prev = 0;
@ -42,31 +41,34 @@ namespace MLPP{
        forwardPass();
        
        while(true){
-            cost_prev = Cost(y_hat, outputSet);
-                
-            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+            cost_prev = Cost(y_hat, outputSet, weights, C);
+
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), cost.HingeLossDeriv(z, outputSet, C))));
+            weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge");

-            // Calculating the weight gradients
-            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), error)));
-            weights = regularization.regWeights(weights, lambda, alpha, reg);
- 
            // Calculating the bias gradients
-            bias -= learning_rate * alg.sum_elements(error) / n;
+            bias += learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputSet, C)) / n;
+            
            forwardPass();
                
+            // UI PORTION
            if(UI) { 
-                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet, weights, C));
                Utilities::UI(weights, bias); 
            }
            epoch++;
            
            if(epoch > max_epoch) { break; }
+
        }
    }

    void SVC::SGD(double learning_rate, int max_epoch, bool UI){
+        class Cost cost;
+        Activation avn;
        LinAlg alg;
        Reg regularization;
+        
        double cost_prev = 0;
        int epoch = 1;
        
@ -77,21 +79,22 @@ namespace MLPP{
            int outputIndex = distribution(generator);

            double y_hat = Evaluate(inputSet[outputIndex]);
-            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+            double z = propagate(inputSet[outputIndex]);
+            cost_prev = Cost({z}, {outputSet[outputIndex]}, weights, C);

-            double error = y_hat - outputSet[outputIndex];
+            double costDeriv = cost.HingeLossDeriv({z}, {outputSet[outputIndex]}, C)[0];

-            // Weight updation
-            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error, inputSet[outputIndex]));
-            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            // Weight Updation
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * costDeriv, inputSet[outputIndex]));
+            weights = regularization.regWeights(weights, learning_rate, 0, "Ridge");
            
            // Bias updation
-            bias -= learning_rate * error;
+            bias -= learning_rate * costDeriv;

            y_hat = Evaluate({inputSet[outputIndex]});
                
            if(UI) { 
-                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::CostInfo(epoch, cost_prev, Cost({z}, {outputSet[outputIndex]}, weights, C));
                Utilities::UI(weights, bias); 
            }
            epoch++;
@ -102,6 +105,8 @@ namespace MLPP{
    }

    void SVC::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+        class Cost cost; 
+        Activation avn;
        LinAlg alg;
        Reg regularization;
        double cost_prev = 0;
@ -114,20 +119,23 @@ namespace MLPP{
        while(true){
            for(int i = 0; i < n_mini_batch; i++){
                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
-                cost_prev = Cost(y_hat, outputMiniBatches[i]);
-                
-                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+                std::vector<double> z = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(z, outputMiniBatches[i], weights, C);

                // Calculating the weight gradients
-                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error)));
-                weights = regularization.regWeights(weights, lambda, alpha, reg);
-    
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C))));
+                weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge");
+                
+
                // Calculating the bias gradients
-                bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size();
+                bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n;
+            
+                forwardPass();
+
                y_hat = Evaluate(inputMiniBatches[i]);
                    
                if(UI) { 
-                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C));
                    Utilities::UI(weights, bias); 
                }
            }
@ -142,29 +150,46 @@ namespace MLPP{
        return util.performance(y_hat, outputSet);
    }

-    void SVC::save(std::string fileName){
+     void SVC::save(std::string fileName){
         Utilities util;
         util.saveParameters(fileName, weights, bias);
     }

-    double SVC::Cost(std::vector <double> y_hat, std::vector<double> y){
-        Reg regularization;
+    double SVC::Cost(std::vector <double> z, std::vector<double> y, std::vector<double> weights, double C){
        class Cost cost; 
-        return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+        return cost.HingeLoss(z, y, weights, C);    
    }

    std::vector<double> SVC::Evaluate(std::vector<std::vector<double>> X){
        LinAlg alg;
+        Activation avn;
+        return avn.sign(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); 
+    }
+    
+    std::vector<double>SVC::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
        return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); 
    }

    double SVC::Evaluate(std::vector<double> x){
        LinAlg alg;
+        Activation avn;
+        return avn.sign(alg.dot(weights, x) + bias);
+    }
+
+    double SVC::propagate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
        return alg.dot(weights, x) + bias;
    }

-    // sign(wTx + b)
+    // sign ( wTx + b )
    void SVC::forwardPass(){
-        y_hat = Evaluate(inputSet);
+        LinAlg alg;
+        Activation avn;
+        
+        z = propagate(inputSet);
+        y_hat = avn.sign(z);
    }
 }
--- a/MLPP/SVC/SVC.hpp
+++ b/MLPP/SVC/SVC.hpp
@ -1,20 +1,22 @@
 //
 //  SVC.hpp
 //
-//  Created by Marc Melikyan on 9/10/21.
+//  Created by Marc Melikyan on 10/2/20.
 //

 #ifndef SVC_hpp
 #define SVC_hpp

+
 #include <vector>
 #include <string>

-namespace MLPP{
+namespace MLPP {
+
    class SVC{
        
        public:
-            SVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            SVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C);
            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
            double modelTest(std::vector<double> x);
            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
@ -24,27 +26,27 @@ namespace MLPP{
            void save(std::string fileName);
        private:

-            double Cost(std::vector <double> y_hat, std::vector<double> y);
+            double Cost(std::vector <double> y_hat, std::vector<double> y, std::vector<double> weights, double C);
        
            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            std::vector<double> propagate(std::vector<std::vector<double>> X);
            double Evaluate(std::vector<double> x);
+            double propagate(std::vector<double> x);
            void forwardPass();
        
            std::vector<std::vector<double>> inputSet;
            std::vector<double> outputSet;
+            std::vector<double> z;
            std::vector<double> y_hat;
            std::vector<double> weights;
            double bias;
-        
+
+            double C;
            int n; 
            int k;
        
-            // Regularization Params
-            std::string reg;
-            int lambda;
-            int alpha; /* This is the controlling param for Elastic Net*/
-        
-        
+            // UI Portion
+            void UI(int epoch, double cost_prev);        
    };
 }

--- a/a.out
+++ b/a.out
--- a/main.cpp
+++ b/main.cpp
@ -42,6 +42,7 @@
 #include "MLPP/Cost/Cost.hpp"
 #include "MLPP/Data/Data.hpp"
 #include "MLPP/Convolutions/Convolutions.hpp"
+#include "MLPP/SVC/SVC.hpp"


 using namespace MLPP;
@ -196,6 +197,15 @@ int main() {
    // data.setData(4, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
    // std::vector<std::vector<double>> outputSet = data.oneHotRep(tempOutputSet, 3);

+    // SUPPORT VECTOR CLASSIFICATION
+    std::vector<std::vector<double>> inputSet; 
+    std::vector<double> outputSet; 
+    data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancerSVM.csv", inputSet, outputSet);
+    SVC model(inputSet, outputSet, 1); 
+    model.SGD(0.00001, 100000, 1);
+    alg.printVector(model.modelSetTest(inputSet));
+    std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
    // SoftmaxReg model(inputSet, outputSet); 
    // model.SGD(0.001, 20000, 0);
    // alg.printMatrix(model.modelSetTest(inputSet));
@ -392,18 +402,18 @@ int main() {
    // OutlierFinder outlierFinder(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier. 
    // alg.printVector(outlierFinder.modelTest(inputSet));

-    // Testing new Functions
-    double z_s = 0.001;
-    std::cout << avn.sinc(z_s) << std::endl;
-    std::cout << avn.sinc(z_s, 1) << std::endl;
+    // // Testing new Functions
+    // double z_s = 0.001;
+    // std::cout << avn.sinc(z_s) << std::endl;
+    // std::cout << avn.sinc(z_s, 1) << std::endl;

-    std::vector<double> z_v = {0.001, 5};
-    alg.printVector(avn.sinc(z_v));
-    alg.printVector(avn.sinc(z_v, 1));
+    // std::vector<double> z_v = {0.001, 5};
+    // alg.printVector(avn.sinc(z_v));
+    // alg.printVector(avn.sinc(z_v, 1));

-    std::vector<std::vector<double>> Z_m = {{0.001, 5}};
-    alg.printMatrix(avn.sinc(Z_m));
-    alg.printMatrix(avn.sinc(Z_m, 1));
+    // std::vector<std::vector<double>> Z_m = {{0.001, 5}};
+    // alg.printMatrix(avn.sinc(Z_m));
+    // alg.printMatrix(avn.sinc(Z_m, 1));

    // std::cout << alg.trace({{1,2}, {3,4}}) << std::endl;
    // alg.printMatrix(alg.pinverse({{1,2}, {3,4}}));