diff --git a/SCsub b/SCsub index d41b6bc..89273ec 100644 --- a/SCsub +++ b/SCsub @@ -59,6 +59,7 @@ sources = [ "mlpp/uni_lin_reg/uni_lin_reg_old.cpp", "mlpp/outlier_finder/outlier_finder_old.cpp", "mlpp/probit_reg/probit_reg_old.cpp", + "mlpp/svc/svc_old.cpp", "test/mlpp_tests.cpp", ] diff --git a/mlpp/svc/svc_old.cpp b/mlpp/svc/svc_old.cpp new file mode 100644 index 0000000..f0de1b4 --- /dev/null +++ b/mlpp/svc/svc_old.cpp @@ -0,0 +1,203 @@ +// +// SVC.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "svc_old.h" +#include "../activation/activation.h" +#include "../cost/cost.h" +#include "../lin_alg/lin_alg.h" +#include "../regularization/reg.h" +#include "../utilities/utilities.h" + +#include +#include + +std::vector MLPPSVCOld::modelSetTest(std::vector> X) { + return Evaluate(X); +} + +real_t MLPPSVCOld::modelTest(std::vector x) { + return Evaluate(x); +} + +void MLPPSVCOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) { + class MLPPCost cost; + MLPPActivation avn; + MLPPLinAlg alg; + MLPPReg regularization; + real_t cost_prev = 0; + int epoch = 1; + forwardPass(); + + while (true) { + cost_prev = Cost(y_hat, outputSet, weights, C); + + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), cost.HingeLossDeriv(z, outputSet, C)))); + weights = regularization.regWeights(weights, learning_rate / n, 0, "Ridge"); + + // Calculating the bias gradients + bias += learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputSet, C)) / n; + + forwardPass(); + + // UI PORTION + if (UI) { + MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet, weights, C)); + MLPPUtilities::UI(weights, bias); + } + epoch++; + + if (epoch > max_epoch) { + break; + } + } +} + +void MLPPSVCOld::SGD(real_t learning_rate, int max_epoch, bool UI) { + class MLPPCost cost; + MLPPActivation avn; + MLPPLinAlg alg; + MLPPReg regularization; + + real_t cost_prev = 0; + int epoch = 1; + + while (true) { + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + int outputIndex = distribution(generator); + + //real_t y_hat = Evaluate(inputSet[outputIndex]); + real_t z = propagate(inputSet[outputIndex]); + cost_prev = Cost({ z }, { outputSet[outputIndex] }, weights, C); + + real_t costDeriv = cost.HingeLossDeriv(std::vector({ z }), std::vector({ outputSet[outputIndex] }), C)[0]; // Explicit conversion to avoid ambiguity with overloaded function. Error occured on Ubuntu. + + // Weight Updation + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * costDeriv, inputSet[outputIndex])); + weights = regularization.regWeights(weights, learning_rate, 0, "Ridge"); + + // Bias updation + bias -= learning_rate * costDeriv; + + //y_hat = Evaluate({ inputSet[outputIndex] }); + + if (UI) { + MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ z }, { outputSet[outputIndex] }, weights, C)); + MLPPUtilities::UI(weights, bias); + } + + epoch++; + + if (epoch > max_epoch) { + break; + } + } + forwardPass(); +} + +void MLPPSVCOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) { + class MLPPCost cost; + MLPPActivation avn; + MLPPLinAlg alg; + MLPPReg regularization; + real_t cost_prev = 0; + int epoch = 1; + + // Creating the mini-batches + int n_mini_batch = n / mini_batch_size; + auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + auto inputMiniBatches = std::get<0>(batches); + auto outputMiniBatches = std::get<1>(batches); + + while (true) { + for (int i = 0; i < n_mini_batch; i++) { + std::vector y_hat = Evaluate(inputMiniBatches[i]); + std::vector z = propagate(inputMiniBatches[i]); + cost_prev = Cost(z, outputMiniBatches[i], weights, C); + + // Calculating the weight gradients + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C)))); + weights = regularization.regWeights(weights, learning_rate / n, 0, "Ridge"); + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n; + + forwardPass(); + + y_hat = Evaluate(inputMiniBatches[i]); + + if (UI) { + MLPPUtilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C)); + MLPPUtilities::UI(weights, bias); + } + } + epoch++; + if (epoch > max_epoch) { + break; + } + } + forwardPass(); +} + +real_t MLPPSVCOld::score() { + MLPPUtilities util; + return util.performance(y_hat, outputSet); +} + +void MLPPSVCOld::save(std::string fileName) { + MLPPUtilities util; + util.saveParameters(fileName, weights, bias); +} + +MLPPSVCOld::MLPPSVCOld(std::vector> p_inputSet, std::vector p_outputSet, real_t p_C) { + inputSet = p_inputSet; + outputSet = p_outputSet; + n = inputSet.size(); + k = inputSet[0].size(); + C = p_C; + + y_hat.resize(n); + weights = MLPPUtilities::weightInitialization(k); + bias = MLPPUtilities::biasInitialization(); +} + +real_t MLPPSVCOld::Cost(std::vector z, std::vector y, std::vector weights, real_t C) { + class MLPPCost cost; + return cost.HingeLoss(z, y, weights, C); +} + +std::vector MLPPSVCOld::Evaluate(std::vector> X) { + MLPPLinAlg alg; + MLPPActivation avn; + return avn.sign(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); +} + +std::vector MLPPSVCOld::propagate(std::vector> X) { + MLPPLinAlg alg; + MLPPActivation avn; + return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); +} + +real_t MLPPSVCOld::Evaluate(std::vector x) { + MLPPLinAlg alg; + MLPPActivation avn; + return avn.sign(alg.dot(weights, x) + bias); +} + +real_t MLPPSVCOld::propagate(std::vector x) { + MLPPLinAlg alg; + MLPPActivation avn; + return alg.dot(weights, x) + bias; +} + +// sign ( wTx + b ) +void MLPPSVCOld::forwardPass() { + MLPPActivation avn; + + z = propagate(inputSet); + y_hat = avn.sign(z); +} diff --git a/mlpp/svc/svc_old.h b/mlpp/svc/svc_old.h new file mode 100644 index 0000000..ffc0d33 --- /dev/null +++ b/mlpp/svc/svc_old.h @@ -0,0 +1,55 @@ + +#ifndef MLPP_SVC_OLD_H +#define MLPP_SVC_OLD_H + +// +// SVC.hpp +// +// Created by Marc Melikyan on 10/2/20. +// + +// https://towardsdatascience.com/svm-implementation-from-scratch-python-2db2fc52e5c2 +// Illustratd a practical definition of the Hinge Loss function and its gradient when optimizing with SGD. + +#include "core/math/math_defs.h" + +#include +#include + +class MLPPSVCOld { +public: + std::vector modelSetTest(std::vector> X); + real_t modelTest(std::vector x); + void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false); + void SGD(real_t learning_rate, int max_epoch, bool UI = false); + void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false); + real_t score(); + void save(std::string fileName); + + MLPPSVCOld(std::vector> inputSet, std::vector outputSet, real_t C); + +private: + real_t Cost(std::vector y_hat, std::vector y, std::vector weights, real_t C); + + std::vector Evaluate(std::vector> X); + std::vector propagate(std::vector> X); + real_t Evaluate(std::vector x); + real_t propagate(std::vector x); + void forwardPass(); + + std::vector> inputSet; + std::vector outputSet; + std::vector z; + std::vector y_hat; + std::vector weights; + real_t bias; + + real_t C; + int n; + int k; + + // UI Portion + void UI(int epoch, real_t cost_prev); +}; + +#endif /* SVC_hpp */ diff --git a/test/mlpp_tests.cpp b/test/mlpp_tests.cpp index 8244578..470aa43 100644 --- a/test/mlpp_tests.cpp +++ b/test/mlpp_tests.cpp @@ -53,6 +53,7 @@ #include "../mlpp/probit_reg/probit_reg_old.h" #include "../mlpp/uni_lin_reg/uni_lin_reg_old.h" #include "../mlpp/wgan/wgan_old.h" +#include "../mlpp/svc/svc_old.h" Vector dstd_vec_to_vec(const std::vector &in) { Vector r; @@ -413,10 +414,10 @@ void MLPPTests::test_support_vector_classification(bool ui) { // SUPPORT VECTOR CLASSIFICATION Ref dt = data.load_breast_cancer_svc(_breast_cancer_svm_data_path); - MLPPSVC model(dt->get_input()->to_std_vector(), dt->get_output()->to_std_vector(), ui); - model.SGD(0.00001, 100000, ui); - alg.printVector(model.modelSetTest(dt->get_input()->to_std_vector())); - std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + MLPPSVCOld model_old(dt->get_input()->to_std_vector(), dt->get_output()->to_std_vector(), ui); + model_old.SGD(0.00001, 100000, ui); + alg.printVector(model_old.modelSetTest(dt->get_input()->to_std_vector())); + std::cout << "ACCURACY: " << 100 * model_old.score() << "%" << std::endl; } void MLPPTests::test_mlp(bool ui) {