From da6324830da835dad3398e22560c3513c8384d62 Mon Sep 17 00:00:00 2001 From: Relintai Date: Fri, 10 Feb 2023 14:03:48 +0100 Subject: [PATCH] Cleaned up SVC. --- mlpp/svc/svc.cpp | 318 +++++++++++++++++++++++++++++++++----------- mlpp/svc/svc.h | 84 ++++++++---- register_types.cpp | 2 + test/mlpp_tests.cpp | 10 +- 4 files changed, 306 insertions(+), 108 deletions(-) diff --git a/mlpp/svc/svc.cpp b/mlpp/svc/svc.cpp index 6150a34..d88db40 100644 --- a/mlpp/svc/svc.cpp +++ b/mlpp/svc/svc.cpp @@ -5,48 +5,84 @@ // #include "svc.h" + #include "../activation/activation.h" #include "../cost/cost.h" #include "../lin_alg/lin_alg.h" #include "../regularization/reg.h" #include "../utilities/utilities.h" -#include #include -std::vector MLPPSVC::modelSetTest(std::vector> X) { - return Evaluate(X); +Ref MLPPSVC::get_input_set() { + return _input_set; +} +void MLPPSVC::set_input_set(const Ref &val) { + _input_set = val; + + _initialized = false; } -real_t MLPPSVC::modelTest(std::vector x) { - return Evaluate(x); +Ref MLPPSVC::get_output_set() { + return _output_set; +} +void MLPPSVC::set_output_set(const Ref &val) { + _output_set = val; + + _initialized = false; } -void MLPPSVC::gradientDescent(real_t learning_rate, int max_epoch, bool UI) { - class MLPPCost cost; +real_t MLPPSVC::get_c() { + return _c; +} +void MLPPSVC::set_c(const real_t val) { + _c = val; + + _initialized = false; +} + +Ref MLPPSVC::model_set_test(const Ref &X) { + ERR_FAIL_COND_V(!_initialized, Ref()); + + return evaluatem(X); +} + +real_t MLPPSVC::model_test(const Ref &x) { + ERR_FAIL_COND_V(!_initialized, 0); + + return evaluatev(x); +} + +void MLPPSVC::gradient_descent(real_t learning_rate, int max_epoch, bool ui) { + ERR_FAIL_COND(!_initialized); + + MLPPCost mlpp_cost; MLPPActivation avn; MLPPLinAlg alg; MLPPReg regularization; + real_t cost_prev = 0; int epoch = 1; - forwardPass(); + + forward_pass(); while (true) { - cost_prev = Cost(y_hat, outputSet, weights, C); + cost_prev = cost(_y_hat, _output_set, _weights, _c); - weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), cost.HingeLossDeriv(z, outputSet, C)))); - weights = regularization.regWeights(weights, learning_rate / n, 0, "Ridge"); + _weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(_input_set), mlpp_cost.hinge_loss_derivwv(_z, _output_set, _c)))); + _weights = regularization.reg_weightsv(_weights, learning_rate / _n, 0, MLPPReg::REGULARIZATION_TYPE_RIDGE); // Calculating the bias gradients - bias += learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputSet, C)) / n; + _bias += learning_rate * alg.sum_elementsv(mlpp_cost.hinge_loss_derivwv(_y_hat, _output_set, _c)) / _n; - forwardPass(); + forward_pass(); // UI PORTION - if (UI) { - MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet, weights, C)); - MLPPUtilities::UI(weights, bias); + if (ui) { + MLPPUtilities::cost_info(epoch, cost_prev, cost(_y_hat, _output_set, _weights, _c)); + MLPPUtilities::print_ui_vb(_weights, _bias); } + epoch++; if (epoch > max_epoch) { @@ -55,39 +91,66 @@ void MLPPSVC::gradientDescent(real_t learning_rate, int max_epoch, bool UI) { } } -void MLPPSVC::SGD(real_t learning_rate, int max_epoch, bool UI) { - class MLPPCost cost; +void MLPPSVC::sgd(real_t learning_rate, int max_epoch, bool ui) { + ERR_FAIL_COND(!_initialized); + + MLPPCost mlpp_cost; MLPPActivation avn; MLPPLinAlg alg; MLPPReg regularization; + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(_n - 1)); + + Ref input_set_row_tmp; + input_set_row_tmp.instance(); + input_set_row_tmp->resize(_input_set->size().x); + + Ref output_set_row_tmp; + output_set_row_tmp.instance(); + output_set_row_tmp->resize(1); + + Ref z_row_tmp; + z_row_tmp.instance(); + z_row_tmp->resize(1); + real_t cost_prev = 0; int epoch = 1; + forward_pass(); + while (true) { - std::random_device rd; - std::default_random_engine generator(rd()); - std::uniform_int_distribution distribution(0, int(n - 1)); - int outputIndex = distribution(generator); + int output_index = distribution(generator); - //real_t y_hat = Evaluate(inputSet[outputIndex]); - real_t z = propagate(inputSet[outputIndex]); - cost_prev = Cost({ z }, { outputSet[outputIndex] }, weights, C); + _input_set->get_row_into_mlpp_vector(output_index, input_set_row_tmp); - real_t costDeriv = cost.HingeLossDeriv(std::vector({ z }), std::vector({ outputSet[outputIndex] }), C)[0]; // Explicit conversion to avoid ambiguity with overloaded function. Error occured on Ubuntu. + real_t output_set_indx = _output_set->get_element(output_index); + output_set_row_tmp->set_element(0, output_set_indx); + + //real_t y_hat = Evaluate(input_set_row_tmp); + real_t z = propagatev(input_set_row_tmp); + + z_row_tmp->set_element(0, z); + + cost_prev = cost(z_row_tmp, output_set_row_tmp, _weights, _c); + + Ref cost_deriv_vec = mlpp_cost.hinge_loss_derivwv(z_row_tmp, output_set_row_tmp, _c); + + real_t cost_deriv = cost_deriv_vec->get_element(0); // Weight Updation - weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * costDeriv, inputSet[outputIndex])); - weights = regularization.regWeights(weights, learning_rate, 0, "Ridge"); + _weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate * cost_deriv, input_set_row_tmp)); + _weights = regularization.reg_weightsv(_weights, learning_rate, 0, MLPPReg::REGULARIZATION_TYPE_RIDGE); // Bias updation - bias -= learning_rate * costDeriv; + _bias -= learning_rate * cost_deriv; - //y_hat = Evaluate({ inputSet[outputIndex] }); + //y_hat = Evaluate({ _input_set[output_index] }); - if (UI) { - MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ z }, { outputSet[outputIndex] }, weights, C)); - MLPPUtilities::UI(weights, bias); + if (ui) { + MLPPUtilities::cost_info(epoch, cost_prev, cost(z_row_tmp, output_set_row_tmp, _weights, _c)); + MLPPUtilities::print_ui_vb(_weights, _bias); } epoch++; @@ -96,108 +159,207 @@ void MLPPSVC::SGD(real_t learning_rate, int max_epoch, bool UI) { break; } } - forwardPass(); + + forward_pass(); } -void MLPPSVC::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) { - class MLPPCost cost; +void MLPPSVC::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, bool ui) { + ERR_FAIL_COND(!_initialized); + + MLPPCost mlpp_cost; MLPPActivation avn; MLPPLinAlg alg; MLPPReg regularization; + real_t cost_prev = 0; int epoch = 1; // Creating the mini-batches - int n_mini_batch = n / mini_batch_size; - auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch); - auto inputMiniBatches = std::get<0>(batches); - auto outputMiniBatches = std::get<1>(batches); + int n_mini_batch = _n / mini_batch_size; + MLPPUtilities::CreateMiniBatchMVBatch batches = MLPPUtilities::create_mini_batchesmv(_input_set, _output_set, n_mini_batch); + + forward_pass(); while (true) { for (int i = 0; i < n_mini_batch; i++) { - std::vector y_hat = Evaluate(inputMiniBatches[i]); - std::vector z = propagate(inputMiniBatches[i]); - cost_prev = Cost(z, outputMiniBatches[i], weights, C); + Ref current_input_batch_entry = batches.input_sets[i]; + Ref current_output_batch_entry = batches.output_sets[i]; + + Ref y_hat = evaluatem(current_input_batch_entry); + Ref z = propagatem(current_input_batch_entry); + cost_prev = cost(z, current_output_batch_entry, _weights, _c); // Calculating the weight gradients - weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C)))); - weights = regularization.regWeights(weights, learning_rate / n, 0, "Ridge"); + _weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(current_input_batch_entry), mlpp_cost.hinge_loss_derivwv(z, current_output_batch_entry, _c)))); + _weights = regularization.reg_weightsv(_weights, learning_rate / _n, 0, MLPPReg::REGULARIZATION_TYPE_RIDGE); // Calculating the bias gradients - bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n; + _bias -= learning_rate * alg.sum_elementsv(mlpp_cost.hinge_loss_derivwv(y_hat, current_output_batch_entry, _c)) / _n; - forwardPass(); + forward_pass(); - y_hat = Evaluate(inputMiniBatches[i]); + y_hat = evaluatem(current_input_batch_entry); - if (UI) { - MLPPUtilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C)); - MLPPUtilities::UI(weights, bias); + if (ui) { + MLPPUtilities::cost_info(epoch, cost_prev, cost(z, current_output_batch_entry, _weights, _c)); + MLPPUtilities::print_ui_vb(_weights, _bias); } } + epoch++; + if (epoch > max_epoch) { break; } } - forwardPass(); + + forward_pass(); } real_t MLPPSVC::score() { + ERR_FAIL_COND_V(!_initialized, 0); + MLPPUtilities util; - return util.performance(y_hat, outputSet); + return util.performance_vec(_y_hat, _output_set); } -void MLPPSVC::save(std::string fileName) { +void MLPPSVC::save(const String &file_name) { + ERR_FAIL_COND(!_initialized); + MLPPUtilities util; - util.saveParameters(fileName, weights, bias); + + //util.saveParameters(_file_name, _weights, _bias); } -MLPPSVC::MLPPSVC(std::vector> p_inputSet, std::vector p_outputSet, real_t p_C) { - inputSet = p_inputSet; - outputSet = p_outputSet; - n = inputSet.size(); - k = inputSet[0].size(); - C = p_C; +bool MLPPSVC::is_initialized() { + return _initialized; +} +void MLPPSVC::initialize() { + if (_initialized) { + return; + } - y_hat.resize(n); - weights = MLPPUtilities::weightInitialization(k); - bias = MLPPUtilities::biasInitialization(); + ERR_FAIL_COND(!_input_set.is_valid() || !_output_set.is_valid()); + + _n = _input_set->size().y; + _k = _input_set->size().x; + + if (!_y_hat.is_valid()) { + _y_hat.instance(); + } + + _y_hat->resize(_n); + + MLPPUtilities util; + + if (!_weights.is_valid()) { + _weights.instance(); + } + + _weights->resize(_k); + + util.weight_initializationv(_weights); + _bias = util.bias_initializationr(); + + _initialized = true; } -real_t MLPPSVC::Cost(std::vector z, std::vector y, std::vector weights, real_t C) { - class MLPPCost cost; - return cost.HingeLoss(z, y, weights, C); +MLPPSVC::MLPPSVC(const Ref &input_set, const Ref &output_set, real_t c) { + _input_set = input_set; + _output_set = output_set; + + _n = _input_set->size().y; + _k = _input_set->size().x; + _c = c; + + _y_hat.instance(); + + _y_hat->resize(_n); + + MLPPUtilities util; + + _weights.instance(); + _weights->resize(_k); + util.weight_initializationv(_weights); + _bias = util.bias_initializationr(); + + _initialized = true; } -std::vector MLPPSVC::Evaluate(std::vector> X) { +MLPPSVC::MLPPSVC() { + _y_hat.instance(); + _weights.instance(); + + _c = 0; + _n = 0; + _k = 0; + + _initialized = false; +} +MLPPSVC::~MLPPSVC() { +} + +real_t MLPPSVC::cost(const Ref &z, const Ref &y, const Ref &weights, real_t c) { + MLPPCost mlpp_cost; + return mlpp_cost.hinge_losswv(z, y, weights, c); +} + +Ref MLPPSVC::evaluatem(const Ref &X) { MLPPLinAlg alg; MLPPActivation avn; - return avn.sign(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); + return avn.sign_normv(alg.scalar_addnv(_bias, alg.mat_vec_multv(X, _weights))); } -std::vector MLPPSVC::propagate(std::vector> X) { +Ref MLPPSVC::propagatem(const Ref &X) { MLPPLinAlg alg; MLPPActivation avn; - return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); + return alg.scalar_addnv(_bias, alg.mat_vec_multv(X, _weights)); } -real_t MLPPSVC::Evaluate(std::vector x) { +real_t MLPPSVC::evaluatev(const Ref &x) { MLPPLinAlg alg; MLPPActivation avn; - return avn.sign(alg.dot(weights, x) + bias); + return avn.sign_normr(alg.dotv(_weights, x) + _bias); } -real_t MLPPSVC::propagate(std::vector x) { +real_t MLPPSVC::propagatev(const Ref &x) { MLPPLinAlg alg; MLPPActivation avn; - return alg.dot(weights, x) + bias; + return alg.dotv(_weights, x) + _bias; } // sign ( wTx + b ) -void MLPPSVC::forwardPass() { +void MLPPSVC::forward_pass() { MLPPActivation avn; - z = propagate(inputSet); - y_hat = avn.sign(z); + _z = propagatem(_input_set); + _y_hat = avn.sign_normv(_z); +} + +void MLPPSVC::_bind_methods() { + ClassDB::bind_method(D_METHOD("get_input_set"), &MLPPSVC::get_input_set); + ClassDB::bind_method(D_METHOD("set_input_set", "val"), &MLPPSVC::set_input_set); + ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "input_set", PROPERTY_HINT_RESOURCE_TYPE, "MLPPMatrix"), "set_input_set", "get_input_set"); + + ClassDB::bind_method(D_METHOD("get_output_set"), &MLPPSVC::get_output_set); + ClassDB::bind_method(D_METHOD("set_output_set", "val"), &MLPPSVC::set_output_set); + ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "output_set", PROPERTY_HINT_RESOURCE_TYPE, "MLPPVector"), "set_output_set", "get_output_set"); + + ClassDB::bind_method(D_METHOD("get_c"), &MLPPSVC::get_c); + ClassDB::bind_method(D_METHOD("set_c", "val"), &MLPPSVC::set_c); + ADD_PROPERTY(PropertyInfo(Variant::REAL, "c"), "set_c", "get_c"); + + ClassDB::bind_method(D_METHOD("model_set_test", "X"), &MLPPSVC::model_set_test); + ClassDB::bind_method(D_METHOD("model_test", "x"), &MLPPSVC::model_test); + + ClassDB::bind_method(D_METHOD("gradient_descent", "learning_rate", "max_epoch", "ui"), &MLPPSVC::gradient_descent, false); + ClassDB::bind_method(D_METHOD("sgd", "learning_rate", "max_epoch", "ui"), &MLPPSVC::sgd, false); + ClassDB::bind_method(D_METHOD("mbgd", "learning_rate", "max_epoch", "mini_batch_size", "ui"), &MLPPSVC::mbgd, false); + + ClassDB::bind_method(D_METHOD("score"), &MLPPSVC::score); + + ClassDB::bind_method(D_METHOD("save", "file_name"), &MLPPSVC::save); + + ClassDB::bind_method(D_METHOD("is_initialized"), &MLPPSVC::is_initialized); + ClassDB::bind_method(D_METHOD("initialize"), &MLPPSVC::initialize); } diff --git a/mlpp/svc/svc.h b/mlpp/svc/svc.h index 566e4fd..73daf34 100644 --- a/mlpp/svc/svc.h +++ b/mlpp/svc/svc.h @@ -13,43 +13,71 @@ #include "core/math/math_defs.h" -#include -#include +#include "core/object/reference.h" + +#include "../lin_alg/mlpp_matrix.h" +#include "../lin_alg/mlpp_vector.h" + +#include "../regularization/reg.h" + +class MLPPSVC : public Reference { + GDCLASS(MLPPSVC, Reference); -class MLPPSVC { public: - std::vector modelSetTest(std::vector> X); - real_t modelTest(std::vector x); - void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false); - void SGD(real_t learning_rate, int max_epoch, bool UI = false); - void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false); + Ref get_input_set(); + void set_input_set(const Ref &val); + + Ref get_output_set(); + void set_output_set(const Ref &val); + + real_t get_c(); + void set_c(const real_t val); + + Ref model_set_test(const Ref &X); + real_t model_test(const Ref &x); + + void gradient_descent(real_t learning_rate, int max_epoch, bool ui = false); + void sgd(real_t learning_rate, int max_epoch, bool ui = false); + void mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, bool ui = false); + real_t score(); - void save(std::string fileName); - MLPPSVC(std::vector> inputSet, std::vector outputSet, real_t C); + void save(const String &file_name); -private: - real_t Cost(std::vector y_hat, std::vector y, std::vector weights, real_t C); + bool is_initialized(); + void initialize(); - std::vector Evaluate(std::vector> X); - std::vector propagate(std::vector> X); - real_t Evaluate(std::vector x); - real_t propagate(std::vector x); - void forwardPass(); + MLPPSVC(const Ref &input_set, const Ref &output_set, real_t c); - std::vector> inputSet; - std::vector outputSet; - std::vector z; - std::vector y_hat; - std::vector weights; - real_t bias; + MLPPSVC(); + ~MLPPSVC(); - real_t C; - int n; - int k; +protected: + real_t cost(const Ref &z, const Ref &y, const Ref &weights, real_t c); - // UI Portion - void UI(int epoch, real_t cost_prev); + Ref evaluatem(const Ref &X); + Ref propagatem(const Ref &X); + + real_t evaluatev(const Ref &x); + real_t propagatev(const Ref &x); + + void forward_pass(); + + static void _bind_methods(); + + Ref _input_set; + Ref _output_set; + + Ref _z; + Ref _y_hat; + Ref _weights; + real_t _bias; + + real_t _c; + int _n; + int _k; + + bool _initialized; }; #endif /* SVC_hpp */ diff --git a/register_types.cpp b/register_types.cpp index 222e6a6..76a8631 100644 --- a/register_types.cpp +++ b/register_types.cpp @@ -43,6 +43,7 @@ SOFTWARE. #include "mlpp/uni_lin_reg/uni_lin_reg.h" #include "mlpp/wgan/wgan.h" #include "mlpp/probit_reg/probit_reg.h" +#include "mlpp/svc/svc.h" #include "mlpp/mlp/mlp.h" @@ -71,6 +72,7 @@ void register_pmlpp_types(ModuleRegistrationLevel p_level) { ClassDB::register_class(); ClassDB::register_class(); ClassDB::register_class(); + ClassDB::register_class(); ClassDB::register_class(); ClassDB::register_class(); diff --git a/test/mlpp_tests.cpp b/test/mlpp_tests.cpp index 470aa43..00407a0 100644 --- a/test/mlpp_tests.cpp +++ b/test/mlpp_tests.cpp @@ -51,9 +51,9 @@ #include "../mlpp/outlier_finder/outlier_finder_old.h" #include "../mlpp/pca/pca_old.h" #include "../mlpp/probit_reg/probit_reg_old.h" +#include "../mlpp/svc/svc_old.h" #include "../mlpp/uni_lin_reg/uni_lin_reg_old.h" #include "../mlpp/wgan/wgan_old.h" -#include "../mlpp/svc/svc_old.h" Vector dstd_vec_to_vec(const std::vector &in) { Vector r; @@ -414,10 +414,16 @@ void MLPPTests::test_support_vector_classification(bool ui) { // SUPPORT VECTOR CLASSIFICATION Ref dt = data.load_breast_cancer_svc(_breast_cancer_svm_data_path); + MLPPSVCOld model_old(dt->get_input()->to_std_vector(), dt->get_output()->to_std_vector(), ui); model_old.SGD(0.00001, 100000, ui); alg.printVector(model_old.modelSetTest(dt->get_input()->to_std_vector())); - std::cout << "ACCURACY: " << 100 * model_old.score() << "%" << std::endl; + std::cout << "ACCURACY (old): " << 100 * model_old.score() << "%" << std::endl; + + MLPPSVC model(dt->get_input(), dt->get_output(), ui); + model.sgd(0.00001, 100000, ui); + PLOG_MSG((model.model_set_test(dt->get_input())->to_string())); + PLOG_MSG("ACCURACY: " + String::num(100 * model.score()) + "%"); } void MLPPTests::test_mlp(bool ui) {