From b8e3f41fdaf970d3c6ec10808352df798fc9e71e Mon Sep 17 00:00:00 2001 From: Relintai Date: Sun, 5 Feb 2023 13:05:36 +0100 Subject: [PATCH] Fix more crashes and issues in MLPPMLP. Also added more tests for it. --- mlpp/mlp/mlp.cpp | 84 ++++++++++++++++++++++----------------------- test/mlpp_tests.cpp | 24 +++++++++++-- 2 files changed, 63 insertions(+), 45 deletions(-) diff --git a/mlpp/mlp/mlp.cpp b/mlpp/mlp/mlp.cpp index 7ce9961..64c399d 100644 --- a/mlpp/mlp/mlp.cpp +++ b/mlpp/mlp/mlp.cpp @@ -86,6 +86,8 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) { real_t cost_prev = 0; int epoch = 1; + y_hat->fill(0); + forward_pass(); while (true) { @@ -99,10 +101,10 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) { Ref D2_1 = alg.mat_vec_multv(alg.transposem(a2), error); // weights and bias updation for layer 2 - weights2 = alg.subtractionnv(weights2, alg.scalar_multiplynv(learning_rate / n, D2_1)); - weights2 = regularization.reg_weightsv(weights2, lambda, alpha, reg); + weights2->set_from_mlpp_vector(alg.subtractionnv(weights2, alg.scalar_multiplynv(learning_rate / static_cast(n), D2_1))); + weights2->set_from_mlpp_vector(regularization.reg_weightsv(weights2, lambda, alpha, reg)); - bias2 -= learning_rate * alg.sum_elementsv(error) / n; + bias2 -= learning_rate * alg.sum_elementsv(error) / static_cast(n); // Calculating the weight/bias for layer 1 @@ -111,10 +113,10 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) { Ref D1_3 = alg.matmultm(alg.transposem(input_set), D1_2); // weight an bias updation for layer 1 - weights1 = alg.subtractionm(weights1, alg.scalar_multiplym(learning_rate / n, D1_3)); - weights1 = regularization.reg_weightsm(weights1, lambda, alpha, reg); + weights1->set_from_mlpp_matrix(alg.subtractionm(weights1, alg.scalar_multiplym(learning_rate / n, D1_3))); + weights1->set_from_mlpp_matrix(regularization.reg_weightsm(weights1, lambda, alpha, reg)); - bias1 = alg.subtract_matrix_rows(bias1, alg.scalar_multiplym(learning_rate / n, D1_2)); + bias1->set_from_mlpp_vector(alg.subtract_matrix_rows(bias1, alg.scalar_multiplym(learning_rate / n, D1_2))); forward_pass(); @@ -126,6 +128,7 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) { std::cout << "Layer 2:" << std::endl; MLPPUtilities::print_ui_vb(weights2, bias2); } + epoch++; if (epoch > max_epoch) { @@ -159,9 +162,9 @@ void MLPPMLP::sgd(real_t learning_rate, int max_epoch, bool UI) { y_hat_row_tmp.instance(); y_hat_row_tmp->resize(1); - Ref lz2; + Ref lz2; lz2.instance(); - Ref la2; + Ref la2; la2.instance(); while (true) { @@ -171,32 +174,33 @@ void MLPPMLP::sgd(real_t learning_rate, int max_epoch, bool UI) { real_t output_element = output_set->get_element(output_Index); output_set_row_tmp->set_element(0, output_element); - real_t y_hat = evaluatev(input_set_row_tmp); - y_hat_row_tmp->set_element(0, y_hat); + real_t ly_hat = evaluatev(input_set_row_tmp); + y_hat_row_tmp->set_element(0, ly_hat); propagatev(input_set_row_tmp, lz2, la2); cost_prev = cost(y_hat_row_tmp, output_set_row_tmp); - real_t error = y_hat - output_element; + real_t error = ly_hat - output_element; // Weight updation for layer 2 - Ref D2_1 = alg.scalar_multiplym(error, a2); - weights2 = alg.subtractionm(weights2, alg.scalar_multiplym(learning_rate, D2_1)); - weights2 = regularization.reg_weightsm(weights2, lambda, alpha, reg); + Ref D2_1 = alg.scalar_multiplynv(error, la2); + + weights2->set_from_mlpp_vector(alg.subtractionnv(weights2, alg.scalar_multiplynv(learning_rate, D2_1))); + weights2->set_from_mlpp_vector(regularization.reg_weightsv(weights2, lambda, alpha, reg)); // Bias updation for layer 2 bias2 -= learning_rate * error; // Weight updation for layer 1 - Ref D1_1 = alg.scalar_multiplym(error, weights2); - Ref D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(z2)); + Ref D1_1 = alg.scalar_multiplynv(error, weights2); + Ref D1_2 = alg.hadamard_productnv(D1_1, avn.sigmoid_derivv(lz2)); Ref D1_3 = alg.outer_product(input_set_row_tmp, D1_2); - weights1 = alg.subtractionm(weights1, alg.scalar_multiplym(learning_rate, D1_3)); - weights1 = regularization.reg_weightsm(weights1, lambda, alpha, reg); + weights1->set_from_mlpp_matrix(alg.subtractionm(weights1, alg.scalar_multiplym(learning_rate, D1_3))); + weights1->set_from_mlpp_matrix(regularization.reg_weightsm(weights1, lambda, alpha, reg)); // Bias updation for layer 1 - bias1 = alg.subtractionm(bias1, alg.scalar_multiplym(learning_rate, D1_2)); + bias1->set_from_mlpp_vector(alg.subtractionnv(bias1, alg.scalar_multiplynv(learning_rate, D1_2))); - y_hat = evaluatev(input_set_row_tmp); + ly_hat = evaluatev(input_set_row_tmp); if (UI) { MLPPUtilities::cost_info(epoch, cost_prev, cost_prev); @@ -240,22 +244,21 @@ void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo Ref current_input = batches.input_sets[i]; Ref current_output = batches.output_sets[i]; - Ref y_hat = evaluatem(current_input); - propagatev(current_input, lz2, la2); - cost_prev = cost(y_hat, current_output); + Ref ly_hat = evaluatem(current_input); + propagatem(current_input, lz2, la2); + cost_prev = cost(ly_hat, current_output); // Calculating the errors - Ref error = alg.subtractionnv(y_hat, current_output); + Ref error = alg.subtractionnv(ly_hat, current_output); // Calculating the weight/bias gradients for layer 2 - - Ref D2_1 = alg.mat_vec_multv(alg.transposem(a2), error); + Ref D2_1 = alg.mat_vec_multv(alg.transposem(la2), error); real_t lr_d_cos = learning_rate / static_cast(current_output->size()); // weights and bias updation for layser 2 - weights2 = alg.subtractionnv(weights2, alg.scalar_multiplynv(lr_d_cos, D2_1)); - weights2 = regularization.reg_weightsm(weights2, lambda, alpha, reg); + weights2->set_from_mlpp_vector(alg.subtractionnv(weights2, alg.scalar_multiplynv(lr_d_cos, D2_1))); + weights2->set_from_mlpp_vector(regularization.reg_weightsv(weights2, lambda, alpha, reg)); // Calculating the bias gradients for layer 2 real_t b_gradient = alg.sum_elementsv(error); @@ -264,23 +267,20 @@ void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo bias2 -= learning_rate * b_gradient / current_output->size(); //Calculating the weight/bias for layer 1 - Ref D1_1 = alg.outer_product(error, weights2); - - Ref D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(z2)); - + Ref D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(lz2)); Ref D1_3 = alg.matmultm(alg.transposem(current_input), D1_2); // weight an bias updation for layer 1 - weights1 = alg.subtractionm(weights1, alg.scalar_multiplym(lr_d_cos, D1_3)); - weights1 = regularization.reg_weightsm(weights1, lambda, alpha, reg); + weights1->set_from_mlpp_matrix(alg.subtractionm(weights1, alg.scalar_multiplym(lr_d_cos, D1_3))); + weights1->set_from_mlpp_matrix(regularization.reg_weightsm(weights1, lambda, alpha, reg)); - bias1 = alg.subtract_matrix_rows(bias1, alg.scalar_multiplym(lr_d_cos, D1_2)); + bias1->set_from_mlpp_vector(alg.subtract_matrix_rows(bias1, alg.scalar_multiplym(lr_d_cos, D1_2))); y_hat = evaluatem(current_input); if (UI) { - MLPPUtilities::CostInfo(epoch, cost_prev, cost(y_hat, current_output)); + MLPPUtilities::CostInfo(epoch, cost_prev, cost(ly_hat, current_output)); std::cout << "Layer 1:" << std::endl; MLPPUtilities::print_ui_mb(weights1, bias1); std::cout << "Layer 2:" << std::endl; @@ -300,7 +300,7 @@ void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo real_t MLPPMLP::score() { MLPPUtilities util; - return util.performance_mat(y_hat, output_set); + return util.performance_vec(y_hat, output_set); } void MLPPMLP::save(const String &fileName) { @@ -346,11 +346,11 @@ void MLPPMLP::initialize() { _initialized = true; } -real_t MLPPMLP::cost(const Ref &y_hat, const Ref &y) { +real_t MLPPMLP::cost(const Ref &p_y_hat, const Ref &p_y) { MLPPReg regularization; class MLPPCost cost; - return cost.log_lossv(y_hat, y) + regularization.reg_termv(weights2, lambda, alpha, reg) + regularization.reg_termm(weights1, lambda, alpha, reg); + return cost.log_lossv(p_y_hat, p_y) + regularization.reg_termv(weights2, lambda, alpha, reg) + regularization.reg_termm(weights1, lambda, alpha, reg); } Ref MLPPMLP::evaluatem(const Ref &X) { @@ -368,7 +368,7 @@ void MLPPMLP::propagatem(const Ref &X, Ref z2_out, Refset_from_mlpp_matrix(alg.mat_vec_addv(alg.matmultm(X, weights1), bias1)); - a2_out->set_from_mlpp_matrix(avn.sigmoid_normm(z2)); + a2_out->set_from_mlpp_matrix(avn.sigmoid_normm(z2_out)); } real_t MLPPMLP::evaluatev(const Ref &x) { @@ -386,7 +386,7 @@ void MLPPMLP::propagatev(const Ref &x, Ref z2_out, Refset_from_mlpp_vector(alg.additionnv(alg.mat_vec_multv(alg.transposem(weights1), x), bias1)); - a2_out->set_from_mlpp_vector(avn.sigmoid_normv(z2)); + a2_out->set_from_mlpp_vector(avn.sigmoid_normv(z2_out)); } void MLPPMLP::forward_pass() { @@ -396,7 +396,7 @@ void MLPPMLP::forward_pass() { z2->set_from_mlpp_matrix(alg.mat_vec_addv(alg.matmultm(input_set, weights1), bias1)); a2->set_from_mlpp_matrix(avn.sigmoid_normm(z2)); - y_hat = avn.sigmoid_normv(alg.scalar_addnv(bias2, alg.mat_vec_multv(a2, weights2))); + y_hat->set_from_mlpp_vector(avn.sigmoid_normv(alg.scalar_addnv(bias2, alg.mat_vec_multv(a2, weights2)))); } MLPPMLP::MLPPMLP(const Ref &p_input_set, const Ref &p_output_set, int p_n_hidden, MLPPReg::RegularizationType p_reg, real_t p_lambda, real_t p_alpha) { diff --git a/test/mlpp_tests.cpp b/test/mlpp_tests.cpp index a16d997..5680656 100644 --- a/test/mlpp_tests.cpp +++ b/test/mlpp_tests.cpp @@ -389,8 +389,12 @@ void MLPPTests::test_mlp(bool ui) { MLPPLinAlg alg; // MLP - std::vector> inputSet = { { 0, 0, 1, 1 }, { 0, 1, 0, 1 } }; - inputSet = alg.transpose(inputSet); + std::vector> inputSet = { + { 0, 0 }, + { 1, 1 }, + { 0, 1 }, + { 1, 0 } + }; std::vector outputSet = { 0, 1, 1, 0 }; MLPPMLPOld model(inputSet, outputSet, 2); @@ -409,7 +413,21 @@ void MLPPTests::test_mlp(bool ui) { MLPPMLP model_new(input_set, output_set, 2); model_new.gradient_descent(0.1, 10000, ui); String res = model_new.model_set_test(input_set)->to_string(); - res += "\nACCURACY: " + String::num(100 * model_new.score()) + "%"; + res += "\nACCURACY (gradient_descent): " + String::num(100 * model_new.score()) + "%"; + + PLOG_MSG(res); + + MLPPMLP model_new2(input_set, output_set, 2); + model_new2.sgd(0.01, 10000, ui); + res = model_new2.model_set_test(input_set)->to_string(); + res += "\nACCURACY (sgd): " + String::num(100 * model_new2.score()) + "%"; + + PLOG_MSG(res); + + MLPPMLP model_new3(input_set, output_set, 2); + model_new3.mbgd(0.01, 10000, 2, ui); + res = model_new3.model_set_test(input_set)->to_string(); + res += "\nACCURACY (mbgd): " + String::num(100 * model_new3.score()) + "%"; PLOG_MSG(res); }