Fix more crashes and issues in MLPPMLP. Also added more tests for it.

This commit is contained in:
Relintai 2023-02-05 13:05:36 +01:00
parent e6afa5b715
commit b8e3f41fda
2 changed files with 63 additions and 45 deletions

View File

@ -86,6 +86,8 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) {
real_t cost_prev = 0; real_t cost_prev = 0;
int epoch = 1; int epoch = 1;
y_hat->fill(0);
forward_pass(); forward_pass();
while (true) { while (true) {
@ -99,10 +101,10 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) {
Ref<MLPPVector> D2_1 = alg.mat_vec_multv(alg.transposem(a2), error); Ref<MLPPVector> D2_1 = alg.mat_vec_multv(alg.transposem(a2), error);
// weights and bias updation for layer 2 // weights and bias updation for layer 2
weights2 = alg.subtractionnv(weights2, alg.scalar_multiplynv(learning_rate / n, D2_1)); weights2->set_from_mlpp_vector(alg.subtractionnv(weights2, alg.scalar_multiplynv(learning_rate / static_cast<real_t>(n), D2_1)));
weights2 = regularization.reg_weightsv(weights2, lambda, alpha, reg); weights2->set_from_mlpp_vector(regularization.reg_weightsv(weights2, lambda, alpha, reg));
bias2 -= learning_rate * alg.sum_elementsv(error) / n; bias2 -= learning_rate * alg.sum_elementsv(error) / static_cast<real_t>(n);
// Calculating the weight/bias for layer 1 // Calculating the weight/bias for layer 1
@ -111,10 +113,10 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) {
Ref<MLPPMatrix> D1_3 = alg.matmultm(alg.transposem(input_set), D1_2); Ref<MLPPMatrix> D1_3 = alg.matmultm(alg.transposem(input_set), D1_2);
// weight an bias updation for layer 1 // weight an bias updation for layer 1
weights1 = alg.subtractionm(weights1, alg.scalar_multiplym(learning_rate / n, D1_3)); weights1->set_from_mlpp_matrix(alg.subtractionm(weights1, alg.scalar_multiplym(learning_rate / n, D1_3)));
weights1 = regularization.reg_weightsm(weights1, lambda, alpha, reg); weights1->set_from_mlpp_matrix(regularization.reg_weightsm(weights1, lambda, alpha, reg));
bias1 = alg.subtract_matrix_rows(bias1, alg.scalar_multiplym(learning_rate / n, D1_2)); bias1->set_from_mlpp_vector(alg.subtract_matrix_rows(bias1, alg.scalar_multiplym(learning_rate / n, D1_2)));
forward_pass(); forward_pass();
@ -126,6 +128,7 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) {
std::cout << "Layer 2:" << std::endl; std::cout << "Layer 2:" << std::endl;
MLPPUtilities::print_ui_vb(weights2, bias2); MLPPUtilities::print_ui_vb(weights2, bias2);
} }
epoch++; epoch++;
if (epoch > max_epoch) { if (epoch > max_epoch) {
@ -159,9 +162,9 @@ void MLPPMLP::sgd(real_t learning_rate, int max_epoch, bool UI) {
y_hat_row_tmp.instance(); y_hat_row_tmp.instance();
y_hat_row_tmp->resize(1); y_hat_row_tmp->resize(1);
Ref<MLPPMatrix> lz2; Ref<MLPPVector> lz2;
lz2.instance(); lz2.instance();
Ref<MLPPMatrix> la2; Ref<MLPPVector> la2;
la2.instance(); la2.instance();
while (true) { while (true) {
@ -171,32 +174,33 @@ void MLPPMLP::sgd(real_t learning_rate, int max_epoch, bool UI) {
real_t output_element = output_set->get_element(output_Index); real_t output_element = output_set->get_element(output_Index);
output_set_row_tmp->set_element(0, output_element); output_set_row_tmp->set_element(0, output_element);
real_t y_hat = evaluatev(input_set_row_tmp); real_t ly_hat = evaluatev(input_set_row_tmp);
y_hat_row_tmp->set_element(0, y_hat); y_hat_row_tmp->set_element(0, ly_hat);
propagatev(input_set_row_tmp, lz2, la2); propagatev(input_set_row_tmp, lz2, la2);
cost_prev = cost(y_hat_row_tmp, output_set_row_tmp); cost_prev = cost(y_hat_row_tmp, output_set_row_tmp);
real_t error = y_hat - output_element; real_t error = ly_hat - output_element;
// Weight updation for layer 2 // Weight updation for layer 2
Ref<MLPPVector> D2_1 = alg.scalar_multiplym(error, a2); Ref<MLPPVector> D2_1 = alg.scalar_multiplynv(error, la2);
weights2 = alg.subtractionm(weights2, alg.scalar_multiplym(learning_rate, D2_1));
weights2 = regularization.reg_weightsm(weights2, lambda, alpha, reg); weights2->set_from_mlpp_vector(alg.subtractionnv(weights2, alg.scalar_multiplynv(learning_rate, D2_1)));
weights2->set_from_mlpp_vector(regularization.reg_weightsv(weights2, lambda, alpha, reg));
// Bias updation for layer 2 // Bias updation for layer 2
bias2 -= learning_rate * error; bias2 -= learning_rate * error;
// Weight updation for layer 1 // Weight updation for layer 1
Ref<MLPPVector> D1_1 = alg.scalar_multiplym(error, weights2); Ref<MLPPVector> D1_1 = alg.scalar_multiplynv(error, weights2);
Ref<MLPPVector> D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(z2)); Ref<MLPPVector> D1_2 = alg.hadamard_productnv(D1_1, avn.sigmoid_derivv(lz2));
Ref<MLPPMatrix> D1_3 = alg.outer_product(input_set_row_tmp, D1_2); Ref<MLPPMatrix> D1_3 = alg.outer_product(input_set_row_tmp, D1_2);
weights1 = alg.subtractionm(weights1, alg.scalar_multiplym(learning_rate, D1_3)); weights1->set_from_mlpp_matrix(alg.subtractionm(weights1, alg.scalar_multiplym(learning_rate, D1_3)));
weights1 = regularization.reg_weightsm(weights1, lambda, alpha, reg); weights1->set_from_mlpp_matrix(regularization.reg_weightsm(weights1, lambda, alpha, reg));
// Bias updation for layer 1 // Bias updation for layer 1
bias1 = alg.subtractionm(bias1, alg.scalar_multiplym(learning_rate, D1_2)); bias1->set_from_mlpp_vector(alg.subtractionnv(bias1, alg.scalar_multiplynv(learning_rate, D1_2)));
y_hat = evaluatev(input_set_row_tmp); ly_hat = evaluatev(input_set_row_tmp);
if (UI) { if (UI) {
MLPPUtilities::cost_info(epoch, cost_prev, cost_prev); MLPPUtilities::cost_info(epoch, cost_prev, cost_prev);
@ -240,22 +244,21 @@ void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo
Ref<MLPPMatrix> current_input = batches.input_sets[i]; Ref<MLPPMatrix> current_input = batches.input_sets[i];
Ref<MLPPVector> current_output = batches.output_sets[i]; Ref<MLPPVector> current_output = batches.output_sets[i];
Ref<MLPPVector> y_hat = evaluatem(current_input); Ref<MLPPVector> ly_hat = evaluatem(current_input);
propagatev(current_input, lz2, la2); propagatem(current_input, lz2, la2);
cost_prev = cost(y_hat, current_output); cost_prev = cost(ly_hat, current_output);
// Calculating the errors // Calculating the errors
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output); Ref<MLPPVector> error = alg.subtractionnv(ly_hat, current_output);
// Calculating the weight/bias gradients for layer 2 // Calculating the weight/bias gradients for layer 2
Ref<MLPPVector> D2_1 = alg.mat_vec_multv(alg.transposem(la2), error);
Ref<MLPPVector> D2_1 = alg.mat_vec_multv(alg.transposem(a2), error);
real_t lr_d_cos = learning_rate / static_cast<real_t>(current_output->size()); real_t lr_d_cos = learning_rate / static_cast<real_t>(current_output->size());
// weights and bias updation for layser 2 // weights and bias updation for layser 2
weights2 = alg.subtractionnv(weights2, alg.scalar_multiplynv(lr_d_cos, D2_1)); weights2->set_from_mlpp_vector(alg.subtractionnv(weights2, alg.scalar_multiplynv(lr_d_cos, D2_1)));
weights2 = regularization.reg_weightsm(weights2, lambda, alpha, reg); weights2->set_from_mlpp_vector(regularization.reg_weightsv(weights2, lambda, alpha, reg));
// Calculating the bias gradients for layer 2 // Calculating the bias gradients for layer 2
real_t b_gradient = alg.sum_elementsv(error); real_t b_gradient = alg.sum_elementsv(error);
@ -264,23 +267,20 @@ void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo
bias2 -= learning_rate * b_gradient / current_output->size(); bias2 -= learning_rate * b_gradient / current_output->size();
//Calculating the weight/bias for layer 1 //Calculating the weight/bias for layer 1
Ref<MLPPMatrix> D1_1 = alg.outer_product(error, weights2); Ref<MLPPMatrix> D1_1 = alg.outer_product(error, weights2);
Ref<MLPPMatrix> D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(lz2));
Ref<MLPPMatrix> D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(z2));
Ref<MLPPMatrix> D1_3 = alg.matmultm(alg.transposem(current_input), D1_2); Ref<MLPPMatrix> D1_3 = alg.matmultm(alg.transposem(current_input), D1_2);
// weight an bias updation for layer 1 // weight an bias updation for layer 1
weights1 = alg.subtractionm(weights1, alg.scalar_multiplym(lr_d_cos, D1_3)); weights1->set_from_mlpp_matrix(alg.subtractionm(weights1, alg.scalar_multiplym(lr_d_cos, D1_3)));
weights1 = regularization.reg_weightsm(weights1, lambda, alpha, reg); weights1->set_from_mlpp_matrix(regularization.reg_weightsm(weights1, lambda, alpha, reg));
bias1 = alg.subtract_matrix_rows(bias1, alg.scalar_multiplym(lr_d_cos, D1_2)); bias1->set_from_mlpp_vector(alg.subtract_matrix_rows(bias1, alg.scalar_multiplym(lr_d_cos, D1_2)));
y_hat = evaluatem(current_input); y_hat = evaluatem(current_input);
if (UI) { if (UI) {
MLPPUtilities::CostInfo(epoch, cost_prev, cost(y_hat, current_output)); MLPPUtilities::CostInfo(epoch, cost_prev, cost(ly_hat, current_output));
std::cout << "Layer 1:" << std::endl; std::cout << "Layer 1:" << std::endl;
MLPPUtilities::print_ui_mb(weights1, bias1); MLPPUtilities::print_ui_mb(weights1, bias1);
std::cout << "Layer 2:" << std::endl; std::cout << "Layer 2:" << std::endl;
@ -300,7 +300,7 @@ void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo
real_t MLPPMLP::score() { real_t MLPPMLP::score() {
MLPPUtilities util; MLPPUtilities util;
return util.performance_mat(y_hat, output_set); return util.performance_vec(y_hat, output_set);
} }
void MLPPMLP::save(const String &fileName) { void MLPPMLP::save(const String &fileName) {
@ -346,11 +346,11 @@ void MLPPMLP::initialize() {
_initialized = true; _initialized = true;
} }
real_t MLPPMLP::cost(const Ref<MLPPVector> &y_hat, const Ref<MLPPVector> &y) { real_t MLPPMLP::cost(const Ref<MLPPVector> &p_y_hat, const Ref<MLPPVector> &p_y) {
MLPPReg regularization; MLPPReg regularization;
class MLPPCost cost; class MLPPCost cost;
return cost.log_lossv(y_hat, y) + regularization.reg_termv(weights2, lambda, alpha, reg) + regularization.reg_termm(weights1, lambda, alpha, reg); return cost.log_lossv(p_y_hat, p_y) + regularization.reg_termv(weights2, lambda, alpha, reg) + regularization.reg_termm(weights1, lambda, alpha, reg);
} }
Ref<MLPPVector> MLPPMLP::evaluatem(const Ref<MLPPMatrix> &X) { Ref<MLPPVector> MLPPMLP::evaluatem(const Ref<MLPPMatrix> &X) {
@ -368,7 +368,7 @@ void MLPPMLP::propagatem(const Ref<MLPPMatrix> &X, Ref<MLPPMatrix> z2_out, Ref<M
MLPPActivation avn; MLPPActivation avn;
z2_out->set_from_mlpp_matrix(alg.mat_vec_addv(alg.matmultm(X, weights1), bias1)); z2_out->set_from_mlpp_matrix(alg.mat_vec_addv(alg.matmultm(X, weights1), bias1));
a2_out->set_from_mlpp_matrix(avn.sigmoid_normm(z2)); a2_out->set_from_mlpp_matrix(avn.sigmoid_normm(z2_out));
} }
real_t MLPPMLP::evaluatev(const Ref<MLPPVector> &x) { real_t MLPPMLP::evaluatev(const Ref<MLPPVector> &x) {
@ -386,7 +386,7 @@ void MLPPMLP::propagatev(const Ref<MLPPVector> &x, Ref<MLPPVector> z2_out, Ref<M
MLPPActivation avn; MLPPActivation avn;
z2_out->set_from_mlpp_vector(alg.additionnv(alg.mat_vec_multv(alg.transposem(weights1), x), bias1)); z2_out->set_from_mlpp_vector(alg.additionnv(alg.mat_vec_multv(alg.transposem(weights1), x), bias1));
a2_out->set_from_mlpp_vector(avn.sigmoid_normv(z2)); a2_out->set_from_mlpp_vector(avn.sigmoid_normv(z2_out));
} }
void MLPPMLP::forward_pass() { void MLPPMLP::forward_pass() {
@ -396,7 +396,7 @@ void MLPPMLP::forward_pass() {
z2->set_from_mlpp_matrix(alg.mat_vec_addv(alg.matmultm(input_set, weights1), bias1)); z2->set_from_mlpp_matrix(alg.mat_vec_addv(alg.matmultm(input_set, weights1), bias1));
a2->set_from_mlpp_matrix(avn.sigmoid_normm(z2)); a2->set_from_mlpp_matrix(avn.sigmoid_normm(z2));
y_hat = avn.sigmoid_normv(alg.scalar_addnv(bias2, alg.mat_vec_multv(a2, weights2))); y_hat->set_from_mlpp_vector(avn.sigmoid_normv(alg.scalar_addnv(bias2, alg.mat_vec_multv(a2, weights2))));
} }
MLPPMLP::MLPPMLP(const Ref<MLPPMatrix> &p_input_set, const Ref<MLPPVector> &p_output_set, int p_n_hidden, MLPPReg::RegularizationType p_reg, real_t p_lambda, real_t p_alpha) { MLPPMLP::MLPPMLP(const Ref<MLPPMatrix> &p_input_set, const Ref<MLPPVector> &p_output_set, int p_n_hidden, MLPPReg::RegularizationType p_reg, real_t p_lambda, real_t p_alpha) {

View File

@ -389,8 +389,12 @@ void MLPPTests::test_mlp(bool ui) {
MLPPLinAlg alg; MLPPLinAlg alg;
// MLP // MLP
std::vector<std::vector<real_t>> inputSet = { { 0, 0, 1, 1 }, { 0, 1, 0, 1 } }; std::vector<std::vector<real_t>> inputSet = {
inputSet = alg.transpose(inputSet); { 0, 0 },
{ 1, 1 },
{ 0, 1 },
{ 1, 0 }
};
std::vector<real_t> outputSet = { 0, 1, 1, 0 }; std::vector<real_t> outputSet = { 0, 1, 1, 0 };
MLPPMLPOld model(inputSet, outputSet, 2); MLPPMLPOld model(inputSet, outputSet, 2);
@ -409,7 +413,21 @@ void MLPPTests::test_mlp(bool ui) {
MLPPMLP model_new(input_set, output_set, 2); MLPPMLP model_new(input_set, output_set, 2);
model_new.gradient_descent(0.1, 10000, ui); model_new.gradient_descent(0.1, 10000, ui);
String res = model_new.model_set_test(input_set)->to_string(); String res = model_new.model_set_test(input_set)->to_string();
res += "\nACCURACY: " + String::num(100 * model_new.score()) + "%"; res += "\nACCURACY (gradient_descent): " + String::num(100 * model_new.score()) + "%";
PLOG_MSG(res);
MLPPMLP model_new2(input_set, output_set, 2);
model_new2.sgd(0.01, 10000, ui);
res = model_new2.model_set_test(input_set)->to_string();
res += "\nACCURACY (sgd): " + String::num(100 * model_new2.score()) + "%";
PLOG_MSG(res);
MLPPMLP model_new3(input_set, output_set, 2);
model_new3.mbgd(0.01, 10000, 2, ui);
res = model_new3.model_set_test(input_set)->to_string();
res += "\nACCURACY (mbgd): " + String::num(100 * model_new3.score()) + "%";
PLOG_MSG(res); PLOG_MSG(res);
} }