MLPPSoftmaxNet cleanup.

This commit is contained in:
Relintai 2023-04-29 11:21:17 +02:00
parent 38dbc2d470
commit cd1f5a2805
2 changed files with 45 additions and 55 deletions

View File

@ -9,7 +9,6 @@
#include "../activation/activation.h" #include "../activation/activation.h"
#include "../cost/cost.h" #include "../cost/cost.h"
#include "../data/data.h" #include "../data/data.h"
#include "../lin_alg/lin_alg.h"
#include "../regularization/reg.h" #include "../regularization/reg.h"
#include "../utilities/utilities.h" #include "../utilities/utilities.h"
@ -74,7 +73,6 @@ Ref<MLPPMatrix> MLPPSoftmaxNet::model_set_test(const Ref<MLPPMatrix> &X) {
void MLPPSoftmaxNet::gradient_descent(real_t learning_rate, int max_epoch, bool ui) { void MLPPSoftmaxNet::gradient_descent(real_t learning_rate, int max_epoch, bool ui) {
MLPPActivation avn; MLPPActivation avn;
MLPPLinAlg alg;
MLPPReg regularization; MLPPReg regularization;
real_t cost_prev = 0; real_t cost_prev = 0;
@ -86,31 +84,27 @@ void MLPPSoftmaxNet::gradient_descent(real_t learning_rate, int max_epoch, bool
cost_prev = cost(_y_hat, _output_set); cost_prev = cost(_y_hat, _output_set);
// Calculating the errors // Calculating the errors
Ref<MLPPMatrix> error = alg.subtractionnm(_y_hat, _output_set); Ref<MLPPMatrix> error = _y_hat->subn(_output_set);
// Calculating the weight/bias gradients for layer 2 // Calculating the weight/bias gradients for layer 2
Ref<MLPPMatrix> D2_1 = _a2->transposen()->multn(error);
Ref<MLPPMatrix> D2_1 = alg.matmultnm(alg.transposenm(_a2), error);
// weights and bias updation for layer 2 // weights and bias updation for layer 2
_weights2 = alg.subtractionnm(_weights2, alg.scalar_multiplynm(learning_rate, D2_1)); _weights2->sub(D2_1->scalar_multiplyn(learning_rate));
_weights2 = regularization.reg_weightsm(_weights2, _lambda, _alpha, _reg); _weights2 = regularization.reg_weightsm(_weights2, _lambda, _alpha, _reg);
_bias2 = alg.subtract_matrix_rowsnv(_bias2, alg.scalar_multiplynm(learning_rate, error)); _bias2->subtract_matrix_rows(error->scalar_multiplyn(learning_rate));
//Calculating the weight/bias for layer 1 //Calculating the weight/bias for layer 1
Ref<MLPPMatrix> D1_1 = error->multn(_weights2->transposen());
Ref<MLPPMatrix> D1_1 = alg.matmultnm(error, alg.transposenm(_weights2)); Ref<MLPPMatrix> D1_2 = D1_1->hadamard_productn(avn.sigmoid_derivm(_z2));
Ref<MLPPMatrix> D1_3 = _input_set->transposen()->multn(D1_2);
Ref<MLPPMatrix> D1_2 = alg.hadamard_productnm(D1_1, avn.sigmoid_derivm(_z2));
Ref<MLPPMatrix> D1_3 = alg.matmultnm(alg.transposenm(_input_set), D1_2);
// weight an bias updation for layer 1 // weight an bias updation for layer 1
_weights1 = alg.subtractionnm(_weights1, alg.scalar_multiplynm(learning_rate, D1_3)); _weights1->sub(D1_3->scalar_multiplyn(learning_rate));
_weights1 = regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg); _weights1 = regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg);
_bias1 = alg.subtract_matrix_rowsnv(_bias1, alg.scalar_multiplynm(learning_rate, D1_2)); _bias1->subtract_matrix_rows(D1_2->scalar_multiplyn(learning_rate));
forward_pass(); forward_pass();
@ -133,7 +127,6 @@ void MLPPSoftmaxNet::gradient_descent(real_t learning_rate, int max_epoch, bool
void MLPPSoftmaxNet::sgd(real_t learning_rate, int max_epoch, bool ui) { void MLPPSoftmaxNet::sgd(real_t learning_rate, int max_epoch, bool ui) {
MLPPActivation avn; MLPPActivation avn;
MLPPLinAlg alg;
MLPPReg regularization; MLPPReg regularization;
real_t cost_prev = 0; real_t cost_prev = 0;
@ -172,26 +165,29 @@ void MLPPSoftmaxNet::sgd(real_t learning_rate, int max_epoch, bool ui) {
PropagateVResult prop_res = propagatev(input_set_row_tmp); PropagateVResult prop_res = propagatev(input_set_row_tmp);
cost_prev = cost(y_hat_mat_tmp, output_row_mat_tmp); cost_prev = cost(y_hat_mat_tmp, output_row_mat_tmp);
Ref<MLPPVector> error = alg.subtractionnv(y_hat, output_set_row_tmp);
Ref<MLPPVector> error = y_hat->subn(output_set_row_tmp);
// Weight updation for layer 2 // Weight updation for layer 2
Ref<MLPPMatrix> D2_1 = alg.outer_product(error, prop_res.a2);
_weights2 = alg.subtractionnm(_weights2, alg.scalar_multiplynm(learning_rate, alg.transposenm(D2_1))); Ref<MLPPMatrix> D2_1 = error->outer_product(prop_res.a2);
_weights2->sub(D2_1->transposen()->scalar_multiplyn(learning_rate));
_weights2 = regularization.reg_weightsm(_weights2, _lambda, _alpha, _reg); _weights2 = regularization.reg_weightsm(_weights2, _lambda, _alpha, _reg);
// Bias updation for layer 2 // Bias updation for layer 2
_bias2 = alg.subtractionnv(_bias2, alg.scalar_multiplynv(learning_rate, error)); _bias2->sub(error->scalar_multiplyn(learning_rate));
// Weight updation for layer 1 // Weight updation for layer 1
Ref<MLPPVector> D1_1 = alg.mat_vec_multnv(_weights2, error); Ref<MLPPVector> D1_1 = _weights2->mult_vec(error);
Ref<MLPPVector> D1_2 = alg.hadamard_productnm(D1_1, avn.sigmoid_derivv(prop_res.z2)); Ref<MLPPVector> D1_2 = D1_1->hadamard_productn(avn.sigmoid_derivv(prop_res.z2));
Ref<MLPPMatrix> D1_3 = alg.outer_product(input_set_row_tmp, D1_2); Ref<MLPPMatrix> D1_3 = input_set_row_tmp->outer_product(D1_2);
_weights1 = alg.subtractionnm(_weights1, alg.scalar_multiplynm(learning_rate, D1_3)); _weights1->sub(D1_3->scalar_multiplyn(learning_rate));
_weights1 = regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg); _weights1 = regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg);
// Bias updation for layer 1 // Bias updation for layer 1
_bias1 = alg.subtractionnv(_bias1, alg.scalar_multiplynv(learning_rate, D1_2)); _bias1->sub(D1_2->scalar_multiplyn(learning_rate));
y_hat = evaluatev(input_set_row_tmp); y_hat = evaluatev(input_set_row_tmp);
@ -215,7 +211,6 @@ void MLPPSoftmaxNet::sgd(real_t learning_rate, int max_epoch, bool ui) {
void MLPPSoftmaxNet::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, bool ui) { void MLPPSoftmaxNet::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, bool ui) {
MLPPActivation avn; MLPPActivation avn;
MLPPLinAlg alg;
MLPPReg regularization; MLPPReg regularization;
real_t cost_prev = 0; real_t cost_prev = 0;
int epoch = 1; int epoch = 1;
@ -237,30 +232,29 @@ void MLPPSoftmaxNet::mbgd(real_t learning_rate, int max_epoch, int mini_batch_si
cost_prev = cost(y_hat, current_output_mini_batch); cost_prev = cost(y_hat, current_output_mini_batch);
// Calculating the errors // Calculating the errors
Ref<MLPPMatrix> error = alg.subtractionnm(y_hat, current_output_mini_batch); Ref<MLPPMatrix> error = y_hat->subn(current_output_mini_batch);
// Calculating the weight/bias gradients for layer 2 // Calculating the weight/bias gradients for layer 2
Ref<MLPPMatrix> D2_1 = alg.matmultnm(alg.transposenm(prop_res.a2), error); Ref<MLPPMatrix> D2_1 = prop_res.a2->transposen()->multn(error);
// weights and bias updation for layser 2 // weights and bias updation for layser 2
_weights2 = alg.subtractionnm(_weights2, alg.scalar_multiplynm(learning_rate, D2_1)); _weights2->sub(D2_1->scalar_multiplyn(learning_rate));
_weights2 = regularization.reg_weightsm(_weights2, _lambda, _alpha, _reg); _weights2 = regularization.reg_weightsm(_weights2, _lambda, _alpha, _reg);
// Bias Updation for layer 2 // Bias Updation for layer 2
_bias2 = alg.subtract_matrix_rowsnv(_bias2, alg.scalar_multiplynm(learning_rate, error)); _bias2->sub(error->scalar_multiplyn(learning_rate));
//Calculating the weight/bias for layer 1 //Calculating the weight/bias for layer 1
Ref<MLPPMatrix> D1_1 = error->multn(_weights2->transposen());
Ref<MLPPMatrix> D1_1 = alg.matmultnm(error, alg.transposenm(_weights2)); Ref<MLPPMatrix> D1_2 = D1_1->hadamard_productn(avn.sigmoid_derivm(prop_res.z2));
Ref<MLPPMatrix> D1_2 = alg.hadamard_productnm(D1_1, avn.sigmoid_derivm(prop_res.z2)); Ref<MLPPMatrix> D1_3 = current_input_mini_batch->transposen()->multn(D1_2);
Ref<MLPPMatrix> D1_3 = alg.matmultnm(alg.transposenm(current_input_mini_batch), D1_2);
// weight an bias updation for layer 1 // weight an bias updation for layer 1
_weights1 = alg.subtractionnm(_weights1, alg.scalar_multiplynm(learning_rate, D1_3)); _weights1->sub(D1_3->scalar_multiplyn(learning_rate));
_weights1 = regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg); _weights1 = regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg);
_bias1 = alg.subtract_matrix_rowsnv(_bias1, alg.scalar_multiplynm(learning_rate, D1_2)); _bias1->subtract_matrix_rows(D1_2->scalar_multiplyn(learning_rate));
y_hat = evaluatem(current_input_mini_batch); y_hat = evaluatem(current_input_mini_batch);
@ -363,56 +357,52 @@ real_t MLPPSoftmaxNet::cost(const Ref<MLPPMatrix> &y_hat, const Ref<MLPPMatrix>
} }
Ref<MLPPVector> MLPPSoftmaxNet::evaluatev(const Ref<MLPPVector> &x) { Ref<MLPPVector> MLPPSoftmaxNet::evaluatev(const Ref<MLPPVector> &x) {
MLPPLinAlg alg;
MLPPActivation avn; MLPPActivation avn;
Ref<MLPPVector> z2 = alg.additionnv(alg.mat_vec_multnv(alg.transposenm(_weights1), x), _bias1); Ref<MLPPVector> z2 = _weights1->transposen()->mult_vec(x)->addn(_bias1);
Ref<MLPPVector> a2 = avn.sigmoid_normv(z2); Ref<MLPPVector> a2 = avn.sigmoid_normv(z2);
return avn.adj_softmax_normv(alg.additionnv(alg.mat_vec_multnv(alg.transposenm(_weights2), a2), _bias2)); return avn.adj_softmax_normv(_weights2->transposen()->mult_vec(a2)->addn(_bias2));
} }
MLPPSoftmaxNet::PropagateVResult MLPPSoftmaxNet::propagatev(const Ref<MLPPVector> &x) { MLPPSoftmaxNet::PropagateVResult MLPPSoftmaxNet::propagatev(const Ref<MLPPVector> &x) {
MLPPLinAlg alg;
MLPPActivation avn; MLPPActivation avn;
PropagateVResult res; PropagateVResult res;
res.z2 = alg.additionnv(alg.mat_vec_multnv(alg.transposenm(_weights1), x), _bias1); res.z2 = _weights1->transposen()->mult_vec(x)->addn(_bias1);
res.a2 = avn.sigmoid_normv(res.z2); res.a2 = avn.sigmoid_normv(res.z2);
return res; return res;
} }
Ref<MLPPMatrix> MLPPSoftmaxNet::evaluatem(const Ref<MLPPMatrix> &X) { Ref<MLPPMatrix> MLPPSoftmaxNet::evaluatem(const Ref<MLPPMatrix> &X) {
MLPPLinAlg alg;
MLPPActivation avn; MLPPActivation avn;
Ref<MLPPMatrix> z2 = alg.mat_vec_addnm(alg.matmultnm(X, _weights1), _bias1); Ref<MLPPMatrix> z2 = X->multn(_weights1)->add_vecn(_bias1);
Ref<MLPPMatrix> a2 = avn.sigmoid_normm(z2); Ref<MLPPMatrix> a2 = avn.sigmoid_normm(z2);
return avn.adj_softmax_normm(alg.mat_vec_addnm(alg.matmultnm(a2, _weights2), _bias2)); return avn.adj_softmax_normm(a2->multn(_weights2)->add_vecn(_bias2));
} }
MLPPSoftmaxNet::PropagateMResult MLPPSoftmaxNet::propagatem(const Ref<MLPPMatrix> &X) { MLPPSoftmaxNet::PropagateMResult MLPPSoftmaxNet::propagatem(const Ref<MLPPMatrix> &X) {
MLPPLinAlg alg;
MLPPActivation avn; MLPPActivation avn;
MLPPSoftmaxNet::PropagateMResult res; MLPPSoftmaxNet::PropagateMResult res;
res.z2 = alg.mat_vec_addnm(alg.matmultnm(X, _weights1), _bias1); res.z2 = X->multn(_weights1)->add_vecn(_bias1);
res.a2 = avn.sigmoid_normm(res.z2); res.a2 = avn.sigmoid_normm(res.z2);
return res; return res;
} }
void MLPPSoftmaxNet::forward_pass() { void MLPPSoftmaxNet::forward_pass() {
MLPPLinAlg alg;
MLPPActivation avn; MLPPActivation avn;
_z2 = alg.mat_vec_addnm(alg.matmultnm(_input_set, _weights1), _bias1); _z2 = _input_set->multn(_weights1)->add_vecn(_bias1);
_a2 = avn.sigmoid_normm(_z2); _a2 = avn.sigmoid_normm(_z2);
_y_hat = avn.adj_softmax_normm(alg.mat_vec_addnm(alg.matmultnm(_a2, _weights2), _bias2));
_y_hat = avn.adj_softmax_normm(_a2->multn(_weights2)->add_vecn(_bias2));
} }
void MLPPSoftmaxNet::_bind_methods() { void MLPPSoftmaxNet::_bind_methods() {

View File

@ -85,6 +85,12 @@ protected:
Ref<MLPPMatrix> _input_set; Ref<MLPPMatrix> _input_set;
Ref<MLPPMatrix> _output_set; Ref<MLPPMatrix> _output_set;
int _n_hidden;
// Regularization Params
MLPPReg::RegularizationType _reg;
real_t _lambda;
real_t _alpha; /* This is the controlling param for Elastic Net*/
Ref<MLPPMatrix> _y_hat; Ref<MLPPMatrix> _y_hat;
Ref<MLPPMatrix> _weights1; Ref<MLPPMatrix> _weights1;
@ -99,12 +105,6 @@ protected:
int _n; int _n;
int _k; int _k;
int _n_class; int _n_class;
int _n_hidden;
// Regularization Params
MLPPReg::RegularizationType _reg;
real_t _lambda;
real_t _alpha; /* This is the controlling param for Elastic Net*/
bool _initialized; bool _initialized;
}; };