mirror of
https://github.com/Relintai/pmlpp.git
synced 2024-11-08 13:12:09 +01:00
Cleanups to MLP, MANN, LogReg, and LinReg.
This commit is contained in:
parent
9993bc9d4e
commit
da321e3c57
@ -7,7 +7,6 @@
|
||||
#include "lin_reg.h"
|
||||
|
||||
#include "../cost/cost.h"
|
||||
#include "../lin_alg/lin_alg.h"
|
||||
#include "../regularization/reg.h"
|
||||
#include "../stat/stat.h"
|
||||
#include "../utilities/utilities.h"
|
||||
@ -78,7 +77,6 @@ real_t MLPPLinReg::model_test(const Ref<MLPPVector> &x) {
|
||||
void MLPPLinReg::newton_raphson(real_t learning_rate, int max_epoch, bool ui) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
|
||||
real_t cost_prev = 0;
|
||||
@ -89,16 +87,18 @@ void MLPPLinReg::newton_raphson(real_t learning_rate, int max_epoch, bool ui) {
|
||||
while (true) {
|
||||
cost_prev = cost(_y_hat, _output_set);
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_y_hat, _output_set);
|
||||
Ref<MLPPVector> error = _y_hat->subn(_output_set);
|
||||
|
||||
// Calculating the weight gradients (2nd derivative)
|
||||
Ref<MLPPVector> first_derivative = alg.mat_vec_multnv(alg.transposenm(_input_set), error);
|
||||
Ref<MLPPMatrix> second_derivative = alg.matmultnm(alg.transposenm(_input_set), _input_set);
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multnv(alg.transposenm(alg.inversenm(second_derivative)), first_derivative)));
|
||||
|
||||
Ref<MLPPVector> first_derivative = _input_set->transposen()->mult_vec(error);
|
||||
Ref<MLPPMatrix> second_derivative = _input_set->transposen()->multn(_input_set);
|
||||
|
||||
_weights->sub(second_derivative->inverse()->transposen()->mult_vec(first_derivative)->scalar_multiplyn(learning_rate / _n));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients (2nd derivative)
|
||||
_bias -= learning_rate * alg.sum_elementsv(error) / _n; // We keep this the same. The 2nd derivative is just [1].
|
||||
_bias -= learning_rate * error->sum_elements() / _n; // We keep this the same. The 2nd derivative is just [1].
|
||||
|
||||
forward_pass();
|
||||
|
||||
@ -118,7 +118,6 @@ void MLPPLinReg::newton_raphson(real_t learning_rate, int max_epoch, bool ui) {
|
||||
void MLPPLinReg::gradient_descent(real_t learning_rate, int max_epoch, bool ui) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
|
||||
real_t cost_prev = 0;
|
||||
@ -129,14 +128,14 @@ void MLPPLinReg::gradient_descent(real_t learning_rate, int max_epoch, bool ui)
|
||||
while (true) {
|
||||
cost_prev = cost(_y_hat, _output_set);
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_y_hat, _output_set);
|
||||
Ref<MLPPVector> error = _y_hat->subn(_output_set);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multnv(alg.transposenm(_input_set), error)));
|
||||
_weights->sub(_input_set->transposen()->mult_vec(error)->scalar_multiplyn(learning_rate / _n));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias -= learning_rate * alg.sum_elementsv(error) / _n;
|
||||
_bias -= learning_rate * error->sum_elements() / _n;
|
||||
|
||||
forward_pass();
|
||||
|
||||
@ -156,7 +155,6 @@ void MLPPLinReg::gradient_descent(real_t learning_rate, int max_epoch, bool ui)
|
||||
void MLPPLinReg::sgd(real_t learning_rate, int max_epoch, bool ui) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
|
||||
real_t cost_prev = 0;
|
||||
@ -193,7 +191,7 @@ void MLPPLinReg::sgd(real_t learning_rate, int max_epoch, bool ui) {
|
||||
real_t error = y_hat - output_element_set;
|
||||
|
||||
// Weight updation
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate * error, input_set_row_tmp));
|
||||
_weights->sub(input_set_row_tmp->scalar_multiplyn(learning_rate * error));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Bias updation
|
||||
@ -219,7 +217,6 @@ void MLPPLinReg::sgd(real_t learning_rate, int max_epoch, bool ui) {
|
||||
void MLPPLinReg::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, bool ui) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
|
||||
real_t cost_prev = 0;
|
||||
@ -237,14 +234,14 @@ void MLPPLinReg::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
Ref<MLPPVector> y_hat = evaluatem(current_input_mini_batch);
|
||||
cost_prev = cost(y_hat, current_output_mini_batch);
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
Ref<MLPPVector> error = y_hat->subn(current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / current_output_mini_batch->size(), alg.mat_vec_multnv(alg.transposenm(current_input_mini_batch), error)));
|
||||
_weights->sub(current_input_mini_batch->transposen()->mult_vec(error)->scalar_multiplyn(learning_rate / current_output_mini_batch->size()));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias -= learning_rate * alg.sum_elementsv(error) / current_output_mini_batch->size();
|
||||
_bias -= learning_rate * error->sum_elements() / current_output_mini_batch->size();
|
||||
y_hat = evaluatem(current_input_mini_batch);
|
||||
|
||||
if (ui) {
|
||||
@ -266,7 +263,6 @@ void MLPPLinReg::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
void MLPPLinReg::momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool ui) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
int epoch = 1;
|
||||
@ -276,7 +272,7 @@ void MLPPLinReg::momentum(real_t learning_rate, int max_epoch, int mini_batch_si
|
||||
MLPPUtilities::CreateMiniBatchMVBatch batches = MLPPUtilities::create_mini_batchesmv(_input_set, _output_set, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Momentum.
|
||||
Ref<MLPPVector> v = alg.zerovecnv(_weights->size());
|
||||
Ref<MLPPVector> v = MLPPVector::create_vec_zero(_weights->size());
|
||||
|
||||
while (true) {
|
||||
for (int i = 0; i < n_mini_batch; i++) {
|
||||
@ -286,19 +282,20 @@ void MLPPLinReg::momentum(real_t learning_rate, int max_epoch, int mini_batch_si
|
||||
Ref<MLPPVector> y_hat = evaluatem(current_input_mini_batch);
|
||||
cost_prev = cost(y_hat, current_output_mini_batch);
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
Ref<MLPPVector> error = y_hat->subn(current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multnv(alg.transposenm(current_input_mini_batch), error));
|
||||
|
||||
Ref<MLPPVector> gradient = current_input_mini_batch->transposen()->mult_vec(error)->scalar_multiplyn(1 / current_output_mini_batch->size());
|
||||
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
Ref<MLPPVector> weight_grad = gradient->addn(reg_deriv_term); // Weight_grad_final
|
||||
|
||||
v = alg.additionnv(alg.scalar_multiplynv(gamma, v), alg.scalar_multiplynv(learning_rate, weight_grad));
|
||||
|
||||
_weights = alg.subtractionnv(_weights, v);
|
||||
v = v->scalar_multiplyn(gamma)->addn(weight_grad->scalar_multiplyn(learning_rate));
|
||||
_weights->sub(v);
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias -= learning_rate * alg.sum_elementsv(error) / current_output_mini_batch->size(); // As normal
|
||||
_bias -= learning_rate * error->sum_elements() / current_output_mini_batch->size(); // As normal
|
||||
y_hat = evaluatem(current_input_mini_batch);
|
||||
|
||||
if (ui) {
|
||||
@ -320,7 +317,6 @@ void MLPPLinReg::momentum(real_t learning_rate, int max_epoch, int mini_batch_si
|
||||
void MLPPLinReg::nag(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool ui) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
int epoch = 1;
|
||||
@ -330,31 +326,32 @@ void MLPPLinReg::nag(real_t learning_rate, int max_epoch, int mini_batch_size, r
|
||||
MLPPUtilities::CreateMiniBatchMVBatch batches = MLPPUtilities::create_mini_batchesmv(_input_set, _output_set, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Momentum.
|
||||
Ref<MLPPVector> v = alg.zerovecnv(_weights->size());
|
||||
Ref<MLPPVector> v = MLPPVector::create_vec_zero(_weights->size());
|
||||
|
||||
while (true) {
|
||||
for (int i = 0; i < n_mini_batch; i++) {
|
||||
Ref<MLPPMatrix> current_input_mini_batch = batches.input_sets[i];
|
||||
Ref<MLPPVector> current_output_mini_batch = batches.output_sets[i];
|
||||
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(gamma, v)); // "Aposterori" calculation
|
||||
_weights->sub(v->scalar_multiplyn(gamma)); // "Aposterori" calculation
|
||||
|
||||
Ref<MLPPVector> y_hat = evaluatem(current_input_mini_batch);
|
||||
cost_prev = cost(y_hat, current_output_mini_batch);
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
Ref<MLPPVector> error = y_hat->subn(current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multnv(alg.transposenm(current_input_mini_batch), error));
|
||||
|
||||
Ref<MLPPVector> gradient = current_input_mini_batch->transposen()->mult_vec(error)->scalar_multiplyn(1 / current_output_mini_batch->size());
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
Ref<MLPPVector> weight_grad = gradient->addn(reg_deriv_term); // Weight_grad_final
|
||||
|
||||
v = alg.additionnv(alg.scalar_multiplynv(gamma, v), alg.scalar_multiplynv(learning_rate, weight_grad));
|
||||
v = v->scalar_multiplyn(gamma)->addn(weight_grad->scalar_multiplyn(learning_rate));
|
||||
|
||||
_weights = alg.subtractionnv(_weights, v);
|
||||
_weights->sub(v);
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias -= learning_rate * alg.sum_elementsv(error) / current_output_mini_batch->size(); // As normal
|
||||
_bias -= learning_rate * error->sum_elements() / current_output_mini_batch->size(); // As normal
|
||||
y_hat = evaluatem(current_input_mini_batch);
|
||||
|
||||
if (ui) {
|
||||
@ -376,7 +373,6 @@ void MLPPLinReg::nag(real_t learning_rate, int max_epoch, int mini_batch_size, r
|
||||
void MLPPLinReg::adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool ui) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
int epoch = 1;
|
||||
@ -386,7 +382,7 @@ void MLPPLinReg::adagrad(real_t learning_rate, int max_epoch, int mini_batch_siz
|
||||
MLPPUtilities::CreateMiniBatchMVBatch batches = MLPPUtilities::create_mini_batchesmv(_input_set, _output_set, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adagrad.
|
||||
Ref<MLPPVector> v = alg.zerovecnv(_weights->size());
|
||||
Ref<MLPPVector> v = MLPPVector::create_vec_zero(_weights->size());
|
||||
|
||||
while (true) {
|
||||
for (int i = 0; i < n_mini_batch; i++) {
|
||||
@ -396,19 +392,18 @@ void MLPPLinReg::adagrad(real_t learning_rate, int max_epoch, int mini_batch_siz
|
||||
Ref<MLPPVector> y_hat = evaluatem(current_input_mini_batch);
|
||||
cost_prev = cost(y_hat, current_output_mini_batch);
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
Ref<MLPPVector> error = y_hat->subn(current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multnv(alg.transposenm(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> gradient = current_input_mini_batch->transposen()->mult_vec(error)->scalar_multiplyn(1 / current_output_mini_batch->size());
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
Ref<MLPPVector> weight_grad = gradient->addn(reg_deriv_term); // Weight_grad_final
|
||||
|
||||
v = alg.hadamard_productnv(weight_grad, weight_grad);
|
||||
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate, alg.division_element_wisenv(weight_grad, alg.sqrtnv(alg.scalar_addnv(e, v)))));
|
||||
v = weight_grad->hadamard_productn(weight_grad);
|
||||
_weights->sub(weight_grad->division_element_wisen(v->scalar_addn(e)->sqrtn())->scalar_multiplyn(learning_rate));
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias -= learning_rate * alg.sum_elementsv(error) / current_output_mini_batch->size(); // As normal
|
||||
_bias -= learning_rate * error->sum_elements() / current_output_mini_batch->size(); // As normal
|
||||
y_hat = evaluatem(current_input_mini_batch);
|
||||
|
||||
if (ui) {
|
||||
@ -431,7 +426,6 @@ void MLPPLinReg::adadelta(real_t learning_rate, int max_epoch, int mini_batch_si
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
// Adagrad upgrade. Momentum is applied.
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
int epoch = 1;
|
||||
@ -441,7 +435,7 @@ void MLPPLinReg::adadelta(real_t learning_rate, int max_epoch, int mini_batch_si
|
||||
MLPPUtilities::CreateMiniBatchMVBatch batches = MLPPUtilities::create_mini_batchesmv(_input_set, _output_set, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adagrad.
|
||||
Ref<MLPPVector> v = alg.zerovecnv(_weights->size());
|
||||
Ref<MLPPVector> v = MLPPVector::create_vec_zero(_weights->size());
|
||||
|
||||
while (true) {
|
||||
for (int i = 0; i < n_mini_batch; i++) {
|
||||
@ -451,19 +445,18 @@ void MLPPLinReg::adadelta(real_t learning_rate, int max_epoch, int mini_batch_si
|
||||
Ref<MLPPVector> y_hat = evaluatem(current_input_mini_batch);
|
||||
cost_prev = cost(y_hat, current_output_mini_batch);
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
Ref<MLPPVector> error = y_hat->subn(current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multnv(alg.transposenm(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> gradient = current_input_mini_batch->transposen()->mult_vec(error)->scalar_multiplyn(1 / current_output_mini_batch->size());
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
Ref<MLPPVector> weight_grad = gradient->addn(reg_deriv_term); // Weight_grad_final
|
||||
|
||||
v = alg.additionnv(alg.scalar_multiplynv(b1, v), alg.scalar_multiplynv(1 - b1, alg.hadamard_productnv(weight_grad, weight_grad)));
|
||||
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate, alg.division_element_wisenv(weight_grad, alg.sqrtnv(alg.scalar_addnv(e, v)))));
|
||||
v = v->scalar_multiplyn(b1)->addn(weight_grad->hadamard_productn(weight_grad)->scalar_multiplyn(1 - b1));
|
||||
_weights->sub(weight_grad->division_element_wisen(v->scalar_addn(e)->sqrtn())->scalar_multiplyn(learning_rate));
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias -= learning_rate * alg.sum_elementsv(error) / current_output_mini_batch->size(); // As normal
|
||||
_bias -= learning_rate * error->sum_elements() / current_output_mini_batch->size(); // As normal
|
||||
y_hat = evaluatem(current_input_mini_batch);
|
||||
|
||||
if (ui) {
|
||||
@ -485,7 +478,6 @@ void MLPPLinReg::adadelta(real_t learning_rate, int max_epoch, int mini_batch_si
|
||||
void MLPPLinReg::adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
int epoch = 1;
|
||||
@ -495,8 +487,8 @@ void MLPPLinReg::adam(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
MLPPUtilities::CreateMiniBatchMVBatch batches = MLPPUtilities::create_mini_batchesmv(_input_set, _output_set, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adam.
|
||||
Ref<MLPPVector> m = alg.zerovecnv(_weights->size());
|
||||
Ref<MLPPVector> v = alg.zerovecnv(_weights->size());
|
||||
Ref<MLPPVector> m = MLPPVector::create_vec_zero(_weights->size());
|
||||
Ref<MLPPVector> v = MLPPVector::create_vec_zero(_weights->size());
|
||||
|
||||
while (true) {
|
||||
for (int i = 0; i < n_mini_batch; i++) {
|
||||
@ -506,23 +498,23 @@ void MLPPLinReg::adam(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
Ref<MLPPVector> y_hat = evaluatem(current_input_mini_batch);
|
||||
cost_prev = cost(y_hat, current_output_mini_batch);
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
Ref<MLPPVector> error = y_hat->subn(current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multnv(alg.transposenm(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> gradient = current_input_mini_batch->transposen()->mult_vec(error)->scalar_multiplyn(1 / current_output_mini_batch->size());
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
Ref<MLPPVector> weight_grad = gradient->addn(reg_deriv_term); // Weight_grad_final
|
||||
|
||||
m = alg.additionnv(alg.scalar_multiplynv(b1, m), alg.scalar_multiplynv(1 - b1, weight_grad));
|
||||
v = alg.additionnv(alg.scalar_multiplynv(b2, v), alg.scalar_multiplynv(1 - b2, alg.exponentiatenv(weight_grad, 2)));
|
||||
m = m->scalar_multiplyn(b1)->addn(weight_grad->scalar_multiplyn(1 - b1));
|
||||
v = v->scalar_multiplyn(b2)->addn(weight_grad->exponentiaten(2)->scalar_multiplyn(1 - b2));
|
||||
|
||||
Ref<MLPPVector> m_hat = alg.scalar_multiplynv(1 / (1 - Math::pow(b1, epoch)), m);
|
||||
Ref<MLPPVector> v_hat = alg.scalar_multiplynv(1 / (1 - Math::pow(b2, epoch)), v);
|
||||
Ref<MLPPVector> m_hat = m->scalar_multiplyn(1 / (1 - Math::pow(b1, epoch)));
|
||||
Ref<MLPPVector> v_hat = v->scalar_multiplyn(1 / (1 - Math::pow(b2, epoch)));
|
||||
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate, alg.division_element_wisenvnm(m_hat, alg.scalar_addnv(e, alg.sqrtnv(v_hat)))));
|
||||
_weights->sub(m_hat->division_element_wisen(v_hat->sqrtn()->scalar_addn(e))->scalar_multiplyn(learning_rate));
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias -= learning_rate * alg.sum_elementsv(error) / current_output_mini_batch->size(); // As normal
|
||||
_bias -= learning_rate * error->sum_elements() / current_output_mini_batch->size(); // As normal
|
||||
y_hat = evaluatem(current_input_mini_batch);
|
||||
|
||||
if (ui) {
|
||||
@ -544,7 +536,6 @@ void MLPPLinReg::adam(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
void MLPPLinReg::adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
int epoch = 1;
|
||||
@ -553,8 +544,8 @@ void MLPPLinReg::adamax(real_t learning_rate, int max_epoch, int mini_batch_size
|
||||
int n_mini_batch = _n / mini_batch_size;
|
||||
MLPPUtilities::CreateMiniBatchMVBatch batches = MLPPUtilities::create_mini_batchesmv(_input_set, _output_set, n_mini_batch);
|
||||
|
||||
Ref<MLPPVector> m = alg.zerovecnv(_weights->size());
|
||||
Ref<MLPPVector> u = alg.zerovecnv(_weights->size());
|
||||
Ref<MLPPVector> m = MLPPVector::create_vec_zero(_weights->size());
|
||||
Ref<MLPPVector> u = MLPPVector::create_vec_zero(_weights->size());
|
||||
|
||||
while (true) {
|
||||
for (int i = 0; i < n_mini_batch; i++) {
|
||||
@ -564,22 +555,21 @@ void MLPPLinReg::adamax(real_t learning_rate, int max_epoch, int mini_batch_size
|
||||
Ref<MLPPVector> y_hat = evaluatem(current_input_mini_batch);
|
||||
cost_prev = cost(y_hat, current_output_mini_batch);
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
Ref<MLPPVector> error = y_hat->subn(current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multnv(alg.transposenm(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> gradient = current_input_mini_batch->transposen()->mult_vec(error)->scalar_multiplyn(1 / current_output_mini_batch->size());
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
Ref<MLPPVector> weight_grad = gradient->addn(reg_deriv_term); // Weight_grad_final
|
||||
|
||||
m = alg.additionnv(alg.scalar_multiplynv(b1, m), alg.scalar_multiplynv(1 - b1, weight_grad));
|
||||
u = alg.maxnvv(alg.scalar_multiplynv(b2, u), alg.absv(weight_grad));
|
||||
m = m->scalar_multiplyn(b1)->addn(weight_grad->scalar_multiplyn(1 - b1));
|
||||
u = u->scalar_multiplyn(b2)->maxn(weight_grad->absn());
|
||||
|
||||
Ref<MLPPVector> m_hat = alg.scalar_multiplynv(1 / (1 - Math::pow(b1, epoch)), m);
|
||||
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate, alg.division_element_wisenv(m_hat, u)));
|
||||
Ref<MLPPVector> m_hat = m->scalar_multiplyn(1 / (1 - Math::pow(b1, epoch)));
|
||||
_weights->sub(m_hat->division_element_wisen(u)->scalar_multiplyn(learning_rate));
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias -= learning_rate * alg.sum_elementsv(error) / current_output_mini_batch->size(); // As normal
|
||||
_bias -= learning_rate * error->sum_elements() / current_output_mini_batch->size(); // As normal
|
||||
y_hat = evaluatem(current_input_mini_batch);
|
||||
|
||||
if (ui) {
|
||||
@ -601,7 +591,6 @@ void MLPPLinReg::adamax(real_t learning_rate, int max_epoch, int mini_batch_size
|
||||
void MLPPLinReg::nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
int epoch = 1;
|
||||
@ -611,9 +600,9 @@ void MLPPLinReg::nadam(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
MLPPUtilities::CreateMiniBatchMVBatch batches = MLPPUtilities::create_mini_batchesmv(_input_set, _output_set, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adam.
|
||||
Ref<MLPPVector> m = alg.zerovecnv(_weights->size());
|
||||
Ref<MLPPVector> v = alg.zerovecnv(_weights->size());
|
||||
Ref<MLPPVector> m_final = alg.zerovecnv(_weights->size());
|
||||
Ref<MLPPVector> m = MLPPVector::create_vec_zero(_weights->size());
|
||||
Ref<MLPPVector> v = MLPPVector::create_vec_zero(_weights->size());
|
||||
Ref<MLPPVector> m_final = MLPPVector::create_vec_zero(_weights->size());
|
||||
|
||||
while (true) {
|
||||
for (int i = 0; i < n_mini_batch; i++) {
|
||||
@ -623,24 +612,25 @@ void MLPPLinReg::nadam(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
Ref<MLPPVector> y_hat = evaluatem(current_input_mini_batch);
|
||||
cost_prev = cost(y_hat, current_output_mini_batch);
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
Ref<MLPPVector> error = y_hat->subn(current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multnv(alg.transposenm(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> gradient = current_input_mini_batch->transposen()->mult_vec(error)->scalar_multiplyn(1 / current_output_mini_batch->size());
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
Ref<MLPPVector> weight_grad = gradient->addn(reg_deriv_term); // Weight_grad_final
|
||||
|
||||
m = alg.additionnv(alg.scalar_multiplynv(b1, m), alg.scalar_multiplynv(1 - b1, weight_grad));
|
||||
v = alg.additionnv(alg.scalar_multiplynv(b2, v), alg.scalar_multiplynv(1 - b2, alg.exponentiatenv(weight_grad, 2)));
|
||||
m_final = alg.additionnv(alg.scalar_multiplynv(b1, m), alg.scalar_multiplynv((1 - b1) / (1 - Math::pow(b1, epoch)), weight_grad));
|
||||
m = m->scalar_multiplyn(b1)->addn(weight_grad->scalar_multiplyn(1 - b1));
|
||||
v = v->scalar_multiplyn(b2)->addn(weight_grad->exponentiaten(2)->scalar_multiplyn(1 - b2));
|
||||
|
||||
Ref<MLPPVector> m_hat = alg.scalar_multiplynv(1 / (1 - Math::pow(b1, epoch)), m);
|
||||
Ref<MLPPVector> v_hat = alg.scalar_multiplynv(1 / (1 - Math::pow(b2, epoch)), v);
|
||||
m_final = m->scalar_multiplyn(b1)->addn(weight_grad->scalar_multiplyn((1 - b1) / (1 - Math::pow(b1, epoch))));
|
||||
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate, alg.division_element_wisenv(m_final, alg.scalar_addnv(e, alg.sqrtnv(v_hat)))));
|
||||
Ref<MLPPVector> m_hat = m->scalar_multiplyn(1 / (1 - Math::pow(b1, epoch)));
|
||||
Ref<MLPPVector> v_hat = v->scalar_multiplyn(1 / (1 - Math::pow(b2, epoch)));
|
||||
|
||||
_weights->sub(m_final->division_element_wisen(v_hat->sqrtn()->scalar_addn(e))->scalar_multiplyn(learning_rate));
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias -= learning_rate * alg.sum_elementsv(error) / current_output_mini_batch->size(); // As normal
|
||||
_bias -= learning_rate * error->sum_elements() / current_output_mini_batch->size(); // As normal
|
||||
y_hat = evaluatem(current_input_mini_batch);
|
||||
|
||||
if (ui) {
|
||||
@ -662,10 +652,9 @@ void MLPPLinReg::nadam(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
void MLPPLinReg::normal_equation() {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPStat stat;
|
||||
|
||||
Ref<MLPPMatrix> input_set_t = alg.transposenm(_input_set);
|
||||
Ref<MLPPMatrix> input_set_t = _input_set->transposen();
|
||||
|
||||
Ref<MLPPVector> input_set_t_row_tmp;
|
||||
input_set_t_row_tmp.instance();
|
||||
@ -683,17 +672,18 @@ void MLPPLinReg::normal_equation() {
|
||||
|
||||
Ref<MLPPVector> temp;
|
||||
//temp.resize(_k);
|
||||
temp = alg.mat_vec_multnv(alg.inversenm(alg.matmultnm(alg.transposenm(_input_set), _input_set)), alg.mat_vec_multnv(alg.transposenm(_input_set), _output_set));
|
||||
|
||||
temp = _input_set->transposen()->multn(_input_set)->inverse()->mult_vec(_input_set->transposen()->mult_vec(_output_set));
|
||||
|
||||
ERR_FAIL_COND_MSG(Math::is_nan(temp->element_get(0)), "ERR: Resulting matrix was noninvertible/degenerate, and so the normal equation could not be performed. Try utilizing gradient descent.");
|
||||
|
||||
if (_reg == MLPPReg::REGULARIZATION_TYPE_RIDGE) {
|
||||
_weights = alg.mat_vec_multnv(alg.inversenm(alg.additionnm(alg.matmultnm(alg.transposenm(_input_set), _input_set), alg.scalar_multiplynm(_lambda, alg.identitym(_k)))), alg.mat_vec_multnv(alg.transposenm(_input_set), _output_set));
|
||||
_weights = _input_set->transposen()->multn(_input_set)->addn(MLPPMatrix::create_identity_mat(_k)->scalar_multiplyn(_lambda))->inverse()->mult_vec(_input_set->transposen()->mult_vec(_output_set));
|
||||
} else {
|
||||
_weights = alg.mat_vec_multnv(alg.inversenm(alg.matmultnm(alg.transposenm(_input_set), _input_set)), alg.mat_vec_multnv(alg.transposenm(_input_set), _output_set));
|
||||
_weights = _input_set->transposen()->multn(_input_set)->inverse()->mult_vec(_input_set->transposen()->mult_vec(_output_set));
|
||||
}
|
||||
|
||||
_bias = stat.meanv(_output_set) - alg.dotnv(_weights, x_means);
|
||||
_bias = stat.meanv(_output_set) - _weights->dot(x_means);
|
||||
|
||||
forward_pass();
|
||||
}
|
||||
@ -764,15 +754,11 @@ real_t MLPPLinReg::cost(const Ref<MLPPVector> &y_hat, const Ref<MLPPVector> &y)
|
||||
}
|
||||
|
||||
real_t MLPPLinReg::evaluatev(const Ref<MLPPVector> &x) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.dotnv(_weights, x) + _bias;
|
||||
return _weights->dot(x) + _bias;
|
||||
}
|
||||
|
||||
Ref<MLPPVector> MLPPLinReg::evaluatem(const Ref<MLPPMatrix> &X) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.scalar_addnv(_bias, alg.mat_vec_multnv(X, _weights));
|
||||
return X->mult_vec(_weights)->scalar_addn(_bias);
|
||||
}
|
||||
|
||||
// wTx + b
|
||||
|
@ -8,7 +8,6 @@
|
||||
|
||||
#include "../activation/activation.h"
|
||||
#include "../cost/cost.h"
|
||||
#include "../lin_alg/lin_alg.h"
|
||||
#include "../regularization/reg.h"
|
||||
#include "../utilities/utilities.h"
|
||||
|
||||
@ -77,7 +76,6 @@ real_t MLPPLogReg::model_test(const Ref<MLPPVector> &x) {
|
||||
void MLPPLogReg::gradient_descent(real_t learning_rate, int max_epoch, bool ui) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
|
||||
real_t cost_prev = 0;
|
||||
@ -88,14 +86,14 @@ void MLPPLogReg::gradient_descent(real_t learning_rate, int max_epoch, bool ui)
|
||||
while (true) {
|
||||
cost_prev = cost(_y_hat, _output_set);
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_y_hat, _output_set);
|
||||
Ref<MLPPVector> error = _y_hat->subn(_output_set);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multnv(alg.transposenm(_input_set), error)));
|
||||
_weights->sub(_input_set->transposen()->mult_vec(error)->scalar_multiplyn(learning_rate / _n));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias -= learning_rate * alg.sum_elementsv(error) / _n;
|
||||
_bias -= learning_rate * error->sum_elements() / _n;
|
||||
|
||||
forward_pass();
|
||||
|
||||
@ -115,7 +113,6 @@ void MLPPLogReg::gradient_descent(real_t learning_rate, int max_epoch, bool ui)
|
||||
void MLPPLogReg::mle(real_t learning_rate, int max_epoch, bool ui) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
|
||||
real_t cost_prev = 0;
|
||||
@ -126,14 +123,14 @@ void MLPPLogReg::mle(real_t learning_rate, int max_epoch, bool ui) {
|
||||
while (true) {
|
||||
cost_prev = cost(_y_hat, _output_set);
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_output_set, _y_hat);
|
||||
Ref<MLPPVector> error = _output_set->subn(_y_hat);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.additionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multnv(alg.transposenm(_input_set), error)));
|
||||
_weights->add(_input_set->transposen()->mult_vec(error)->scalar_multiplyn(learning_rate / _n));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias += learning_rate * alg.sum_elementsv(error) / _n;
|
||||
_bias += learning_rate * error->sum_elements() / _n;
|
||||
|
||||
forward_pass();
|
||||
|
||||
@ -153,7 +150,6 @@ void MLPPLogReg::mle(real_t learning_rate, int max_epoch, bool ui) {
|
||||
void MLPPLogReg::sgd(real_t learning_rate, int max_epoch, bool ui) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
int epoch = 1;
|
||||
@ -189,7 +185,7 @@ void MLPPLogReg::sgd(real_t learning_rate, int max_epoch, bool ui) {
|
||||
real_t error = y_hat - output_element_set;
|
||||
|
||||
// Weight updation
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate * error, input_row_tmp));
|
||||
_weights->sub(input_row_tmp->scalar_multiplyn(learning_rate * error));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Bias updation
|
||||
@ -215,7 +211,6 @@ void MLPPLogReg::sgd(real_t learning_rate, int max_epoch, bool ui) {
|
||||
void MLPPLogReg::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
int epoch = 1;
|
||||
@ -232,14 +227,14 @@ void MLPPLogReg::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
Ref<MLPPVector> y_hat = evaluatem(current_mini_batch_input_entry);
|
||||
cost_prev = cost(y_hat, current_mini_batch_output_entry);
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_mini_batch_output_entry);
|
||||
Ref<MLPPVector> error = y_hat->subn(current_mini_batch_output_entry);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / current_mini_batch_output_entry->size(), alg.mat_vec_multnv(alg.transposenm(current_mini_batch_input_entry), error)));
|
||||
_weights->sub(current_mini_batch_input_entry->transposen()->mult_vec(error)->scalar_multiplyn(learning_rate / current_mini_batch_output_entry->size()));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias -= learning_rate * alg.sum_elementsv(error) / current_mini_batch_output_entry->size();
|
||||
_bias -= learning_rate * error->sum_elements() / current_mini_batch_output_entry->size();
|
||||
y_hat = evaluatem(current_mini_batch_input_entry);
|
||||
|
||||
if (UI) {
|
||||
@ -322,17 +317,15 @@ real_t MLPPLogReg::cost(const Ref<MLPPVector> &y_hat, const Ref<MLPPVector> &y)
|
||||
}
|
||||
|
||||
real_t MLPPLogReg::evaluatev(const Ref<MLPPVector> &x) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
return avn.sigmoid_normr(alg.dotnv(_weights, x) + _bias);
|
||||
return avn.sigmoid_normr(_weights->dot(x) + _bias);
|
||||
}
|
||||
|
||||
Ref<MLPPVector> MLPPLogReg::evaluatem(const Ref<MLPPMatrix> &X) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
return avn.sigmoid_normv(alg.scalar_addnv(_bias, alg.mat_vec_multnv(X, _weights)));
|
||||
return avn.sigmoid_normv(X->mult_vec(_weights)->scalar_addn(_bias));
|
||||
}
|
||||
|
||||
// sigmoid ( wTx + b )
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
#include "../activation/activation.h"
|
||||
#include "../cost/cost.h"
|
||||
#include "../lin_alg/lin_alg.h"
|
||||
#include "../regularization/reg.h"
|
||||
#include "../utilities/utilities.h"
|
||||
|
||||
@ -89,7 +88,6 @@ void MLPPMANN::gradient_descent(real_t learning_rate, int max_epoch, bool ui) {
|
||||
|
||||
MLPPCost mlpp_cost;
|
||||
MLPPActivation avn;
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
|
||||
real_t cost_prev = 0;
|
||||
@ -101,39 +99,45 @@ void MLPPMANN::gradient_descent(real_t learning_rate, int max_epoch, bool ui) {
|
||||
cost_prev = cost(_y_hat, _output_set);
|
||||
|
||||
if (_output_layer->get_activation() == MLPPActivation::ACTIVATION_FUNCTION_SOFTMAX) {
|
||||
_output_layer->set_delta(alg.subtractionnm(_y_hat, _output_set));
|
||||
_output_layer->set_delta(_y_hat->subn(_output_set));
|
||||
} else {
|
||||
_output_layer->set_delta(alg.hadamard_productnm(mlpp_cost.run_cost_deriv_matrix(_output_layer->get_cost(), _y_hat, _output_set), avn.run_activation_deriv_matrix(_output_layer->get_activation(), _output_layer->get_z())));
|
||||
_output_layer->set_delta(
|
||||
mlpp_cost.run_cost_deriv_matrix(_output_layer->get_cost(), _y_hat, _output_set)->hadamard_productn(avn.run_activation_deriv_matrix(_output_layer->get_activation(), _output_layer->get_z())));
|
||||
}
|
||||
|
||||
Ref<MLPPMatrix> output_w_grad = alg.matmultnm(alg.transposenm(_output_layer->get_input()), _output_layer->get_delta());
|
||||
Ref<MLPPMatrix> output_w_grad = _output_layer->get_input()->transposen()->multn(_output_layer->get_delta());
|
||||
|
||||
_output_layer->set_weights(alg.subtractionnm(_output_layer->get_weights(), alg.scalar_multiplynm(learning_rate / _n, output_w_grad)));
|
||||
_output_layer->set_weights(_output_layer->get_weights()->subn(output_w_grad->scalar_multiplyn(learning_rate / _n)));
|
||||
_output_layer->set_weights(regularization.reg_weightsm(_output_layer->get_weights(), _output_layer->get_lambda(), _output_layer->get_alpha(), _output_layer->get_reg()));
|
||||
_output_layer->set_bias(alg.subtract_matrix_rowsnv(_output_layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, _output_layer->get_delta())));
|
||||
_output_layer->set_bias(_output_layer->get_bias()->subtract_matrix_rowsn(_output_layer->get_delta()->scalar_multiplyn(learning_rate / _n)));
|
||||
|
||||
if (!_network.empty()) {
|
||||
Ref<MLPPHiddenLayer> layer = _network[_network.size() - 1];
|
||||
|
||||
//auto hiddenLayerAvn = layer.activation_map[layer.activation];
|
||||
|
||||
layer->set_delta(alg.hadamard_productnm(alg.matmultnm(_output_layer->get_delta(), alg.transposenm(_output_layer->get_weights())), avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z())));
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
layer->set_delta(_output_layer->get_delta()->multn(_output_layer->get_weights()->transposen())->hadamard_productn(avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z())));
|
||||
|
||||
layer->set_weights(alg.subtractionnm(layer->get_weights(), alg.scalar_multiplynm(learning_rate / _n, hidden_layer_w_grad)));
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = layer->get_input()->transposen()->multn(layer->get_delta());
|
||||
|
||||
layer->set_weights(layer->get_weights()->subn(hidden_layer_w_grad->scalar_multiplyn(learning_rate / _n)));
|
||||
layer->set_weights(regularization.reg_weightsm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()));
|
||||
layer->set_bias(alg.subtract_matrix_rowsnv(layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, layer->get_delta())));
|
||||
|
||||
layer->set_bias(layer->get_bias()->subtract_matrix_rowsn(layer->get_delta()->scalar_multiplyn(learning_rate / _n)));
|
||||
|
||||
for (int i = _network.size() - 2; i >= 0; i--) {
|
||||
layer = _network[i];
|
||||
Ref<MLPPHiddenLayer> next_layer = _network[i + 1];
|
||||
|
||||
//hiddenLayerAvn = layer.activation_map[layer.activation];
|
||||
layer->set_delta(alg.hadamard_productnm(alg.matmultnm(next_layer->get_delta(), next_layer->get_weights()), avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z())));
|
||||
hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
layer->set_weights(alg.subtractionnm(layer->get_weights(), alg.scalar_multiplynm(learning_rate / _n, hidden_layer_w_grad)));
|
||||
|
||||
layer->set_delta(next_layer->get_delta()->multn(next_layer->get_weights())->hadamard_productn(avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z())));
|
||||
|
||||
hidden_layer_w_grad = layer->get_input()->transposen()->multn(layer->get_delta());
|
||||
|
||||
layer->set_weights(layer->get_weights()->subn(hidden_layer_w_grad->scalar_multiplyn(learning_rate / _n)));
|
||||
layer->set_weights(regularization.reg_weightsm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()));
|
||||
layer->set_bias(alg.subtract_matrix_rowsnv(layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, layer->get_delta())));
|
||||
layer->set_bias(layer->get_bias()->subtract_matrix_rowsn(layer->get_delta()->scalar_multiplyn(learning_rate / _n)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
#include "../activation/activation.h"
|
||||
#include "../cost/cost.h"
|
||||
#include "../lin_alg/lin_alg.h"
|
||||
#include "../regularization/reg.h"
|
||||
#include "../utilities/utilities.h"
|
||||
|
||||
@ -83,7 +82,6 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPActivation avn;
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
int epoch = 1;
|
||||
@ -96,29 +94,29 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) {
|
||||
cost_prev = cost(_y_hat, _output_set);
|
||||
|
||||
// Calculating the errors
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_y_hat, _output_set);
|
||||
Ref<MLPPVector> error = _y_hat->subn(_output_set);
|
||||
|
||||
// Calculating the weight/bias gradients for layer 2
|
||||
|
||||
Ref<MLPPVector> D2_1 = alg.mat_vec_multnv(alg.transposenm(_a2), error);
|
||||
Ref<MLPPVector> D2_1 = _a2->transposen()->mult_vec(error);
|
||||
|
||||
// weights and bias updation for layer 2
|
||||
_weights2->set_from_mlpp_vector(alg.subtractionnv(_weights2, alg.scalar_multiplynv(learning_rate / static_cast<real_t>(_n), D2_1)));
|
||||
_weights2->sub(D2_1->scalar_multiplyn(learning_rate / static_cast<real_t>(_n)));
|
||||
_weights2->set_from_mlpp_vector(regularization.reg_weightsv(_weights2, _lambda, _alpha, _reg));
|
||||
|
||||
_bias2 -= learning_rate * alg.sum_elementsv(error) / static_cast<real_t>(_n);
|
||||
_bias2 -= learning_rate * error->sum_elements() / static_cast<real_t>(_n);
|
||||
|
||||
// Calculating the weight/bias for layer 1
|
||||
|
||||
Ref<MLPPMatrix> D1_1 = alg.outer_product(error, _weights2);
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productnm(alg.transposenm(D1_1), avn.sigmoid_derivm(_z2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultnm(alg.transposenm(_input_set), D1_2);
|
||||
Ref<MLPPMatrix> D1_1 = error->outer_product(_weights2);
|
||||
Ref<MLPPMatrix> D1_2 = D1_1->transposen()->hadamard_productn(avn.sigmoid_derivm(_z2));
|
||||
Ref<MLPPMatrix> D1_3 = _input_set->transposen()->multn(D1_2);
|
||||
|
||||
// weight an bias updation for layer 1
|
||||
_weights1->set_from_mlpp_matrix(alg.subtractionnm(_weights1, alg.scalar_multiplynm(learning_rate / _n, D1_3)));
|
||||
_weights1->sub(D1_3->scalar_multiplyn(learning_rate / _n));
|
||||
_weights1->set_from_mlpp_matrix(regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg));
|
||||
|
||||
_bias1->set_from_mlpp_vector(alg.subtract_matrix_rowsnv(_bias1, alg.scalar_multiplynm(learning_rate / _n, D1_2)));
|
||||
_bias1->subtract_matrix_rows(D1_2->scalar_multiplyn(learning_rate / _n));
|
||||
|
||||
forward_pass();
|
||||
|
||||
@ -143,7 +141,6 @@ void MLPPMLP::sgd(real_t learning_rate, int max_epoch, bool UI) {
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPActivation avn;
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
int epoch = 1;
|
||||
@ -183,24 +180,24 @@ void MLPPMLP::sgd(real_t learning_rate, int max_epoch, bool UI) {
|
||||
real_t error = ly_hat - output_element;
|
||||
|
||||
// Weight updation for layer 2
|
||||
Ref<MLPPVector> D2_1 = alg.scalar_multiplynv(error, la2);
|
||||
Ref<MLPPVector> D2_1 = la2->scalar_multiplyn(error);
|
||||
|
||||
_weights2->set_from_mlpp_vector(alg.subtractionnv(_weights2, alg.scalar_multiplynv(learning_rate, D2_1)));
|
||||
_weights2->sub(D2_1->scalar_multiplyn(learning_rate));
|
||||
_weights2->set_from_mlpp_vector(regularization.reg_weightsv(_weights2, _lambda, _alpha, _reg));
|
||||
|
||||
// Bias updation for layer 2
|
||||
_bias2 -= learning_rate * error;
|
||||
|
||||
// Weight updation for layer 1
|
||||
Ref<MLPPVector> D1_1 = alg.scalar_multiplynv(error, _weights2);
|
||||
Ref<MLPPVector> D1_2 = alg.hadamard_productnv(D1_1, avn.sigmoid_derivv(lz2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.outer_product(input_set_row_tmp, D1_2);
|
||||
Ref<MLPPVector> D1_1 = _weights2->scalar_multiplyn(error);
|
||||
Ref<MLPPVector> D1_2 = D1_1->hadamard_productn(avn.sigmoid_derivv(lz2));
|
||||
Ref<MLPPMatrix> D1_3 = input_set_row_tmp->outer_product(D1_2);
|
||||
|
||||
_weights1->set_from_mlpp_matrix(alg.subtractionnm(_weights1, alg.scalar_multiplynm(learning_rate, D1_3)));
|
||||
_weights1->sub(D1_3->scalar_multiplyn(learning_rate));
|
||||
_weights1->set_from_mlpp_matrix(regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg));
|
||||
// Bias updation for layer 1
|
||||
|
||||
_bias1->set_from_mlpp_vector(alg.subtractionnv(_bias1, alg.scalar_multiplynv(learning_rate, D1_2)));
|
||||
_bias1->sub(D1_2->scalar_multiplyn(learning_rate));
|
||||
|
||||
ly_hat = evaluatev(input_set_row_tmp);
|
||||
|
||||
@ -226,7 +223,6 @@ void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo
|
||||
ERR_FAIL_COND(!_initialized);
|
||||
|
||||
MLPPActivation avn;
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
int epoch = 1;
|
||||
@ -251,33 +247,33 @@ void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo
|
||||
cost_prev = cost(ly_hat, current_output);
|
||||
|
||||
// Calculating the errors
|
||||
Ref<MLPPVector> error = alg.subtractionnv(ly_hat, current_output);
|
||||
Ref<MLPPVector> error = ly_hat->subn(current_output);
|
||||
|
||||
// Calculating the weight/bias gradients for layer 2
|
||||
Ref<MLPPVector> D2_1 = alg.mat_vec_multnv(alg.transposenm(la2), error);
|
||||
Ref<MLPPVector> D2_1 = la2->transposen()->mult_vec(error);
|
||||
|
||||
real_t lr_d_cos = learning_rate / static_cast<real_t>(current_output->size());
|
||||
|
||||
// weights and bias updation for layser 2
|
||||
_weights2->set_from_mlpp_vector(alg.subtractionnv(_weights2, alg.scalar_multiplynv(lr_d_cos, D2_1)));
|
||||
_weights2->sub(D2_1->scalar_multiplyn(lr_d_cos));
|
||||
_weights2->set_from_mlpp_vector(regularization.reg_weightsv(_weights2, _lambda, _alpha, _reg));
|
||||
|
||||
// Calculating the bias gradients for layer 2
|
||||
real_t b_gradient = alg.sum_elementsv(error);
|
||||
real_t b_gradient = error->sum_elements();
|
||||
|
||||
// Bias Updation for layer 2
|
||||
_bias2 -= learning_rate * b_gradient / current_output->size();
|
||||
|
||||
//Calculating the weight/bias for layer 1
|
||||
Ref<MLPPMatrix> D1_1 = alg.outer_product(error, _weights2);
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productnm(D1_1, avn.sigmoid_derivm(lz2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultnm(alg.transposenm(current_input), D1_2);
|
||||
Ref<MLPPMatrix> D1_1 = error->outer_product(_weights2);
|
||||
Ref<MLPPMatrix> D1_2 = D1_1->hadamard_productn(avn.sigmoid_derivm(lz2));
|
||||
Ref<MLPPMatrix> D1_3 = current_input->transposen()->multn(D1_2);
|
||||
|
||||
// weight an bias updation for layer 1
|
||||
_weights1->set_from_mlpp_matrix(alg.subtractionnm(_weights1, alg.scalar_multiplynm(lr_d_cos, D1_3)));
|
||||
_weights1->sub(D1_3->scalar_multiplyn(lr_d_cos));
|
||||
_weights1->set_from_mlpp_matrix(regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg));
|
||||
|
||||
_bias1->set_from_mlpp_vector(alg.subtract_matrix_rowsnv(_bias1, alg.scalar_multiplynm(lr_d_cos, D1_2)));
|
||||
_bias1->subtract_matrix_rows(D1_2->scalar_multiplyn(lr_d_cos));
|
||||
|
||||
_y_hat = evaluatem(current_input);
|
||||
|
||||
@ -356,49 +352,44 @@ real_t MLPPMLP::cost(const Ref<MLPPVector> &p_y_hat, const Ref<MLPPVector> &p_y)
|
||||
}
|
||||
|
||||
Ref<MLPPVector> MLPPMLP::evaluatem(const Ref<MLPPMatrix> &X) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
Ref<MLPPMatrix> pz2 = alg.mat_vec_addnm(alg.matmultnm(X, _weights1), _bias1);
|
||||
Ref<MLPPMatrix> pz2 = X->multn(_weights1)->add_vecn(_bias1);
|
||||
Ref<MLPPMatrix> pa2 = avn.sigmoid_normm(pz2);
|
||||
|
||||
return avn.sigmoid_normv(alg.scalar_addnv(_bias2, alg.mat_vec_multnv(pa2, _weights2)));
|
||||
return avn.sigmoid_normv(pa2->mult_vec(_weights2)->scalar_addn(_bias2));
|
||||
}
|
||||
|
||||
void MLPPMLP::propagatem(const Ref<MLPPMatrix> &X, Ref<MLPPMatrix> z2_out, Ref<MLPPMatrix> a2_out) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
z2_out->set_from_mlpp_matrix(alg.mat_vec_addnm(alg.matmultnm(X, _weights1), _bias1));
|
||||
z2_out->set_from_mlpp_matrix(X->multn(_weights1)->add_vecn(_bias1));
|
||||
a2_out->set_from_mlpp_matrix(avn.sigmoid_normm(z2_out));
|
||||
}
|
||||
|
||||
real_t MLPPMLP::evaluatev(const Ref<MLPPVector> &x) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
Ref<MLPPVector> pz2 = alg.additionnv(alg.mat_vec_multnv(alg.transposenm(_weights1), x), _bias1);
|
||||
Ref<MLPPVector> pz2 = _weights1->transposen()->mult_vec(x)->addn(_bias1);
|
||||
Ref<MLPPVector> pa2 = avn.sigmoid_normv(pz2);
|
||||
|
||||
return avn.sigmoid_normr(alg.dotnv(_weights2, pa2) + _bias2);
|
||||
return avn.sigmoid_normr(_weights2->dot(pa2) + _bias2);
|
||||
}
|
||||
|
||||
void MLPPMLP::propagatev(const Ref<MLPPVector> &x, Ref<MLPPVector> z2_out, Ref<MLPPVector> a2_out) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
z2_out->set_from_mlpp_vector(alg.additionnv(alg.mat_vec_multnv(alg.transposenm(_weights1), x), _bias1));
|
||||
z2_out->set_from_mlpp_vector(_weights1->transposen()->mult_vec(x)->addn(_bias1));
|
||||
a2_out->set_from_mlpp_vector(avn.sigmoid_normv(z2_out));
|
||||
}
|
||||
|
||||
void MLPPMLP::forward_pass() {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
_z2->set_from_mlpp_matrix(alg.mat_vec_addnm(alg.matmultnm(_input_set, _weights1), _bias1));
|
||||
_z2->set_from_mlpp_matrix(_input_set->multn(_weights1)->add_vecn(_bias1));
|
||||
_a2->set_from_mlpp_matrix(avn.sigmoid_normm(_z2));
|
||||
|
||||
_y_hat->set_from_mlpp_vector(avn.sigmoid_normv(alg.scalar_addnv(_bias2, alg.mat_vec_multnv(_a2, _weights2))));
|
||||
_y_hat->set_from_mlpp_vector(avn.sigmoid_normv(_a2->mult_vec(_weights2)->scalar_addn(_bias2)));
|
||||
}
|
||||
|
||||
MLPPMLP::MLPPMLP(const Ref<MLPPMatrix> &p_input_set, const Ref<MLPPVector> &p_output_set, int p_n_hidden, MLPPReg::RegularizationType p_reg, real_t p_lambda, real_t p_alpha) {
|
||||
|
Loading…
Reference in New Issue
Block a user