Initial cleanups to MLP.

This commit is contained in:
Relintai 2023-02-04 16:48:31 +01:00
parent a875cc9e70
commit 5f63aebc99
2 changed files with 77 additions and 54 deletions

View File

@ -15,36 +15,25 @@
#include <iostream> #include <iostream>
#include <random> #include <random>
std::vector<real_t> MLPPMLP::model_set_test(std::vector<std::vector<real_t>> X) {
MLPPMLP::MLPPMLP(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int n_hidden, std::string reg, real_t lambda, real_t alpha) : return evaluate(X);
inputSet(inputSet), outputSet(outputSet), n_hidden(n_hidden), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) {
MLPPActivation avn;
y_hat.resize(n);
weights1 = MLPPUtilities::weightInitialization(k, n_hidden);
weights2 = MLPPUtilities::weightInitialization(n_hidden);
bias1 = MLPPUtilities::biasInitialization(n_hidden);
bias2 = MLPPUtilities::biasInitialization();
} }
std::vector<real_t> MLPPMLP::modelSetTest(std::vector<std::vector<real_t>> X) { real_t MLPPMLP::model_test(std::vector<real_t> x) {
return Evaluate(X); return evaluate(x);
} }
real_t MLPPMLP::modelTest(std::vector<real_t> x) { void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) {
return Evaluate(x);
}
void MLPPMLP::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
MLPPActivation avn; MLPPActivation avn;
MLPPLinAlg alg; MLPPLinAlg alg;
MLPPReg regularization; MLPPReg regularization;
real_t cost_prev = 0; real_t cost_prev = 0;
int epoch = 1; int epoch = 1;
forwardPass();
forward_pass();
while (true) { while (true) {
cost_prev = Cost(y_hat, outputSet); cost_prev = cost(y_hat, outputSet);
// Calculating the errors // Calculating the errors
std::vector<real_t> error = alg.subtraction(y_hat, outputSet); std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
@ -76,11 +65,11 @@ void MLPPMLP::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate / n, D1_2)); bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate / n, D1_2));
forwardPass(); forward_pass();
// UI PORTION // UI PORTION
if (UI) { if (UI) {
MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); MLPPUtilities::CostInfo(epoch, cost_prev, cost(y_hat, outputSet));
std::cout << "Layer 1:" << std::endl; std::cout << "Layer 1:" << std::endl;
MLPPUtilities::UI(weights1, bias1); MLPPUtilities::UI(weights1, bias1);
std::cout << "Layer 2:" << std::endl; std::cout << "Layer 2:" << std::endl;
@ -94,7 +83,7 @@ void MLPPMLP::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
} }
} }
void MLPPMLP::SGD(real_t learning_rate, int max_epoch, bool UI) { void MLPPMLP::sgd(real_t learning_rate, int max_epoch, bool UI) {
MLPPActivation avn; MLPPActivation avn;
MLPPLinAlg alg; MLPPLinAlg alg;
MLPPReg regularization; MLPPReg regularization;
@ -107,9 +96,9 @@ void MLPPMLP::SGD(real_t learning_rate, int max_epoch, bool UI) {
std::uniform_int_distribution<int> distribution(0, int(n - 1)); std::uniform_int_distribution<int> distribution(0, int(n - 1));
int outputIndex = distribution(generator); int outputIndex = distribution(generator);
real_t y_hat = Evaluate(inputSet[outputIndex]); real_t y_hat = evaluate(inputSet[outputIndex]);
auto [z2, a2] = propagate(inputSet[outputIndex]); auto [z2, a2] = propagate(inputSet[outputIndex]);
cost_prev = Cost({ y_hat }, { outputSet[outputIndex] }); cost_prev = cost({ y_hat }, { outputSet[outputIndex] });
real_t error = y_hat - outputSet[outputIndex]; real_t error = y_hat - outputSet[outputIndex];
// Weight updation for layer 2 // Weight updation for layer 2
@ -131,9 +120,9 @@ void MLPPMLP::SGD(real_t learning_rate, int max_epoch, bool UI) {
bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2)); bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2));
y_hat = Evaluate(inputSet[outputIndex]); y_hat = evaluate(inputSet[outputIndex]);
if (UI) { if (UI) {
MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] })); MLPPUtilities::CostInfo(epoch, cost_prev, cost({ y_hat }, { outputSet[outputIndex] }));
std::cout << "Layer 1:" << std::endl; std::cout << "Layer 1:" << std::endl;
MLPPUtilities::UI(weights1, bias1); MLPPUtilities::UI(weights1, bias1);
std::cout << "Layer 2:" << std::endl; std::cout << "Layer 2:" << std::endl;
@ -145,10 +134,11 @@ void MLPPMLP::SGD(real_t learning_rate, int max_epoch, bool UI) {
break; break;
} }
} }
forwardPass();
forward_pass();
} }
void MLPPMLP::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) { void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
MLPPActivation avn; MLPPActivation avn;
MLPPLinAlg alg; MLPPLinAlg alg;
MLPPReg regularization; MLPPReg regularization;
@ -161,9 +151,9 @@ void MLPPMLP::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, boo
while (true) { while (true) {
for (int i = 0; i < n_mini_batch; i++) { for (int i = 0; i < n_mini_batch; i++) {
std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]); std::vector<real_t> y_hat = evaluate(inputMiniBatches[i]);
auto [z2, a2] = propagate(inputMiniBatches[i]); auto [z2, a2] = propagate(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = cost(y_hat, outputMiniBatches[i]);
// Calculating the errors // Calculating the errors
std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]); std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
@ -196,22 +186,25 @@ void MLPPMLP::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, boo
bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate / outputMiniBatches[i].size(), D1_2)); bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate / outputMiniBatches[i].size(), D1_2));
y_hat = Evaluate(inputMiniBatches[i]); y_hat = evaluate(inputMiniBatches[i]);
if (UI) { if (UI) {
MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i])); MLPPUtilities::CostInfo(epoch, cost_prev, cost(y_hat, outputMiniBatches[i]));
std::cout << "Layer 1:" << std::endl; std::cout << "Layer 1:" << std::endl;
MLPPUtilities::UI(weights1, bias1); MLPPUtilities::UI(weights1, bias1);
std::cout << "Layer 2:" << std::endl; std::cout << "Layer 2:" << std::endl;
MLPPUtilities::UI(weights2, bias2); MLPPUtilities::UI(weights2, bias2);
} }
} }
epoch++; epoch++;
if (epoch > max_epoch) { if (epoch > max_epoch) {
break; break;
} }
} }
forwardPass();
forward_pass();
} }
real_t MLPPMLP::score() { real_t MLPPMLP::score() {
@ -225,13 +218,13 @@ void MLPPMLP::save(std::string fileName) {
util.saveParameters(fileName, weights2, bias2, 1, 2); util.saveParameters(fileName, weights2, bias2, 1, 2);
} }
real_t MLPPMLP::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) { real_t MLPPMLP::cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
MLPPReg regularization; MLPPReg regularization;
class MLPPCost cost; class MLPPCost cost;
return cost.LogLoss(y_hat, y) + regularization.regTerm(weights2, lambda, alpha, reg) + regularization.regTerm(weights1, lambda, alpha, reg); return cost.LogLoss(y_hat, y) + regularization.regTerm(weights2, lambda, alpha, reg) + regularization.regTerm(weights1, lambda, alpha, reg);
} }
std::vector<real_t> MLPPMLP::Evaluate(std::vector<std::vector<real_t>> X) { std::vector<real_t> MLPPMLP::evaluate(std::vector<std::vector<real_t>> X) {
MLPPLinAlg alg; MLPPLinAlg alg;
MLPPActivation avn; MLPPActivation avn;
std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1); std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
@ -247,7 +240,7 @@ std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> M
return { z2, a2 }; return { z2, a2 };
} }
real_t MLPPMLP::Evaluate(std::vector<real_t> x) { real_t MLPPMLP::evaluate(std::vector<real_t> x) {
MLPPLinAlg alg; MLPPLinAlg alg;
MLPPActivation avn; MLPPActivation avn;
std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1);
@ -263,7 +256,7 @@ std::tuple<std::vector<real_t>, std::vector<real_t>> MLPPMLP::propagate(std::vec
return { z2, a2 }; return { z2, a2 };
} }
void MLPPMLP::forwardPass() { void MLPPMLP::forward_pass() {
MLPPLinAlg alg; MLPPLinAlg alg;
MLPPActivation avn; MLPPActivation avn;
z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1); z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1);
@ -271,7 +264,21 @@ void MLPPMLP::forwardPass() {
y_hat = avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2))); y_hat = avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2)));
} }
MLPPMLP::MLPPMLP(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int n_hidden, std::string reg, real_t lambda, real_t alpha) :
inputSet(inputSet), outputSet(outputSet), n_hidden(n_hidden), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) {
MLPPActivation avn;
y_hat.resize(n);
weights1 = MLPPUtilities::weightInitialization(k, n_hidden);
weights2 = MLPPUtilities::weightInitialization(n_hidden);
bias1 = MLPPUtilities::biasInitialization(n_hidden);
bias2 = MLPPUtilities::biasInitialization();
}
MLPPMLP::MLPPMLP() {
}
MLPPMLP::~MLPPMLP() {
}
// ======= OLD ======= // ======= OLD =======
@ -529,4 +536,3 @@ void MLPPMLPOld::forwardPass() {
a2 = avn.sigmoid(z2); a2 = avn.sigmoid(z2);
y_hat = avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2))); y_hat = avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2)));
} }

View File

@ -8,31 +8,48 @@
// Created by Marc Melikyan on 11/4/20. // Created by Marc Melikyan on 11/4/20.
// //
#include "core/containers/vector.h"
#include "core/math/math_defs.h" #include "core/math/math_defs.h"
#include "core/string/ustring.h"
#include "core/variant/variant.h"
#include "core/object/reference.h"
#include "../lin_alg/mlpp_matrix.h"
#include "../lin_alg/mlpp_vector.h"
#include <map> #include <map>
#include <string> #include <string>
#include <vector> #include <vector>
class MLPPMLP { class MLPPMLP : public Reference {
GDCLASS(MLPPMLP, Reference);
public: public:
MLPPMLP(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int n_hidden, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5); std::vector<real_t> model_set_test(std::vector<std::vector<real_t>> X);
std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X); real_t model_test(std::vector<real_t> x);
real_t modelTest(std::vector<real_t> x);
void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false); void gradient_descent(real_t learning_rate, int max_epoch, bool UI = false);
void SGD(real_t learning_rate, int max_epoch, bool UI = false); void sgd(real_t learning_rate, int max_epoch, bool UI = false);
void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false); void mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
real_t score(); real_t score();
void save(std::string fileName); void save(std::string fileName);
private: MLPPMLP(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int n_hidden, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X); MLPPMLP();
~MLPPMLP();
private:
real_t cost(std::vector<real_t> y_hat, std::vector<real_t> y);
std::vector<real_t> evaluate(std::vector<std::vector<real_t>> X);
std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> propagate(std::vector<std::vector<real_t>> X); std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> propagate(std::vector<std::vector<real_t>> X);
real_t Evaluate(std::vector<real_t> x); real_t evaluate(std::vector<real_t> x);
std::tuple<std::vector<real_t>, std::vector<real_t>> propagate(std::vector<real_t> x); std::tuple<std::vector<real_t>, std::vector<real_t>> propagate(std::vector<real_t> x);
void forwardPass();
void forward_pass();
std::vector<std::vector<real_t>> inputSet; std::vector<std::vector<real_t>> inputSet;
std::vector<real_t> outputSet; std::vector<real_t> outputSet;