Initial cleanup pass on MLPPANN.

This commit is contained in:
Relintai 2023-02-12 15:07:26 +01:00
parent 5f8e35c58f
commit a9b30ef75f
3 changed files with 247 additions and 186 deletions

View File

@ -15,22 +15,7 @@
#include <iostream> #include <iostream>
#include <random> #include <random>
MLPPANN::MLPPANN(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet) { std::vector<real_t> MLPPANN::model_set_test(std::vector<std::vector<real_t>> X) {
inputSet = p_inputSet;
outputSet = p_outputSet;
n = inputSet.size();
k = inputSet[0].size();
lrScheduler = "None";
decayConstant = 0;
dropRate = 0;
}
MLPPANN::~MLPPANN() {
delete outputLayer;
}
std::vector<real_t> MLPPANN::modelSetTest(std::vector<std::vector<real_t>> X) {
if (!network.empty()) { if (!network.empty()) {
network[0].input = X; network[0].input = X;
network[0].forwardPass(); network[0].forwardPass();
@ -43,11 +28,13 @@ std::vector<real_t> MLPPANN::modelSetTest(std::vector<std::vector<real_t>> X) {
} else { } else {
outputLayer->input = X; outputLayer->input = X;
} }
outputLayer->forwardPass(); outputLayer->forwardPass();
return outputLayer->a; return outputLayer->a;
} }
real_t MLPPANN::modelTest(std::vector<real_t> x) { real_t MLPPANN::model_test(std::vector<real_t> x) {
if (!network.empty()) { if (!network.empty()) {
network[0].Test(x); network[0].Test(x);
for (uint32_t i = 1; i < network.size(); i++) { for (uint32_t i = 1; i < network.size(); i++) {
@ -60,33 +47,36 @@ real_t MLPPANN::modelTest(std::vector<real_t> x) {
return outputLayer->a_test; return outputLayer->a_test;
} }
void MLPPANN::gradientDescent(real_t learning_rate, int max_epoch, bool UI) { void MLPPANN::gradient_descent(real_t learning_rate, int max_epoch, bool ui) {
class MLPPCost cost; MLPPCost mlpp_cost;
MLPPLinAlg alg; MLPPLinAlg alg;
real_t cost_prev = 0; real_t cost_prev = 0;
int epoch = 1; int epoch = 1;
forwardPass();
forward_pass();
real_t initial_learning_rate = learning_rate; real_t initial_learning_rate = learning_rate;
alg.printMatrix(network[network.size() - 1].weights); alg.printMatrix(network[network.size() - 1].weights);
while (true) { while (true) {
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
cost_prev = Cost(y_hat, outputSet);
auto grads = computeGradients(y_hat, outputSet); cost_prev = cost(y_hat, outputSet);
auto grads = compute_gradients(y_hat, outputSet);
auto cumulativeHiddenLayerWGrad = std::get<0>(grads); auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
auto outputWGrad = std::get<1>(grads); auto outputWGrad = std::get<1>(grads);
cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad); cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad); outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too. update_parameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
std::cout << learning_rate << std::endl; std::cout << learning_rate << std::endl;
forwardPass(); forward_pass();
if (UI) { if (ui) {
MLPPANN::UI(epoch, cost_prev, y_hat, outputSet); print_ui(epoch, cost_prev, y_hat, outputSet);
} }
epoch++; epoch++;
@ -96,8 +86,8 @@ void MLPPANN::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
} }
} }
void MLPPANN::SGD(real_t learning_rate, int max_epoch, bool UI) { void MLPPANN::sgd(real_t learning_rate, int max_epoch, bool ui) {
class MLPPCost cost; MLPPCost mlpp_cost;
MLPPLinAlg alg; MLPPLinAlg alg;
real_t cost_prev = 0; real_t cost_prev = 0;
@ -105,28 +95,28 @@ void MLPPANN::SGD(real_t learning_rate, int max_epoch, bool UI) {
real_t initial_learning_rate = learning_rate; real_t initial_learning_rate = learning_rate;
while (true) { while (true) {
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
std::random_device rd; std::random_device rd;
std::default_random_engine generator(rd()); std::default_random_engine generator(rd());
std::uniform_int_distribution<int> distribution(0, int(n - 1)); std::uniform_int_distribution<int> distribution(0, int(n - 1));
int outputIndex = distribution(generator); int outputIndex = distribution(generator);
std::vector<real_t> y_hat = modelSetTest({ inputSet[outputIndex] }); std::vector<real_t> y_hat = model_set_test({ inputSet[outputIndex] });
cost_prev = Cost({ y_hat }, { outputSet[outputIndex] }); cost_prev = cost({ y_hat }, { outputSet[outputIndex] });
auto grads = computeGradients(y_hat, { outputSet[outputIndex] }); auto grads = compute_gradients(y_hat, { outputSet[outputIndex] });
auto cumulativeHiddenLayerWGrad = std::get<0>(grads); auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
auto outputWGrad = std::get<1>(grads); auto outputWGrad = std::get<1>(grads);
cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad); cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad); outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too. update_parameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
y_hat = modelSetTest({ inputSet[outputIndex] }); y_hat = model_set_test({ inputSet[outputIndex] });
if (UI) { if (ui) {
MLPPANN::UI(epoch, cost_prev, y_hat, { outputSet[outputIndex] }); print_ui(epoch, cost_prev, y_hat, { outputSet[outputIndex] });
} }
epoch++; epoch++;
@ -134,11 +124,12 @@ void MLPPANN::SGD(real_t learning_rate, int max_epoch, bool UI) {
break; break;
} }
} }
forwardPass();
forward_pass();
} }
void MLPPANN::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) { void MLPPANN::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, bool ui) {
class MLPPCost cost; MLPPCost mlpp_cost;
MLPPLinAlg alg; MLPPLinAlg alg;
real_t cost_prev = 0; real_t cost_prev = 0;
@ -155,35 +146,39 @@ void MLPPANN::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, boo
auto outputMiniBatches = std::get<1>(batches); auto outputMiniBatches = std::get<1>(batches);
while (true) { while (true) {
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for (int i = 0; i < n_mini_batch; i++) {
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]);
auto grads = computeGradients(y_hat, outputMiniBatches[i]); for (int i = 0; i < n_mini_batch; i++) {
std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
cost_prev = cost(y_hat, outputMiniBatches[i]);
auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
auto cumulativeHiddenLayerWGrad = std::get<0>(grads); auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
auto outputWGrad = std::get<1>(grads); auto outputWGrad = std::get<1>(grads);
cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad); cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad); outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too. update_parameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
y_hat = modelSetTest(inputMiniBatches[i]); y_hat = model_set_test(inputMiniBatches[i]);
if (UI) { if (ui) {
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
} }
} }
epoch++; epoch++;
if (epoch > max_epoch) { if (epoch > max_epoch) {
break; break;
} }
} }
forwardPass();
forward_pass();
} }
void MLPPANN::Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool NAG, bool UI) { void MLPPANN::momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool nag, bool ui) {
class MLPPCost cost; class MLPPCost mlpp_cost;
MLPPLinAlg alg; MLPPLinAlg alg;
real_t cost_prev = 0; real_t cost_prev = 0;
@ -204,12 +199,13 @@ void MLPPANN::Momentum(real_t learning_rate, int max_epoch, int mini_batch_size,
std::vector<real_t> v_output; std::vector<real_t> v_output;
while (true) { while (true) {
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for (int i = 0; i < n_mini_batch; i++) {
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]);
auto grads = computeGradients(y_hat, outputMiniBatches[i]); for (int i = 0; i < n_mini_batch; i++) {
std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
cost_prev = cost(y_hat, outputMiniBatches[i]);
auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
auto cumulativeHiddenLayerWGrad = std::get<0>(grads); auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
auto outputWGrad = std::get<1>(grads); auto outputWGrad = std::get<1>(grads);
@ -221,31 +217,34 @@ void MLPPANN::Momentum(real_t learning_rate, int max_epoch, int mini_batch_size,
v_output.resize(outputWGrad.size()); v_output.resize(outputWGrad.size());
} }
if (NAG) { // "Aposterori" calculation if (nag) { // "Aposterori" calculation
updateParameters(v_hidden, v_output, 0); // DON'T update bias. update_parameters(v_hidden, v_output, 0); // DON'T update bias.
} }
v_hidden = alg.addition(alg.scalarMultiply(gamma, v_hidden), alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad)); v_hidden = alg.addition(alg.scalarMultiply(gamma, v_hidden), alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad));
v_output = alg.addition(alg.scalarMultiply(gamma, v_output), alg.scalarMultiply(learning_rate / n, outputWGrad)); v_output = alg.addition(alg.scalarMultiply(gamma, v_output), alg.scalarMultiply(learning_rate / n, outputWGrad));
updateParameters(v_hidden, v_output, learning_rate); // subject to change. may want bias to have this matrix too. update_parameters(v_hidden, v_output, learning_rate); // subject to change. may want bias to have this matrix too.
y_hat = modelSetTest(inputMiniBatches[i]); y_hat = model_set_test(inputMiniBatches[i]);
if (UI) { if (ui) {
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
} }
} }
epoch++; epoch++;
if (epoch > max_epoch) { if (epoch > max_epoch) {
break; break;
} }
} }
forwardPass();
forward_pass();
} }
void MLPPANN::Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI) { void MLPPANN::adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool ui) {
class MLPPCost cost; MLPPCost mlpp_cost;
MLPPLinAlg alg; MLPPLinAlg alg;
real_t cost_prev = 0; real_t cost_prev = 0;
@ -266,12 +265,13 @@ void MLPPANN::Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size,
std::vector<real_t> v_output; std::vector<real_t> v_output;
while (true) { while (true) {
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for (int i = 0; i < n_mini_batch; i++) {
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]);
auto grads = computeGradients(y_hat, outputMiniBatches[i]); for (int i = 0; i < n_mini_batch; i++) {
std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
cost_prev = cost(y_hat, outputMiniBatches[i]);
auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
auto cumulativeHiddenLayerWGrad = std::get<0>(grads); auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
auto outputWGrad = std::get<1>(grads); auto outputWGrad = std::get<1>(grads);
@ -290,11 +290,11 @@ void MLPPANN::Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size,
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden)))); std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output)))); std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too. update_parameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
y_hat = modelSetTest(inputMiniBatches[i]); y_hat = model_set_test(inputMiniBatches[i]);
if (UI) { if (ui) {
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
} }
} }
epoch++; epoch++;
@ -302,11 +302,12 @@ void MLPPANN::Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size,
break; break;
} }
} }
forwardPass();
forward_pass();
} }
void MLPPANN::Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI) { void MLPPANN::adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool ui) {
class MLPPCost cost; MLPPCost mlpp_cost;
MLPPLinAlg alg; MLPPLinAlg alg;
real_t cost_prev = 0; real_t cost_prev = 0;
@ -327,12 +328,12 @@ void MLPPANN::Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size,
std::vector<real_t> v_output; std::vector<real_t> v_output;
while (true) { while (true) {
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for (int i = 0; i < n_mini_batch; i++) { for (int i = 0; i < n_mini_batch; i++) {
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]); std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = cost(y_hat, outputMiniBatches[i]);
auto grads = computeGradients(y_hat, outputMiniBatches[i]); auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
auto cumulativeHiddenLayerWGrad = std::get<0>(grads); auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
auto outputWGrad = std::get<1>(grads); auto outputWGrad = std::get<1>(grads);
@ -351,11 +352,11 @@ void MLPPANN::Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size,
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden)))); std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output)))); std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too. update_parameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
y_hat = modelSetTest(inputMiniBatches[i]); y_hat = model_set_test(inputMiniBatches[i]);
if (UI) { if (ui) {
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
} }
} }
epoch++; epoch++;
@ -363,11 +364,11 @@ void MLPPANN::Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size,
break; break;
} }
} }
forwardPass(); forward_pass();
} }
void MLPPANN::Adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) { void MLPPANN::adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui) {
class MLPPCost cost; MLPPCost mlpp_cost;
MLPPLinAlg alg; MLPPLinAlg alg;
real_t cost_prev = 0; real_t cost_prev = 0;
@ -390,12 +391,12 @@ void MLPPANN::Adam(real_t learning_rate, int max_epoch, int mini_batch_size, rea
std::vector<real_t> m_output; std::vector<real_t> m_output;
std::vector<real_t> v_output; std::vector<real_t> v_output;
while (true) { while (true) {
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for (int i = 0; i < n_mini_batch; i++) { for (int i = 0; i < n_mini_batch; i++) {
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]); std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = cost(y_hat, outputMiniBatches[i]);
auto grads = computeGradients(y_hat, outputMiniBatches[i]); auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
auto cumulativeHiddenLayerWGrad = std::get<0>(grads); auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
auto outputWGrad = std::get<1>(grads); auto outputWGrad = std::get<1>(grads);
@ -424,23 +425,25 @@ void MLPPANN::Adam(real_t learning_rate, int max_epoch, int mini_batch_size, rea
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, alg.sqrt(v_hidden_hat)))); std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, alg.sqrt(v_output_hat)))); std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too. update_parameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
y_hat = modelSetTest(inputMiniBatches[i]); y_hat = model_set_test(inputMiniBatches[i]);
if (UI) { if (ui) {
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
} }
} }
epoch++; epoch++;
if (epoch > max_epoch) { if (epoch > max_epoch) {
break; break;
} }
} }
forwardPass(); forward_pass();
} }
void MLPPANN::Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) { void MLPPANN::adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui) {
class MLPPCost cost; MLPPCost mlpp_cost;
MLPPLinAlg alg; MLPPLinAlg alg;
real_t cost_prev = 0; real_t cost_prev = 0;
@ -463,12 +466,12 @@ void MLPPANN::Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, r
std::vector<real_t> m_output; std::vector<real_t> m_output;
std::vector<real_t> u_output; std::vector<real_t> u_output;
while (true) { while (true) {
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for (int i = 0; i < n_mini_batch; i++) { for (int i = 0; i < n_mini_batch; i++) {
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]); std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = cost(y_hat, outputMiniBatches[i]);
auto grads = computeGradients(y_hat, outputMiniBatches[i]); auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
auto cumulativeHiddenLayerWGrad = std::get<0>(grads); auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
auto outputWGrad = std::get<1>(grads); auto outputWGrad = std::get<1>(grads);
@ -495,23 +498,25 @@ void MLPPANN::Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, r
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, u_hidden))); std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, u_hidden)));
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, u_output))); std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, u_output)));
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too. update_parameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
y_hat = modelSetTest(inputMiniBatches[i]); y_hat = model_set_test(inputMiniBatches[i]);
if (UI) { if (ui) {
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
} }
} }
epoch++; epoch++;
if (epoch > max_epoch) { if (epoch > max_epoch) {
break; break;
} }
} }
forwardPass(); forward_pass();
} }
void MLPPANN::Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) { void MLPPANN::nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui) {
class MLPPCost cost; MLPPCost mlpp_cost;
MLPPLinAlg alg; MLPPLinAlg alg;
real_t cost_prev = 0; real_t cost_prev = 0;
@ -534,12 +539,12 @@ void MLPPANN::Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, re
std::vector<real_t> m_output; std::vector<real_t> m_output;
std::vector<real_t> v_output; std::vector<real_t> v_output;
while (true) { while (true) {
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for (int i = 0; i < n_mini_batch; i++) { for (int i = 0; i < n_mini_batch; i++) {
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]); std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = cost(y_hat, outputMiniBatches[i]);
auto grads = computeGradients(y_hat, outputMiniBatches[i]); auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
auto cumulativeHiddenLayerWGrad = std::get<0>(grads); auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
auto outputWGrad = std::get<1>(grads); auto outputWGrad = std::get<1>(grads);
@ -570,23 +575,26 @@ void MLPPANN::Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, re
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_final, alg.scalarAdd(e, alg.sqrt(v_hidden_hat)))); std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_final, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_final, alg.scalarAdd(e, alg.sqrt(v_output_hat)))); std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_final, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too. update_parameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
y_hat = modelSetTest(inputMiniBatches[i]); y_hat = model_set_test(inputMiniBatches[i]);
if (UI) { if (ui) {
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
} }
} }
epoch++; epoch++;
if (epoch > max_epoch) { if (epoch > max_epoch) {
break; break;
} }
} }
forwardPass();
forward_pass();
} }
void MLPPANN::AMSGrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) { void MLPPANN::amsgrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui) {
class MLPPCost cost; MLPPCost mlpp_cost;
MLPPLinAlg alg; MLPPLinAlg alg;
real_t cost_prev = 0; real_t cost_prev = 0;
@ -613,12 +621,12 @@ void MLPPANN::AMSGrad(real_t learning_rate, int max_epoch, int mini_batch_size,
std::vector<real_t> v_output_hat; std::vector<real_t> v_output_hat;
while (true) { while (true) {
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for (int i = 0; i < n_mini_batch; i++) { for (int i = 0; i < n_mini_batch; i++) {
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]); std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = cost(y_hat, outputMiniBatches[i]);
auto grads = computeGradients(y_hat, outputMiniBatches[i]); auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
auto cumulativeHiddenLayerWGrad = std::get<0>(grads); auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
auto outputWGrad = std::get<1>(grads); auto outputWGrad = std::get<1>(grads);
@ -647,24 +655,27 @@ void MLPPANN::AMSGrad(real_t learning_rate, int max_epoch, int mini_batch_size,
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden, alg.scalarAdd(e, alg.sqrt(v_hidden_hat)))); std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output, alg.scalarAdd(e, alg.sqrt(v_output_hat)))); std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too. update_parameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
y_hat = modelSetTest(inputMiniBatches[i]); y_hat = model_set_test(inputMiniBatches[i]);
if (UI) { if (ui) {
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
} }
} }
epoch++; epoch++;
if (epoch > max_epoch) { if (epoch > max_epoch) {
break; break;
} }
} }
forwardPass();
forward_pass();
} }
real_t MLPPANN::score() { real_t MLPPANN::score() {
MLPPUtilities util; MLPPUtilities util;
forwardPass(); forward_pass();
return util.performance(y_hat, outputSet); return util.performance(y_hat, outputSet);
} }
@ -681,12 +692,12 @@ void MLPPANN::save(std::string fileName) {
} }
} }
void MLPPANN::setLearningRateScheduler(std::string type, real_t decayConstant) { void MLPPANN::set_learning_rate_scheduler(std::string type, real_t decayConstant) {
lrScheduler = type; lrScheduler = type;
MLPPANN::decayConstant = decayConstant; MLPPANN::decayConstant = decayConstant;
} }
void MLPPANN::setLearningRateScheduler(std::string type, real_t decayConstant, real_t dropRate) { void MLPPANN::set_learning_rate_scheduler_drop(std::string type, real_t decayConstant, real_t dropRate) {
lrScheduler = type; lrScheduler = type;
MLPPANN::decayConstant = decayConstant; MLPPANN::decayConstant = decayConstant;
MLPPANN::dropRate = dropRate; MLPPANN::dropRate = dropRate;
@ -694,7 +705,7 @@ void MLPPANN::setLearningRateScheduler(std::string type, real_t decayConstant, r
// https://en.wikipedia.org/wiki/Learning_rate // https://en.wikipedia.org/wiki/Learning_rate
// Learning Rate Decay (C2W2L09) - Andrew Ng - Deep Learning Specialization // Learning Rate Decay (C2W2L09) - Andrew Ng - Deep Learning Specialization
real_t MLPPANN::applyLearningRateScheduler(real_t learningRate, real_t decayConstant, real_t epoch, real_t dropRate) { real_t MLPPANN::apply_learning_rate_scheduler(real_t learningRate, real_t decayConstant, real_t epoch, real_t dropRate) {
if (lrScheduler == "Time") { if (lrScheduler == "Time") {
return learningRate / (1 + decayConstant * epoch); return learningRate / (1 + decayConstant * epoch);
} else if (lrScheduler == "Epoch") { } else if (lrScheduler == "Epoch") {
@ -707,7 +718,7 @@ real_t MLPPANN::applyLearningRateScheduler(real_t learningRate, real_t decayCons
return learningRate; return learningRate;
} }
void MLPPANN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, real_t lambda, real_t alpha) { void MLPPANN::add_layer(int n_hidden, std::string activation, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
if (network.empty()) { if (network.empty()) {
network.push_back(MLPPOldHiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha)); network.push_back(MLPPOldHiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha));
network[0].forwardPass(); network[0].forwardPass();
@ -717,7 +728,7 @@ void MLPPANN::addLayer(int n_hidden, std::string activation, std::string weightI
} }
} }
void MLPPANN::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, real_t lambda, real_t alpha) { void MLPPANN::add_output_layer(std::string activation, std::string loss, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
if (!network.empty()) { if (!network.empty()) {
outputLayer = new MLPPOldOutputLayer(network[network.size() - 1].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha); outputLayer = new MLPPOldOutputLayer(network[network.size() - 1].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha);
} else { } else {
@ -725,21 +736,41 @@ void MLPPANN::addOutputLayer(std::string activation, std::string loss, std::stri
} }
} }
real_t MLPPANN::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) { MLPPANN::MLPPANN(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet) {
inputSet = p_inputSet;
outputSet = p_outputSet;
n = inputSet.size();
k = inputSet[0].size();
lrScheduler = "None";
decayConstant = 0;
dropRate = 0;
}
MLPPANN::MLPPANN() {
}
MLPPANN::~MLPPANN() {
delete outputLayer;
}
real_t MLPPANN::cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
MLPPReg regularization; MLPPReg regularization;
class MLPPCost cost; MLPPCost mlpp_cost;
real_t totalRegTerm = 0; real_t totalRegTerm = 0;
auto cost_function = outputLayer->cost_map[outputLayer->cost]; auto cost_function = outputLayer->cost_map[outputLayer->cost];
if (!network.empty()) { if (!network.empty()) {
for (uint32_t i = 0; i < network.size() - 1; i++) { for (uint32_t i = 0; i < network.size() - 1; i++) {
totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
} }
} }
return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
return (mlpp_cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
} }
void MLPPANN::forwardPass() { void MLPPANN::forward_pass() {
if (!network.empty()) { if (!network.empty()) {
network[0].input = inputSet; network[0].input = inputSet;
network[0].forwardPass(); network[0].forwardPass();
@ -752,11 +783,12 @@ void MLPPANN::forwardPass() {
} else { } else {
outputLayer->input = inputSet; outputLayer->input = inputSet;
} }
outputLayer->forwardPass(); outputLayer->forwardPass();
y_hat = outputLayer->a; y_hat = outputLayer->a;
} }
void MLPPANN::updateParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate) { void MLPPANN::update_parameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate) {
MLPPLinAlg alg; MLPPLinAlg alg;
outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation); outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
@ -773,9 +805,9 @@ void MLPPANN::updateParameters(std::vector<std::vector<std::vector<real_t>>> hid
} }
} }
std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> MLPPANN::computeGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) { std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> MLPPANN::compute_gradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
// std::cout << "BEGIN" << std::endl; // std::cout << "BEGIN" << std::endl;
class MLPPCost cost; MLPPCost mlpp_cost;
MLPPActivation avn; MLPPActivation avn;
MLPPLinAlg alg; MLPPLinAlg alg;
MLPPReg regularization; MLPPReg regularization;
@ -784,7 +816,7 @@ std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> M
auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost]; auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
auto outputAvn = outputLayer->activation_map[outputLayer->activation]; auto outputAvn = outputLayer->activation_map[outputLayer->activation];
outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1)); outputLayer->delta = alg.hadamard_product((mlpp_cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta); std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg)); outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
@ -805,8 +837,8 @@ std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> M
return { cumulativeHiddenLayerWGrad, outputWGrad }; return { cumulativeHiddenLayerWGrad, outputWGrad };
} }
void MLPPANN::UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet) { void MLPPANN::print_ui(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); MLPPUtilities::CostInfo(epoch, cost_prev, cost(y_hat, outputSet));
std::cout << "Layer " << network.size() + 1 << ": " << std::endl; std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
MLPPUtilities::UI(outputLayer->weights, outputLayer->bias); MLPPUtilities::UI(outputLayer->weights, outputLayer->bias);
if (!network.empty()) { if (!network.empty()) {

View File

@ -21,39 +21,44 @@
class MLPPANN { class MLPPANN {
public: public:
MLPPANN(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet); std::vector<real_t> model_set_test(std::vector<std::vector<real_t>> X);
~MLPPANN(); real_t model_test(std::vector<real_t> x);
std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
real_t modelTest(std::vector<real_t> x); void gradient_descent(real_t learning_rate, int max_epoch, bool ui = false);
void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false); void sgd(real_t learning_rate, int max_epoch, bool ui = false);
void SGD(real_t learning_rate, int max_epoch, bool UI = false); void mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, bool ui = false);
void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false); void momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool nag, bool ui = false);
void Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool NAG, bool UI = false); void adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool ui = false);
void Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI = false); void adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool ui = false);
void Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI = false); void adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui = false);
void Adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false); void adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui = false);
void Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false); void nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui = false);
void Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false); void amsgrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui = false);
void AMSGrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
real_t score(); real_t score();
void save(std::string fileName); void save(std::string file_name);
void setLearningRateScheduler(std::string type, real_t decayConstant); void set_learning_rate_scheduler(std::string type, real_t decay_constant);
void setLearningRateScheduler(std::string type, real_t decayConstant, real_t dropRate); void set_learning_rate_scheduler_drop(std::string type, real_t decay_constant, real_t drop_rate);
void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5); void add_layer(int n_hidden, std::string activation, std::string weight_init = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5); void add_output_layer(std::string activation, std::string loss, std::string weight_init = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
MLPPANN(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet);
MLPPANN();
~MLPPANN();
private: private:
real_t applyLearningRateScheduler(real_t learningRate, real_t decayConstant, real_t epoch, real_t dropRate); real_t apply_learning_rate_scheduler(real_t learningRate, real_t decayConstant, real_t epoch, real_t dropRate);
real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y); real_t cost(std::vector<real_t> y_hat, std::vector<real_t> y);
void forwardPass(); void forward_pass();
void updateParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate); void update_parameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate);
std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> computeGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet); std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> compute_gradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet);
void UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet); void print_ui(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet);
std::vector<std::vector<real_t>> inputSet; std::vector<std::vector<real_t>> inputSet;
std::vector<real_t> outputSet; std::vector<real_t> outputSet;

View File

@ -591,17 +591,31 @@ void MLPPTests::test_dynamically_sized_ann(bool ui) {
// Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss // Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss
std::vector<std::vector<real_t>> inputSet = { { 0, 0, 1, 1 }, { 0, 1, 0, 1 } }; std::vector<std::vector<real_t>> inputSet = { { 0, 0, 1, 1 }, { 0, 1, 0, 1 } };
std::vector<real_t> outputSet = { 0, 1, 1, 0 }; std::vector<real_t> outputSet = { 0, 1, 1, 0 };
MLPPANNOld ann_old(alg.transpose(inputSet), outputSet);
ann_old.addLayer(2, "Cosh");
ann_old.addOutputLayer("Sigmoid", "LogLoss");
ann_old.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, ui);
ann_old.Adadelta(1, 1000, 2, 0.9, 0.000001, ui);
ann_old.Momentum(0.1, 8000, 2, 0.9, true, ui);
ann_old.setLearningRateScheduler("Step", 0.5, 1000);
ann_old.gradientDescent(0.01, 30000);
alg.printVector(ann_old.modelSetTest(alg.transpose(inputSet)));
std::cout << "ACCURACY: " << 100 * ann_old.score() << "%" << std::endl;
MLPPANN ann(alg.transpose(inputSet), outputSet); MLPPANN ann(alg.transpose(inputSet), outputSet);
ann.addLayer(2, "Cosh"); ann.add_layer(2, "Cosh");
ann.addOutputLayer("Sigmoid", "LogLoss"); ann.add_output_layer("Sigmoid", "LogLoss");
ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, ui); ann.amsgrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, ui);
ann.Adadelta(1, 1000, 2, 0.9, 0.000001, ui); ann.adadelta(1, 1000, 2, 0.9, 0.000001, ui);
ann.Momentum(0.1, 8000, 2, 0.9, true, ui); ann.momentum(0.1, 8000, 2, 0.9, true, ui);
ann.setLearningRateScheduler("Step", 0.5, 1000); ann.set_learning_rate_scheduler_drop("Step", 0.5, 1000);
ann.gradientDescent(0.01, 30000); ann.gradient_descent(0.01, 30000);
alg.printVector(ann.modelSetTest(alg.transpose(inputSet))); alg.printVector(ann.model_set_test(alg.transpose(inputSet)));
std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
} }
void MLPPTests::test_wgan_old(bool ui) { void MLPPTests::test_wgan_old(bool ui) {
@ -660,13 +674,23 @@ void MLPPTests::test_ann(bool ui) {
std::vector<std::vector<real_t>> inputSet = { { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 } }; // XOR std::vector<std::vector<real_t>> inputSet = { { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 } }; // XOR
std::vector<real_t> outputSet = { 0, 1, 1, 0 }; std::vector<real_t> outputSet = { 0, 1, 1, 0 };
MLPPANN ann(inputSet, outputSet); MLPPANNOld ann_old(inputSet, outputSet);
ann.addLayer(5, "Sigmoid"); ann_old.addLayer(5, "Sigmoid");
ann.addLayer(8, "Sigmoid"); // Add more layers as needed. ann_old.addLayer(8, "Sigmoid"); // Add more layers as needed.
ann.addOutputLayer("Sigmoid", "LogLoss"); ann_old.addOutputLayer("Sigmoid", "LogLoss");
ann.gradientDescent(1, 20000, ui); ann_old.gradientDescent(1, 20000, ui);
std::vector<real_t> predictions = ann.modelSetTest(inputSet); std::vector<real_t> predictions_old = ann_old.modelSetTest(inputSet);
alg.printVector(predictions_old); // Testing out the model's preds for train set.
std::cout << "ACCURACY: " << 100 * ann_old.score() << "%" << std::endl; // Accuracy.
MLPPANN ann(inputSet, outputSet);
ann.add_layer(5, "Sigmoid");
ann.add_layer(8, "Sigmoid"); // Add more layers as needed.
ann.add_output_layer("Sigmoid", "LogLoss");
ann.gradient_descent(1, 20000, ui);
std::vector<real_t> predictions = ann.model_set_test(inputSet);
alg.printVector(predictions); // Testing out the model's preds for train set. alg.printVector(predictions); // Testing out the model's preds for train set.
std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; // Accuracy. std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; // Accuracy.
} }