mirror of
https://github.com/Relintai/pmlpp.git
synced 2024-11-13 13:57:19 +01:00
Initial cleanup pass on MLPPANN.
This commit is contained in:
parent
5f8e35c58f
commit
a9b30ef75f
326
mlpp/ann/ann.cpp
326
mlpp/ann/ann.cpp
@ -15,22 +15,7 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <random>
|
#include <random>
|
||||||
|
|
||||||
MLPPANN::MLPPANN(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet) {
|
std::vector<real_t> MLPPANN::model_set_test(std::vector<std::vector<real_t>> X) {
|
||||||
inputSet = p_inputSet;
|
|
||||||
outputSet = p_outputSet;
|
|
||||||
|
|
||||||
n = inputSet.size();
|
|
||||||
k = inputSet[0].size();
|
|
||||||
lrScheduler = "None";
|
|
||||||
decayConstant = 0;
|
|
||||||
dropRate = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
MLPPANN::~MLPPANN() {
|
|
||||||
delete outputLayer;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<real_t> MLPPANN::modelSetTest(std::vector<std::vector<real_t>> X) {
|
|
||||||
if (!network.empty()) {
|
if (!network.empty()) {
|
||||||
network[0].input = X;
|
network[0].input = X;
|
||||||
network[0].forwardPass();
|
network[0].forwardPass();
|
||||||
@ -43,11 +28,13 @@ std::vector<real_t> MLPPANN::modelSetTest(std::vector<std::vector<real_t>> X) {
|
|||||||
} else {
|
} else {
|
||||||
outputLayer->input = X;
|
outputLayer->input = X;
|
||||||
}
|
}
|
||||||
|
|
||||||
outputLayer->forwardPass();
|
outputLayer->forwardPass();
|
||||||
|
|
||||||
return outputLayer->a;
|
return outputLayer->a;
|
||||||
}
|
}
|
||||||
|
|
||||||
real_t MLPPANN::modelTest(std::vector<real_t> x) {
|
real_t MLPPANN::model_test(std::vector<real_t> x) {
|
||||||
if (!network.empty()) {
|
if (!network.empty()) {
|
||||||
network[0].Test(x);
|
network[0].Test(x);
|
||||||
for (uint32_t i = 1; i < network.size(); i++) {
|
for (uint32_t i = 1; i < network.size(); i++) {
|
||||||
@ -60,33 +47,36 @@ real_t MLPPANN::modelTest(std::vector<real_t> x) {
|
|||||||
return outputLayer->a_test;
|
return outputLayer->a_test;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
|
void MLPPANN::gradient_descent(real_t learning_rate, int max_epoch, bool ui) {
|
||||||
class MLPPCost cost;
|
MLPPCost mlpp_cost;
|
||||||
MLPPLinAlg alg;
|
MLPPLinAlg alg;
|
||||||
real_t cost_prev = 0;
|
real_t cost_prev = 0;
|
||||||
int epoch = 1;
|
int epoch = 1;
|
||||||
forwardPass();
|
|
||||||
|
forward_pass();
|
||||||
|
|
||||||
real_t initial_learning_rate = learning_rate;
|
real_t initial_learning_rate = learning_rate;
|
||||||
|
|
||||||
alg.printMatrix(network[network.size() - 1].weights);
|
alg.printMatrix(network[network.size() - 1].weights);
|
||||||
while (true) {
|
while (true) {
|
||||||
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
||||||
cost_prev = Cost(y_hat, outputSet);
|
|
||||||
|
|
||||||
auto grads = computeGradients(y_hat, outputSet);
|
cost_prev = cost(y_hat, outputSet);
|
||||||
|
|
||||||
|
auto grads = compute_gradients(y_hat, outputSet);
|
||||||
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
||||||
auto outputWGrad = std::get<1>(grads);
|
auto outputWGrad = std::get<1>(grads);
|
||||||
|
|
||||||
cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
|
cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
|
||||||
outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
|
outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
|
||||||
updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
|
update_parameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
|
|
||||||
std::cout << learning_rate << std::endl;
|
std::cout << learning_rate << std::endl;
|
||||||
|
|
||||||
forwardPass();
|
forward_pass();
|
||||||
|
|
||||||
if (UI) {
|
if (ui) {
|
||||||
MLPPANN::UI(epoch, cost_prev, y_hat, outputSet);
|
print_ui(epoch, cost_prev, y_hat, outputSet);
|
||||||
}
|
}
|
||||||
|
|
||||||
epoch++;
|
epoch++;
|
||||||
@ -96,8 +86,8 @@ void MLPPANN::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::SGD(real_t learning_rate, int max_epoch, bool UI) {
|
void MLPPANN::sgd(real_t learning_rate, int max_epoch, bool ui) {
|
||||||
class MLPPCost cost;
|
MLPPCost mlpp_cost;
|
||||||
MLPPLinAlg alg;
|
MLPPLinAlg alg;
|
||||||
|
|
||||||
real_t cost_prev = 0;
|
real_t cost_prev = 0;
|
||||||
@ -105,28 +95,28 @@ void MLPPANN::SGD(real_t learning_rate, int max_epoch, bool UI) {
|
|||||||
real_t initial_learning_rate = learning_rate;
|
real_t initial_learning_rate = learning_rate;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
||||||
|
|
||||||
std::random_device rd;
|
std::random_device rd;
|
||||||
std::default_random_engine generator(rd());
|
std::default_random_engine generator(rd());
|
||||||
std::uniform_int_distribution<int> distribution(0, int(n - 1));
|
std::uniform_int_distribution<int> distribution(0, int(n - 1));
|
||||||
int outputIndex = distribution(generator);
|
int outputIndex = distribution(generator);
|
||||||
|
|
||||||
std::vector<real_t> y_hat = modelSetTest({ inputSet[outputIndex] });
|
std::vector<real_t> y_hat = model_set_test({ inputSet[outputIndex] });
|
||||||
cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
|
cost_prev = cost({ y_hat }, { outputSet[outputIndex] });
|
||||||
|
|
||||||
auto grads = computeGradients(y_hat, { outputSet[outputIndex] });
|
auto grads = compute_gradients(y_hat, { outputSet[outputIndex] });
|
||||||
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
||||||
auto outputWGrad = std::get<1>(grads);
|
auto outputWGrad = std::get<1>(grads);
|
||||||
|
|
||||||
cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
|
cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
|
||||||
outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
|
outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
|
||||||
|
|
||||||
updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
|
update_parameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
y_hat = modelSetTest({ inputSet[outputIndex] });
|
y_hat = model_set_test({ inputSet[outputIndex] });
|
||||||
|
|
||||||
if (UI) {
|
if (ui) {
|
||||||
MLPPANN::UI(epoch, cost_prev, y_hat, { outputSet[outputIndex] });
|
print_ui(epoch, cost_prev, y_hat, { outputSet[outputIndex] });
|
||||||
}
|
}
|
||||||
|
|
||||||
epoch++;
|
epoch++;
|
||||||
@ -134,11 +124,12 @@ void MLPPANN::SGD(real_t learning_rate, int max_epoch, bool UI) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
forwardPass();
|
|
||||||
|
forward_pass();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
|
void MLPPANN::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, bool ui) {
|
||||||
class MLPPCost cost;
|
MLPPCost mlpp_cost;
|
||||||
MLPPLinAlg alg;
|
MLPPLinAlg alg;
|
||||||
|
|
||||||
real_t cost_prev = 0;
|
real_t cost_prev = 0;
|
||||||
@ -155,35 +146,39 @@ void MLPPANN::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, boo
|
|||||||
auto outputMiniBatches = std::get<1>(batches);
|
auto outputMiniBatches = std::get<1>(batches);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
||||||
for (int i = 0; i < n_mini_batch; i++) {
|
|
||||||
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
|
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
auto grads = computeGradients(y_hat, outputMiniBatches[i]);
|
for (int i = 0; i < n_mini_batch; i++) {
|
||||||
|
std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
|
cost_prev = cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
|
||||||
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
||||||
auto outputWGrad = std::get<1>(grads);
|
auto outputWGrad = std::get<1>(grads);
|
||||||
|
|
||||||
cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
|
cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
|
||||||
outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
|
outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
|
||||||
|
|
||||||
updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
|
update_parameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
|
|
||||||
if (UI) {
|
if (ui) {
|
||||||
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
epoch++;
|
epoch++;
|
||||||
|
|
||||||
if (epoch > max_epoch) {
|
if (epoch > max_epoch) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
forwardPass();
|
|
||||||
|
forward_pass();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool NAG, bool UI) {
|
void MLPPANN::momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool nag, bool ui) {
|
||||||
class MLPPCost cost;
|
class MLPPCost mlpp_cost;
|
||||||
MLPPLinAlg alg;
|
MLPPLinAlg alg;
|
||||||
|
|
||||||
real_t cost_prev = 0;
|
real_t cost_prev = 0;
|
||||||
@ -204,12 +199,13 @@ void MLPPANN::Momentum(real_t learning_rate, int max_epoch, int mini_batch_size,
|
|||||||
|
|
||||||
std::vector<real_t> v_output;
|
std::vector<real_t> v_output;
|
||||||
while (true) {
|
while (true) {
|
||||||
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
||||||
for (int i = 0; i < n_mini_batch; i++) {
|
|
||||||
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
|
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
auto grads = computeGradients(y_hat, outputMiniBatches[i]);
|
for (int i = 0; i < n_mini_batch; i++) {
|
||||||
|
std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
|
cost_prev = cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
|
||||||
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
||||||
auto outputWGrad = std::get<1>(grads);
|
auto outputWGrad = std::get<1>(grads);
|
||||||
|
|
||||||
@ -221,31 +217,34 @@ void MLPPANN::Momentum(real_t learning_rate, int max_epoch, int mini_batch_size,
|
|||||||
v_output.resize(outputWGrad.size());
|
v_output.resize(outputWGrad.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NAG) { // "Aposterori" calculation
|
if (nag) { // "Aposterori" calculation
|
||||||
updateParameters(v_hidden, v_output, 0); // DON'T update bias.
|
update_parameters(v_hidden, v_output, 0); // DON'T update bias.
|
||||||
}
|
}
|
||||||
|
|
||||||
v_hidden = alg.addition(alg.scalarMultiply(gamma, v_hidden), alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad));
|
v_hidden = alg.addition(alg.scalarMultiply(gamma, v_hidden), alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad));
|
||||||
|
|
||||||
v_output = alg.addition(alg.scalarMultiply(gamma, v_output), alg.scalarMultiply(learning_rate / n, outputWGrad));
|
v_output = alg.addition(alg.scalarMultiply(gamma, v_output), alg.scalarMultiply(learning_rate / n, outputWGrad));
|
||||||
|
|
||||||
updateParameters(v_hidden, v_output, learning_rate); // subject to change. may want bias to have this matrix too.
|
update_parameters(v_hidden, v_output, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
|
|
||||||
if (UI) {
|
if (ui) {
|
||||||
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
epoch++;
|
epoch++;
|
||||||
|
|
||||||
if (epoch > max_epoch) {
|
if (epoch > max_epoch) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
forwardPass();
|
|
||||||
|
forward_pass();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI) {
|
void MLPPANN::adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool ui) {
|
||||||
class MLPPCost cost;
|
MLPPCost mlpp_cost;
|
||||||
MLPPLinAlg alg;
|
MLPPLinAlg alg;
|
||||||
|
|
||||||
real_t cost_prev = 0;
|
real_t cost_prev = 0;
|
||||||
@ -266,12 +265,13 @@ void MLPPANN::Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size,
|
|||||||
|
|
||||||
std::vector<real_t> v_output;
|
std::vector<real_t> v_output;
|
||||||
while (true) {
|
while (true) {
|
||||||
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
||||||
for (int i = 0; i < n_mini_batch; i++) {
|
|
||||||
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
|
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
auto grads = computeGradients(y_hat, outputMiniBatches[i]);
|
for (int i = 0; i < n_mini_batch; i++) {
|
||||||
|
std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
|
cost_prev = cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
|
||||||
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
||||||
auto outputWGrad = std::get<1>(grads);
|
auto outputWGrad = std::get<1>(grads);
|
||||||
|
|
||||||
@ -290,11 +290,11 @@ void MLPPANN::Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size,
|
|||||||
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
|
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
|
||||||
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
|
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
|
||||||
|
|
||||||
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
update_parameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
|
|
||||||
if (UI) {
|
if (ui) {
|
||||||
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
epoch++;
|
epoch++;
|
||||||
@ -302,11 +302,12 @@ void MLPPANN::Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
forwardPass();
|
|
||||||
|
forward_pass();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI) {
|
void MLPPANN::adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool ui) {
|
||||||
class MLPPCost cost;
|
MLPPCost mlpp_cost;
|
||||||
MLPPLinAlg alg;
|
MLPPLinAlg alg;
|
||||||
|
|
||||||
real_t cost_prev = 0;
|
real_t cost_prev = 0;
|
||||||
@ -327,12 +328,12 @@ void MLPPANN::Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size,
|
|||||||
|
|
||||||
std::vector<real_t> v_output;
|
std::vector<real_t> v_output;
|
||||||
while (true) {
|
while (true) {
|
||||||
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
||||||
for (int i = 0; i < n_mini_batch; i++) {
|
for (int i = 0; i < n_mini_batch; i++) {
|
||||||
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
|
std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
auto grads = computeGradients(y_hat, outputMiniBatches[i]);
|
auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
|
||||||
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
||||||
auto outputWGrad = std::get<1>(grads);
|
auto outputWGrad = std::get<1>(grads);
|
||||||
|
|
||||||
@ -351,11 +352,11 @@ void MLPPANN::Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size,
|
|||||||
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
|
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
|
||||||
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
|
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
|
||||||
|
|
||||||
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
update_parameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
|
|
||||||
if (UI) {
|
if (ui) {
|
||||||
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
epoch++;
|
epoch++;
|
||||||
@ -363,11 +364,11 @@ void MLPPANN::Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
forwardPass();
|
forward_pass();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::Adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
|
void MLPPANN::adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui) {
|
||||||
class MLPPCost cost;
|
MLPPCost mlpp_cost;
|
||||||
MLPPLinAlg alg;
|
MLPPLinAlg alg;
|
||||||
|
|
||||||
real_t cost_prev = 0;
|
real_t cost_prev = 0;
|
||||||
@ -390,12 +391,12 @@ void MLPPANN::Adam(real_t learning_rate, int max_epoch, int mini_batch_size, rea
|
|||||||
std::vector<real_t> m_output;
|
std::vector<real_t> m_output;
|
||||||
std::vector<real_t> v_output;
|
std::vector<real_t> v_output;
|
||||||
while (true) {
|
while (true) {
|
||||||
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
||||||
for (int i = 0; i < n_mini_batch; i++) {
|
for (int i = 0; i < n_mini_batch; i++) {
|
||||||
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
|
std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
auto grads = computeGradients(y_hat, outputMiniBatches[i]);
|
auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
|
||||||
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
||||||
auto outputWGrad = std::get<1>(grads);
|
auto outputWGrad = std::get<1>(grads);
|
||||||
|
|
||||||
@ -424,23 +425,25 @@ void MLPPANN::Adam(real_t learning_rate, int max_epoch, int mini_batch_size, rea
|
|||||||
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
|
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
|
||||||
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
|
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
|
||||||
|
|
||||||
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
update_parameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
|
|
||||||
if (UI) {
|
if (ui) {
|
||||||
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
epoch++;
|
epoch++;
|
||||||
|
|
||||||
if (epoch > max_epoch) {
|
if (epoch > max_epoch) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
forwardPass();
|
forward_pass();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
|
void MLPPANN::adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui) {
|
||||||
class MLPPCost cost;
|
MLPPCost mlpp_cost;
|
||||||
MLPPLinAlg alg;
|
MLPPLinAlg alg;
|
||||||
|
|
||||||
real_t cost_prev = 0;
|
real_t cost_prev = 0;
|
||||||
@ -463,12 +466,12 @@ void MLPPANN::Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, r
|
|||||||
std::vector<real_t> m_output;
|
std::vector<real_t> m_output;
|
||||||
std::vector<real_t> u_output;
|
std::vector<real_t> u_output;
|
||||||
while (true) {
|
while (true) {
|
||||||
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
||||||
for (int i = 0; i < n_mini_batch; i++) {
|
for (int i = 0; i < n_mini_batch; i++) {
|
||||||
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
|
std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
auto grads = computeGradients(y_hat, outputMiniBatches[i]);
|
auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
|
||||||
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
||||||
auto outputWGrad = std::get<1>(grads);
|
auto outputWGrad = std::get<1>(grads);
|
||||||
|
|
||||||
@ -495,23 +498,25 @@ void MLPPANN::Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, r
|
|||||||
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, u_hidden)));
|
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, u_hidden)));
|
||||||
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, u_output)));
|
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, u_output)));
|
||||||
|
|
||||||
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
update_parameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
|
|
||||||
if (UI) {
|
if (ui) {
|
||||||
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
epoch++;
|
epoch++;
|
||||||
|
|
||||||
if (epoch > max_epoch) {
|
if (epoch > max_epoch) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
forwardPass();
|
forward_pass();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
|
void MLPPANN::nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui) {
|
||||||
class MLPPCost cost;
|
MLPPCost mlpp_cost;
|
||||||
MLPPLinAlg alg;
|
MLPPLinAlg alg;
|
||||||
|
|
||||||
real_t cost_prev = 0;
|
real_t cost_prev = 0;
|
||||||
@ -534,12 +539,12 @@ void MLPPANN::Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, re
|
|||||||
std::vector<real_t> m_output;
|
std::vector<real_t> m_output;
|
||||||
std::vector<real_t> v_output;
|
std::vector<real_t> v_output;
|
||||||
while (true) {
|
while (true) {
|
||||||
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
||||||
for (int i = 0; i < n_mini_batch; i++) {
|
for (int i = 0; i < n_mini_batch; i++) {
|
||||||
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
|
std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
auto grads = computeGradients(y_hat, outputMiniBatches[i]);
|
auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
|
||||||
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
||||||
auto outputWGrad = std::get<1>(grads);
|
auto outputWGrad = std::get<1>(grads);
|
||||||
|
|
||||||
@ -570,23 +575,26 @@ void MLPPANN::Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, re
|
|||||||
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_final, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
|
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_final, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
|
||||||
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_final, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
|
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_final, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
|
||||||
|
|
||||||
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
update_parameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
|
|
||||||
if (UI) {
|
if (ui) {
|
||||||
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
epoch++;
|
epoch++;
|
||||||
|
|
||||||
if (epoch > max_epoch) {
|
if (epoch > max_epoch) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
forwardPass();
|
|
||||||
|
forward_pass();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::AMSGrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
|
void MLPPANN::amsgrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui) {
|
||||||
class MLPPCost cost;
|
MLPPCost mlpp_cost;
|
||||||
MLPPLinAlg alg;
|
MLPPLinAlg alg;
|
||||||
|
|
||||||
real_t cost_prev = 0;
|
real_t cost_prev = 0;
|
||||||
@ -613,12 +621,12 @@ void MLPPANN::AMSGrad(real_t learning_rate, int max_epoch, int mini_batch_size,
|
|||||||
|
|
||||||
std::vector<real_t> v_output_hat;
|
std::vector<real_t> v_output_hat;
|
||||||
while (true) {
|
while (true) {
|
||||||
learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
learning_rate = apply_learning_rate_scheduler(initial_learning_rate, decayConstant, epoch, dropRate);
|
||||||
for (int i = 0; i < n_mini_batch; i++) {
|
for (int i = 0; i < n_mini_batch; i++) {
|
||||||
std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
|
std::vector<real_t> y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
auto grads = computeGradients(y_hat, outputMiniBatches[i]);
|
auto grads = compute_gradients(y_hat, outputMiniBatches[i]);
|
||||||
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
|
||||||
auto outputWGrad = std::get<1>(grads);
|
auto outputWGrad = std::get<1>(grads);
|
||||||
|
|
||||||
@ -647,24 +655,27 @@ void MLPPANN::AMSGrad(real_t learning_rate, int max_epoch, int mini_batch_size,
|
|||||||
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
|
std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
|
||||||
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
|
std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
|
||||||
|
|
||||||
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
update_parameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
y_hat = model_set_test(inputMiniBatches[i]);
|
||||||
|
|
||||||
if (UI) {
|
if (ui) {
|
||||||
MLPPANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
print_ui(epoch, cost_prev, y_hat, outputMiniBatches[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
epoch++;
|
epoch++;
|
||||||
|
|
||||||
if (epoch > max_epoch) {
|
if (epoch > max_epoch) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
forwardPass();
|
|
||||||
|
forward_pass();
|
||||||
}
|
}
|
||||||
|
|
||||||
real_t MLPPANN::score() {
|
real_t MLPPANN::score() {
|
||||||
MLPPUtilities util;
|
MLPPUtilities util;
|
||||||
forwardPass();
|
forward_pass();
|
||||||
return util.performance(y_hat, outputSet);
|
return util.performance(y_hat, outputSet);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -681,12 +692,12 @@ void MLPPANN::save(std::string fileName) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::setLearningRateScheduler(std::string type, real_t decayConstant) {
|
void MLPPANN::set_learning_rate_scheduler(std::string type, real_t decayConstant) {
|
||||||
lrScheduler = type;
|
lrScheduler = type;
|
||||||
MLPPANN::decayConstant = decayConstant;
|
MLPPANN::decayConstant = decayConstant;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::setLearningRateScheduler(std::string type, real_t decayConstant, real_t dropRate) {
|
void MLPPANN::set_learning_rate_scheduler_drop(std::string type, real_t decayConstant, real_t dropRate) {
|
||||||
lrScheduler = type;
|
lrScheduler = type;
|
||||||
MLPPANN::decayConstant = decayConstant;
|
MLPPANN::decayConstant = decayConstant;
|
||||||
MLPPANN::dropRate = dropRate;
|
MLPPANN::dropRate = dropRate;
|
||||||
@ -694,7 +705,7 @@ void MLPPANN::setLearningRateScheduler(std::string type, real_t decayConstant, r
|
|||||||
|
|
||||||
// https://en.wikipedia.org/wiki/Learning_rate
|
// https://en.wikipedia.org/wiki/Learning_rate
|
||||||
// Learning Rate Decay (C2W2L09) - Andrew Ng - Deep Learning Specialization
|
// Learning Rate Decay (C2W2L09) - Andrew Ng - Deep Learning Specialization
|
||||||
real_t MLPPANN::applyLearningRateScheduler(real_t learningRate, real_t decayConstant, real_t epoch, real_t dropRate) {
|
real_t MLPPANN::apply_learning_rate_scheduler(real_t learningRate, real_t decayConstant, real_t epoch, real_t dropRate) {
|
||||||
if (lrScheduler == "Time") {
|
if (lrScheduler == "Time") {
|
||||||
return learningRate / (1 + decayConstant * epoch);
|
return learningRate / (1 + decayConstant * epoch);
|
||||||
} else if (lrScheduler == "Epoch") {
|
} else if (lrScheduler == "Epoch") {
|
||||||
@ -707,7 +718,7 @@ real_t MLPPANN::applyLearningRateScheduler(real_t learningRate, real_t decayCons
|
|||||||
return learningRate;
|
return learningRate;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
|
void MLPPANN::add_layer(int n_hidden, std::string activation, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
|
||||||
if (network.empty()) {
|
if (network.empty()) {
|
||||||
network.push_back(MLPPOldHiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha));
|
network.push_back(MLPPOldHiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha));
|
||||||
network[0].forwardPass();
|
network[0].forwardPass();
|
||||||
@ -717,7 +728,7 @@ void MLPPANN::addLayer(int n_hidden, std::string activation, std::string weightI
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
|
void MLPPANN::add_output_layer(std::string activation, std::string loss, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
|
||||||
if (!network.empty()) {
|
if (!network.empty()) {
|
||||||
outputLayer = new MLPPOldOutputLayer(network[network.size() - 1].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha);
|
outputLayer = new MLPPOldOutputLayer(network[network.size() - 1].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha);
|
||||||
} else {
|
} else {
|
||||||
@ -725,21 +736,41 @@ void MLPPANN::addOutputLayer(std::string activation, std::string loss, std::stri
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
real_t MLPPANN::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
|
MLPPANN::MLPPANN(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet) {
|
||||||
|
inputSet = p_inputSet;
|
||||||
|
outputSet = p_outputSet;
|
||||||
|
|
||||||
|
n = inputSet.size();
|
||||||
|
k = inputSet[0].size();
|
||||||
|
lrScheduler = "None";
|
||||||
|
decayConstant = 0;
|
||||||
|
dropRate = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
MLPPANN::MLPPANN() {
|
||||||
|
}
|
||||||
|
|
||||||
|
MLPPANN::~MLPPANN() {
|
||||||
|
delete outputLayer;
|
||||||
|
}
|
||||||
|
|
||||||
|
real_t MLPPANN::cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
|
||||||
MLPPReg regularization;
|
MLPPReg regularization;
|
||||||
class MLPPCost cost;
|
MLPPCost mlpp_cost;
|
||||||
real_t totalRegTerm = 0;
|
real_t totalRegTerm = 0;
|
||||||
|
|
||||||
auto cost_function = outputLayer->cost_map[outputLayer->cost];
|
auto cost_function = outputLayer->cost_map[outputLayer->cost];
|
||||||
|
|
||||||
if (!network.empty()) {
|
if (!network.empty()) {
|
||||||
for (uint32_t i = 0; i < network.size() - 1; i++) {
|
for (uint32_t i = 0; i < network.size() - 1; i++) {
|
||||||
totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
|
totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
|
|
||||||
|
return (mlpp_cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::forwardPass() {
|
void MLPPANN::forward_pass() {
|
||||||
if (!network.empty()) {
|
if (!network.empty()) {
|
||||||
network[0].input = inputSet;
|
network[0].input = inputSet;
|
||||||
network[0].forwardPass();
|
network[0].forwardPass();
|
||||||
@ -752,11 +783,12 @@ void MLPPANN::forwardPass() {
|
|||||||
} else {
|
} else {
|
||||||
outputLayer->input = inputSet;
|
outputLayer->input = inputSet;
|
||||||
}
|
}
|
||||||
|
|
||||||
outputLayer->forwardPass();
|
outputLayer->forwardPass();
|
||||||
y_hat = outputLayer->a;
|
y_hat = outputLayer->a;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::updateParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate) {
|
void MLPPANN::update_parameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate) {
|
||||||
MLPPLinAlg alg;
|
MLPPLinAlg alg;
|
||||||
|
|
||||||
outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
|
outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
|
||||||
@ -773,9 +805,9 @@ void MLPPANN::updateParameters(std::vector<std::vector<std::vector<real_t>>> hid
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> MLPPANN::computeGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
|
std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> MLPPANN::compute_gradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
|
||||||
// std::cout << "BEGIN" << std::endl;
|
// std::cout << "BEGIN" << std::endl;
|
||||||
class MLPPCost cost;
|
MLPPCost mlpp_cost;
|
||||||
MLPPActivation avn;
|
MLPPActivation avn;
|
||||||
MLPPLinAlg alg;
|
MLPPLinAlg alg;
|
||||||
MLPPReg regularization;
|
MLPPReg regularization;
|
||||||
@ -784,7 +816,7 @@ std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> M
|
|||||||
|
|
||||||
auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
|
auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
|
||||||
auto outputAvn = outputLayer->activation_map[outputLayer->activation];
|
auto outputAvn = outputLayer->activation_map[outputLayer->activation];
|
||||||
outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
|
outputLayer->delta = alg.hadamard_product((mlpp_cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
|
||||||
std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
|
std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
|
||||||
outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
|
outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
|
||||||
|
|
||||||
@ -805,8 +837,8 @@ std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> M
|
|||||||
return { cumulativeHiddenLayerWGrad, outputWGrad };
|
return { cumulativeHiddenLayerWGrad, outputWGrad };
|
||||||
}
|
}
|
||||||
|
|
||||||
void MLPPANN::UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
|
void MLPPANN::print_ui(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
|
||||||
MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
|
MLPPUtilities::CostInfo(epoch, cost_prev, cost(y_hat, outputSet));
|
||||||
std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
|
std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
|
||||||
MLPPUtilities::UI(outputLayer->weights, outputLayer->bias);
|
MLPPUtilities::UI(outputLayer->weights, outputLayer->bias);
|
||||||
if (!network.empty()) {
|
if (!network.empty()) {
|
||||||
|
@ -21,39 +21,44 @@
|
|||||||
|
|
||||||
class MLPPANN {
|
class MLPPANN {
|
||||||
public:
|
public:
|
||||||
MLPPANN(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet);
|
std::vector<real_t> model_set_test(std::vector<std::vector<real_t>> X);
|
||||||
~MLPPANN();
|
real_t model_test(std::vector<real_t> x);
|
||||||
std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
|
|
||||||
real_t modelTest(std::vector<real_t> x);
|
void gradient_descent(real_t learning_rate, int max_epoch, bool ui = false);
|
||||||
void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
|
void sgd(real_t learning_rate, int max_epoch, bool ui = false);
|
||||||
void SGD(real_t learning_rate, int max_epoch, bool UI = false);
|
void mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, bool ui = false);
|
||||||
void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
|
void momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool nag, bool ui = false);
|
||||||
void Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool NAG, bool UI = false);
|
void adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool ui = false);
|
||||||
void Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI = false);
|
void adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool ui = false);
|
||||||
void Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI = false);
|
void adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui = false);
|
||||||
void Adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
|
void adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui = false);
|
||||||
void Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
|
void nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui = false);
|
||||||
void Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
|
void amsgrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool ui = false);
|
||||||
void AMSGrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
|
|
||||||
real_t score();
|
real_t score();
|
||||||
void save(std::string fileName);
|
void save(std::string file_name);
|
||||||
|
|
||||||
void setLearningRateScheduler(std::string type, real_t decayConstant);
|
void set_learning_rate_scheduler(std::string type, real_t decay_constant);
|
||||||
void setLearningRateScheduler(std::string type, real_t decayConstant, real_t dropRate);
|
void set_learning_rate_scheduler_drop(std::string type, real_t decay_constant, real_t drop_rate);
|
||||||
|
|
||||||
void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
|
void add_layer(int n_hidden, std::string activation, std::string weight_init = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
|
||||||
void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
|
void add_output_layer(std::string activation, std::string loss, std::string weight_init = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
|
||||||
|
|
||||||
|
MLPPANN(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet);
|
||||||
|
|
||||||
|
MLPPANN();
|
||||||
|
~MLPPANN();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
real_t applyLearningRateScheduler(real_t learningRate, real_t decayConstant, real_t epoch, real_t dropRate);
|
real_t apply_learning_rate_scheduler(real_t learningRate, real_t decayConstant, real_t epoch, real_t dropRate);
|
||||||
|
|
||||||
real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
|
real_t cost(std::vector<real_t> y_hat, std::vector<real_t> y);
|
||||||
|
|
||||||
void forwardPass();
|
void forward_pass();
|
||||||
void updateParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate);
|
void update_parameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate);
|
||||||
std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> computeGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet);
|
std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> compute_gradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet);
|
||||||
|
|
||||||
void UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet);
|
void print_ui(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet);
|
||||||
|
|
||||||
std::vector<std::vector<real_t>> inputSet;
|
std::vector<std::vector<real_t>> inputSet;
|
||||||
std::vector<real_t> outputSet;
|
std::vector<real_t> outputSet;
|
||||||
|
@ -591,17 +591,31 @@ void MLPPTests::test_dynamically_sized_ann(bool ui) {
|
|||||||
// Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss
|
// Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss
|
||||||
std::vector<std::vector<real_t>> inputSet = { { 0, 0, 1, 1 }, { 0, 1, 0, 1 } };
|
std::vector<std::vector<real_t>> inputSet = { { 0, 0, 1, 1 }, { 0, 1, 0, 1 } };
|
||||||
std::vector<real_t> outputSet = { 0, 1, 1, 0 };
|
std::vector<real_t> outputSet = { 0, 1, 1, 0 };
|
||||||
|
|
||||||
|
MLPPANNOld ann_old(alg.transpose(inputSet), outputSet);
|
||||||
|
ann_old.addLayer(2, "Cosh");
|
||||||
|
ann_old.addOutputLayer("Sigmoid", "LogLoss");
|
||||||
|
|
||||||
|
ann_old.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, ui);
|
||||||
|
ann_old.Adadelta(1, 1000, 2, 0.9, 0.000001, ui);
|
||||||
|
ann_old.Momentum(0.1, 8000, 2, 0.9, true, ui);
|
||||||
|
|
||||||
|
ann_old.setLearningRateScheduler("Step", 0.5, 1000);
|
||||||
|
ann_old.gradientDescent(0.01, 30000);
|
||||||
|
alg.printVector(ann_old.modelSetTest(alg.transpose(inputSet)));
|
||||||
|
std::cout << "ACCURACY: " << 100 * ann_old.score() << "%" << std::endl;
|
||||||
|
|
||||||
MLPPANN ann(alg.transpose(inputSet), outputSet);
|
MLPPANN ann(alg.transpose(inputSet), outputSet);
|
||||||
ann.addLayer(2, "Cosh");
|
ann.add_layer(2, "Cosh");
|
||||||
ann.addOutputLayer("Sigmoid", "LogLoss");
|
ann.add_output_layer("Sigmoid", "LogLoss");
|
||||||
|
|
||||||
ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, ui);
|
ann.amsgrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, ui);
|
||||||
ann.Adadelta(1, 1000, 2, 0.9, 0.000001, ui);
|
ann.adadelta(1, 1000, 2, 0.9, 0.000001, ui);
|
||||||
ann.Momentum(0.1, 8000, 2, 0.9, true, ui);
|
ann.momentum(0.1, 8000, 2, 0.9, true, ui);
|
||||||
|
|
||||||
ann.setLearningRateScheduler("Step", 0.5, 1000);
|
ann.set_learning_rate_scheduler_drop("Step", 0.5, 1000);
|
||||||
ann.gradientDescent(0.01, 30000);
|
ann.gradient_descent(0.01, 30000);
|
||||||
alg.printVector(ann.modelSetTest(alg.transpose(inputSet)));
|
alg.printVector(ann.model_set_test(alg.transpose(inputSet)));
|
||||||
std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
|
std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
|
||||||
}
|
}
|
||||||
void MLPPTests::test_wgan_old(bool ui) {
|
void MLPPTests::test_wgan_old(bool ui) {
|
||||||
@ -660,13 +674,23 @@ void MLPPTests::test_ann(bool ui) {
|
|||||||
std::vector<std::vector<real_t>> inputSet = { { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 } }; // XOR
|
std::vector<std::vector<real_t>> inputSet = { { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 } }; // XOR
|
||||||
std::vector<real_t> outputSet = { 0, 1, 1, 0 };
|
std::vector<real_t> outputSet = { 0, 1, 1, 0 };
|
||||||
|
|
||||||
MLPPANN ann(inputSet, outputSet);
|
MLPPANNOld ann_old(inputSet, outputSet);
|
||||||
ann.addLayer(5, "Sigmoid");
|
ann_old.addLayer(5, "Sigmoid");
|
||||||
ann.addLayer(8, "Sigmoid"); // Add more layers as needed.
|
ann_old.addLayer(8, "Sigmoid"); // Add more layers as needed.
|
||||||
ann.addOutputLayer("Sigmoid", "LogLoss");
|
ann_old.addOutputLayer("Sigmoid", "LogLoss");
|
||||||
ann.gradientDescent(1, 20000, ui);
|
ann_old.gradientDescent(1, 20000, ui);
|
||||||
|
|
||||||
std::vector<real_t> predictions = ann.modelSetTest(inputSet);
|
std::vector<real_t> predictions_old = ann_old.modelSetTest(inputSet);
|
||||||
|
alg.printVector(predictions_old); // Testing out the model's preds for train set.
|
||||||
|
std::cout << "ACCURACY: " << 100 * ann_old.score() << "%" << std::endl; // Accuracy.
|
||||||
|
|
||||||
|
MLPPANN ann(inputSet, outputSet);
|
||||||
|
ann.add_layer(5, "Sigmoid");
|
||||||
|
ann.add_layer(8, "Sigmoid"); // Add more layers as needed.
|
||||||
|
ann.add_output_layer("Sigmoid", "LogLoss");
|
||||||
|
ann.gradient_descent(1, 20000, ui);
|
||||||
|
|
||||||
|
std::vector<real_t> predictions = ann.model_set_test(inputSet);
|
||||||
alg.printVector(predictions); // Testing out the model's preds for train set.
|
alg.printVector(predictions); // Testing out the model's preds for train set.
|
||||||
std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; // Accuracy.
|
std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; // Accuracy.
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user