mirror of
https://github.com/Relintai/MLPP.git
synced 2024-11-12 10:15:01 +01:00
Added new optimizers for Neural Nets.
This commit is contained in:
parent
a4c36293f9
commit
2c83feb410
358
MLPP/ANN/ANN.cpp
358
MLPP/ANN/ANN.cpp
@ -114,7 +114,158 @@ namespace MLPP {
|
|||||||
forwardPass();
|
forwardPass();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
void ANN::Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool NAG, bool UI){
|
||||||
|
class Cost cost;
|
||||||
|
LinAlg alg;
|
||||||
|
|
||||||
|
double cost_prev = 0;
|
||||||
|
int epoch = 1;
|
||||||
|
|
||||||
|
// Creating the mini-batches
|
||||||
|
int n_mini_batch = n/mini_batch_size;
|
||||||
|
// always evaluate the result
|
||||||
|
// always do forward pass only ONCE at end.
|
||||||
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
|
|
||||||
|
// Initializing necessary components for Adam.
|
||||||
|
std::vector<std::vector<std::vector<double>>> v_hidden;
|
||||||
|
|
||||||
|
std::vector<double> v_output;
|
||||||
|
while(true){
|
||||||
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(!network.empty() && v_hidden.empty()){ // Initing our tensor
|
||||||
|
v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(v_output.empty()){
|
||||||
|
v_output.resize(outputWGrad.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
if(NAG){ // "Aposterori" calculation
|
||||||
|
updateParameters(v_hidden, v_output, 0); // DON'T update bias.
|
||||||
|
}
|
||||||
|
|
||||||
|
v_hidden = alg.addition(alg.scalarMultiply(gamma, v_hidden), alg.scalarMultiply(learning_rate/n, cumulativeHiddenLayerWGrad));
|
||||||
|
|
||||||
|
v_output = alg.addition(alg.scalarMultiply(gamma, v_output), alg.scalarMultiply(learning_rate/n, outputWGrad));
|
||||||
|
|
||||||
|
updateParameters(v_hidden, v_output, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
|
y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
|
||||||
|
}
|
||||||
|
epoch++;
|
||||||
|
if(epoch > max_epoch) { break; }
|
||||||
|
}
|
||||||
|
forwardPass();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ANN::Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI){
|
||||||
|
class Cost cost;
|
||||||
|
LinAlg alg;
|
||||||
|
|
||||||
|
double cost_prev = 0;
|
||||||
|
int epoch = 1;
|
||||||
|
|
||||||
|
// Creating the mini-batches
|
||||||
|
int n_mini_batch = n/mini_batch_size;
|
||||||
|
// always evaluate the result
|
||||||
|
// always do forward pass only ONCE at end.
|
||||||
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
|
|
||||||
|
// Initializing necessary components for Adam.
|
||||||
|
std::vector<std::vector<std::vector<double>>> v_hidden;
|
||||||
|
|
||||||
|
std::vector<double> v_output;
|
||||||
|
while(true){
|
||||||
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(!network.empty() && v_hidden.empty()){ // Initing our tensor
|
||||||
|
v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(v_output.empty()){
|
||||||
|
v_output.resize(outputWGrad.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
v_hidden = alg.addition(v_hidden, alg.exponentiate(cumulativeHiddenLayerWGrad, 2));
|
||||||
|
|
||||||
|
v_output = alg.addition(v_output, alg.exponentiate(outputWGrad, 2));
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
|
||||||
|
std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
|
||||||
|
|
||||||
|
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
|
y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
|
||||||
|
}
|
||||||
|
epoch++;
|
||||||
|
if(epoch > max_epoch) { break; }
|
||||||
|
}
|
||||||
|
forwardPass();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ANN::Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI){
|
||||||
|
class Cost cost;
|
||||||
|
LinAlg alg;
|
||||||
|
|
||||||
|
double cost_prev = 0;
|
||||||
|
int epoch = 1;
|
||||||
|
|
||||||
|
// Creating the mini-batches
|
||||||
|
int n_mini_batch = n/mini_batch_size;
|
||||||
|
// always evaluate the result
|
||||||
|
// always do forward pass only ONCE at end.
|
||||||
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
|
|
||||||
|
// Initializing necessary components for Adam.
|
||||||
|
std::vector<std::vector<std::vector<double>>> v_hidden;
|
||||||
|
|
||||||
|
std::vector<double> v_output;
|
||||||
|
while(true){
|
||||||
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(!network.empty() && v_hidden.empty()){ // Initing our tensor
|
||||||
|
v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(v_output.empty()){
|
||||||
|
v_output.resize(outputWGrad.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
v_hidden = alg.addition(alg.scalarMultiply(1 - b1, v_hidden), alg.scalarMultiply(b1, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
|
||||||
|
|
||||||
|
v_output = alg.addition(v_output, alg.exponentiate(outputWGrad, 2));
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
|
||||||
|
std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
|
||||||
|
|
||||||
|
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
|
y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
|
||||||
|
}
|
||||||
|
epoch++;
|
||||||
|
if(epoch > max_epoch) { break; }
|
||||||
|
}
|
||||||
|
forwardPass();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||||
class Cost cost;
|
class Cost cost;
|
||||||
LinAlg alg;
|
LinAlg alg;
|
||||||
|
|
||||||
@ -139,18 +290,9 @@ namespace MLPP {
|
|||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
if(!network.empty() && m_hidden.empty() && v_hidden.empty()){ // Initing our tensor
|
if(!network.empty() && m_hidden.empty() && v_hidden.empty()){ // Initing our tensor
|
||||||
m_hidden.resize(cumulativeHiddenLayerWGrad.size());
|
m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
|
||||||
v_hidden.resize(cumulativeHiddenLayerWGrad.size());
|
v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
|
||||||
for(int i = 0; i < cumulativeHiddenLayerWGrad.size(); i++){
|
|
||||||
m_hidden[i].resize(cumulativeHiddenLayerWGrad[i].size());
|
|
||||||
v_hidden[i].resize(cumulativeHiddenLayerWGrad[i].size());
|
|
||||||
for(int j = 0; j < cumulativeHiddenLayerWGrad[i].size(); j++){
|
|
||||||
m_hidden[i][j].resize(cumulativeHiddenLayerWGrad[i][j].size());
|
|
||||||
v_hidden[i][j].resize(cumulativeHiddenLayerWGrad[i][j].size());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(m_output.empty() && v_output.empty()){
|
if(m_output.empty() && v_output.empty()){
|
||||||
@ -185,6 +327,198 @@ namespace MLPP {
|
|||||||
forwardPass();
|
forwardPass();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ANN::Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||||
|
class Cost cost;
|
||||||
|
LinAlg alg;
|
||||||
|
|
||||||
|
double cost_prev = 0;
|
||||||
|
int epoch = 1;
|
||||||
|
|
||||||
|
// Creating the mini-batches
|
||||||
|
int n_mini_batch = n/mini_batch_size;
|
||||||
|
// always evaluate the result
|
||||||
|
// always do forward pass only ONCE at end.
|
||||||
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
|
|
||||||
|
// Initializing necessary components for Adam.
|
||||||
|
std::vector<std::vector<std::vector<double>>> m_hidden;
|
||||||
|
std::vector<std::vector<std::vector<double>>> u_hidden;
|
||||||
|
|
||||||
|
std::vector<double> m_output;
|
||||||
|
std::vector<double> u_output;
|
||||||
|
while(true){
|
||||||
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||||
|
if(!network.empty() && m_hidden.empty() && u_hidden.empty()){ // Initing our tensor
|
||||||
|
m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
|
||||||
|
u_hidden = alg.resize(u_hidden, cumulativeHiddenLayerWGrad);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(m_output.empty() && u_output.empty()){
|
||||||
|
m_output.resize(outputWGrad.size());
|
||||||
|
u_output.resize(outputWGrad.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
|
||||||
|
u_hidden = alg.max(alg.scalarMultiply(b2, u_hidden), alg.abs(cumulativeHiddenLayerWGrad));
|
||||||
|
|
||||||
|
m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
|
||||||
|
u_output = alg.max(alg.scalarMultiply(b2, u_output), alg.abs(outputWGrad));
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> m_hidden_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m_hidden);
|
||||||
|
|
||||||
|
std::vector<double> m_output_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m_output);
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, u_hidden)));
|
||||||
|
std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, u_output)));
|
||||||
|
|
||||||
|
|
||||||
|
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
|
y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
|
||||||
|
}
|
||||||
|
epoch++;
|
||||||
|
if(epoch > max_epoch) { break; }
|
||||||
|
}
|
||||||
|
forwardPass();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ANN::Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||||
|
class Cost cost;
|
||||||
|
LinAlg alg;
|
||||||
|
|
||||||
|
double cost_prev = 0;
|
||||||
|
int epoch = 1;
|
||||||
|
|
||||||
|
// Creating the mini-batches
|
||||||
|
int n_mini_batch = n/mini_batch_size;
|
||||||
|
// always evaluate the result
|
||||||
|
// always do forward pass only ONCE at end.
|
||||||
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
|
|
||||||
|
// Initializing necessary components for Adam.
|
||||||
|
std::vector<std::vector<std::vector<double>>> m_hidden;
|
||||||
|
std::vector<std::vector<std::vector<double>>> v_hidden;
|
||||||
|
std::vector<std::vector<std::vector<double>>> m_hidden_final;
|
||||||
|
|
||||||
|
std::vector<double> m_output;
|
||||||
|
std::vector<double> v_output;
|
||||||
|
while(true){
|
||||||
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||||
|
if(!network.empty() && m_hidden.empty() && v_hidden.empty()){ // Initing our tensor
|
||||||
|
m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
|
||||||
|
v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(m_output.empty() && v_output.empty()){
|
||||||
|
m_output.resize(outputWGrad.size());
|
||||||
|
v_output.resize(outputWGrad.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
|
||||||
|
v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
|
||||||
|
|
||||||
|
|
||||||
|
m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
|
||||||
|
v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2)));
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> m_hidden_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m_hidden);
|
||||||
|
std::vector<std::vector<std::vector<double>>> v_hidden_hat = alg.scalarMultiply(1/(1 - pow(b2, epoch)), v_hidden);
|
||||||
|
std::vector<std::vector<std::vector<double>>> m_hidden_final = alg.addition(alg.scalarMultiply(b1, m_hidden_hat), alg.scalarMultiply((1 - b1)/(1 - pow(b1, epoch)), cumulativeHiddenLayerWGrad));
|
||||||
|
|
||||||
|
std::vector<double> m_output_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m_output);
|
||||||
|
std::vector<double> v_output_hat = alg.scalarMultiply(1/(1 - pow(b2, epoch)), v_output);
|
||||||
|
std::vector<double> m_output_final = alg.addition(alg.scalarMultiply(b1, m_output_hat), alg.scalarMultiply((1 - b1)/(1 - pow(b1, epoch)), outputWGrad));
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden_final, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
|
||||||
|
std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output_final, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
|
||||||
|
|
||||||
|
|
||||||
|
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
|
y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
|
||||||
|
}
|
||||||
|
epoch++;
|
||||||
|
if(epoch > max_epoch) { break; }
|
||||||
|
}
|
||||||
|
forwardPass();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ANN::AMSGrad(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||||
|
class Cost cost;
|
||||||
|
LinAlg alg;
|
||||||
|
|
||||||
|
double cost_prev = 0;
|
||||||
|
int epoch = 1;
|
||||||
|
|
||||||
|
// Creating the mini-batches
|
||||||
|
int n_mini_batch = n/mini_batch_size;
|
||||||
|
// always evaluate the result
|
||||||
|
// always do forward pass only ONCE at end.
|
||||||
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
|
|
||||||
|
// Initializing necessary components for Adam.
|
||||||
|
std::vector<std::vector<std::vector<double>>> m_hidden;
|
||||||
|
std::vector<std::vector<std::vector<double>>> v_hidden;
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> v_hidden_hat;
|
||||||
|
|
||||||
|
std::vector<double> m_output;
|
||||||
|
std::vector<double> v_output;
|
||||||
|
|
||||||
|
std::vector<double> v_output_hat;
|
||||||
|
while(true){
|
||||||
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||||
|
if(!network.empty() && m_hidden.empty() && v_hidden.empty()){ // Initing our tensor
|
||||||
|
m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
|
||||||
|
v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
|
||||||
|
v_hidden_hat = alg.resize(v_hidden_hat, cumulativeHiddenLayerWGrad);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(m_output.empty() && v_output.empty()){
|
||||||
|
m_output.resize(outputWGrad.size());
|
||||||
|
v_output.resize(outputWGrad.size());
|
||||||
|
v_output_hat.resize(outputWGrad.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
|
||||||
|
v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
|
||||||
|
|
||||||
|
m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
|
||||||
|
v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2)));
|
||||||
|
|
||||||
|
v_hidden_hat = alg.max(v_hidden_hat, v_hidden);
|
||||||
|
|
||||||
|
v_output_hat = alg.max(v_output_hat, v_output);
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
|
||||||
|
std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
|
||||||
|
|
||||||
|
|
||||||
|
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||||
|
y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
|
||||||
|
}
|
||||||
|
epoch++;
|
||||||
|
if(epoch > max_epoch) { break; }
|
||||||
|
}
|
||||||
|
forwardPass();
|
||||||
|
}
|
||||||
|
|
||||||
double ANN::score(){
|
double ANN::score(){
|
||||||
Utilities util;
|
Utilities util;
|
||||||
forwardPass();
|
forwardPass();
|
||||||
|
@ -24,7 +24,13 @@ class ANN{
|
|||||||
double modelTest(std::vector<double> x);
|
double modelTest(std::vector<double> x);
|
||||||
void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
|
void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
|
||||||
void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
|
void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
|
||||||
|
void Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool NAG, bool UI = 1);
|
||||||
|
void Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI = 1);
|
||||||
|
void Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI = 1);
|
||||||
void Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
void Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||||
|
void Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||||
|
void Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||||
|
void AMSGrad(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||||
double score();
|
double score();
|
||||||
void save(std::string fileName);
|
void save(std::string fileName);
|
||||||
|
|
||||||
|
@ -1130,4 +1130,29 @@ namespace MLPP{
|
|||||||
}
|
}
|
||||||
return A;
|
return A;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> LinAlg::resize(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B){
|
||||||
|
A.resize(B.size());
|
||||||
|
for(int i = 0; i < B.size(); i++){
|
||||||
|
A[i].resize(B[i].size());
|
||||||
|
for(int j = 0; j < B[i].size(); j++){
|
||||||
|
A[i][j].resize(B[i][j].size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return A;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> LinAlg::max(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B){
|
||||||
|
for(int i = 0; i < A.size(); i++){
|
||||||
|
A[i] = max(A[i], B[i]);
|
||||||
|
}
|
||||||
|
return A;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> LinAlg::abs(std::vector<std::vector<std::vector<double>>> A){
|
||||||
|
for(int i = 0; i < A.size(); i++){
|
||||||
|
A[i] = abs(A[i]);
|
||||||
|
}
|
||||||
|
return A;
|
||||||
|
}
|
||||||
}
|
}
|
@ -210,6 +210,12 @@ namespace MLPP{
|
|||||||
|
|
||||||
std::vector<std::vector<std::vector<double>>> resize(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B);
|
std::vector<std::vector<std::vector<double>>> resize(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B);
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> hadamard_product(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B);
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> max(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B);
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<double>>> abs(std::vector<std::vector<std::vector<double>>> A);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -166,327 +166,6 @@ namespace MLPP{
|
|||||||
forwardPass();
|
forwardPass();
|
||||||
}
|
}
|
||||||
|
|
||||||
void LinReg::Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool UI){
|
|
||||||
LinAlg alg;
|
|
||||||
Reg regularization;
|
|
||||||
double cost_prev = 0;
|
|
||||||
int epoch = 1;
|
|
||||||
|
|
||||||
// Creating the mini-batches
|
|
||||||
int n_mini_batch = n/mini_batch_size;
|
|
||||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
|
||||||
|
|
||||||
// Initializing necessary components for Momentum.
|
|
||||||
std::vector<double> v = alg.zerovec(weights.size());
|
|
||||||
while(true){
|
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
|
||||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
// Calculating the weight gradients
|
|
||||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
|
||||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
|
||||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
|
||||||
|
|
||||||
v = alg.addition(alg.scalarMultiply(gamma, v), alg.scalarMultiply(learning_rate, weight_grad));
|
|
||||||
|
|
||||||
weights = alg.subtraction(weights, v);
|
|
||||||
|
|
||||||
// Calculating the bias gradients
|
|
||||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
|
||||||
y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
|
|
||||||
if(UI) {
|
|
||||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
|
||||||
Utilities::UI(weights, bias);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
epoch++;
|
|
||||||
if(epoch > max_epoch) { break; }
|
|
||||||
}
|
|
||||||
forwardPass();
|
|
||||||
}
|
|
||||||
|
|
||||||
void LinReg::NAG(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool UI){
|
|
||||||
LinAlg alg;
|
|
||||||
Reg regularization;
|
|
||||||
double cost_prev = 0;
|
|
||||||
int epoch = 1;
|
|
||||||
|
|
||||||
// Creating the mini-batches
|
|
||||||
int n_mini_batch = n/mini_batch_size;
|
|
||||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
|
||||||
|
|
||||||
// Initializing necessary components for Momentum.
|
|
||||||
std::vector<double> v = alg.zerovec(weights.size());
|
|
||||||
while(true){
|
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
|
||||||
weights = alg.subtraction(weights, alg.scalarMultiply(gamma, v)); // "Aposterori" calculation
|
|
||||||
|
|
||||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
// Calculating the weight gradients
|
|
||||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
|
||||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
|
||||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
|
||||||
|
|
||||||
v = alg.addition(alg.scalarMultiply(gamma, v), alg.scalarMultiply(learning_rate, weight_grad));
|
|
||||||
|
|
||||||
weights = alg.subtraction(weights, v);
|
|
||||||
|
|
||||||
// Calculating the bias gradients
|
|
||||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
|
||||||
y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
|
|
||||||
if(UI) {
|
|
||||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
|
||||||
Utilities::UI(weights, bias);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
epoch++;
|
|
||||||
if(epoch > max_epoch) { break; }
|
|
||||||
}
|
|
||||||
forwardPass();
|
|
||||||
}
|
|
||||||
|
|
||||||
void LinReg::Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI){
|
|
||||||
LinAlg alg;
|
|
||||||
Reg regularization;
|
|
||||||
double cost_prev = 0;
|
|
||||||
int epoch = 1;
|
|
||||||
|
|
||||||
// Creating the mini-batches
|
|
||||||
int n_mini_batch = n/mini_batch_size;
|
|
||||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
|
||||||
|
|
||||||
// Initializing necessary components for Adagrad.
|
|
||||||
std::vector<double> v = alg.zerovec(weights.size());
|
|
||||||
while(true){
|
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
|
||||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
// Calculating the weight gradients
|
|
||||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
|
||||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
|
||||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
|
||||||
|
|
||||||
v = alg.hadamard_product(weight_grad, weight_grad);
|
|
||||||
|
|
||||||
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(weight_grad, alg.sqrt(alg.scalarAdd(e, v)))));
|
|
||||||
|
|
||||||
// Calculating the bias gradients
|
|
||||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
|
||||||
y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
|
|
||||||
if(UI) {
|
|
||||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
|
||||||
Utilities::UI(weights, bias);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
epoch++;
|
|
||||||
if(epoch > max_epoch) { break; }
|
|
||||||
}
|
|
||||||
forwardPass();
|
|
||||||
}
|
|
||||||
|
|
||||||
void LinReg::Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI){
|
|
||||||
// Adagrad upgrade. Momentum is applied.
|
|
||||||
LinAlg alg;
|
|
||||||
Reg regularization;
|
|
||||||
double cost_prev = 0;
|
|
||||||
int epoch = 1;
|
|
||||||
|
|
||||||
// Creating the mini-batches
|
|
||||||
int n_mini_batch = n/mini_batch_size;
|
|
||||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
|
||||||
|
|
||||||
// Initializing necessary components for Adagrad.
|
|
||||||
std::vector<double> v = alg.zerovec(weights.size());
|
|
||||||
while(true){
|
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
|
||||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
// Calculating the weight gradients
|
|
||||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
|
||||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
|
||||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
|
||||||
|
|
||||||
v = alg.addition(alg.scalarMultiply(b1, v), alg.scalarMultiply(1 - b1, alg.hadamard_product(weight_grad, weight_grad)));
|
|
||||||
|
|
||||||
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(weight_grad, alg.sqrt(alg.scalarAdd(e, v)))));
|
|
||||||
|
|
||||||
// Calculating the bias gradients
|
|
||||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
|
||||||
y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
|
|
||||||
if(UI) {
|
|
||||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
|
||||||
Utilities::UI(weights, bias);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
epoch++;
|
|
||||||
if(epoch > max_epoch) { break; }
|
|
||||||
}
|
|
||||||
forwardPass();
|
|
||||||
}
|
|
||||||
|
|
||||||
void LinReg::Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
|
||||||
LinAlg alg;
|
|
||||||
Reg regularization;
|
|
||||||
double cost_prev = 0;
|
|
||||||
int epoch = 1;
|
|
||||||
|
|
||||||
// Creating the mini-batches
|
|
||||||
int n_mini_batch = n/mini_batch_size;
|
|
||||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
|
||||||
|
|
||||||
// Initializing necessary components for Adam.
|
|
||||||
std::vector<double> m = alg.zerovec(weights.size());
|
|
||||||
|
|
||||||
std::vector<double> v = alg.zerovec(weights.size());
|
|
||||||
while(true){
|
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
|
||||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
// Calculating the weight gradients
|
|
||||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
|
||||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
|
||||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
|
||||||
|
|
||||||
m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
|
|
||||||
v = alg.addition(alg.scalarMultiply(b2, v), alg.scalarMultiply(1 - b2, alg.exponentiate(weight_grad, 2)));
|
|
||||||
|
|
||||||
std::vector<double> m_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m);
|
|
||||||
std::vector<double> v_hat = alg.scalarMultiply(1/(1 - pow(b2, epoch)), v);
|
|
||||||
|
|
||||||
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_hat, alg.scalarAdd(e, alg.sqrt(v_hat)))));
|
|
||||||
|
|
||||||
// Calculating the bias gradients
|
|
||||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
|
||||||
y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
|
|
||||||
if(UI) {
|
|
||||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
|
||||||
Utilities::UI(weights, bias);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
epoch++;
|
|
||||||
if(epoch > max_epoch) { break; }
|
|
||||||
}
|
|
||||||
forwardPass();
|
|
||||||
}
|
|
||||||
|
|
||||||
void LinReg::Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
|
||||||
LinAlg alg;
|
|
||||||
Reg regularization;
|
|
||||||
double cost_prev = 0;
|
|
||||||
int epoch = 1;
|
|
||||||
|
|
||||||
// Creating the mini-batches
|
|
||||||
int n_mini_batch = n/mini_batch_size;
|
|
||||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
|
||||||
|
|
||||||
std::vector<double> m = alg.zerovec(weights.size());
|
|
||||||
|
|
||||||
std::vector<double> u = alg.zerovec(weights.size());
|
|
||||||
while(true){
|
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
|
||||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
// Calculating the weight gradients
|
|
||||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
|
||||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
|
||||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
|
||||||
|
|
||||||
m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
|
|
||||||
u = alg.max(alg.scalarMultiply(b2, u), alg.abs(weight_grad));
|
|
||||||
|
|
||||||
std::vector<double> m_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m);
|
|
||||||
|
|
||||||
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_hat, u)));
|
|
||||||
|
|
||||||
// Calculating the bias gradients
|
|
||||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
|
||||||
y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
|
|
||||||
if(UI) {
|
|
||||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
|
||||||
Utilities::UI(weights, bias);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
epoch++;
|
|
||||||
if(epoch > max_epoch) { break; }
|
|
||||||
}
|
|
||||||
forwardPass();
|
|
||||||
}
|
|
||||||
|
|
||||||
void LinReg::Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
|
||||||
LinAlg alg;
|
|
||||||
Reg regularization;
|
|
||||||
double cost_prev = 0;
|
|
||||||
int epoch = 1;
|
|
||||||
|
|
||||||
// Creating the mini-batches
|
|
||||||
int n_mini_batch = n/mini_batch_size;
|
|
||||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
|
||||||
|
|
||||||
// Initializing necessary components for Adam.
|
|
||||||
std::vector<double> m = alg.zerovec(weights.size());
|
|
||||||
std::vector<double> v = alg.zerovec(weights.size());
|
|
||||||
std::vector<double> m_final = alg.zerovec(weights.size());
|
|
||||||
while(true){
|
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
|
||||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
|
||||||
|
|
||||||
// Calculating the weight gradients
|
|
||||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
|
||||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
|
||||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
|
||||||
|
|
||||||
m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
|
|
||||||
v = alg.addition(alg.scalarMultiply(b2, v), alg.scalarMultiply(1 - b2, alg.exponentiate(weight_grad, 2)));
|
|
||||||
m_final = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply((1 - b1)/(1 - pow(b1, epoch)), weight_grad));
|
|
||||||
|
|
||||||
std::vector<double> m_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m);
|
|
||||||
std::vector<double> v_hat = alg.scalarMultiply(1/(1 - pow(b2, epoch)), v);
|
|
||||||
|
|
||||||
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_final, alg.scalarAdd(e, alg.sqrt(v_hat)))));
|
|
||||||
|
|
||||||
// Calculating the bias gradients
|
|
||||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
|
||||||
y_hat = Evaluate(inputMiniBatches[i]);
|
|
||||||
|
|
||||||
if(UI) {
|
|
||||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
|
||||||
Utilities::UI(weights, bias);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
epoch++;
|
|
||||||
if(epoch > max_epoch) { break; }
|
|
||||||
}
|
|
||||||
forwardPass();
|
|
||||||
}
|
|
||||||
|
|
||||||
void LinReg::normalEquation(){
|
void LinReg::normalEquation(){
|
||||||
LinAlg alg;
|
LinAlg alg;
|
||||||
Stat stat;
|
Stat stat;
|
||||||
|
@ -20,14 +20,7 @@ namespace MLPP{
|
|||||||
void NewtonRaphson(double learning_rate, int max_epoch, bool UI);
|
void NewtonRaphson(double learning_rate, int max_epoch, bool UI);
|
||||||
void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
|
void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
|
||||||
void SGD(double learning_rate, int max_epoch, bool UI = 1);
|
void SGD(double learning_rate, int max_epoch, bool UI = 1);
|
||||||
// void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
|
void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
|
||||||
// void Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool UI = 1);
|
|
||||||
// void NAG(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool UI = 1);
|
|
||||||
// void Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI = 1);
|
|
||||||
// void Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI = 1);
|
|
||||||
// void Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
|
||||||
// void Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
|
||||||
// void Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
|
||||||
void normalEquation();
|
void normalEquation();
|
||||||
double score();
|
double score();
|
||||||
void save(std::string fileName);
|
void save(std::string fileName);
|
||||||
|
22
README.md
22
README.md
@ -91,7 +91,23 @@ The result will be the model's predictions for the entire dataset.
|
|||||||
- Arcsch
|
- Arcsch
|
||||||
- Arsech
|
- Arsech
|
||||||
- Arcoth
|
- Arcoth
|
||||||
2. Possible Loss Functions
|
2. Possible Optimization Algorithms
|
||||||
|
- Batch Gradient Descent
|
||||||
|
- Mini-Batch Gradient Descent
|
||||||
|
- Stochastic Gradient Descent
|
||||||
|
- Gradient Descent with Momentum
|
||||||
|
- Nesterov Accelerated Gradient
|
||||||
|
- Adagrad Optimizer
|
||||||
|
- Adadelta Optimizer
|
||||||
|
- Adam Optimizer
|
||||||
|
- Adamax Optimizer
|
||||||
|
- Nadam Optimizer
|
||||||
|
- AMSGrad Optimizer
|
||||||
|
- 2nd Order Newton-Raphson Optimizer*
|
||||||
|
- Normal Equation*
|
||||||
|
|
||||||
|
* Only available for linear regression
|
||||||
|
3. Possible Loss Functions
|
||||||
- MSE
|
- MSE
|
||||||
- RMSE
|
- RMSE
|
||||||
- MAE
|
- MAE
|
||||||
@ -99,11 +115,11 @@ The result will be the model's predictions for the entire dataset.
|
|||||||
- Log Loss
|
- Log Loss
|
||||||
- Cross Entropy
|
- Cross Entropy
|
||||||
- Hinge Loss
|
- Hinge Loss
|
||||||
3. Possible Regularization Methods
|
4. Possible Regularization Methods
|
||||||
- Lasso
|
- Lasso
|
||||||
- Ridge
|
- Ridge
|
||||||
- ElasticNet
|
- ElasticNet
|
||||||
4. Possible Weight Initialization Methods
|
5. Possible Weight Initialization Methods
|
||||||
- Uniform
|
- Uniform
|
||||||
- Xavier Normal
|
- Xavier Normal
|
||||||
- Xavier Uniform
|
- Xavier Uniform
|
||||||
|
8
main.cpp
8
main.cpp
@ -364,10 +364,12 @@ int main() {
|
|||||||
std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
|
std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
|
||||||
std::vector<double> outputSet = {0,1,1,0};
|
std::vector<double> outputSet = {0,1,1,0};
|
||||||
ANN ann(alg.transpose(inputSet), outputSet);
|
ANN ann(alg.transpose(inputSet), outputSet);
|
||||||
//ann.addLayer(10, "RELU", "Default", "Ridge", 0.0001);
|
//ann.addLayer(10, "RELU");
|
||||||
ann.addLayer(10, "RELU", "Default", "XavierNormal");
|
ann.addLayer(10, "Sigmoid");
|
||||||
ann.addOutputLayer("Sigmoid", "LogLoss");
|
ann.addOutputLayer("Sigmoid", "LogLoss");
|
||||||
ann.Adam(0.1, 800, 2, 0.9, 0.999, 1e-8, 1);
|
//ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, 1);
|
||||||
|
//ann.Adadelta(1, 1000, 2, 0.9, 0.000001, 1);
|
||||||
|
ann.Momentum(0.1, 8000, 2, 0.9, true, 1);
|
||||||
//ann.MBGD(0.1, 1000, 2, 1);
|
//ann.MBGD(0.1, 1000, 2, 1);
|
||||||
alg.printVector(ann.modelSetTest(alg.transpose(inputSet)));
|
alg.printVector(ann.modelSetTest(alg.transpose(inputSet)));
|
||||||
std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
|
std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
|
||||||
|
Loading…
Reference in New Issue
Block a user