mirror of
https://github.com/Relintai/MLPP.git
synced 2024-11-12 10:15:01 +01:00
Added new optimizers for Neural Nets.
This commit is contained in:
parent
a4c36293f9
commit
2c83feb410
358
MLPP/ANN/ANN.cpp
358
MLPP/ANN/ANN.cpp
@ -114,7 +114,158 @@ namespace MLPP {
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||
void ANN::Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool NAG, bool UI){
|
||||
class Cost cost;
|
||||
LinAlg alg;
|
||||
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
|
||||
// Creating the mini-batches
|
||||
int n_mini_batch = n/mini_batch_size;
|
||||
// always evaluate the result
|
||||
// always do forward pass only ONCE at end.
|
||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adam.
|
||||
std::vector<std::vector<std::vector<double>>> v_hidden;
|
||||
|
||||
std::vector<double> v_output;
|
||||
while(true){
|
||||
for(int i = 0; i < n_mini_batch; i++){
|
||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||
|
||||
if(!network.empty() && v_hidden.empty()){ // Initing our tensor
|
||||
v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
|
||||
}
|
||||
|
||||
if(v_output.empty()){
|
||||
v_output.resize(outputWGrad.size());
|
||||
}
|
||||
|
||||
if(NAG){ // "Aposterori" calculation
|
||||
updateParameters(v_hidden, v_output, 0); // DON'T update bias.
|
||||
}
|
||||
|
||||
v_hidden = alg.addition(alg.scalarMultiply(gamma, v_hidden), alg.scalarMultiply(learning_rate/n, cumulativeHiddenLayerWGrad));
|
||||
|
||||
v_output = alg.addition(alg.scalarMultiply(gamma, v_output), alg.scalarMultiply(learning_rate/n, outputWGrad));
|
||||
|
||||
updateParameters(v_hidden, v_output, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
||||
|
||||
if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
|
||||
}
|
||||
epoch++;
|
||||
if(epoch > max_epoch) { break; }
|
||||
}
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void ANN::Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI){
|
||||
class Cost cost;
|
||||
LinAlg alg;
|
||||
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
|
||||
// Creating the mini-batches
|
||||
int n_mini_batch = n/mini_batch_size;
|
||||
// always evaluate the result
|
||||
// always do forward pass only ONCE at end.
|
||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adam.
|
||||
std::vector<std::vector<std::vector<double>>> v_hidden;
|
||||
|
||||
std::vector<double> v_output;
|
||||
while(true){
|
||||
for(int i = 0; i < n_mini_batch; i++){
|
||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||
|
||||
if(!network.empty() && v_hidden.empty()){ // Initing our tensor
|
||||
v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
|
||||
}
|
||||
|
||||
if(v_output.empty()){
|
||||
v_output.resize(outputWGrad.size());
|
||||
}
|
||||
|
||||
v_hidden = alg.addition(v_hidden, alg.exponentiate(cumulativeHiddenLayerWGrad, 2));
|
||||
|
||||
v_output = alg.addition(v_output, alg.exponentiate(outputWGrad, 2));
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
|
||||
std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
|
||||
|
||||
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
||||
|
||||
if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
|
||||
}
|
||||
epoch++;
|
||||
if(epoch > max_epoch) { break; }
|
||||
}
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void ANN::Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI){
|
||||
class Cost cost;
|
||||
LinAlg alg;
|
||||
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
|
||||
// Creating the mini-batches
|
||||
int n_mini_batch = n/mini_batch_size;
|
||||
// always evaluate the result
|
||||
// always do forward pass only ONCE at end.
|
||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adam.
|
||||
std::vector<std::vector<std::vector<double>>> v_hidden;
|
||||
|
||||
std::vector<double> v_output;
|
||||
while(true){
|
||||
for(int i = 0; i < n_mini_batch; i++){
|
||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||
|
||||
if(!network.empty() && v_hidden.empty()){ // Initing our tensor
|
||||
v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
|
||||
}
|
||||
|
||||
if(v_output.empty()){
|
||||
v_output.resize(outputWGrad.size());
|
||||
}
|
||||
|
||||
v_hidden = alg.addition(alg.scalarMultiply(1 - b1, v_hidden), alg.scalarMultiply(b1, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
|
||||
|
||||
v_output = alg.addition(v_output, alg.exponentiate(outputWGrad, 2));
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
|
||||
std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
|
||||
|
||||
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
||||
|
||||
if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
|
||||
}
|
||||
epoch++;
|
||||
if(epoch > max_epoch) { break; }
|
||||
}
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||
class Cost cost;
|
||||
LinAlg alg;
|
||||
|
||||
@ -139,18 +290,9 @@ namespace MLPP {
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||
|
||||
if(!network.empty() && m_hidden.empty() && v_hidden.empty()){ // Initing our tensor
|
||||
m_hidden.resize(cumulativeHiddenLayerWGrad.size());
|
||||
v_hidden.resize(cumulativeHiddenLayerWGrad.size());
|
||||
for(int i = 0; i < cumulativeHiddenLayerWGrad.size(); i++){
|
||||
m_hidden[i].resize(cumulativeHiddenLayerWGrad[i].size());
|
||||
v_hidden[i].resize(cumulativeHiddenLayerWGrad[i].size());
|
||||
for(int j = 0; j < cumulativeHiddenLayerWGrad[i].size(); j++){
|
||||
m_hidden[i][j].resize(cumulativeHiddenLayerWGrad[i][j].size());
|
||||
v_hidden[i][j].resize(cumulativeHiddenLayerWGrad[i][j].size());
|
||||
}
|
||||
}
|
||||
m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
|
||||
v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
|
||||
}
|
||||
|
||||
if(m_output.empty() && v_output.empty()){
|
||||
@ -185,6 +327,198 @@ namespace MLPP {
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void ANN::Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||
class Cost cost;
|
||||
LinAlg alg;
|
||||
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
|
||||
// Creating the mini-batches
|
||||
int n_mini_batch = n/mini_batch_size;
|
||||
// always evaluate the result
|
||||
// always do forward pass only ONCE at end.
|
||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adam.
|
||||
std::vector<std::vector<std::vector<double>>> m_hidden;
|
||||
std::vector<std::vector<std::vector<double>>> u_hidden;
|
||||
|
||||
std::vector<double> m_output;
|
||||
std::vector<double> u_output;
|
||||
while(true){
|
||||
for(int i = 0; i < n_mini_batch; i++){
|
||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||
if(!network.empty() && m_hidden.empty() && u_hidden.empty()){ // Initing our tensor
|
||||
m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
|
||||
u_hidden = alg.resize(u_hidden, cumulativeHiddenLayerWGrad);
|
||||
}
|
||||
|
||||
if(m_output.empty() && u_output.empty()){
|
||||
m_output.resize(outputWGrad.size());
|
||||
u_output.resize(outputWGrad.size());
|
||||
}
|
||||
|
||||
m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
|
||||
u_hidden = alg.max(alg.scalarMultiply(b2, u_hidden), alg.abs(cumulativeHiddenLayerWGrad));
|
||||
|
||||
m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
|
||||
u_output = alg.max(alg.scalarMultiply(b2, u_output), alg.abs(outputWGrad));
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> m_hidden_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m_hidden);
|
||||
|
||||
std::vector<double> m_output_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m_output);
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, u_hidden)));
|
||||
std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, u_output)));
|
||||
|
||||
|
||||
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
||||
|
||||
if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
|
||||
}
|
||||
epoch++;
|
||||
if(epoch > max_epoch) { break; }
|
||||
}
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void ANN::Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||
class Cost cost;
|
||||
LinAlg alg;
|
||||
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
|
||||
// Creating the mini-batches
|
||||
int n_mini_batch = n/mini_batch_size;
|
||||
// always evaluate the result
|
||||
// always do forward pass only ONCE at end.
|
||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adam.
|
||||
std::vector<std::vector<std::vector<double>>> m_hidden;
|
||||
std::vector<std::vector<std::vector<double>>> v_hidden;
|
||||
std::vector<std::vector<std::vector<double>>> m_hidden_final;
|
||||
|
||||
std::vector<double> m_output;
|
||||
std::vector<double> v_output;
|
||||
while(true){
|
||||
for(int i = 0; i < n_mini_batch; i++){
|
||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||
if(!network.empty() && m_hidden.empty() && v_hidden.empty()){ // Initing our tensor
|
||||
m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
|
||||
v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
|
||||
}
|
||||
|
||||
if(m_output.empty() && v_output.empty()){
|
||||
m_output.resize(outputWGrad.size());
|
||||
v_output.resize(outputWGrad.size());
|
||||
}
|
||||
|
||||
m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
|
||||
v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
|
||||
|
||||
|
||||
m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
|
||||
v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2)));
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> m_hidden_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m_hidden);
|
||||
std::vector<std::vector<std::vector<double>>> v_hidden_hat = alg.scalarMultiply(1/(1 - pow(b2, epoch)), v_hidden);
|
||||
std::vector<std::vector<std::vector<double>>> m_hidden_final = alg.addition(alg.scalarMultiply(b1, m_hidden_hat), alg.scalarMultiply((1 - b1)/(1 - pow(b1, epoch)), cumulativeHiddenLayerWGrad));
|
||||
|
||||
std::vector<double> m_output_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m_output);
|
||||
std::vector<double> v_output_hat = alg.scalarMultiply(1/(1 - pow(b2, epoch)), v_output);
|
||||
std::vector<double> m_output_final = alg.addition(alg.scalarMultiply(b1, m_output_hat), alg.scalarMultiply((1 - b1)/(1 - pow(b1, epoch)), outputWGrad));
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden_final, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
|
||||
std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output_final, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
|
||||
|
||||
|
||||
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
||||
|
||||
if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
|
||||
}
|
||||
epoch++;
|
||||
if(epoch > max_epoch) { break; }
|
||||
}
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void ANN::AMSGrad(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||
class Cost cost;
|
||||
LinAlg alg;
|
||||
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
|
||||
// Creating the mini-batches
|
||||
int n_mini_batch = n/mini_batch_size;
|
||||
// always evaluate the result
|
||||
// always do forward pass only ONCE at end.
|
||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adam.
|
||||
std::vector<std::vector<std::vector<double>>> m_hidden;
|
||||
std::vector<std::vector<std::vector<double>>> v_hidden;
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> v_hidden_hat;
|
||||
|
||||
std::vector<double> m_output;
|
||||
std::vector<double> v_output;
|
||||
|
||||
std::vector<double> v_output_hat;
|
||||
while(true){
|
||||
for(int i = 0; i < n_mini_batch; i++){
|
||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
|
||||
if(!network.empty() && m_hidden.empty() && v_hidden.empty()){ // Initing our tensor
|
||||
m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
|
||||
v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
|
||||
v_hidden_hat = alg.resize(v_hidden_hat, cumulativeHiddenLayerWGrad);
|
||||
}
|
||||
|
||||
if(m_output.empty() && v_output.empty()){
|
||||
m_output.resize(outputWGrad.size());
|
||||
v_output.resize(outputWGrad.size());
|
||||
v_output_hat.resize(outputWGrad.size());
|
||||
}
|
||||
|
||||
m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
|
||||
v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
|
||||
|
||||
m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
|
||||
v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2)));
|
||||
|
||||
v_hidden_hat = alg.max(v_hidden_hat, v_hidden);
|
||||
|
||||
v_output_hat = alg.max(v_output_hat, v_output);
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
|
||||
std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
|
||||
|
||||
|
||||
updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||
y_hat = modelSetTest(inputMiniBatches[i]);
|
||||
|
||||
if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
|
||||
}
|
||||
epoch++;
|
||||
if(epoch > max_epoch) { break; }
|
||||
}
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
double ANN::score(){
|
||||
Utilities util;
|
||||
forwardPass();
|
||||
|
@ -24,7 +24,13 @@ class ANN{
|
||||
double modelTest(std::vector<double> x);
|
||||
void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
|
||||
void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
|
||||
void Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool NAG, bool UI = 1);
|
||||
void Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI = 1);
|
||||
void Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI = 1);
|
||||
void Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||
void Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||
void Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||
void AMSGrad(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||
double score();
|
||||
void save(std::string fileName);
|
||||
|
||||
|
@ -1130,4 +1130,29 @@ namespace MLPP{
|
||||
}
|
||||
return A;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> LinAlg::resize(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B){
|
||||
A.resize(B.size());
|
||||
for(int i = 0; i < B.size(); i++){
|
||||
A[i].resize(B[i].size());
|
||||
for(int j = 0; j < B[i].size(); j++){
|
||||
A[i][j].resize(B[i][j].size());
|
||||
}
|
||||
}
|
||||
return A;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> LinAlg::max(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B){
|
||||
for(int i = 0; i < A.size(); i++){
|
||||
A[i] = max(A[i], B[i]);
|
||||
}
|
||||
return A;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> LinAlg::abs(std::vector<std::vector<std::vector<double>>> A){
|
||||
for(int i = 0; i < A.size(); i++){
|
||||
A[i] = abs(A[i]);
|
||||
}
|
||||
return A;
|
||||
}
|
||||
}
|
@ -210,6 +210,12 @@ namespace MLPP{
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> resize(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B);
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> hadamard_product(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B);
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> max(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B);
|
||||
|
||||
std::vector<std::vector<std::vector<double>>> abs(std::vector<std::vector<std::vector<double>>> A);
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
|
@ -166,327 +166,6 @@ namespace MLPP{
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void LinReg::Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool UI){
|
||||
LinAlg alg;
|
||||
Reg regularization;
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
|
||||
// Creating the mini-batches
|
||||
int n_mini_batch = n/mini_batch_size;
|
||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Momentum.
|
||||
std::vector<double> v = alg.zerovec(weights.size());
|
||||
while(true){
|
||||
for(int i = 0; i < n_mini_batch; i++){
|
||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||
|
||||
// Calculating the weight gradients
|
||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||
|
||||
v = alg.addition(alg.scalarMultiply(gamma, v), alg.scalarMultiply(learning_rate, weight_grad));
|
||||
|
||||
weights = alg.subtraction(weights, v);
|
||||
|
||||
// Calculating the bias gradients
|
||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||
y_hat = Evaluate(inputMiniBatches[i]);
|
||||
|
||||
if(UI) {
|
||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||
Utilities::UI(weights, bias);
|
||||
}
|
||||
}
|
||||
epoch++;
|
||||
if(epoch > max_epoch) { break; }
|
||||
}
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void LinReg::NAG(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool UI){
|
||||
LinAlg alg;
|
||||
Reg regularization;
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
|
||||
// Creating the mini-batches
|
||||
int n_mini_batch = n/mini_batch_size;
|
||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Momentum.
|
||||
std::vector<double> v = alg.zerovec(weights.size());
|
||||
while(true){
|
||||
for(int i = 0; i < n_mini_batch; i++){
|
||||
weights = alg.subtraction(weights, alg.scalarMultiply(gamma, v)); // "Aposterori" calculation
|
||||
|
||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||
|
||||
// Calculating the weight gradients
|
||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||
|
||||
v = alg.addition(alg.scalarMultiply(gamma, v), alg.scalarMultiply(learning_rate, weight_grad));
|
||||
|
||||
weights = alg.subtraction(weights, v);
|
||||
|
||||
// Calculating the bias gradients
|
||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||
y_hat = Evaluate(inputMiniBatches[i]);
|
||||
|
||||
if(UI) {
|
||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||
Utilities::UI(weights, bias);
|
||||
}
|
||||
}
|
||||
epoch++;
|
||||
if(epoch > max_epoch) { break; }
|
||||
}
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void LinReg::Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI){
|
||||
LinAlg alg;
|
||||
Reg regularization;
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
|
||||
// Creating the mini-batches
|
||||
int n_mini_batch = n/mini_batch_size;
|
||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adagrad.
|
||||
std::vector<double> v = alg.zerovec(weights.size());
|
||||
while(true){
|
||||
for(int i = 0; i < n_mini_batch; i++){
|
||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||
|
||||
// Calculating the weight gradients
|
||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||
|
||||
v = alg.hadamard_product(weight_grad, weight_grad);
|
||||
|
||||
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(weight_grad, alg.sqrt(alg.scalarAdd(e, v)))));
|
||||
|
||||
// Calculating the bias gradients
|
||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||
y_hat = Evaluate(inputMiniBatches[i]);
|
||||
|
||||
if(UI) {
|
||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||
Utilities::UI(weights, bias);
|
||||
}
|
||||
}
|
||||
epoch++;
|
||||
if(epoch > max_epoch) { break; }
|
||||
}
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void LinReg::Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI){
|
||||
// Adagrad upgrade. Momentum is applied.
|
||||
LinAlg alg;
|
||||
Reg regularization;
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
|
||||
// Creating the mini-batches
|
||||
int n_mini_batch = n/mini_batch_size;
|
||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adagrad.
|
||||
std::vector<double> v = alg.zerovec(weights.size());
|
||||
while(true){
|
||||
for(int i = 0; i < n_mini_batch; i++){
|
||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||
|
||||
// Calculating the weight gradients
|
||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||
|
||||
v = alg.addition(alg.scalarMultiply(b1, v), alg.scalarMultiply(1 - b1, alg.hadamard_product(weight_grad, weight_grad)));
|
||||
|
||||
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(weight_grad, alg.sqrt(alg.scalarAdd(e, v)))));
|
||||
|
||||
// Calculating the bias gradients
|
||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||
y_hat = Evaluate(inputMiniBatches[i]);
|
||||
|
||||
if(UI) {
|
||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||
Utilities::UI(weights, bias);
|
||||
}
|
||||
}
|
||||
epoch++;
|
||||
if(epoch > max_epoch) { break; }
|
||||
}
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void LinReg::Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||
LinAlg alg;
|
||||
Reg regularization;
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
|
||||
// Creating the mini-batches
|
||||
int n_mini_batch = n/mini_batch_size;
|
||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adam.
|
||||
std::vector<double> m = alg.zerovec(weights.size());
|
||||
|
||||
std::vector<double> v = alg.zerovec(weights.size());
|
||||
while(true){
|
||||
for(int i = 0; i < n_mini_batch; i++){
|
||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||
|
||||
// Calculating the weight gradients
|
||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||
|
||||
m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
|
||||
v = alg.addition(alg.scalarMultiply(b2, v), alg.scalarMultiply(1 - b2, alg.exponentiate(weight_grad, 2)));
|
||||
|
||||
std::vector<double> m_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m);
|
||||
std::vector<double> v_hat = alg.scalarMultiply(1/(1 - pow(b2, epoch)), v);
|
||||
|
||||
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_hat, alg.scalarAdd(e, alg.sqrt(v_hat)))));
|
||||
|
||||
// Calculating the bias gradients
|
||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||
y_hat = Evaluate(inputMiniBatches[i]);
|
||||
|
||||
if(UI) {
|
||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||
Utilities::UI(weights, bias);
|
||||
}
|
||||
}
|
||||
epoch++;
|
||||
if(epoch > max_epoch) { break; }
|
||||
}
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void LinReg::Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||
LinAlg alg;
|
||||
Reg regularization;
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
|
||||
// Creating the mini-batches
|
||||
int n_mini_batch = n/mini_batch_size;
|
||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
std::vector<double> m = alg.zerovec(weights.size());
|
||||
|
||||
std::vector<double> u = alg.zerovec(weights.size());
|
||||
while(true){
|
||||
for(int i = 0; i < n_mini_batch; i++){
|
||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||
|
||||
// Calculating the weight gradients
|
||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||
|
||||
m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
|
||||
u = alg.max(alg.scalarMultiply(b2, u), alg.abs(weight_grad));
|
||||
|
||||
std::vector<double> m_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m);
|
||||
|
||||
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_hat, u)));
|
||||
|
||||
// Calculating the bias gradients
|
||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||
y_hat = Evaluate(inputMiniBatches[i]);
|
||||
|
||||
if(UI) {
|
||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||
Utilities::UI(weights, bias);
|
||||
}
|
||||
}
|
||||
epoch++;
|
||||
if(epoch > max_epoch) { break; }
|
||||
}
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void LinReg::Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||
LinAlg alg;
|
||||
Reg regularization;
|
||||
double cost_prev = 0;
|
||||
int epoch = 1;
|
||||
|
||||
// Creating the mini-batches
|
||||
int n_mini_batch = n/mini_batch_size;
|
||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||
|
||||
// Initializing necessary components for Adam.
|
||||
std::vector<double> m = alg.zerovec(weights.size());
|
||||
std::vector<double> v = alg.zerovec(weights.size());
|
||||
std::vector<double> m_final = alg.zerovec(weights.size());
|
||||
while(true){
|
||||
for(int i = 0; i < n_mini_batch; i++){
|
||||
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||
|
||||
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||
|
||||
// Calculating the weight gradients
|
||||
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||
|
||||
m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
|
||||
v = alg.addition(alg.scalarMultiply(b2, v), alg.scalarMultiply(1 - b2, alg.exponentiate(weight_grad, 2)));
|
||||
m_final = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply((1 - b1)/(1 - pow(b1, epoch)), weight_grad));
|
||||
|
||||
std::vector<double> m_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m);
|
||||
std::vector<double> v_hat = alg.scalarMultiply(1/(1 - pow(b2, epoch)), v);
|
||||
|
||||
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_final, alg.scalarAdd(e, alg.sqrt(v_hat)))));
|
||||
|
||||
// Calculating the bias gradients
|
||||
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||
y_hat = Evaluate(inputMiniBatches[i]);
|
||||
|
||||
if(UI) {
|
||||
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||
Utilities::UI(weights, bias);
|
||||
}
|
||||
}
|
||||
epoch++;
|
||||
if(epoch > max_epoch) { break; }
|
||||
}
|
||||
forwardPass();
|
||||
}
|
||||
|
||||
void LinReg::normalEquation(){
|
||||
LinAlg alg;
|
||||
Stat stat;
|
||||
|
@ -20,14 +20,7 @@ namespace MLPP{
|
||||
void NewtonRaphson(double learning_rate, int max_epoch, bool UI);
|
||||
void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
|
||||
void SGD(double learning_rate, int max_epoch, bool UI = 1);
|
||||
// void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
|
||||
// void Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool UI = 1);
|
||||
// void NAG(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool UI = 1);
|
||||
// void Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI = 1);
|
||||
// void Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI = 1);
|
||||
// void Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||
// void Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||
// void Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||
void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
|
||||
void normalEquation();
|
||||
double score();
|
||||
void save(std::string fileName);
|
||||
|
22
README.md
22
README.md
@ -91,7 +91,23 @@ The result will be the model's predictions for the entire dataset.
|
||||
- Arcsch
|
||||
- Arsech
|
||||
- Arcoth
|
||||
2. Possible Loss Functions
|
||||
2. Possible Optimization Algorithms
|
||||
- Batch Gradient Descent
|
||||
- Mini-Batch Gradient Descent
|
||||
- Stochastic Gradient Descent
|
||||
- Gradient Descent with Momentum
|
||||
- Nesterov Accelerated Gradient
|
||||
- Adagrad Optimizer
|
||||
- Adadelta Optimizer
|
||||
- Adam Optimizer
|
||||
- Adamax Optimizer
|
||||
- Nadam Optimizer
|
||||
- AMSGrad Optimizer
|
||||
- 2nd Order Newton-Raphson Optimizer*
|
||||
- Normal Equation*
|
||||
|
||||
* Only available for linear regression
|
||||
3. Possible Loss Functions
|
||||
- MSE
|
||||
- RMSE
|
||||
- MAE
|
||||
@ -99,11 +115,11 @@ The result will be the model's predictions for the entire dataset.
|
||||
- Log Loss
|
||||
- Cross Entropy
|
||||
- Hinge Loss
|
||||
3. Possible Regularization Methods
|
||||
4. Possible Regularization Methods
|
||||
- Lasso
|
||||
- Ridge
|
||||
- ElasticNet
|
||||
4. Possible Weight Initialization Methods
|
||||
5. Possible Weight Initialization Methods
|
||||
- Uniform
|
||||
- Xavier Normal
|
||||
- Xavier Uniform
|
||||
|
8
main.cpp
8
main.cpp
@ -364,10 +364,12 @@ int main() {
|
||||
std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
|
||||
std::vector<double> outputSet = {0,1,1,0};
|
||||
ANN ann(alg.transpose(inputSet), outputSet);
|
||||
//ann.addLayer(10, "RELU", "Default", "Ridge", 0.0001);
|
||||
ann.addLayer(10, "RELU", "Default", "XavierNormal");
|
||||
//ann.addLayer(10, "RELU");
|
||||
ann.addLayer(10, "Sigmoid");
|
||||
ann.addOutputLayer("Sigmoid", "LogLoss");
|
||||
ann.Adam(0.1, 800, 2, 0.9, 0.999, 1e-8, 1);
|
||||
//ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, 1);
|
||||
//ann.Adadelta(1, 1000, 2, 0.9, 0.000001, 1);
|
||||
ann.Momentum(0.1, 8000, 2, 0.9, true, 1);
|
||||
//ann.MBGD(0.1, 1000, 2, 1);
|
||||
alg.printVector(ann.modelSetTest(alg.transpose(inputSet)));
|
||||
std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
|
||||
|
Loading…
Reference in New Issue
Block a user