Added step based learning rate decay

This commit is contained in:
novak_99 2022-02-04 21:52:00 -08:00
parent 5c69713503
commit 559e55fd89
5 changed files with 58 additions and 36 deletions

View File

@ -16,7 +16,7 @@
namespace MLPP { namespace MLPP {
ANN::ANN(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet) ANN::ANN(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet)
: inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), lrScheduler("None"), decayConstant(0) : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), lrScheduler("None"), decayConstant(0), dropRate(0)
{ {
} }
@ -63,10 +63,11 @@ namespace MLPP {
double cost_prev = 0; double cost_prev = 0;
int epoch = 1; int epoch = 1;
forwardPass(); forwardPass();
double initial_learning_rate = learning_rate;
alg.printMatrix(network[network.size() - 1].weights); alg.printMatrix(network[network.size() - 1].weights);
while(true){ while(true){
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch); learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
cost_prev = Cost(y_hat, outputSet); cost_prev = Cost(y_hat, outputSet);
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputSet); auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputSet);
@ -90,6 +91,7 @@ namespace MLPP {
double cost_prev = 0; double cost_prev = 0;
int epoch = 1; int epoch = 1;
double initial_learning_rate = learning_rate;
// Creating the mini-batches // Creating the mini-batches
int n_mini_batch = n/mini_batch_size; int n_mini_batch = n/mini_batch_size;
@ -97,7 +99,7 @@ namespace MLPP {
// always do forward pass only ONCE at end. // always do forward pass only ONCE at end.
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
while(true){ while(true){
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch); learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for(int i = 0; i < n_mini_batch; i++){ for(int i = 0; i < n_mini_batch; i++){
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]); std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = Cost(y_hat, outputMiniBatches[i]);
@ -123,6 +125,7 @@ namespace MLPP {
double cost_prev = 0; double cost_prev = 0;
int epoch = 1; int epoch = 1;
double initial_learning_rate = learning_rate;
// Creating the mini-batches // Creating the mini-batches
int n_mini_batch = n/mini_batch_size; int n_mini_batch = n/mini_batch_size;
@ -135,7 +138,7 @@ namespace MLPP {
std::vector<double> v_output; std::vector<double> v_output;
while(true){ while(true){
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch); learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for(int i = 0; i < n_mini_batch; i++){ for(int i = 0; i < n_mini_batch; i++){
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]); std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = Cost(y_hat, outputMiniBatches[i]);
@ -175,6 +178,7 @@ namespace MLPP {
double cost_prev = 0; double cost_prev = 0;
int epoch = 1; int epoch = 1;
double initial_learning_rate = learning_rate;
// Creating the mini-batches // Creating the mini-batches
int n_mini_batch = n/mini_batch_size; int n_mini_batch = n/mini_batch_size;
@ -187,7 +191,7 @@ namespace MLPP {
std::vector<double> v_output; std::vector<double> v_output;
while(true){ while(true){
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch); learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for(int i = 0; i < n_mini_batch; i++){ for(int i = 0; i < n_mini_batch; i++){
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]); std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = Cost(y_hat, outputMiniBatches[i]);
@ -226,6 +230,7 @@ namespace MLPP {
double cost_prev = 0; double cost_prev = 0;
int epoch = 1; int epoch = 1;
double initial_learning_rate = learning_rate;
// Creating the mini-batches // Creating the mini-batches
int n_mini_batch = n/mini_batch_size; int n_mini_batch = n/mini_batch_size;
@ -238,7 +243,7 @@ namespace MLPP {
std::vector<double> v_output; std::vector<double> v_output;
while(true){ while(true){
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch); learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for(int i = 0; i < n_mini_batch; i++){ for(int i = 0; i < n_mini_batch; i++){
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]); std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = Cost(y_hat, outputMiniBatches[i]);
@ -277,6 +282,7 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
double cost_prev = 0; double cost_prev = 0;
int epoch = 1; int epoch = 1;
double initial_learning_rate = learning_rate;
// Creating the mini-batches // Creating the mini-batches
int n_mini_batch = n/mini_batch_size; int n_mini_batch = n/mini_batch_size;
@ -291,7 +297,7 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
std::vector<double> m_output; std::vector<double> m_output;
std::vector<double> v_output; std::vector<double> v_output;
while(true){ while(true){
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch); learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for(int i = 0; i < n_mini_batch; i++){ for(int i = 0; i < n_mini_batch; i++){
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]); std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = Cost(y_hat, outputMiniBatches[i]);
@ -340,6 +346,7 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
double cost_prev = 0; double cost_prev = 0;
int epoch = 1; int epoch = 1;
double initial_learning_rate = learning_rate;
// Creating the mini-batches // Creating the mini-batches
int n_mini_batch = n/mini_batch_size; int n_mini_batch = n/mini_batch_size;
@ -354,7 +361,7 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
std::vector<double> m_output; std::vector<double> m_output;
std::vector<double> u_output; std::vector<double> u_output;
while(true){ while(true){
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch); learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for(int i = 0; i < n_mini_batch; i++){ for(int i = 0; i < n_mini_batch; i++){
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]); std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = Cost(y_hat, outputMiniBatches[i]);
@ -401,6 +408,7 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
double cost_prev = 0; double cost_prev = 0;
int epoch = 1; int epoch = 1;
double initial_learning_rate = learning_rate;
// Creating the mini-batches // Creating the mini-batches
int n_mini_batch = n/mini_batch_size; int n_mini_batch = n/mini_batch_size;
@ -416,7 +424,7 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
std::vector<double> m_output; std::vector<double> m_output;
std::vector<double> v_output; std::vector<double> v_output;
while(true){ while(true){
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch); learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for(int i = 0; i < n_mini_batch; i++){ for(int i = 0; i < n_mini_batch; i++){
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]); std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = Cost(y_hat, outputMiniBatches[i]);
@ -468,6 +476,7 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
double cost_prev = 0; double cost_prev = 0;
int epoch = 1; int epoch = 1;
double initial_learning_rate = learning_rate;
// Creating the mini-batches // Creating the mini-batches
int n_mini_batch = n/mini_batch_size; int n_mini_batch = n/mini_batch_size;
@ -486,7 +495,7 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
std::vector<double> v_output_hat; std::vector<double> v_output_hat;
while(true){ while(true){
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch); learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
for(int i = 0; i < n_mini_batch; i++){ for(int i = 0; i < n_mini_batch; i++){
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]); std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
cost_prev = Cost(y_hat, outputMiniBatches[i]); cost_prev = Cost(y_hat, outputMiniBatches[i]);
@ -554,18 +563,27 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
ANN::decayConstant = decayConstant; ANN::decayConstant = decayConstant;
} }
void ANN::setLearningRateScheduler(std::string type, double decayConstant, double dropRate){
lrScheduler = type;
ANN::decayConstant = decayConstant;
ANN::dropRate = dropRate;
}
// https://en.wikipedia.org/wiki/Learning_rate // https://en.wikipedia.org/wiki/Learning_rate
// Learning Rate Decay (C2W2L09) - Andrew Ng - Deep Learning Specialization // Learning Rate Decay (C2W2L09) - Andrew Ng - Deep Learning Specialization
double ANN::applyLearningRateScheduler(double learningRate, double decayConstant, double epoch){ double ANN::applyLearningRateScheduler(double learningRate, double decayConstant, double epoch, double dropRate){
if(lrScheduler == "Time"){ if(lrScheduler == "Time"){
return learningRate / (1 + decayConstant * epoch); return learningRate / (1 + decayConstant * epoch);
} }
else if(lrScheduler == "Exponential"){
return learningRate * std::exp(-decayConstant * epoch);
}
else if(lrScheduler == "Epoch"){ else if(lrScheduler == "Epoch"){
return learningRate * (decayConstant / std::sqrt(epoch)); return learningRate * (decayConstant / std::sqrt(epoch));
} }
else if(lrScheduler == "Step"){
return learningRate * std::pow(decayConstant, int((1 + epoch)/dropRate)); // Utilizing an explicit int conversion implicitly takes the floor.
}
else if(lrScheduler == "Exponential"){
return learningRate * std::exp(-decayConstant * epoch);
}
return learningRate; return learningRate;
} }

View File

@ -34,13 +34,15 @@ class ANN{
double score(); double score();
void save(std::string fileName); void save(std::string fileName);
void setLearningRateScheduler(std::string type, double k); void setLearningRateScheduler(std::string type, double decayConstant);
double applyLearningRateScheduler(double learningRate, double decayConstant, double epoch); void setLearningRateScheduler(std::string type, double decayConstant, double dropRate);
void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
private: private:
double applyLearningRateScheduler(double learningRate, double decayConstant, double epoch, double dropRate);
double Cost(std::vector<double> y_hat, std::vector<double> y); double Cost(std::vector<double> y_hat, std::vector<double> y);
void forwardPass(); void forwardPass();
@ -62,6 +64,7 @@ class ANN{
std::string lrScheduler; std::string lrScheduler;
double decayConstant; double decayConstant;
double dropRate;
}; };
} }

View File

@ -133,8 +133,9 @@ The result will be the model's predictions for the entire dataset.
- LeCun Uniform - LeCun Uniform
6. Possible Learning Rate Schedulers 6. Possible Learning Rate Schedulers
- Time Based - Time Based
- Exponential
- Epoch Based - Epoch Based
- Step Based
- Exponential
3. ***Prebuilt Neural Networks*** 3. ***Prebuilt Neural Networks***
1. Multilayer Peceptron 1. Multilayer Peceptron
2. Autoencoder 2. Autoencoder

BIN
a.out

Binary file not shown.

View File

@ -363,19 +363,19 @@ int main() {
// Possible Weight Init Methods: Default, Uniform, HeNormal, HeUniform, XavierNormal, XavierUniform // Possible Weight Init Methods: Default, Uniform, HeNormal, HeUniform, XavierNormal, XavierUniform
// Possible Activations: Linear, Sigmoid, Swish, Softplus, Softsign, CLogLog, Ar{Sinh, Cosh, Tanh, Csch, Sech, Coth}, GaussianCDF, GELU, UnitStep // Possible Activations: Linear, Sigmoid, Swish, Softplus, Softsign, CLogLog, Ar{Sinh, Cosh, Tanh, Csch, Sech, Coth}, GaussianCDF, GELU, UnitStep
// Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss // Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss
// std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}}; std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
// std::vector<double> outputSet = {0,1,1,0}; std::vector<double> outputSet = {0,1,1,0};
// ANN ann(alg.transpose(inputSet), outputSet); ANN ann(alg.transpose(inputSet), outputSet);
// ann.addLayer(2, "Sigmoid"); ann.addLayer(2, "Sigmoid");
// ann.addLayer(2, "Sigmoid"); ann.addLayer(2, "Sigmoid");
// ann.addOutputLayer("Sigmoid", "LogLoss"); ann.addOutputLayer("Sigmoid", "LogLoss");
// //ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, 1); //ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, 1);
// //ann.Adadelta(1, 1000, 2, 0.9, 0.000001, 1); //ann.Adadelta(1, 1000, 2, 0.9, 0.000001, 1);
// //ann.Momentum(0.1, 8000, 2, 0.9, true, 1); //ann.Momentum(0.1, 8000, 2, 0.9, true, 1);
// ann.setLearningRateScheduler("Time", 0.000000000001); ann.setLearningRateScheduler("Step", 0.5, 1000);
// ann.gradientDescent(0.1, 20000, 1); ann.gradientDescent(0.1, 20000, 1);
// alg.printVector(ann.modelSetTest(alg.transpose(inputSet))); alg.printVector(ann.modelSetTest(alg.transpose(inputSet)));
// std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
//std::vector<std::vector<double>> outputSet = {{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}, //std::vector<std::vector<double>> outputSet = {{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20},
// {2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40}}; // {2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40}};
@ -697,14 +697,14 @@ int main() {
// DualSVC kernelSVM(inputSet, outputSet, 1000); // DualSVC kernelSVM(inputSet, outputSet, 1000);
// kernelSVM.gradientDescent(0.0001, 20, 1); // kernelSVM.gradientDescent(0.0001, 20, 1);
std::vector<std::vector<double>> linearlyDependentMat = // std::vector<std::vector<double>> linearlyIndependentMat =
{ // {
{1,2,3,4}, // {1,2,3,4},
{234538495,4444,6111,55} // {234538495,4444,6111,55}
}; // };
std::cout << "True of false: linearly independent?: " << std::boolalpha << alg.linearIndependenceChecker(linearlyDependentMat) << std::endl; // std::cout << "True of false: linearly independent?: " << std::boolalpha << alg.linearIndependenceChecker(linearlyIndependentMat) << std::endl;
return 0; return 0;