mirror of
https://github.com/Relintai/MLPP.git
synced 2025-02-10 16:10:06 +01:00
Added learning rate schedulers and decay for neural nets.
This commit is contained in:
parent
e1e8c251e4
commit
a13e0e344b
@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
namespace MLPP {
|
namespace MLPP {
|
||||||
ANN::ANN(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet)
|
ANN::ANN(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet)
|
||||||
: inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size())
|
: inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), lrScheduler("None"), decayConstant(0)
|
||||||
{
|
{
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -66,6 +66,7 @@ namespace MLPP {
|
|||||||
|
|
||||||
alg.printMatrix(network[network.size() - 1].weights);
|
alg.printMatrix(network[network.size() - 1].weights);
|
||||||
while(true){
|
while(true){
|
||||||
|
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch);
|
||||||
cost_prev = Cost(y_hat, outputSet);
|
cost_prev = Cost(y_hat, outputSet);
|
||||||
|
|
||||||
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputSet);
|
auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputSet);
|
||||||
@ -96,6 +97,7 @@ namespace MLPP {
|
|||||||
// always do forward pass only ONCE at end.
|
// always do forward pass only ONCE at end.
|
||||||
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
while(true){
|
while(true){
|
||||||
|
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch);
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
@ -133,6 +135,7 @@ namespace MLPP {
|
|||||||
|
|
||||||
std::vector<double> v_output;
|
std::vector<double> v_output;
|
||||||
while(true){
|
while(true){
|
||||||
|
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch);
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
@ -184,6 +187,7 @@ namespace MLPP {
|
|||||||
|
|
||||||
std::vector<double> v_output;
|
std::vector<double> v_output;
|
||||||
while(true){
|
while(true){
|
||||||
|
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch);
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
@ -234,6 +238,7 @@ namespace MLPP {
|
|||||||
|
|
||||||
std::vector<double> v_output;
|
std::vector<double> v_output;
|
||||||
while(true){
|
while(true){
|
||||||
|
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch);
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
@ -286,6 +291,7 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
|
|||||||
std::vector<double> m_output;
|
std::vector<double> m_output;
|
||||||
std::vector<double> v_output;
|
std::vector<double> v_output;
|
||||||
while(true){
|
while(true){
|
||||||
|
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch);
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
@ -348,6 +354,7 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
|
|||||||
std::vector<double> m_output;
|
std::vector<double> m_output;
|
||||||
std::vector<double> u_output;
|
std::vector<double> u_output;
|
||||||
while(true){
|
while(true){
|
||||||
|
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch);
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
@ -409,6 +416,7 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
|
|||||||
std::vector<double> m_output;
|
std::vector<double> m_output;
|
||||||
std::vector<double> v_output;
|
std::vector<double> v_output;
|
||||||
while(true){
|
while(true){
|
||||||
|
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch);
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
@ -478,6 +486,7 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
|
|||||||
|
|
||||||
std::vector<double> v_output_hat;
|
std::vector<double> v_output_hat;
|
||||||
while(true){
|
while(true){
|
||||||
|
learning_rate = applyLearningRateScheduler(learning_rate, decayConstant, epoch);
|
||||||
for(int i = 0; i < n_mini_batch; i++){
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
|
||||||
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
@ -540,6 +549,25 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ANN::setLearningRateScheduler(std::string type, double decayConstant){
|
||||||
|
lrScheduler = type;
|
||||||
|
ANN::decayConstant = decayConstant;
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://en.wikipedia.org/wiki/Learning_rate
|
||||||
|
// Learning Rate Decay (C2W2L09) - Andrew Ng - Deep Learning Specialization
|
||||||
|
double ANN::applyLearningRateScheduler(double learningRate, double decayConstant, double epoch){
|
||||||
|
if(lrScheduler == "Time"){
|
||||||
|
return learningRate / (1 + decayConstant * epoch);
|
||||||
|
}
|
||||||
|
else if(lrScheduler == "Exponential"){
|
||||||
|
return learningRate * std::exp(-decayConstant * epoch);
|
||||||
|
}
|
||||||
|
else if(lrScheduler == "Epoch"){
|
||||||
|
return learningRate * (decayConstant / std::sqrt(epoch));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void ANN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){
|
void ANN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){
|
||||||
if(network.empty()){
|
if(network.empty()){
|
||||||
network.push_back(HiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha));
|
network.push_back(HiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha));
|
||||||
@ -612,8 +640,7 @@ void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<double>> ANN::computeGradients(std::vector<double> y_hat, std::vector<double> outputSet){
|
std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<double>> ANN::computeGradients(std::vector<double> y_hat, std::vector<double> outputSet){
|
||||||
std::cout << "BEGIN" << std::endl;
|
// std::cout << "BEGIN" << std::endl;
|
||||||
std::cout << k << std::endl;
|
|
||||||
class Cost cost;
|
class Cost cost;
|
||||||
Activation avn;
|
Activation avn;
|
||||||
LinAlg alg;
|
LinAlg alg;
|
||||||
|
@ -34,6 +34,9 @@ class ANN{
|
|||||||
double score();
|
double score();
|
||||||
void save(std::string fileName);
|
void save(std::string fileName);
|
||||||
|
|
||||||
|
void setLearningRateScheduler(std::string type, double k);
|
||||||
|
double applyLearningRateScheduler(double learningRate, double decayConstant, double epoch);
|
||||||
|
|
||||||
void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
|
void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
|
||||||
void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
|
void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
|
||||||
|
|
||||||
@ -56,6 +59,9 @@ class ANN{
|
|||||||
|
|
||||||
int n;
|
int n;
|
||||||
int k;
|
int k;
|
||||||
|
|
||||||
|
std::string lrScheduler;
|
||||||
|
double decayConstant;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,7 +21,6 @@ class GAN{
|
|||||||
GAN(double k, std::vector<std::vector<double>> outputSet);
|
GAN(double k, std::vector<std::vector<double>> outputSet);
|
||||||
~GAN();
|
~GAN();
|
||||||
std::vector<std::vector<double>> generateExample(int n);
|
std::vector<std::vector<double>> generateExample(int n);
|
||||||
double modelTest(std::vector<double> x);
|
|
||||||
void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
|
void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
|
||||||
double score();
|
double score();
|
||||||
void save(std::string fileName);
|
void save(std::string fileName);
|
||||||
|
@ -131,6 +131,10 @@ The result will be the model's predictions for the entire dataset.
|
|||||||
- He Uniform
|
- He Uniform
|
||||||
- LeCun Normal
|
- LeCun Normal
|
||||||
- LeCun Uniform
|
- LeCun Uniform
|
||||||
|
6. Possible Learning Rate Schedulers
|
||||||
|
- Time Based
|
||||||
|
- Exponential
|
||||||
|
- Epoch Based
|
||||||
3. ***Prebuilt Neural Networks***
|
3. ***Prebuilt Neural Networks***
|
||||||
1. Multilayer Peceptron
|
1. Multilayer Peceptron
|
||||||
2. Autoencoder
|
2. Autoencoder
|
||||||
|
45
main.cpp
45
main.cpp
@ -363,30 +363,31 @@ int main() {
|
|||||||
// Possible Weight Init Methods: Default, Uniform, HeNormal, HeUniform, XavierNormal, XavierUniform
|
// Possible Weight Init Methods: Default, Uniform, HeNormal, HeUniform, XavierNormal, XavierUniform
|
||||||
// Possible Activations: Linear, Sigmoid, Swish, Softplus, Softsign, CLogLog, Ar{Sinh, Cosh, Tanh, Csch, Sech, Coth}, GaussianCDF, GELU, UnitStep
|
// Possible Activations: Linear, Sigmoid, Swish, Softplus, Softsign, CLogLog, Ar{Sinh, Cosh, Tanh, Csch, Sech, Coth}, GaussianCDF, GELU, UnitStep
|
||||||
// Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss
|
// Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss
|
||||||
// std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
|
std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
|
||||||
// std::vector<double> outputSet = {0,1,1,0};
|
std::vector<double> outputSet = {0,1,1,0};
|
||||||
// ANN ann(alg.transpose(inputSet), outputSet);
|
ANN ann(alg.transpose(inputSet), outputSet);
|
||||||
// //ann.addLayer(10, "Sigmoid");
|
ann.addLayer(2, "Sigmoid");
|
||||||
// ann.addLayer(10, "Sigmoid");
|
ann.addLayer(2, "Sigmoid");
|
||||||
// ann.addOutputLayer("Sigmoid", "LogLoss");
|
ann.addOutputLayer("Sigmoid", "LogLoss");
|
||||||
// //ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, 1);
|
//ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, 1);
|
||||||
// //ann.Adadelta(1, 1000, 2, 0.9, 0.000001, 1);
|
//ann.Adadelta(1, 1000, 2, 0.9, 0.000001, 1);
|
||||||
// ann.Momentum(0.1, 8000, 2, 0.9, true, 1);
|
//ann.Momentum(0.1, 8000, 2, 0.9, true, 1);
|
||||||
// //ann.MBGD(0.1, 1000, 2, 1);
|
ann.setLearningRateScheduler("Time", 0.000000000001);
|
||||||
// alg.printVector(ann.modelSetTest(alg.transpose(inputSet)));
|
ann.gradientDescent(0.1, 20000, 1);
|
||||||
// std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
|
alg.printVector(ann.modelSetTest(alg.transpose(inputSet)));
|
||||||
|
std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
|
||||||
|
|
||||||
std::vector<std::vector<double>> outputSet = {{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20},
|
//std::vector<std::vector<double>> outputSet = {{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20},
|
||||||
{2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40}};
|
// {2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40}};
|
||||||
//Vector outputSet = {0,1,1,0};
|
//Vector outputSet = {0,1,1,0};
|
||||||
GAN gan(2, alg.transpose(outputSet));
|
// GAN gan(2, alg.transpose(outputSet));
|
||||||
gan.addLayer(5, "Sigmoid");
|
// gan.addLayer(5, "Sigmoid");
|
||||||
gan.addLayer(2, "RELU");
|
// gan.addLayer(2, "RELU");
|
||||||
gan.addLayer(5, "Sigmoid");
|
// gan.addLayer(5, "Sigmoid");
|
||||||
gan.addOutputLayer("Sigmoid", "LogLoss");
|
// gan.addOutputLayer("Sigmoid", "LogLoss");
|
||||||
gan.gradientDescent(0.1, 25000, 0);
|
// gan.gradientDescent(0.1, 25000, 0);
|
||||||
std::cout << "GENERATED INPUT: (Gaussian-sampled noise):" << std::endl;
|
// std::cout << "GENERATED INPUT: (Gaussian-sampled noise):" << std::endl;
|
||||||
alg.printMatrix(gan.generateExample(5));
|
// alg.printMatrix(gan.generateExample(100));
|
||||||
|
|
||||||
|
|
||||||
// typedef std::vector<std::vector<double>> Matrix;
|
// typedef std::vector<std::vector<double>> Matrix;
|
||||||
|
Loading…
Reference in New Issue
Block a user