diff --git a/MLPP/ANN/ANN.cpp b/MLPP/ANN/ANN.cpp index 6fddae1..d924e2f 100644 --- a/MLPP/ANN/ANN.cpp +++ b/MLPP/ANN/ANN.cpp @@ -25,25 +25,34 @@ namespace MLPP { } std::vector ANN::modelSetTest(std::vector> X){ - network[0].input = X; - network[0].forwardPass(); + if(!network.empty()){ + network[0].input = X; + network[0].forwardPass(); - for(int i = 1; i < network.size(); i++){ - network[i].input = network[i - 1].a; - network[i].forwardPass(); + for(int i = 1; i < network.size(); i++){ + network[i].input = network[i - 1].a; + network[i].forwardPass(); + } + outputLayer->input = network[network.size() - 1].a; + } + else{ + outputLayer->input = X; } - outputLayer->input = network[network.size() - 1].a; outputLayer->forwardPass(); return outputLayer->a; } double ANN::modelTest(std::vector x){ - - network[0].Test(x); - for(int i = 1; i < network.size(); i++){ - network[i].Test(network[i - 1].a_test); + if(!network.empty()){ + network[0].Test(x); + for(int i = 1; i < network.size(); i++){ + network[i].Test(network[i - 1].a_test); + } + outputLayer->Test(network[network.size() - 1].a_test); + } + else{ + outputLayer->Test(x); } - outputLayer->Test(network[network.size() - 1].a_test); return outputLayer->a_test; } @@ -69,21 +78,23 @@ namespace MLPP { outputLayer->weights = regularization.regWeights(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg); outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n; - auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation]; - network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1)); - std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta); - - network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad)); - network[network.size() - 1].weights = regularization.regWeights(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg); - network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta)); + if(!network.empty()){ + auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation]; + network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta); + + network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad)); + network[network.size() - 1].weights = regularization.regWeights(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg); + network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta)); - for(int i = network.size() - 2; i >= 0; i--){ - auto hiddenLayerAvn = network[i].activation_map[network[i].activation]; - network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, network[i + 1].weights), (avn.*hiddenLayerAvn)(network[i].z, 1)); - std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta); - network[i].weights = alg.subtraction(network[i].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad)); - network[i].weights = regularization.regWeights(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); - network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta)); + for(int i = network.size() - 2; i >= 0; i--){ + auto hiddenLayerAvn = network[i].activation_map[network[i].activation]; + network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, network[i + 1].weights), (avn.*hiddenLayerAvn)(network[i].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta); + network[i].weights = alg.subtraction(network[i].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad)); + network[i].weights = regularization.regWeights(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); + network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta)); + } } forwardPass(); @@ -92,11 +103,11 @@ namespace MLPP { Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); std::cout << "Layer " << network.size() + 1 << ": " << std::endl; Utilities::UI(outputLayer->weights, outputLayer->bias); - std::cout << "Layer " << network.size() << ": " << std::endl; - Utilities::UI(network[network.size() - 1].weights, network[network.size() - 1].bias); - for(int i = network.size() - 2; i >= 0; i--){ - std::cout << "Layer " << i + 1 << ": " << std::endl; - Utilities::UI(network[i].weights, network[i].bias); + if(!network.empty()){ + for(int i = network.size() - 1; i >= 0; i--){ + std::cout << "Layer " << i + 1 << ": " << std::endl; + Utilities::UI(network[i].weights, network[i].bias); + } } } @@ -113,11 +124,16 @@ namespace MLPP { void ANN::save(std::string fileName){ Utilities util; - util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1); - for(int i = 1; i < network.size(); i++){ - util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); + if(!network.empty()){ + util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1); + for(int i = 1; i < network.size(); i++){ + util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); + } + util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1); + } + else{ + util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1); } - util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1); } void ANN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){ @@ -132,7 +148,12 @@ namespace MLPP { } void ANN::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, double lambda, double alpha){ - outputLayer = new OutputLayer(network[0].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha); + if(!network.empty()){ + outputLayer = new OutputLayer(network[0].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha); + } + else{ + outputLayer = new OutputLayer(k, activation, loss, inputSet, weightInit, reg, lambda, alpha); + } } double ANN::Cost(std::vector y_hat, std::vector y){ @@ -141,21 +162,28 @@ namespace MLPP { double totalRegTerm = 0; auto cost_function = outputLayer->cost_map[outputLayer->cost]; - for(int i = 0; i < network.size() - 1; i++){ - totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); + if(!network.empty()){ + for(int i = 0; i < network.size() - 1; i++){ + totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); + } } return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg); } void ANN::forwardPass(){ - network[0].input = inputSet; - network[0].forwardPass(); + if(!network.empty()){ + network[0].input = inputSet; + network[0].forwardPass(); - for(int i = 1; i < network.size(); i++){ - network[i].input = network[i - 1].a; - network[i].forwardPass(); + for(int i = 1; i < network.size(); i++){ + network[i].input = network[i - 1].a; + network[i].forwardPass(); + } + outputLayer->input = network[network.size() - 1].a; + } + else{ + outputLayer->input = inputSet; } - outputLayer->input = network[network.size() - 1].a; outputLayer->forwardPass(); y_hat = outputLayer->a; } diff --git a/MLPP/Activation/Activation.cpp b/MLPP/Activation/Activation.cpp index d6e34c7..772251c 100644 --- a/MLPP/Activation/Activation.cpp +++ b/MLPP/Activation/Activation.cpp @@ -7,6 +7,7 @@ #include #include "LinAlg/LinAlg.hpp" #include "Activation.hpp" +#include namespace MLPP{ @@ -283,6 +284,53 @@ namespace MLPP{ return alg.hadamard_product(z, sigmoid(z)); } + double Activation::mish(double z, bool deriv){ + if(deriv){ + return sech(softplus(z)) * sech(softplus(z)) * z * sigmoid(z) + mish(z)/z; + } + return z * tanh(softplus(z)); + } + + std::vector Activation::mish(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ + return alg.addition(alg.hadamard_product(alg.hadamard_product(alg.hadamard_product(sech(softplus(z)), sech(softplus(z))), z), sigmoid(z)), alg.elementWiseDivision(mish(z), z)); + } + return alg.hadamard_product(z, tanh(softplus(z))); + } + + std::vector> Activation::mish(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ + return alg.addition(alg.hadamard_product(alg.hadamard_product(alg.hadamard_product(sech(softplus(z)), sech(softplus(z))), z), sigmoid(z)), alg.elementWiseDivision(mish(z), z)); + } + return alg.hadamard_product(z, tanh(softplus(z))); + } + + double Activation::sinc(double z, bool deriv){ + if(deriv){ + return (z * std::cos(z) - std::sin(z)) / (z * z); + } + return std::sin(z)/z; + } + + std::vector Activation::sinc(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ + return alg.elementWiseDivision(alg.subtraction(alg.hadamard_product(z, alg.cos(z)), alg.sin(z)), alg.hadamard_product(z, z)); + } + return alg.elementWiseDivision(alg.sin(z), z); + } + + std::vector> Activation::sinc(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ + return alg.elementWiseDivision(alg.subtraction(alg.hadamard_product(z, alg.cos(z)), alg.sin(z)), alg.hadamard_product(z, z)); + } + return alg.elementWiseDivision(alg.sin(z), z); + } + + double Activation::RELU(double z, bool deriv){ if (deriv){ if(z <= 0){ diff --git a/MLPP/Activation/Activation.hpp b/MLPP/Activation/Activation.hpp index 5a42a5a..9925064 100644 --- a/MLPP/Activation/Activation.hpp +++ b/MLPP/Activation/Activation.hpp @@ -53,6 +53,14 @@ namespace MLPP{ std::vector swish(std::vector z, bool deriv = 0); std::vector> swish(std::vector> z, bool deriv = 0); + double mish(double z, bool deriv = 0); + std::vector mish(std::vector z, bool deriv = 0); + std::vector> mish(std::vector> z, bool deriv = 0); + + double sinc(double z, bool deriv = 0); + std::vector sinc(std::vector z, bool deriv = 0); + std::vector> sinc(std::vector> z, bool deriv = 0); + double RELU(double z, bool deriv = 0); std::vector RELU(std::vector z, bool deriv = 0); std::vector> RELU(std::vector> z, bool deriv = 0); diff --git a/MLPP/HiddenLayer/HiddenLayer.cpp b/MLPP/HiddenLayer/HiddenLayer.cpp index 6ce42b3..d60c873 100644 --- a/MLPP/HiddenLayer/HiddenLayer.cpp +++ b/MLPP/HiddenLayer/HiddenLayer.cpp @@ -28,6 +28,12 @@ namespace MLPP { activation_map["Swish"] = &Activation::swish; activationTest_map["Swish"] = &Activation::swish; + activation_map["Mish"] = &Activation::mish; + activationTest_map["Mish"] = &Activation::mish; + + activation_map["SinC"] = &Activation::sinc; + activationTest_map["SinC"] = &Activation::sinc; + activation_map["Softplus"] = &Activation::softplus; activationTest_map["Softplus"] = &Activation::softplus; diff --git a/MLPP/LinAlg/LinAlg.cpp b/MLPP/LinAlg/LinAlg.cpp index 1b1fa1e..196f923 100644 --- a/MLPP/LinAlg/LinAlg.cpp +++ b/MLPP/LinAlg/LinAlg.cpp @@ -390,6 +390,34 @@ namespace MLPP{ return full; } + std::vector> LinAlg::sin(std::vector> A){ + std::vector> B; + B.resize(A.size()); + for(int i = 0; i < B.size(); i++){ + B[i].resize(A[0].size()); + } + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + B[i][j] = std::sin(A[i][j]); + } + } + return B; + } + + std::vector> LinAlg::cos(std::vector> A){ + std::vector> B; + B.resize(A.size()); + for(int i = 0; i < B.size(); i++){ + B[i].resize(A[0].size()); + } + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + B[i][j] = std::cos(A[i][j]); + } + } + return B; + } + double LinAlg::max(std::vector> A){ return max(flatten(A)); } @@ -490,10 +518,10 @@ namespace MLPP{ } std::vector> P = identity(A.size()); - P[sub_i][sub_j] = -sin(theta); - P[sub_i][sub_i] = cos(theta); - P[sub_j][sub_j] = cos(theta); - P[sub_j][sub_i] = sin(theta); + P[sub_i][sub_j] = -std::sin(theta); + P[sub_i][sub_i] = std::cos(theta); + P[sub_j][sub_j] = std::cos(theta); + P[sub_j][sub_i] = std::sin(theta); a_new = matmult(matmult(inverse(P), A), P); @@ -782,6 +810,24 @@ namespace MLPP{ return full; } + std::vector LinAlg::sin(std::vector a){ + std::vector b; + b.resize(a.size()); + for(int i = 0; i < a.size(); i++){ + b[i] = std::sin(a[i]); + } + return b; + } + + std::vector LinAlg::cos(std::vector a){ + std::vector b; + b.resize(a.size()); + for(int i = 0; i < a.size(); i++){ + b[i] = std::cos(a[i]); + } + return b; + } + double LinAlg::max(std::vector a){ int max = a[0]; for(int i = 0; i < a.size(); i++){ diff --git a/MLPP/LinAlg/LinAlg.hpp b/MLPP/LinAlg/LinAlg.hpp index 786a963..a230e64 100644 --- a/MLPP/LinAlg/LinAlg.hpp +++ b/MLPP/LinAlg/LinAlg.hpp @@ -70,6 +70,10 @@ namespace MLPP{ std::vector> full(int n, int m, int k); + std::vector> sin(std::vector> A); + + std::vector> cos(std::vector> A); + double max(std::vector> A); double min(std::vector> A); @@ -136,6 +140,10 @@ namespace MLPP{ std::vector full(int n, int k); + std::vector sin(std::vector a); + + std::vector cos(std::vector a); + double max(std::vector a); double min(std::vector a); diff --git a/MLPP/MANN/MANN.cpp b/MLPP/MANN/MANN.cpp index 390f971..e8c8491 100644 --- a/MLPP/MANN/MANN.cpp +++ b/MLPP/MANN/MANN.cpp @@ -25,25 +25,34 @@ namespace MLPP { } std::vector> MANN::modelSetTest(std::vector> X){ - network[0].input = X; - network[0].forwardPass(); + if(!network.empty()){ + network[0].input = X; + network[0].forwardPass(); - for(int i = 1; i < network.size(); i++){ - network[i].input = network[i - 1].a; - network[i].forwardPass(); + for(int i = 1; i < network.size(); i++){ + network[i].input = network[i - 1].a; + network[i].forwardPass(); + } + outputLayer->input = network[network.size() - 1].a; + } + else { + outputLayer->input = X; } - outputLayer->input = network[network.size() - 1].a; outputLayer->forwardPass(); return outputLayer->a; } std::vector MANN::modelTest(std::vector x){ - - network[0].Test(x); - for(int i = 1; i < network.size(); i++){ - network[i].Test(network[i - 1].a_test); + if(!network.empty()){ + network[0].Test(x); + for(int i = 1; i < network.size(); i++){ + network[i].Test(network[i - 1].a_test); + } + outputLayer->Test(network[network.size() - 1].a_test); + } + else{ + outputLayer->Test(x); } - outputLayer->Test(network[network.size() - 1].a_test); return outputLayer->a_test; } @@ -75,21 +84,23 @@ namespace MLPP { outputLayer->weights = regularization.regWeights(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg); outputLayer->bias = alg.subtractMatrixRows(outputLayer->bias, alg.scalarMultiply(learning_rate/n, outputLayer->delta)); - auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation]; - network[network.size() - 1].delta = alg.hadamard_product(alg.matmult(outputLayer->delta, alg.transpose(outputLayer->weights)), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1)); - std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta); - - network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad)); - network[network.size() - 1].weights = regularization.regWeights(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg); - network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta)); + if(!network.empty()){ + auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation]; + network[network.size() - 1].delta = alg.hadamard_product(alg.matmult(outputLayer->delta, alg.transpose(outputLayer->weights)), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta); + + network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad)); + network[network.size() - 1].weights = regularization.regWeights(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg); + network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta)); - for(int i = network.size() - 2; i >= 0; i--){ - auto hiddenLayerAvn = network[i].activation_map[network[i].activation]; - network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, network[i + 1].weights), (avn.*hiddenLayerAvn)(network[i].z, 1)); - std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta); - network[i].weights = alg.subtraction(network[i].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad)); - network[i].weights = regularization.regWeights(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); - network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta)); + for(int i = network.size() - 2; i >= 0; i--){ + auto hiddenLayerAvn = network[i].activation_map[network[i].activation]; + network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, network[i + 1].weights), (avn.*hiddenLayerAvn)(network[i].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta); + network[i].weights = alg.subtraction(network[i].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad)); + network[i].weights = regularization.regWeights(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); + network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta)); + } } forwardPass(); @@ -98,11 +109,12 @@ namespace MLPP { Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); std::cout << "Layer " << network.size() + 1 << ": " << std::endl; Utilities::UI(outputLayer->weights, outputLayer->bias); - std::cout << "Layer " << network.size() << ": " << std::endl; - Utilities::UI(network[network.size() - 1].weights, network[network.size() - 1].bias); - for(int i = network.size() - 2; i >= 0; i--){ - std::cout << "Layer " << i + 1 << ": " << std::endl; - Utilities::UI(network[i].weights, network[i].bias); + if(!network.empty()){ + std::cout << "Layer " << network.size() << ": " << std::endl; + for(int i = network.size() - 1; i >= 0; i--){ + std::cout << "Layer " << i + 1 << ": " << std::endl; + Utilities::UI(network[i].weights, network[i].bias); + } } } @@ -119,11 +131,16 @@ namespace MLPP { void MANN::save(std::string fileName){ Utilities util; - util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1); - for(int i = 1; i < network.size(); i++){ - util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); + if(!network.empty()){ + util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1); + for(int i = 1; i < network.size(); i++){ + util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); + } + util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1); + } + else{ + util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1); } - util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1); } void MANN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){ @@ -138,7 +155,12 @@ namespace MLPP { } void MANN::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, double lambda, double alpha){ - outputLayer = new MultiOutputLayer(n_output, network[0].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha); + if(!network.empty()){ + outputLayer = new MultiOutputLayer(n_output, network[0].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha); + } + else{ + outputLayer = new MultiOutputLayer(n_output, k, activation, loss, inputSet, weightInit, reg, lambda, alpha); + } } double MANN::Cost(std::vector> y_hat, std::vector> y){ @@ -147,21 +169,28 @@ namespace MLPP { double totalRegTerm = 0; auto cost_function = outputLayer->cost_map[outputLayer->cost]; - for(int i = 0; i < network.size() - 1; i++){ - totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); + if(!network.empty()){ + for(int i = 0; i < network.size() - 1; i++){ + totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); + } } return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg); } void MANN::forwardPass(){ - network[0].input = inputSet; - network[0].forwardPass(); + if(!network.empty()){ + network[0].input = inputSet; + network[0].forwardPass(); - for(int i = 1; i < network.size(); i++){ - network[i].input = network[i - 1].a; - network[i].forwardPass(); + for(int i = 1; i < network.size(); i++){ + network[i].input = network[i - 1].a; + network[i].forwardPass(); + } + outputLayer->input = network[network.size() - 1].a; + } + else{ + outputLayer->input = inputSet; } - outputLayer->input = network[network.size() - 1].a; outputLayer->forwardPass(); y_hat = outputLayer->a; } diff --git a/MLPP/MultiOutputLayer/MultiOutputLayer.cpp b/MLPP/MultiOutputLayer/MultiOutputLayer.cpp index 63f6dac..977c9bd 100644 --- a/MLPP/MultiOutputLayer/MultiOutputLayer.cpp +++ b/MLPP/MultiOutputLayer/MultiOutputLayer.cpp @@ -30,6 +30,12 @@ namespace MLPP { activation_map["Swish"] = &Activation::swish; activationTest_map["Swish"] = &Activation::swish; + activation_map["Mish"] = &Activation::mish; + activationTest_map["Mish"] = &Activation::mish; + + activation_map["SinC"] = &Activation::sinc; + activationTest_map["SinC"] = &Activation::sinc; + activation_map["Softplus"] = &Activation::softplus; activationTest_map["Softplus"] = &Activation::softplus; diff --git a/MLPP/OutputLayer/OutputLayer.cpp b/MLPP/OutputLayer/OutputLayer.cpp index d5a2603..9436e54 100644 --- a/MLPP/OutputLayer/OutputLayer.cpp +++ b/MLPP/OutputLayer/OutputLayer.cpp @@ -27,6 +27,12 @@ namespace MLPP { activation_map["Swish"] = &Activation::swish; activationTest_map["Swish"] = &Activation::swish; + activation_map["Mish"] = &Activation::mish; + activationTest_map["Mish"] = &Activation::mish; + + activation_map["SinC"] = &Activation::sinc; + activationTest_map["SinC"] = &Activation::sinc; + activation_map["Softplus"] = &Activation::softplus; activationTest_map["Softplus"] = &Activation::softplus; diff --git a/MLPP/SVC/SVC.cpp b/MLPP/SVC/SVC.cpp new file mode 100644 index 0000000..6406624 --- /dev/null +++ b/MLPP/SVC/SVC.cpp @@ -0,0 +1,170 @@ +// +// SVC.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "SVC.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Stat/Stat.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include +#include + +namespace MLPP{ + + SVC::SVC(std::vector> inputSet, std::vector outputSet, std::string reg, double lambda, double alpha) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) + { + y_hat.resize(n); + + weights = Utilities::weightInitialization(k); + bias = Utilities::biasInitialization(); + } + + std::vector SVC::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + double SVC::modelTest(std::vector x){ + return Evaluate(x); + } + + void SVC::gradientDescent(double learning_rate, int max_epoch, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + + std::vector error = alg.subtraction(y_hat, outputSet); + + // Calculating the weight gradients + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), error))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(error) / n; + forwardPass(); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + } + + void SVC::SGD(double learning_rate, int max_epoch, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + while(true){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + int outputIndex = distribution(generator); + + double y_hat = Evaluate(inputSet[outputIndex]); + cost_prev = Cost({y_hat}, {outputSet[outputIndex]}); + + double error = y_hat - outputSet[outputIndex]; + + // Weight updation + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error, inputSet[outputIndex])); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Bias updation + bias -= learning_rate * error; + + y_hat = Evaluate({inputSet[outputIndex]}); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]})); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void SVC::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + while(true){ + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = Evaluate(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + std::vector error = alg.subtraction(y_hat, outputMiniBatches[i]); + + // Calculating the weight gradients + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); + y_hat = Evaluate(inputMiniBatches[i]); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i])); + Utilities::UI(weights, bias); + } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + double SVC::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void SVC::save(std::string fileName){ + Utilities util; + util.saveParameters(fileName, weights, bias); + } + + double SVC::Cost(std::vector y_hat, std::vector y){ + Reg regularization; + class Cost cost; + return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg); + } + + std::vector SVC::Evaluate(std::vector> X){ + LinAlg alg; + return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); + } + + double SVC::Evaluate(std::vector x){ + LinAlg alg; + return alg.dot(weights, x) + bias; + } + + // sign(wTx + b) + void SVC::forwardPass(){ + y_hat = Evaluate(inputSet); + } +} \ No newline at end of file diff --git a/MLPP/SVC/SVC.hpp b/MLPP/SVC/SVC.hpp new file mode 100644 index 0000000..432520e --- /dev/null +++ b/MLPP/SVC/SVC.hpp @@ -0,0 +1,51 @@ +// +// SVC.hpp +// +// Created by Marc Melikyan on 9/10/21. +// + +#ifndef SVC_hpp +#define SVC_hpp + +#include +#include + +namespace MLPP{ + class SVC{ + + public: + SVC(std::vector> inputSet, std::vector outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + double score(); + void save(std::string fileName); + private: + + double Cost(std::vector y_hat, std::vector y); + + std::vector Evaluate(std::vector> X); + double Evaluate(std::vector x); + void forwardPass(); + + std::vector> inputSet; + std::vector outputSet; + std::vector y_hat; + std::vector weights; + double bias; + + int n; + int k; + + // Regularization Params + std::string reg; + int lambda; + int alpha; /* This is the controlling param for Elastic Net*/ + + + }; +} + +#endif /* SVC_hpp */ diff --git a/a.out b/a.out new file mode 100755 index 0000000..593deed Binary files /dev/null and b/a.out differ diff --git a/main.cpp b/main.cpp index f01d1b5..ab1453e 100644 --- a/main.cpp +++ b/main.cpp @@ -8,7 +8,6 @@ // THINGS CURRENTLY TO DO: // POLYMORPHIC IMPLEMENTATION OF REGRESSION CLASSES // EXTEND SGD/MBGD SUPPORT FOR DYN. SIZED ANN -// STANDARDIZE ACTIVATIONS/OPTIMIZATIONS // ADD LEAKYRELU, ELU, SELU TO ANN // HYPOTHESIS TESTING CLASS @@ -212,11 +211,11 @@ int main() { // // SOFTMAX NETWORK // std::vector> inputSet; // std::vector tempOutputSet; - // data.setData(4, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet); + // data.setData(13, "/Users/marcmelikyan/Desktop/Data/Wine.csv", inputSet, tempOutputSet); // std::vector> outputSet = data.oneHotRep(tempOutputSet, 3); - // SoftmaxNet model(inputSet, outputSet, 2); - // model.gradientDescent(0.001, 10000, 0); + // SoftmaxNet model(inputSet, outputSet, 5); + // model.SGD(0.1, 500000, 0); // alg.printMatrix(model.modelSetTest(inputSet)); // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; @@ -227,10 +226,10 @@ int main() { // alg.printMatrix(model.modelSetTest(alg.transpose(inputSet))); // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; - // // DYNAMICALLY SIZED ANN - // // Possible Weight Init Methods: Default, Uniform, HeNormal, HeUniform, XavierNormal, XavierUniform - // // Possible Activations: Linear, Sigmoid, Swish, Softplus, Softsign, CLogLog, Ar{Sinh, Cosh, Tanh, Csch, Sech, Coth}, GaussianCDF, GELU, UnitStep - // // Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss + // DYNAMICALLY SIZED ANN + // Possible Weight Init Methods: Default, Uniform, HeNormal, HeUniform, XavierNormal, XavierUniform + // Possible Activations: Linear, Sigmoid, Swish, Softplus, Softsign, CLogLog, Ar{Sinh, Cosh, Tanh, Csch, Sech, Coth}, GaussianCDF, GELU, UnitStep + // Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss // std::vector> inputSet = {{0,0,1,1}, {0,1,0,1}}; // std::vector outputSet = {0,1,1,0}; // ANN ann(alg.transpose(inputSet), outputSet); @@ -241,24 +240,43 @@ int main() { // alg.printVector(ann.modelSetTest(alg.transpose(inputSet))); // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; + // std::vector> inputSet = {{0,0,1,1}, {0,1,0,1}}; + // std::vector outputSet = {0,1,1,0}; + // ANN ann(alg.transpose(inputSet), outputSet); + // ann.addLayer(10, "Sigmoid"); + // ann.addLayer(10, "Sigmoid"); + // ann.addLayer(10, "Sigmoid"); + // ann.addLayer(10, "Sigmoid"); + // ann.addOutputLayer("Sigmoid", "LogLoss"); + // ann.gradientDescent(0.1, 80000, 0); + // alg.printVector(ann.modelSetTest(alg.transpose(inputSet))); + // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; + // // DYNAMICALLY SIZED MANN (Multidimensional Output ANN) // std::vector> inputSet = {{1,2,3},{2,4,6},{3,6,9},{4,8,12}}; // std::vector> outputSet = {{1,5}, {2,10}, {3,15}, {4,20}}; + // MANN mann(inputSet, outputSet); + // mann.addOutputLayer("Linear", "MSE"); + // mann.gradientDescent(0.001, 80000, 0); + // alg.printMatrix(mann.modelSetTest(inputSet)); + // std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl; + // std::vector> inputSet; // std::vector tempOutputSet; // data.setData(4, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet); + // std::vector> outputSet = data.oneHotRep(tempOutputSet, 3); // std::vector> inputSet; // std::vector tempOutputSet; - // data.setData(784, "/Users/marcmelikyan/Desktop/Data/mnist_train.csv", inputSet, tempOutputSet); + // data.setData(784, "mini_mnist.csv", inputSet, tempOutputSet); // std::vector> outputSet = data.oneHotRep(tempOutputSet, 10); // MANN mann(inputSet, outputSet); - // mann.addLayer(128, "RELU"); - // mann.addLayer(128, "RELU"); + // mann.addLayer(2, "RELU"); + // mann.addLayer(2, "RELU"); // mann.addOutputLayer("Softmax", "CrossEntropy"); - // mann.gradientDescent(0.001, 1, 1); + // mann.gradientDescent(0.001, 80000, 1); // alg.printMatrix(mann.modelSetTest(inputSet)); // std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl; @@ -374,18 +392,18 @@ int main() { // OutlierFinder outlierFinder(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier. // alg.printVector(outlierFinder.modelTest(inputSet)); - // // Testing new Functions - // double z_s = 0.001; - // std::cout << avn.softsign(z_s) << std::endl; - // std::cout << avn.softsign(z_s, 1) << std::endl; + // Testing new Functions + double z_s = 0.001; + std::cout << avn.sinc(z_s) << std::endl; + std::cout << avn.sinc(z_s, 1) << std::endl; - // std::vector z_v = {0.001, 5}; - // alg.printVector(avn.softsign(z_v)); - // alg.printVector(avn.softsign(z_v, 1)); + std::vector z_v = {0.001, 5}; + alg.printVector(avn.sinc(z_v)); + alg.printVector(avn.sinc(z_v, 1)); - // std::vector> Z_m = {{0.001, 5}}; - // alg.printMatrix(avn.softsign(Z_m)); - // alg.printMatrix(avn.softsign(Z_m, 1)); + std::vector> Z_m = {{0.001, 5}}; + alg.printMatrix(avn.sinc(Z_m)); + alg.printMatrix(avn.sinc(Z_m, 1)); // std::cout << alg.trace({{1,2}, {3,4}}) << std::endl; // alg.printMatrix(alg.pinverse({{1,2}, {3,4}})); @@ -401,5 +419,9 @@ int main() { // std::cout << alg.max({{1,2,3,4,5}, {6,5,3,4,1}, {9,9,9,9,9}}) << std::endl; // std::cout << alg.min({{1,2,3,4,5}, {6,5,3,4,1}, {9,9,9,9,9}}) << std::endl; + // std::vector chicken; + // data.getImage("../../Data/apple.jpeg", chicken); + // alg.printVector(chicken); + return 0; } \ No newline at end of file