From 51765e87adab6c1331251e0fd3cad9434e0a1d9e Mon Sep 17 00:00:00 2001 From: Relintai Date: Mon, 23 Jan 2023 21:13:26 +0100 Subject: [PATCH] Added https://github.com/novak-99/MLPP as a base, without the included datasets. --- .gitignore | 2 + LICENSE | 21 + MLPP/ANN/ANN.cpp | 742 ++++++++++ MLPP/ANN/ANN.hpp | 72 + MLPP/Activation/Activation.cpp | 884 ++++++++++++ MLPP/Activation/Activation.hpp | 146 ++ MLPP/AutoEncoder/AutoEncoder.cpp | 253 ++++ MLPP/AutoEncoder/AutoEncoder.hpp | 54 + MLPP/BernoulliNB/BernoulliNB.cpp | 182 +++ MLPP/BernoulliNB/BernoulliNB.hpp | 47 + MLPP/CLogLogReg/CLogLogReg.cpp | 219 +++ MLPP/CLogLogReg/CLogLogReg.hpp | 58 + MLPP/Convolutions/Convolutions.cpp | 402 ++++++ MLPP/Convolutions/Convolutions.hpp | 51 + MLPP/Cost/Cost.cpp | 422 ++++++ MLPP/Cost/Cost.hpp | 86 ++ MLPP/Data/Data.cpp | 773 +++++++++++ MLPP/Data/Data.hpp | 99 ++ MLPP/DualSVC/DualSVC.cpp | 241 ++++ MLPP/DualSVC/DualSVC.hpp | 71 + MLPP/ExpReg/ExpReg.cpp | 240 ++++ MLPP/ExpReg/ExpReg.hpp | 51 + MLPP/GAN/GAN.cpp | 290 ++++ MLPP/GAN/GAN.hpp | 56 + .../GaussMarkovChecker/GaussMarkovChecker.cpp | 59 + .../GaussMarkovChecker/GaussMarkovChecker.hpp | 27 + MLPP/GaussianNB/GaussianNB.cpp | 92 ++ MLPP/GaussianNB/GaussianNB.hpp | 42 + MLPP/HiddenLayer/HiddenLayer.cpp | 114 ++ MLPP/HiddenLayer/HiddenLayer.hpp | 52 + MLPP/HypothesisTesting/HypothesisTesting.cpp | 19 + MLPP/HypothesisTesting/HypothesisTesting.hpp | 24 + MLPP/KMeans/KMeans.cpp | 235 ++++ MLPP/KMeans/KMeans.hpp | 45 + MLPP/LinAlg/LinAlg.cpp | 1231 +++++++++++++++++ MLPP/LinAlg/LinAlg.hpp | 236 ++++ MLPP/LinReg/LinReg.cpp | 233 ++++ MLPP/LinReg/LinReg.hpp | 53 + MLPP/LogReg/LogReg.cpp | 200 +++ MLPP/LogReg/LogReg.hpp | 53 + MLPP/MANN/MANN.cpp | 197 +++ MLPP/MANN/MANN.hpp | 48 + MLPP/MLP/MLP.cpp | 270 ++++ MLPP/MLP/MLP.hpp | 61 + MLPP/MultiOutputLayer/MultiOutputLayer.cpp | 133 ++ MLPP/MultiOutputLayer/MultiOutputLayer.hpp | 58 + MLPP/MultinomialNB/MultinomialNB.cpp | 121 ++ MLPP/MultinomialNB/MultinomialNB.hpp | 45 + MLPP/NumericalAnalysis/NumericalAnalysis.cpp | 305 ++++ MLPP/NumericalAnalysis/NumericalAnalysis.hpp | 57 + MLPP/OutlierFinder/OutlierFinder.cpp | 43 + MLPP/OutlierFinder/OutlierFinder.hpp | 27 + MLPP/OutputLayer/OutputLayer.cpp | 130 ++ MLPP/OutputLayer/OutputLayer.hpp | 56 + MLPP/PCA/PCA.cpp | 56 + MLPP/PCA/PCA.hpp | 28 + MLPP/ProbitReg/ProbitReg.cpp | 239 ++++ MLPP/ProbitReg/ProbitReg.hpp | 57 + MLPP/Regularization/Reg.cpp | 177 +++ MLPP/Regularization/Reg.hpp | 31 + MLPP/SVC/SVC.cpp | 195 +++ MLPP/SVC/SVC.hpp | 56 + MLPP/SoftmaxNet/SoftmaxNet.cpp | 290 ++++ MLPP/SoftmaxNet/SoftmaxNet.hpp | 66 + MLPP/SoftmaxReg/SoftmaxReg.cpp | 192 +++ MLPP/SoftmaxReg/SoftmaxReg.hpp | 54 + MLPP/Stat/Stat.cpp | 219 +++ MLPP/Stat/Stat.hpp | 54 + MLPP/TanhReg/TanhReg.cpp | 193 +++ MLPP/TanhReg/TanhReg.hpp | 59 + MLPP/Transforms/Transforms.cpp | 59 + MLPP/Transforms/Transforms.hpp | 20 + MLPP/UniLinReg/UniLinReg.cpp | 37 + MLPP/UniLinReg/UniLinReg.hpp | 30 + MLPP/Utilities/Utilities.cpp | 397 ++++++ MLPP/Utilities/Utilities.hpp | 54 + MLPP/WGAN/WGAN.cpp | 300 ++++ MLPP/WGAN/WGAN.hpp | 56 + MLPP/kNN/kNN.cpp | 87 ++ MLPP/kNN/kNN.hpp | 35 + README.md | 244 ++++ main.cpp | 722 ++++++++++ 82 files changed, 13735 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 MLPP/ANN/ANN.cpp create mode 100644 MLPP/ANN/ANN.hpp create mode 100644 MLPP/Activation/Activation.cpp create mode 100644 MLPP/Activation/Activation.hpp create mode 100644 MLPP/AutoEncoder/AutoEncoder.cpp create mode 100644 MLPP/AutoEncoder/AutoEncoder.hpp create mode 100644 MLPP/BernoulliNB/BernoulliNB.cpp create mode 100644 MLPP/BernoulliNB/BernoulliNB.hpp create mode 100644 MLPP/CLogLogReg/CLogLogReg.cpp create mode 100644 MLPP/CLogLogReg/CLogLogReg.hpp create mode 100644 MLPP/Convolutions/Convolutions.cpp create mode 100644 MLPP/Convolutions/Convolutions.hpp create mode 100644 MLPP/Cost/Cost.cpp create mode 100644 MLPP/Cost/Cost.hpp create mode 100644 MLPP/Data/Data.cpp create mode 100644 MLPP/Data/Data.hpp create mode 100644 MLPP/DualSVC/DualSVC.cpp create mode 100644 MLPP/DualSVC/DualSVC.hpp create mode 100644 MLPP/ExpReg/ExpReg.cpp create mode 100644 MLPP/ExpReg/ExpReg.hpp create mode 100644 MLPP/GAN/GAN.cpp create mode 100644 MLPP/GAN/GAN.hpp create mode 100644 MLPP/GaussMarkovChecker/GaussMarkovChecker.cpp create mode 100644 MLPP/GaussMarkovChecker/GaussMarkovChecker.hpp create mode 100644 MLPP/GaussianNB/GaussianNB.cpp create mode 100644 MLPP/GaussianNB/GaussianNB.hpp create mode 100644 MLPP/HiddenLayer/HiddenLayer.cpp create mode 100644 MLPP/HiddenLayer/HiddenLayer.hpp create mode 100644 MLPP/HypothesisTesting/HypothesisTesting.cpp create mode 100644 MLPP/HypothesisTesting/HypothesisTesting.hpp create mode 100644 MLPP/KMeans/KMeans.cpp create mode 100644 MLPP/KMeans/KMeans.hpp create mode 100644 MLPP/LinAlg/LinAlg.cpp create mode 100644 MLPP/LinAlg/LinAlg.hpp create mode 100644 MLPP/LinReg/LinReg.cpp create mode 100644 MLPP/LinReg/LinReg.hpp create mode 100644 MLPP/LogReg/LogReg.cpp create mode 100644 MLPP/LogReg/LogReg.hpp create mode 100644 MLPP/MANN/MANN.cpp create mode 100644 MLPP/MANN/MANN.hpp create mode 100644 MLPP/MLP/MLP.cpp create mode 100644 MLPP/MLP/MLP.hpp create mode 100644 MLPP/MultiOutputLayer/MultiOutputLayer.cpp create mode 100644 MLPP/MultiOutputLayer/MultiOutputLayer.hpp create mode 100644 MLPP/MultinomialNB/MultinomialNB.cpp create mode 100644 MLPP/MultinomialNB/MultinomialNB.hpp create mode 100644 MLPP/NumericalAnalysis/NumericalAnalysis.cpp create mode 100644 MLPP/NumericalAnalysis/NumericalAnalysis.hpp create mode 100644 MLPP/OutlierFinder/OutlierFinder.cpp create mode 100644 MLPP/OutlierFinder/OutlierFinder.hpp create mode 100644 MLPP/OutputLayer/OutputLayer.cpp create mode 100644 MLPP/OutputLayer/OutputLayer.hpp create mode 100644 MLPP/PCA/PCA.cpp create mode 100644 MLPP/PCA/PCA.hpp create mode 100644 MLPP/ProbitReg/ProbitReg.cpp create mode 100644 MLPP/ProbitReg/ProbitReg.hpp create mode 100644 MLPP/Regularization/Reg.cpp create mode 100644 MLPP/Regularization/Reg.hpp create mode 100644 MLPP/SVC/SVC.cpp create mode 100644 MLPP/SVC/SVC.hpp create mode 100644 MLPP/SoftmaxNet/SoftmaxNet.cpp create mode 100644 MLPP/SoftmaxNet/SoftmaxNet.hpp create mode 100644 MLPP/SoftmaxReg/SoftmaxReg.cpp create mode 100644 MLPP/SoftmaxReg/SoftmaxReg.hpp create mode 100644 MLPP/Stat/Stat.cpp create mode 100644 MLPP/Stat/Stat.hpp create mode 100644 MLPP/TanhReg/TanhReg.cpp create mode 100644 MLPP/TanhReg/TanhReg.hpp create mode 100644 MLPP/Transforms/Transforms.cpp create mode 100644 MLPP/Transforms/Transforms.hpp create mode 100644 MLPP/UniLinReg/UniLinReg.cpp create mode 100644 MLPP/UniLinReg/UniLinReg.hpp create mode 100644 MLPP/Utilities/Utilities.cpp create mode 100644 MLPP/Utilities/Utilities.hpp create mode 100644 MLPP/WGAN/WGAN.cpp create mode 100644 MLPP/WGAN/WGAN.hpp create mode 100644 MLPP/kNN/kNN.cpp create mode 100644 MLPP/kNN/kNN.hpp create mode 100644 README.md create mode 100644 main.cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9cac8a5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +a.out +.DS_Store \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f89a601 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Marc Melikyan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MLPP/ANN/ANN.cpp b/MLPP/ANN/ANN.cpp new file mode 100644 index 0000000..50399b5 --- /dev/null +++ b/MLPP/ANN/ANN.cpp @@ -0,0 +1,742 @@ +// +// ANN.cpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#include "ANN.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include +#include + +namespace MLPP { + ANN::ANN(std::vector> inputSet, std::vector outputSet) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), lrScheduler("None"), decayConstant(0), dropRate(0) + { + + } + + ANN::~ANN(){ + delete outputLayer; + } + + std::vector ANN::modelSetTest(std::vector> X){ + if(!network.empty()){ + network[0].input = X; + network[0].forwardPass(); + + for(int i = 1; i < network.size(); i++){ + network[i].input = network[i - 1].a; + network[i].forwardPass(); + } + outputLayer->input = network[network.size() - 1].a; + } + else{ + outputLayer->input = X; + } + outputLayer->forwardPass(); + return outputLayer->a; + } + + double ANN::modelTest(std::vector x){ + if(!network.empty()){ + network[0].Test(x); + for(int i = 1; i < network.size(); i++){ + network[i].Test(network[i - 1].a_test); + } + outputLayer->Test(network[network.size() - 1].a_test); + } + else{ + outputLayer->Test(x); + } + return outputLayer->a_test; + } + + void ANN::gradientDescent(double learning_rate, int max_epoch, bool UI){ + class Cost cost; + LinAlg alg; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + double initial_learning_rate = learning_rate; + + alg.printMatrix(network[network.size() - 1].weights); + while(true){ + learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); + cost_prev = Cost(y_hat, outputSet); + + auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputSet); + + cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeHiddenLayerWGrad); + outputWGrad = alg.scalarMultiply(learning_rate/n, outputWGrad); + updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too. + + std::cout << learning_rate << std::endl; + + forwardPass(); + + if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputSet); } + + epoch++; + if(epoch > max_epoch) { break; } + } + } + + void ANN::SGD(double learning_rate, int max_epoch, bool UI){ + class Cost cost; + LinAlg alg; + + double cost_prev = 0; + int epoch = 1; + double initial_learning_rate = learning_rate; + + while(true){ + learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); + + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + int outputIndex = distribution(generator); + + std::vector y_hat = modelSetTest({inputSet[outputIndex]}); + cost_prev = Cost({y_hat}, {outputSet[outputIndex]}); + + auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, {outputSet[outputIndex]}); + cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeHiddenLayerWGrad); + outputWGrad = alg.scalarMultiply(learning_rate/n, outputWGrad); + + updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too. + y_hat = modelSetTest({inputSet[outputIndex]}); + + if(UI) { ANN::UI(epoch, cost_prev, y_hat, {outputSet[outputIndex]}); } + + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void ANN::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + class Cost cost; + LinAlg alg; + + double cost_prev = 0; + int epoch = 1; + double initial_learning_rate = learning_rate; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + // always evaluate the result + // always do forward pass only ONCE at end. + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + while(true){ + learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = modelSetTest(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]); + cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeHiddenLayerWGrad); + outputWGrad = alg.scalarMultiply(learning_rate/n, outputWGrad); + + updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too. + y_hat = modelSetTest(inputMiniBatches[i]); + + if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void ANN::Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool NAG, bool UI){ + class Cost cost; + LinAlg alg; + + double cost_prev = 0; + int epoch = 1; + double initial_learning_rate = learning_rate; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + // always evaluate the result + // always do forward pass only ONCE at end. + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + // Initializing necessary components for Adam. + std::vector>> v_hidden; + + std::vector v_output; + while(true){ + learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = modelSetTest(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]); + + if(!network.empty() && v_hidden.empty()){ // Initing our tensor + v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad); + } + + if(v_output.empty()){ + v_output.resize(outputWGrad.size()); + } + + if(NAG){ // "Aposterori" calculation + updateParameters(v_hidden, v_output, 0); // DON'T update bias. + } + + v_hidden = alg.addition(alg.scalarMultiply(gamma, v_hidden), alg.scalarMultiply(learning_rate/n, cumulativeHiddenLayerWGrad)); + + v_output = alg.addition(alg.scalarMultiply(gamma, v_output), alg.scalarMultiply(learning_rate/n, outputWGrad)); + + updateParameters(v_hidden, v_output, learning_rate); // subject to change. may want bias to have this matrix too. + y_hat = modelSetTest(inputMiniBatches[i]); + + if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void ANN::Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI){ + class Cost cost; + LinAlg alg; + + double cost_prev = 0; + int epoch = 1; + double initial_learning_rate = learning_rate; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + // always evaluate the result + // always do forward pass only ONCE at end. + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + // Initializing necessary components for Adam. + std::vector>> v_hidden; + + std::vector v_output; + while(true){ + learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = modelSetTest(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]); + + if(!network.empty() && v_hidden.empty()){ // Initing our tensor + v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad); + } + + if(v_output.empty()){ + v_output.resize(outputWGrad.size()); + } + + v_hidden = alg.addition(v_hidden, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)); + + v_output = alg.addition(v_output, alg.exponentiate(outputWGrad, 2)); + + std::vector>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden)))); + std::vector outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output)))); + + updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too. + y_hat = modelSetTest(inputMiniBatches[i]); + + if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void ANN::Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI){ + class Cost cost; + LinAlg alg; + + double cost_prev = 0; + int epoch = 1; + double initial_learning_rate = learning_rate; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + // always evaluate the result + // always do forward pass only ONCE at end. + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + // Initializing necessary components for Adam. + std::vector>> v_hidden; + + std::vector v_output; + while(true){ + learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = modelSetTest(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]); + + if(!network.empty() && v_hidden.empty()){ // Initing our tensor + v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad); + } + + if(v_output.empty()){ + v_output.resize(outputWGrad.size()); + } + + v_hidden = alg.addition(alg.scalarMultiply(1 - b1, v_hidden), alg.scalarMultiply(b1, alg.exponentiate(cumulativeHiddenLayerWGrad, 2))); + + v_output = alg.addition(v_output, alg.exponentiate(outputWGrad, 2)); + + std::vector>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden)))); + std::vector outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output)))); + + updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too. + y_hat = modelSetTest(inputMiniBatches[i]); + + if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + +void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){ + class Cost cost; + LinAlg alg; + + double cost_prev = 0; + int epoch = 1; + double initial_learning_rate = learning_rate; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + // always evaluate the result + // always do forward pass only ONCE at end. + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + // Initializing necessary components for Adam. + std::vector>> m_hidden; + std::vector>> v_hidden; + + std::vector m_output; + std::vector v_output; + while(true){ + learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = modelSetTest(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]); + if(!network.empty() && m_hidden.empty() && v_hidden.empty()){ // Initing our tensor + m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad); + v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad); + } + + if(m_output.empty() && v_output.empty()){ + m_output.resize(outputWGrad.size()); + v_output.resize(outputWGrad.size()); + } + + m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad)); + v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2))); + + m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad)); + v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2))); + + std::vector>> m_hidden_hat = alg.scalarMultiply(1/(1 - std::pow(b1, epoch)), m_hidden); + std::vector>> v_hidden_hat = alg.scalarMultiply(1/(1 - std::pow(b2, epoch)), v_hidden); + + std::vector m_output_hat = alg.scalarMultiply(1/(1 - std::pow(b1, epoch)), m_output); + std::vector v_output_hat = alg.scalarMultiply(1/(1 - std::pow(b2, epoch)), v_output); + + std::vector>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, alg.sqrt(v_hidden_hat)))); + std::vector outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, alg.sqrt(v_output_hat)))); + + + updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too. + y_hat = modelSetTest(inputMiniBatches[i]); + + if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void ANN::Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){ + class Cost cost; + LinAlg alg; + + double cost_prev = 0; + int epoch = 1; + double initial_learning_rate = learning_rate; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + // always evaluate the result + // always do forward pass only ONCE at end. + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + // Initializing necessary components for Adam. + std::vector>> m_hidden; + std::vector>> u_hidden; + + std::vector m_output; + std::vector u_output; + while(true){ + learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = modelSetTest(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]); + if(!network.empty() && m_hidden.empty() && u_hidden.empty()){ // Initing our tensor + m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad); + u_hidden = alg.resize(u_hidden, cumulativeHiddenLayerWGrad); + } + + if(m_output.empty() && u_output.empty()){ + m_output.resize(outputWGrad.size()); + u_output.resize(outputWGrad.size()); + } + + m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad)); + u_hidden = alg.max(alg.scalarMultiply(b2, u_hidden), alg.abs(cumulativeHiddenLayerWGrad)); + + m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad)); + u_output = alg.max(alg.scalarMultiply(b2, u_output), alg.abs(outputWGrad)); + + std::vector>> m_hidden_hat = alg.scalarMultiply(1/(1 - std::pow(b1, epoch)), m_hidden); + + std::vector m_output_hat = alg.scalarMultiply(1/(1 - std::pow(b1, epoch)), m_output); + + std::vector>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, u_hidden))); + std::vector outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, u_output))); + + + updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too. + y_hat = modelSetTest(inputMiniBatches[i]); + + if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void ANN::Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){ + class Cost cost; + LinAlg alg; + + double cost_prev = 0; + int epoch = 1; + double initial_learning_rate = learning_rate; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + // always evaluate the result + // always do forward pass only ONCE at end. + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + // Initializing necessary components for Adam. + std::vector>> m_hidden; + std::vector>> v_hidden; + std::vector>> m_hidden_final; + + std::vector m_output; + std::vector v_output; + while(true){ + learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = modelSetTest(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]); + if(!network.empty() && m_hidden.empty() && v_hidden.empty()){ // Initing our tensor + m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad); + v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad); + } + + if(m_output.empty() && v_output.empty()){ + m_output.resize(outputWGrad.size()); + v_output.resize(outputWGrad.size()); + } + + m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad)); + v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2))); + + + m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad)); + v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2))); + + std::vector>> m_hidden_hat = alg.scalarMultiply(1/(1 - std::pow(b1, epoch)), m_hidden); + std::vector>> v_hidden_hat = alg.scalarMultiply(1/(1 - std::pow(b2, epoch)), v_hidden); + std::vector>> m_hidden_final = alg.addition(alg.scalarMultiply(b1, m_hidden_hat), alg.scalarMultiply((1 - b1)/(1 - std::pow(b1, epoch)), cumulativeHiddenLayerWGrad)); + + std::vector m_output_hat = alg.scalarMultiply(1/(1 - std::pow(b1, epoch)), m_output); + std::vector v_output_hat = alg.scalarMultiply(1/(1 - std::pow(b2, epoch)), v_output); + std::vector m_output_final = alg.addition(alg.scalarMultiply(b1, m_output_hat), alg.scalarMultiply((1 - b1)/(1 - std::pow(b1, epoch)), outputWGrad)); + + std::vector>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden_final, alg.scalarAdd(e, alg.sqrt(v_hidden_hat)))); + std::vector outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output_final, alg.scalarAdd(e, alg.sqrt(v_output_hat)))); + + + updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too. + y_hat = modelSetTest(inputMiniBatches[i]); + + if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void ANN::AMSGrad(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){ + class Cost cost; + LinAlg alg; + + double cost_prev = 0; + int epoch = 1; + double initial_learning_rate = learning_rate; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + // always evaluate the result + // always do forward pass only ONCE at end. + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + // Initializing necessary components for Adam. + std::vector>> m_hidden; + std::vector>> v_hidden; + + std::vector>> v_hidden_hat; + + std::vector m_output; + std::vector v_output; + + std::vector v_output_hat; + while(true){ + learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate); + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = modelSetTest(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]); + if(!network.empty() && m_hidden.empty() && v_hidden.empty()){ // Initing our tensor + m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad); + v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad); + v_hidden_hat = alg.resize(v_hidden_hat, cumulativeHiddenLayerWGrad); + } + + if(m_output.empty() && v_output.empty()){ + m_output.resize(outputWGrad.size()); + v_output.resize(outputWGrad.size()); + v_output_hat.resize(outputWGrad.size()); + } + + m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad)); + v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2))); + + m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad)); + v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2))); + + v_hidden_hat = alg.max(v_hidden_hat, v_hidden); + + v_output_hat = alg.max(v_output_hat, v_output); + + std::vector>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden, alg.scalarAdd(e, alg.sqrt(v_hidden_hat)))); + std::vector outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output, alg.scalarAdd(e, alg.sqrt(v_output_hat)))); + + + updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too. + y_hat = modelSetTest(inputMiniBatches[i]); + + if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + double ANN::score(){ + Utilities util; + forwardPass(); + return util.performance(y_hat, outputSet); + } + + void ANN::save(std::string fileName){ + Utilities util; + if(!network.empty()){ + util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1); + for(int i = 1; i < network.size(); i++){ + util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); + } + util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1); + } + else{ + util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1); + } + } + + void ANN::setLearningRateScheduler(std::string type, double decayConstant){ + lrScheduler = type; + ANN::decayConstant = decayConstant; + } + + void ANN::setLearningRateScheduler(std::string type, double decayConstant, double dropRate){ + lrScheduler = type; + ANN::decayConstant = decayConstant; + ANN::dropRate = dropRate; + } + + // https://en.wikipedia.org/wiki/Learning_rate + // Learning Rate Decay (C2W2L09) - Andrew Ng - Deep Learning Specialization + double ANN::applyLearningRateScheduler(double learningRate, double decayConstant, double epoch, double dropRate){ + if(lrScheduler == "Time"){ + return learningRate / (1 + decayConstant * epoch); + } + else if(lrScheduler == "Epoch"){ + return learningRate * (decayConstant / std::sqrt(epoch)); + } + else if(lrScheduler == "Step"){ + return learningRate * std::pow(decayConstant, int((1 + epoch)/dropRate)); // Utilizing an explicit int conversion implicitly takes the floor. + } + else if(lrScheduler == "Exponential"){ + return learningRate * std::exp(-decayConstant * epoch); + } + return learningRate; + } + + void ANN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){ + if(network.empty()){ + network.push_back(HiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha)); + network[0].forwardPass(); + } + else{ + network.push_back(HiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha)); + network[network.size() - 1].forwardPass(); + } + } + + void ANN::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, double lambda, double alpha){ + LinAlg alg; + if(!network.empty()){ + outputLayer = new OutputLayer(network[network.size() - 1].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha); + } + else{ + outputLayer = new OutputLayer(k, activation, loss, inputSet, weightInit, reg, lambda, alpha); + } + } + + double ANN::Cost(std::vector y_hat, std::vector y){ + Reg regularization; + class Cost cost; + double totalRegTerm = 0; + + auto cost_function = outputLayer->cost_map[outputLayer->cost]; + if(!network.empty()){ + for(int i = 0; i < network.size() - 1; i++){ + totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); + } + } + return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg); + } + + void ANN::forwardPass(){ + if(!network.empty()){ + network[0].input = inputSet; + network[0].forwardPass(); + + for(int i = 1; i < network.size(); i++){ + network[i].input = network[i - 1].a; + network[i].forwardPass(); + } + outputLayer->input = network[network.size() - 1].a; + } + else{ + outputLayer->input = inputSet; + } + outputLayer->forwardPass(); + y_hat = outputLayer->a; + } + + void ANN::updateParameters(std::vector>> hiddenLayerUpdations, std::vector outputLayerUpdation, double learning_rate){ + LinAlg alg; + + outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation); + outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n; + + if(!network.empty()){ + + network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]); + network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta)); + + for(int i = network.size() - 2; i >= 0; i--){ + network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]); + network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta)); + } + } + } + + std::tuple>>, std::vector> ANN::computeGradients(std::vector y_hat, std::vector outputSet){ + // std::cout << "BEGIN" << std::endl; + class Cost cost; + Activation avn; + LinAlg alg; + Reg regularization; + + std::vector>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. + + auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost]; + auto outputAvn = outputLayer->activation_map[outputLayer->activation]; + outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1)); + std::vector outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta); + outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg)); + + if(!network.empty()){ + auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation]; + network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta); + + cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well. + + for(int i = network.size() - 2; i >= 0; i--){ + auto hiddenLayerAvn = network[i].activation_map[network[i].activation]; + network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta); + cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well. + + } + } + return {cumulativeHiddenLayerWGrad, outputWGrad}; + } + + void ANN::UI(int epoch, double cost_prev, std::vector y_hat, std::vector outputSet){ + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + std::cout << "Layer " << network.size() + 1 << ": " << std::endl; + Utilities::UI(outputLayer->weights, outputLayer->bias); + if(!network.empty()){ + for(int i = network.size() - 1; i >= 0; i--){ + std::cout << "Layer " << i + 1 << ": " << std::endl; + Utilities::UI(network[i].weights, network[i].bias); + } + } + } +} \ No newline at end of file diff --git a/MLPP/ANN/ANN.hpp b/MLPP/ANN/ANN.hpp new file mode 100644 index 0000000..3370596 --- /dev/null +++ b/MLPP/ANN/ANN.hpp @@ -0,0 +1,72 @@ +// +// ANN.hpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#ifndef ANN_hpp +#define ANN_hpp + +#include "HiddenLayer/HiddenLayer.hpp" +#include "OutputLayer/OutputLayer.hpp" + +#include +#include +#include + +namespace MLPP{ + +class ANN{ + public: + ANN(std::vector> inputSet, std::vector outputSet); + ~ANN(); + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + void Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool NAG, bool UI = 1); + void Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI = 1); + void Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI = 1); + void Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1); + void Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1); + void Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1); + void AMSGrad(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1); + double score(); + void save(std::string fileName); + + void setLearningRateScheduler(std::string type, double decayConstant); + void setLearningRateScheduler(std::string type, double decayConstant, double dropRate); + + void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + + private: + double applyLearningRateScheduler(double learningRate, double decayConstant, double epoch, double dropRate); + + double Cost(std::vector y_hat, std::vector y); + + void forwardPass(); + void updateParameters(std::vector>> hiddenLayerUpdations, std::vector outputLayerUpdation, double learning_rate); + std::tuple>>, std::vector> computeGradients(std::vector y_hat, std::vector outputSet); + + void UI(int epoch, double cost_prev, std::vector y_hat, std::vector outputSet); + + + std::vector> inputSet; + std::vector outputSet; + std::vector y_hat; + + std::vector network; + OutputLayer *outputLayer; + + int n; + int k; + + std::string lrScheduler; + double decayConstant; + double dropRate; + }; +} + +#endif /* ANN_hpp */ \ No newline at end of file diff --git a/MLPP/Activation/Activation.cpp b/MLPP/Activation/Activation.cpp new file mode 100644 index 0000000..6e15fff --- /dev/null +++ b/MLPP/Activation/Activation.cpp @@ -0,0 +1,884 @@ +// +// Activation.cpp +// +// Created by Marc Melikyan on 1/16/21. +// + +#include +#include "LinAlg/LinAlg.hpp" +#include "Activation.hpp" +#include +#include + +namespace MLPP{ + + double Activation::linear(double z, bool deriv){ + if(deriv){ return 1; } + return z; + } + + std::vector Activation::linear(std::vector z, bool deriv){ + if(deriv) { + LinAlg alg; + return alg.onevec(z.size()); + } + return z; + + } + + std::vector> Activation::linear(std::vector> z, bool deriv){ + if(deriv){ + LinAlg alg; + return alg.onemat(z.size(), z[0].size()); + } + return z; + } + + double Activation::sigmoid(double z, bool deriv){ + if(deriv) { return sigmoid(z) * (1 - sigmoid(z)); } + return 1 / (1 + exp(-z)); + } + + std::vector Activation::sigmoid(std::vector z, bool deriv){ + LinAlg alg; + if(deriv) { return alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), sigmoid(z))); } + return alg.elementWiseDivision(alg.onevec(z.size()), alg.addition(alg.onevec(z.size()), alg.exp(alg.scalarMultiply(-1, z)))); + } + + std::vector> Activation::sigmoid(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv) { return alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), sigmoid(z))); } + return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.addition(alg.onemat(z.size(), z[0].size()), alg.exp(alg.scalarMultiply(-1, z)))); + } + + std::vector Activation::softmax(std::vector z, bool deriv){ + LinAlg alg; + std::vector a; + a.resize(z.size()); + std::vector expZ = alg.exp(z); + double sum = 0; + + for(int i = 0; i < z.size(); i++){ + sum += expZ[i]; + } + for(int i = 0; i < z.size(); i++){ + a[i] = expZ[i] / sum; + } + return a; + } + + std::vector> Activation::softmax(std::vector> z, bool deriv){ + LinAlg alg; + std::vector> a; + a.resize(z.size()); + + for(int i = 0; i < z.size(); i++){ + a[i] = softmax(z[i]); + } + return a; + } + + std::vector Activation::adjSoftmax(std::vector z){ + LinAlg alg; + std::vector a; + double C = -*std::max_element(z.begin(), z.end()); + z = alg.scalarAdd(C, z); + + return softmax(z); + } + + std::vector> Activation::adjSoftmax(std::vector> z){ + LinAlg alg; + std::vector> a; + a.resize(z.size()); + + for(int i = 0; i < z.size(); i++){ + a[i] = adjSoftmax(z[i]); + } + return a; + } + + std::vector> Activation::softmaxDeriv(std::vector z){ + LinAlg alg; + std::vector> deriv; + std::vector a = softmax(z); + deriv.resize(a.size()); + for(int i = 0; i < deriv.size(); i++){ + deriv[i].resize(a.size()); + } + for(int i = 0; i < a.size(); i++){ + for(int j = 0; j < z.size(); j++){ + if(i == j){ + deriv[i][j] = a[i] * (1 - a[i]); + } + else{ + deriv[i][j] = -a[i] * a[j]; + } + } + } + return deriv; + } + + std::vector>> Activation::softmaxDeriv(std::vector> z){ + LinAlg alg; + std::vector>> deriv; + std::vector> a = softmax(z); + + deriv.resize(a.size()); + for(int i = 0; i < deriv.size(); i++){ + deriv[i].resize(a.size()); + } + for(int i = 0; i < a.size(); i++){ + for(int j = 0; j < z.size(); j++){ + if(i == j){ + deriv[i][j] = alg.subtraction(a[i], alg.hadamard_product(a[i], a[i])); + } + else{ + deriv[i][j] = alg.scalarMultiply(-1, alg.hadamard_product(a[i], a[j])); + } + } + } + return deriv; + } + + double Activation::softplus(double z, bool deriv){ + if(deriv){ return sigmoid(z); } + return std::log(1 + exp(z)); + } + + std::vector Activation::softplus(std::vector z, bool deriv){ + if(deriv) { return sigmoid(z); } + LinAlg alg; + return alg.log(alg.addition(alg.onevec(z.size()), alg.exp(z))); + } + + std::vector> Activation::softplus(std::vector> z, bool deriv){ + if(deriv) { return sigmoid(z); } + LinAlg alg; + return alg.log(alg.addition(alg.onemat(z.size(), z[0].size()), alg.exp(z))); + } + + double Activation::softsign(double z, bool deriv){ + if(deriv){ return 1/((1 + abs(z)) * (1 + abs(z))); } + return z/(1 + abs(z)); + } + + std::vector Activation::softsign(std::vector z, bool deriv){ + LinAlg alg; + if(deriv) { return alg.elementWiseDivision(alg.onevec(z.size()), alg.exponentiate(alg.addition(alg.onevec(z.size()), alg.abs(z)), 2)); } + return alg.elementWiseDivision(z, alg.addition(alg.onevec(z.size()), alg.abs(z))); + } + + std::vector> Activation::softsign(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv) { return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.exponentiate(alg.addition(alg.onemat(z.size(), z[0].size()), alg.abs(z)), 2)); } + return alg.elementWiseDivision(z, alg.addition(alg.onemat(z.size(), z[0].size()), alg.abs(z))); + } + + double Activation::gaussianCDF(double z, bool deriv){ + if(deriv) { + return (1 / sqrt(2 * M_PI)) * exp(-z * z / 2); + } + return 0.5 * (1 + erf(z / sqrt(2))); + } + + std::vector Activation::gaussianCDF(std::vector z, bool deriv){ + LinAlg alg; + if(deriv) { + return alg.scalarMultiply(1 / sqrt(2 * M_PI), alg.exp(alg.scalarMultiply(-1/2, alg.hadamard_product(z, z)))); + } + return alg.scalarMultiply(0.5, alg.addition(alg.onevec(z.size()), alg.erf(alg.scalarMultiply(1/sqrt(2), z)))); + } + + std::vector> Activation::gaussianCDF(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv) { + return alg.scalarMultiply(1 / sqrt(2 * M_PI), alg.exp(alg.scalarMultiply(-1/2, alg.hadamard_product(z, z)))); + } + return alg.scalarMultiply(0.5, alg.addition(alg.onemat(z.size(), z[0].size()), alg.erf(alg.scalarMultiply(1/sqrt(2), z)))); + } + + double Activation::cloglog(double z, bool deriv){ + if(deriv) { return exp(z-exp(z)); } + return 1 - exp(-exp(z)); + } + + std::vector Activation::cloglog(std::vector z, bool deriv){ + LinAlg alg; + if(deriv) { + return alg.exp(alg.scalarMultiply(-1, alg.exp(z))); + } + return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.exp(alg.scalarMultiply(-1, alg.exp(z))))); + } + + std::vector> Activation::cloglog(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv) { + return alg.exp(alg.scalarMultiply(-1, alg.exp(z))); + } + return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.exp(alg.scalarMultiply(-1, alg.exp(z))))); + } + + double Activation::logit(double z, bool deriv){ + if(deriv) { return 1/z - 1/(z-1); } + return std::log(z / (1 - z)); + } + + std::vector Activation::logit(std::vector z, bool deriv){ + LinAlg alg; + if(deriv) { + return alg.subtraction(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.elementWiseDivision(alg.onevec(z.size()), alg.subtraction(z, alg.onevec(z.size())))); + } + return alg.log(alg.elementWiseDivision(z, alg.subtraction(alg.onevec(z.size()), z))); + } + + std::vector> Activation::logit(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv) { + return alg.subtraction(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.subtraction(z, alg.onemat(z.size(), z[0].size())))); + } + return alg.log(alg.elementWiseDivision(z, alg.subtraction(alg.onemat(z.size(), z[0].size()), z))); + } + + double Activation::unitStep(double z, bool deriv){ + if(deriv) { + return 0; + } + return z < 0 ? 0 : 1; + } + + std::vector Activation::unitStep(std::vector z, bool deriv){ + if(deriv){ + std::vector deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = unitStep(z[i], 1); + } + return deriv; + } + std::vector a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = unitStep(z[i]); + } + return a; + } + + std::vector> Activation::unitStep(std::vector> z, bool deriv){ + if(deriv){ + std::vector> deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = unitStep(z[i], 1); + } + return deriv; + } + std::vector> a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = unitStep(z[i]); + } + return a; + } + + double Activation::swish(double z, bool deriv){ + if(deriv){ + return swish(z) + sigmoid(z) * (1 - swish(z)); + } + return z * sigmoid(z); + } + + std::vector Activation::swish(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ + alg.addition(swish(z), alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), swish(z)))); + } + return alg.hadamard_product(z, sigmoid(z)); + } + + std::vector> Activation::swish(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ + alg.addition(swish(z), alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), swish(z)))); + } + return alg.hadamard_product(z, sigmoid(z)); + } + + double Activation::mish(double z, bool deriv){ + if(deriv){ + return sech(softplus(z)) * sech(softplus(z)) * z * sigmoid(z) + mish(z)/z; + } + return z * tanh(softplus(z)); + } + + std::vector Activation::mish(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ + return alg.addition(alg.hadamard_product(alg.hadamard_product(alg.hadamard_product(sech(softplus(z)), sech(softplus(z))), z), sigmoid(z)), alg.elementWiseDivision(mish(z), z)); + } + return alg.hadamard_product(z, tanh(softplus(z))); + } + + std::vector> Activation::mish(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ + return alg.addition(alg.hadamard_product(alg.hadamard_product(alg.hadamard_product(sech(softplus(z)), sech(softplus(z))), z), sigmoid(z)), alg.elementWiseDivision(mish(z), z)); + } + return alg.hadamard_product(z, tanh(softplus(z))); + } + + double Activation::sinc(double z, bool deriv){ + if(deriv){ + return (z * std::cos(z) - std::sin(z)) / (z * z); + } + return std::sin(z)/z; + } + + std::vector Activation::sinc(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ + return alg.elementWiseDivision(alg.subtraction(alg.hadamard_product(z, alg.cos(z)), alg.sin(z)), alg.hadamard_product(z, z)); + } + return alg.elementWiseDivision(alg.sin(z), z); + } + + std::vector> Activation::sinc(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ + return alg.elementWiseDivision(alg.subtraction(alg.hadamard_product(z, alg.cos(z)), alg.sin(z)), alg.hadamard_product(z, z)); + } + return alg.elementWiseDivision(alg.sin(z), z); + } + + + double Activation::RELU(double z, bool deriv){ + if (deriv){ + if(z <= 0){ + return 0; + } + else { + return 1; + } + } + return fmax(0, z); + } + + std::vector Activation::RELU(std::vector z, bool deriv){ + if(deriv){ + std::vector deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = RELU(z[i], 1); + } + return deriv; + } + std::vector a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = RELU(z[i]); + } + return a; + } + + std::vector> Activation::RELU(std::vector> z, bool deriv){ + if(deriv){ + std::vector> deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = RELU(z[i], 1); + } + return deriv; + } + std::vector> a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = RELU(z[i]); + } + return a; + } + + double Activation::leakyReLU(double z, double c, bool deriv){ + if (deriv){ + if(z <= 0){ + return c; + } + else { + return 1; + } + } + return fmax(c * z, z); + } + + std::vector Activation::leakyReLU(std::vector z, double c, bool deriv){ + if(deriv){ + std::vector deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = leakyReLU(z[i], c, 1); + } + return deriv; + } + std::vector a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = leakyReLU(z[i], c); + } + return a; + } + + std::vector> Activation::leakyReLU(std::vector> z, double c, bool deriv){ + if(deriv){ + std::vector> deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = leakyReLU(z[i], c, 1); + } + return deriv; + } + std::vector> a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = leakyReLU(z[i], c); + } + return a; + } + + double Activation::ELU(double z, double c, bool deriv){ + if (deriv){ + if(z <= 0){ + return c * exp(z); + } + else { + return 1; + } + } + if(z >= 0){ + return z; + } + else{ + return c * (exp(z) - 1); + } + } + + std::vector Activation::ELU(std::vector z, double c, bool deriv){ + if(deriv){ + std::vector deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = ELU(z[i], c, 1); + } + return deriv; + } + std::vector a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = ELU(z[i], c); + } + return a; + } + + std::vector> Activation::ELU(std::vector> z, double c, bool deriv){ + if(deriv){ + std::vector> deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = ELU(z[i], c, 1); + } + return deriv; + } + std::vector> a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = ELU(z[i], c); + } + return a; + } + + double Activation::SELU(double z, double lambda, double c, bool deriv){ + if (deriv){ + return ELU(z, c, 1); + } + return lambda * ELU(z, c); + } + + std::vector Activation::SELU(std::vector z, double lambda, double c, bool deriv){ + if(deriv){ + std::vector deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = SELU(z[i], lambda, c, 1); + } + return deriv; + } + std::vector a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = SELU(z[i], lambda, c); + } + return a; + } + + std::vector> Activation::SELU(std::vector> z, double lambda, double c, bool deriv){ + if(deriv){ + std::vector> deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = SELU(z[i], lambda, c, 1); + } + return deriv; + } + std::vector> a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = SELU(z[i], lambda, c); + } + return a; + } + + double Activation::GELU(double z, bool deriv){ + if (deriv){ + return 0.5 * tanh(0.0356774 * std::pow(z, 3) + 0.797885 * z) + (0.0535161 * std::pow(z, 3) + 0.398942 * z) * std::pow(sech(0.0356774 * std::pow(z, 3) + 0.797885 * z), 2) + 0.5; + } + return 0.5 * z * (1 + tanh(sqrt(2/M_PI) * (z + 0.044715 * std::pow(z, 3)))); + } + + std::vector Activation::GELU(std::vector z, bool deriv){ + if(deriv){ + std::vector deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = GELU(z[i], 1); + } + return deriv; + } + std::vector a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = GELU(z[i]); + } + return a; + } + + std::vector> Activation::GELU(std::vector> z, bool deriv){ + if(deriv){ + std::vector> deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = GELU(z[i], 1); + } + return deriv; + } + std::vector> a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = GELU(z[i]); + } + return a; + } + + double Activation::sign(double z, bool deriv){ + if(deriv){ + return 0; + } + if(z < 0){ + return -1; + } + else if(z == 0){ + return 0; + } + else{ + return 1; + } + } + + std::vector Activation::sign(std::vector z, bool deriv){ + if(deriv){ + std::vector deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = sign(z[i], 1); + } + return deriv; + } + std::vector a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = sign(z[i]); + } + return a; + } + + std::vector> Activation::sign(std::vector> z, bool deriv){ + if(deriv){ + std::vector> deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = sign(z[i], 1); + } + return deriv; + } + std::vector> a; + a.resize(z.size()); + + for(int i = 0; i < a.size(); i++){ + a[i] = sign(z[i]); + } + return a; + } + + double Activation::sinh(double z, bool deriv){ + if(deriv){ return cosh(z); } + return 0.5 * (exp(z) - exp(-z)); + } + + std::vector Activation::sinh(std::vector z, bool deriv){ + if(deriv){ return cosh(z); } + LinAlg alg; + return alg.scalarMultiply(0.5, alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z)))); + } + + std::vector> Activation::sinh(std::vector> z, bool deriv){ + if(deriv){ return cosh(z); } + LinAlg alg; + return alg.scalarMultiply(0.5, alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z)))); + } + + double Activation::cosh(double z, bool deriv){ + if(deriv){ return sinh(z); } + return 0.5 * (exp(z) + exp(-z)); + } + + std::vector Activation::cosh(std::vector z, bool deriv){ + if(deriv){ return sinh(z); } + LinAlg alg; + return alg.scalarMultiply(0.5, alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z)))); + } + + std::vector> Activation::cosh(std::vector> z, bool deriv){ + if(deriv){ return sinh(z); } + LinAlg alg; + return alg.scalarMultiply(0.5, alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z)))); + } + + double Activation::tanh(double z, bool deriv){ + if(deriv){ return 1 - tanh(z) * tanh(z); } + return (exp(z) - exp(-z)) / (exp(z) + exp(-z)); + } + + std::vector Activation::tanh(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ + return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.hadamard_product(tanh(z), tanh(z)))); + } + return alg.elementWiseDivision(alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))), alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z)))); + } + + std::vector> Activation::tanh(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ + return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.hadamard_product(tanh(z), tanh(z)))); + } + + return alg.elementWiseDivision(alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))), alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z)))); + } + + double Activation::csch(double z, bool deriv){ + if(deriv){ return -csch(z) * coth(z); } + return 1 / sinh(z); + } + + std::vector Activation::csch(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), coth(z)); } + return alg.elementWiseDivision(alg.onevec(z.size()), sinh(z)); + } + + std::vector> Activation::csch(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), coth(z)); } + return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), sinh(z)); + } + + double Activation::sech(double z, bool deriv){ + if(deriv){ return -sech(z) * tanh(z); } + return 1 / cosh(z); + } + + std::vector Activation::sech(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.hadamard_product(alg.scalarMultiply(-1, sech(z)), tanh(z)); } + return alg.elementWiseDivision(alg.onevec(z.size()), cosh(z)); + + // return activation(z, deriv, static_cast(&sech)); + } + + std::vector> Activation::sech(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.hadamard_product(alg.scalarMultiply(-1, sech(z)), tanh(z)); } + return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), cosh(z)); + + // return activation(z, deriv, static_cast(&sech)); + } + + + double Activation::coth(double z, bool deriv){ + if(deriv){ return -csch(z) * csch(z); } + return 1 / tanh(z); + } + + std::vector Activation::coth(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), csch(z)); } + return alg.elementWiseDivision(alg.onevec(z.size()), tanh(z)); + } + + std::vector> Activation::coth(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), csch(z)); } + return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), tanh(z)); + } + + double Activation::arsinh(double z, bool deriv){ + if(deriv){ return 1 / sqrt(z * z + 1); } + return std::log(z + sqrt(z * z + 1)); + } + + std::vector Activation::arsinh(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.elementWiseDivision(alg.onevec(z.size()), alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onevec(z.size())))); } + return alg.log(alg.addition(z, alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onevec(z.size()))))); + } + + std::vector> Activation::arsinh(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size())))); } + return alg.log(alg.addition(z, alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size()))))); + } + + double Activation::arcosh(double z, bool deriv){ + if(deriv){ + return 1/sqrt(z * z - 1); + } + return std::log(z + sqrt(z * z - 1)); + } + + std::vector Activation::arcosh(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.elementWiseDivision(alg.onevec(z.size()), alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onevec(z.size())))); } + return alg.log(alg.addition(z, alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onevec(z.size()))))); + } + + std::vector> Activation::arcosh(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size())))); } + return alg.log(alg.addition(z, alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size()))))); + } + + double Activation::artanh(double z, bool deriv){ + if(deriv){ + return 1/(1 - z * z); + } + return 0.5 * std::log((1 + z)/(1 - z)); + } + + std::vector Activation::artanh(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.elementWiseDivision(alg.onevec(z.size()), alg.subtraction(alg.onevec(z.size()), alg.hadamard_product(z, z))); } + return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onevec(z.size()), z), alg.subtraction(alg.onevec(z.size()), z)))); + } + + std::vector> Activation::artanh(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.subtraction(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z))); } + return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onemat(z.size(), z[0].size()), z), alg.subtraction(alg.onemat(z.size(), z[0].size()), z)))); + } + + double Activation::arcsch(double z, bool deriv){ + if(deriv){ + return -1/((z * z) * sqrt(1 + (1/(z * z)))); + } + return std::log(sqrt(1 + (1 / (z * z))) + (1/z)); + } + + std::vector Activation::arcsch(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.elementWiseDivision(alg.full(z.size(), -1), alg.hadamard_product(alg.hadamard_product(z, z), alg.sqrt(alg.addition(alg.onevec(z.size()), alg.elementWiseDivision(alg.onevec(z.size()), alg.hadamard_product(z, z)))))); } + return alg.log(alg.addition(alg.sqrt(alg.addition(alg.onevec(z.size()), alg.elementWiseDivision(alg.onevec(z.size()), alg.hadamard_product(z, z)))), alg.elementWiseDivision(alg.onevec(z.size()), z))); + } + + std::vector> Activation::arcsch(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.elementWiseDivision(alg.full(z.size(), z[0].size(), -1), alg.hadamard_product(alg.hadamard_product(z, z), alg.sqrt(alg.addition(alg.onemat(z.size(), z[0].size()), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z)))))); } + return alg.log(alg.addition(alg.sqrt(alg.addition(alg.onemat(z.size(), z[0].size()), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z)))), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z))); + } + + + double Activation::arsech(double z, bool deriv){ + if(deriv){ + return -1/(z * sqrt(1 - z * z)); + } + return std::log((1/z) + ((1/z) + 1) * ((1/z) - 1)); + } + + std::vector Activation::arsech(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.elementWiseDivision(alg.full(z.size(), -1), alg.hadamard_product(z, alg.sqrt(alg.subtraction(alg.onevec(z.size()), alg.hadamard_product(z, z))))); } + return alg.log(alg.addition(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.hadamard_product(alg.addition(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.onevec(z.size())), alg.subtraction(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.onevec(z.size()))))); + } + + std::vector> Activation::arsech(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.elementWiseDivision(alg.full(z.size(), z[0].size(), -1), alg.hadamard_product(z, alg.sqrt(alg.subtraction(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z))))); } + return alg.log(alg.addition(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.hadamard_product(alg.addition(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.onemat(z.size(), z[0].size())), alg.subtraction(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.onemat(z.size(), z[0].size()))))); + } + + double Activation::arcoth(double z, bool deriv){ + if(deriv){ + return 1/(1 - z * z); + } + return 0.5 * std::log((1 + z)/(z - 1)); + } + + std::vector Activation::arcoth(std::vector z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.elementWiseDivision(alg.onevec(z.size()), alg.subtraction(alg.onevec(z.size()), alg.hadamard_product(z, z))); } + return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onevec(z.size()), z), alg.subtraction(z, alg.onevec(z.size()))))); + } + + std::vector> Activation::arcoth(std::vector> z, bool deriv){ + LinAlg alg; + if(deriv){ return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.subtraction(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z))); } + return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onemat(z.size(), z[0].size()), z), alg.subtraction(z, alg.onemat(z.size(), z[0].size()))))); + } + + // TO DO: Implement this template activation + std::vector Activation::activation(std::vector z, bool deriv, double(*function)(double, bool)){ + if(deriv){ + std::vector deriv; + deriv.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + deriv[i] = function(z[i], 1); + } + return deriv; + } + std::vector a; + a.resize(z.size()); + for(int i = 0; i < z.size(); i++){ + a[i] = function(z[i], deriv); + } + return a; + } +} diff --git a/MLPP/Activation/Activation.hpp b/MLPP/Activation/Activation.hpp new file mode 100644 index 0000000..aa0bbbc --- /dev/null +++ b/MLPP/Activation/Activation.hpp @@ -0,0 +1,146 @@ +// +// Activation.hpp +// +// Created by Marc Melikyan on 1/16/21. +// + +#ifndef Activation_hpp +#define Activation_hpp + +#include + +namespace MLPP{ + class Activation{ + public: + double linear(double z, bool deriv = 0); + std::vector linear(std::vector z, bool deriv = 0); + std::vector> linear(std::vector> z, bool deriv = 0); + + double sigmoid(double z, bool deriv = 0); + std::vector sigmoid(std::vector z, bool deriv = 0); + std::vector> sigmoid(std::vector> z, bool deriv = 0); + + std::vector softmax(std::vector z, bool deriv = 0); + std::vector> softmax(std::vector> z, bool deriv = 0); + + std::vector adjSoftmax(std::vector z); + std::vector> adjSoftmax(std::vector> z); + + std::vector> softmaxDeriv(std::vector z); + std::vector>> softmaxDeriv(std::vector> z); + + double softplus(double z, bool deriv = 0); + std::vector softplus(std::vector z, bool deriv = 0); + std::vector> softplus(std::vector> z, bool deriv = 0); + + double softsign(double z, bool deriv = 0); + std::vector softsign(std::vector z, bool deriv = 0); + std::vector> softsign(std::vector> z, bool deriv = 0); + + double gaussianCDF(double z, bool deriv = 0); + std::vector gaussianCDF(std::vector z, bool deriv = 0); + std::vector> gaussianCDF(std::vector> z, bool deriv = 0); + + double cloglog(double z, bool deriv = 0); + std::vector cloglog(std::vector z, bool deriv = 0); + std::vector> cloglog(std::vector> z, bool deriv = 0); + + double logit(double z, bool deriv = 0); + std::vector logit(std::vector z, bool deriv = 0); + std::vector> logit(std::vector> z, bool deriv = 0); + + double unitStep(double z, bool deriv = 0); + std::vector unitStep(std::vector z, bool deriv = 0); + std::vector> unitStep(std::vector> z, bool deriv = 0); + + double swish(double z, bool deriv = 0); + std::vector swish(std::vector z, bool deriv = 0); + std::vector> swish(std::vector> z, bool deriv = 0); + + double mish(double z, bool deriv = 0); + std::vector mish(std::vector z, bool deriv = 0); + std::vector> mish(std::vector> z, bool deriv = 0); + + double sinc(double z, bool deriv = 0); + std::vector sinc(std::vector z, bool deriv = 0); + std::vector> sinc(std::vector> z, bool deriv = 0); + + double RELU(double z, bool deriv = 0); + std::vector RELU(std::vector z, bool deriv = 0); + std::vector> RELU(std::vector> z, bool deriv = 0); + + double leakyReLU(double z, double c, bool deriv = 0); + std::vector leakyReLU(std::vector z, double c, bool deriv = 0); + std::vector> leakyReLU(std::vector> z, double c, bool deriv = 0); + + double ELU(double z, double c, bool deriv = 0); + std::vector ELU(std::vector z, double c, bool deriv = 0); + std::vector> ELU(std::vector> z, double c, bool deriv = 0); + + double SELU(double z, double lambda, double c, bool deriv = 0); + std::vector SELU(std::vector z, double lambda, double c, bool deriv = 0); + std::vector> SELU(std::vector>, double lambda, double c, bool deriv = 0); + + double GELU(double z, bool deriv = 0); + std::vector GELU(std::vector z, bool deriv = 0); + std::vector> GELU(std::vector> z, bool deriv = 0); + + double sign(double z, bool deriv = 0); + std::vector sign(std::vector z, bool deriv = 0); + std::vector> sign(std::vector> z, bool deriv = 0); + + double sinh(double z, bool deriv = 0); + std::vector sinh(std::vector z, bool deriv = 0); + std::vector> sinh(std::vector> z, bool deriv = 0); + + double cosh(double z, bool deriv = 0); + std::vector cosh(std::vector z, bool deriv = 0); + std::vector> cosh(std::vector> z, bool deriv = 0); + + double tanh(double z, bool deriv = 0); + std::vector tanh(std::vector z, bool deriv = 0); + std::vector> tanh(std::vector> z, bool deriv = 0); + + double csch(double z, bool deriv = 0); + std::vector csch(std::vector z, bool deriv = 0); + std::vector> csch( std::vector> z, bool deriv = 0); + + double sech(double z, bool deriv = 0); + std::vector sech(std::vector z, bool deriv = 0); + std::vector> sech(std::vector> z, bool deriv = 0); + + double coth(double z, bool deriv = 0); + std::vector coth(std::vector z, bool deriv = 0); + std::vector> coth(std::vector> z, bool deriv = 0); + + double arsinh(double z, bool deriv = 0); + std::vector arsinh(std::vector z, bool deriv = 0); + std::vector> arsinh(std::vector> z, bool deriv = 0); + + double arcosh(double z, bool deriv = 0); + std::vector arcosh(std::vector z, bool deriv = 0); + std::vector> arcosh(std::vector> z, bool deriv = 0); + + double artanh(double z, bool deriv = 0); + std::vector artanh(std::vector z, bool deriv = 0); + std::vector> artanh(std::vector> z, bool deriv = 0); + + double arcsch(double z, bool deriv = 0); + std::vector arcsch(std::vector z, bool deriv = 0); + std::vector> arcsch(std::vector> z, bool deriv = 0); + + double arsech(double z, bool deriv = 0); + std::vector arsech(std::vector z, bool deriv = 0); + std::vector> arsech(std::vector> z, bool deriv = 0); + + double arcoth(double z, bool deriv = 0); + std::vector arcoth(std::vector z, bool deriv = 0); + std::vector> arcoth(std::vector> z, bool deriv = 0); + + std::vector activation(std::vector z, bool deriv, double(*function)(double, bool)); + + private: + }; +} + +#endif /* Activation_hpp */ diff --git a/MLPP/AutoEncoder/AutoEncoder.cpp b/MLPP/AutoEncoder/AutoEncoder.cpp new file mode 100644 index 0000000..71bc0f9 --- /dev/null +++ b/MLPP/AutoEncoder/AutoEncoder.cpp @@ -0,0 +1,253 @@ +// +// AutoEncoder.cpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#include "AutoEncoder.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP { + AutoEncoder::AutoEncoder(std::vector> inputSet, int n_hidden) + : inputSet(inputSet), n_hidden(n_hidden), n(inputSet.size()), k(inputSet[0].size()) + { + Activation avn; + y_hat.resize(inputSet.size()); + + weights1 = Utilities::weightInitialization(k, n_hidden); + weights2 = Utilities::weightInitialization(n_hidden, k); + bias1 = Utilities::biasInitialization(n_hidden); + bias2 = Utilities::biasInitialization(k); + } + + std::vector> AutoEncoder::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + std::vector AutoEncoder::modelTest(std::vector x){ + return Evaluate(x); + } + + void AutoEncoder::gradientDescent(double learning_rate, int max_epoch, bool UI){ + Activation avn; + LinAlg alg; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, inputSet); + + // Calculating the errors + std::vector> error = alg.subtraction(y_hat, inputSet); + + // Calculating the weight/bias gradients for layer 2 + std::vector> D2_1 = alg.matmult(alg.transpose(a2), error); + + // weights and bias updation for layer 2 + weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate/n, D2_1)); + + // Calculating the bias gradients for layer 2 + bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error)); + + //Calculating the weight/bias for layer 1 + + std::vector> D1_1 = alg.matmult(error, alg.transpose(weights2)); + + std::vector> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1)); + + std::vector> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2); + + + // weight an bias updation for layer 1 + weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate/n, D1_3)); + + bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate/n, D1_2)); + + forwardPass(); + + // UI PORTION + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, inputSet)); + std::cout << "Layer 1:" << std::endl; + Utilities::UI(weights1, bias1); + std::cout << "Layer 2:" << std::endl; + Utilities::UI(weights2, bias2); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + + } + + void AutoEncoder::SGD(double learning_rate, int max_epoch, bool UI){ + Activation avn; + LinAlg alg; + double cost_prev = 0; + int epoch = 1; + + while(true){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + int outputIndex = distribution(generator); + + std::vector y_hat = Evaluate(inputSet[outputIndex]); + auto [z2, a2] = propagate(inputSet[outputIndex]); + cost_prev = Cost({y_hat}, {inputSet[outputIndex]}); + std::vector error = alg.subtraction(y_hat, inputSet[outputIndex]); + + // Weight updation for layer 2 + std::vector> D2_1 = alg.outerProduct(error, a2); + weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, alg.transpose(D2_1))); + + // Bias updation for layer 2 + bias2 = alg.subtraction(bias2, alg.scalarMultiply(learning_rate, error)); + + // Weight updation for layer 1 + std::vector D1_1 = alg.mat_vec_mult(weights2, error); + std::vector D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1)); + std::vector> D1_3 = alg.outerProduct(inputSet[outputIndex], D1_2); + + weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3)); + // Bias updation for layer 1 + + bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2)); + + y_hat = Evaluate(inputSet[outputIndex]); + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {inputSet[outputIndex]})); + std::cout << "Layer 1:" << std::endl; + Utilities::UI(weights1, bias1); + std::cout << "Layer 2:" << std::endl; + Utilities::UI(weights2, bias2); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void AutoEncoder::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + Activation avn; + LinAlg alg; + double cost_prev = 0; + int epoch = 1; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + std::vector>> inputMiniBatches = Utilities::createMiniBatches(inputSet, n_mini_batch); + + while(true){ + for(int i = 0; i < n_mini_batch; i++){ + std::vector> y_hat = Evaluate(inputMiniBatches[i]); + auto [z2, a2] = propagate(inputMiniBatches[i]); + cost_prev = Cost(y_hat, inputMiniBatches[i]); + + // Calculating the errors + std::vector> error = alg.subtraction(y_hat, inputMiniBatches[i]); + + // Calculating the weight/bias gradients for layer 2 + + std::vector> D2_1 = alg.matmult(alg.transpose(a2), error); + + // weights and bias updation for layer 2 + weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate/inputMiniBatches[i].size(), D2_1)); + + // Bias Updation for layer 2 + bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error)); + + //Calculating the weight/bias for layer 1 + + std::vector> D1_1 = alg.matmult(error, alg.transpose(weights2)); + + std::vector> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1)); + + std::vector> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2); + + + // weight an bias updation for layer 1 + weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate/inputMiniBatches[i].size(), D1_3)); + + bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate/inputMiniBatches[i].size(), D1_2)); + + y_hat = Evaluate(inputMiniBatches[i]); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, inputMiniBatches[i])); + std::cout << "Layer 1:" << std::endl; + Utilities::UI(weights1, bias1); + std::cout << "Layer 2:" << std::endl; + Utilities::UI(weights2, bias2); + } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + double AutoEncoder::score(){ + Utilities util; + return util.performance(y_hat, inputSet); + } + + void AutoEncoder::save(std::string fileName){ + Utilities util; + util.saveParameters(fileName, weights1, bias1, 0, 1); + util.saveParameters(fileName, weights2, bias2, 1, 2); + } + + double AutoEncoder::Cost(std::vector> y_hat, std::vector> y){ + class Cost cost; + return cost.MSE(y_hat, inputSet); + } + + std::vector> AutoEncoder::Evaluate(std::vector> X){ + LinAlg alg; + Activation avn; + std::vector> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1); + std::vector> a2 = avn.sigmoid(z2); + return alg.mat_vec_add(alg.matmult(a2, weights2), bias2); + } + + std::tuple>, std::vector>> AutoEncoder::propagate(std::vector> X){ + LinAlg alg; + Activation avn; + std::vector> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1); + std::vector> a2 = avn.sigmoid(z2); + return {z2, a2}; + } + + std::vector AutoEncoder::Evaluate(std::vector x){ + LinAlg alg; + Activation avn; + std::vector z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); + std::vector a2 = avn.sigmoid(z2); + return alg.addition(alg.mat_vec_mult(alg.transpose(weights2), a2), bias2); + } + + std::tuple, std::vector> AutoEncoder::propagate(std::vector x){ + LinAlg alg; + Activation avn; + std::vector z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); + std::vector a2 = avn.sigmoid(z2); + return {z2, a2}; + } + + void AutoEncoder::forwardPass(){ + LinAlg alg; + Activation avn; + z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1); + a2 = avn.sigmoid(z2); + y_hat = alg.mat_vec_add(alg.matmult(a2, weights2), bias2); + } +} \ No newline at end of file diff --git a/MLPP/AutoEncoder/AutoEncoder.hpp b/MLPP/AutoEncoder/AutoEncoder.hpp new file mode 100644 index 0000000..c370a2a --- /dev/null +++ b/MLPP/AutoEncoder/AutoEncoder.hpp @@ -0,0 +1,54 @@ +// +// AutoEncoder.hpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#ifndef AutoEncoder_hpp +#define AutoEncoder_hpp + +#include +#include +#include + +namespace MLPP { + +class AutoEncoder{ + public: + AutoEncoder(std::vector> inputSet, int n_hidden); + std::vector> modelSetTest(std::vector> X); + std::vector modelTest(std::vector x); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + double score(); + void save(std::string fileName); + + private: + double Cost(std::vector> y_hat, std::vector> y); + + std::vector> Evaluate(std::vector> X); + std::tuple>, std::vector>> propagate(std::vector> X); + std::vector Evaluate(std::vector x); + std::tuple, std::vector> propagate(std::vector x); + void forwardPass(); + + std::vector> inputSet; + std::vector> y_hat; + + std::vector> weights1; + std::vector> weights2; + + std::vector bias1; + std::vector bias2; + + std::vector> z2; + std::vector> a2; + + int n; + int k; + int n_hidden; + }; +} + +#endif /* AutoEncoder_hpp */ diff --git a/MLPP/BernoulliNB/BernoulliNB.cpp b/MLPP/BernoulliNB/BernoulliNB.cpp new file mode 100644 index 0000000..3fd4a4a --- /dev/null +++ b/MLPP/BernoulliNB/BernoulliNB.cpp @@ -0,0 +1,182 @@ +// +// BernoulliNB.cpp +// +// Created by Marc Melikyan on 1/17/21. +// + +#include "BernoulliNB.hpp" +#include "Utilities/Utilities.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Data/Data.hpp" + +#include +#include + +namespace MLPP{ + BernoulliNB::BernoulliNB(std::vector> inputSet, std::vector outputSet) + : inputSet(inputSet), outputSet(outputSet), class_num(2) + { + y_hat.resize(outputSet.size()); + Evaluate(); + } + + std::vector BernoulliNB::modelSetTest(std::vector> X){ + std::vector y_hat; + for(int i = 0; i < X.size(); i++){ + y_hat.push_back(modelTest(X[i])); + } + return y_hat; + } + + double BernoulliNB::modelTest(std::vector x){ + double score_0 = 1; + double score_1 = 1; + + std::vector foundIndices; + + for(int j = 0; j < x.size(); j++){ + for(int k = 0; k < vocab.size(); k++){ + if(x[j] == vocab[k]){ + score_0 *= theta[0][vocab[k]]; + score_1 *= theta[1][vocab[k]]; + + foundIndices.push_back(k); + } + } + } + + for(int i = 0; i < vocab.size(); i++){ + bool found = false; + for(int j = 0; j < foundIndices.size(); j++){ + if(vocab[i] == vocab[foundIndices[j]]){ + found = true; + } + } + if(!found){ + score_0 *= 1 - theta[0][vocab[i]]; + score_1 *= 1 - theta[1][vocab[i]]; + } + } + + score_0 *= prior_0; + score_1 *= prior_1; + + // Assigning the traning example to a class + + if(score_0 > score_1){ + return 0; + } + else{ + return 1; + } + } + + double BernoulliNB::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void BernoulliNB::computeVocab(){ + LinAlg alg; + Data data; + vocab = data.vecToSet(alg.flatten(inputSet)); + } + + void BernoulliNB::computeTheta(){ + + // Resizing theta for the sake of ease & proper access of the elements. + theta.resize(class_num); + + // Setting all values in the hasmap by default to 0. + for(int i = class_num - 1; i >= 0; i--){ + for(int j = 0; j < vocab.size(); j++){ + theta[i][vocab[j]] = 0; + } + } + + for(int i = 0; i < inputSet.size(); i++){ + for(int j = 0; j < inputSet[0].size(); j++){ + theta[outputSet[i]][inputSet[i][j]]++; + } + } + + for(int i = 0; i < theta.size(); i++){ + for(int j = 0; j < theta[i].size(); j++){ + if(i == 0){ + theta[i][j] /= prior_0 * y_hat.size(); + } + else{ + theta[i][j] /= prior_1 * y_hat.size(); + } + } + } + } + + void BernoulliNB::Evaluate(){ + for(int i = 0; i < outputSet.size(); i++){ + // Pr(B | A) * Pr(A) + double score_0 = 1; + double score_1 = 1; + + + double sum = 0; + for(int i = 0; i < outputSet.size(); i++){ + if(outputSet[i] == 1){ sum += outputSet[i]; } + } + + // Easy computation of priors, i.e. Pr(C_k) + prior_1 = sum / y_hat.size(); + prior_0 = 1 - prior_1; + + // Evaluating Theta... + computeTheta(); + + // Evaluating the vocab set... + computeVocab(); + + std::vector foundIndices; + + for(int j = 0; j < inputSet.size(); j++){ + for(int k = 0; k < vocab.size(); k++){ + if(inputSet[i][j] == vocab[k]){ + score_0 += std::log(theta[0][vocab[k]]); + score_1 += std::log(theta[1][vocab[k]]); + + foundIndices.push_back(k); + } + } + } + + for(int i = 0; i < vocab.size(); i++){ + bool found = false; + for(int j = 0; j < foundIndices.size(); j++){ + if(vocab[i] == vocab[foundIndices[j]]){ + found = true; + } + } + if(!found){ + score_0 += std::log(1 - theta[0][vocab[i]]); + score_1 += std::log(1 - theta[1][vocab[i]]); + } + } + + score_0 += std::log(prior_0); + score_1 += std::log(prior_1); + + score_0 = exp(score_0); + score_1 = exp(score_1); + + std::cout << score_0 << std::endl; + std::cout << score_1 << std::endl; + + // Assigning the traning example to a class + + if(score_0 > score_1){ + y_hat[i] = 0; + } + else{ + y_hat[i] = 1; + } + } + } +} \ No newline at end of file diff --git a/MLPP/BernoulliNB/BernoulliNB.hpp b/MLPP/BernoulliNB/BernoulliNB.hpp new file mode 100644 index 0000000..dd10ec2 --- /dev/null +++ b/MLPP/BernoulliNB/BernoulliNB.hpp @@ -0,0 +1,47 @@ +// +// BernoulliNB.hpp +// +// Created by Marc Melikyan on 1/17/21. +// + +#ifndef BernoulliNB_hpp +#define BernoulliNB_hpp + +#include +#include + +namespace MLPP{ + class BernoulliNB{ + + public: + BernoulliNB(std::vector> inputSet, std::vector outputSet); + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + double score(); + + private: + + void computeVocab(); + void computeTheta(); + void Evaluate(); + + // Model Params + double prior_1 = 0; + double prior_0 = 0; + + std::vector> theta; + std::vector vocab; + int class_num; + + // Datasets + std::vector> inputSet; + std::vector outputSet; + std::vector y_hat; + + + + + }; + + #endif /* BernoulliNB_hpp */ +} \ No newline at end of file diff --git a/MLPP/CLogLogReg/CLogLogReg.cpp b/MLPP/CLogLogReg/CLogLogReg.cpp new file mode 100644 index 0000000..d34c9df --- /dev/null +++ b/MLPP/CLogLogReg/CLogLogReg.cpp @@ -0,0 +1,219 @@ +// +// CLogLogReg.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "CLogLogReg.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP{ + CLogLogReg::CLogLogReg(std::vector> inputSet, std::vector outputSet, std::string reg, double lambda, double alpha) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) + { + y_hat.resize(n); + weights = Utilities::weightInitialization(k); + bias = Utilities::biasInitialization(); + } + + std::vector CLogLogReg::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + double CLogLogReg::modelTest(std::vector x){ + return Evaluate(x); + } + + void CLogLogReg::gradientDescent(double learning_rate, int max_epoch, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + + std::vector error = alg.subtraction(y_hat, outputSet); + + + // Calculating the weight gradients + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.cloglog(z, 1))))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n; + + forwardPass(); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + } + + void CLogLogReg::MLE(double learning_rate, int max_epoch, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + + std::vector error = alg.subtraction(y_hat, outputSet); + + weights = alg.addition(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.cloglog(z, 1))))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias += learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n; + forwardPass(); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + + } + } + + void CLogLogReg::SGD(double learning_rate, int max_epoch, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + int outputIndex = distribution(generator); + + double y_hat = Evaluate(inputSet[outputIndex]); + double z = propagate(inputSet[outputIndex]); + cost_prev = Cost({y_hat}, {outputSet[outputIndex]}); + + double error = y_hat - outputSet[outputIndex]; + + // Weight Updation + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error * exp(z-exp(z)), inputSet[outputIndex])); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Bias updation + bias -= learning_rate * error * exp(z-exp(z)); + + y_hat = Evaluate({inputSet[outputIndex]}); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]})); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void CLogLogReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + while(true){ + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = Evaluate(inputMiniBatches[i]); + std::vector z = propagate(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + std::vector error = alg.subtraction(y_hat, outputMiniBatches[i]); + + // Calculating the weight gradients + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.cloglog(z, 1))))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n; + + forwardPass(); + + y_hat = Evaluate(inputMiniBatches[i]); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i])); + Utilities::UI(weights, bias); + } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + double CLogLogReg::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + double CLogLogReg::Cost(std::vector y_hat, std::vector y){ + Reg regularization; + class Cost cost; + return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg); + } + + std::vector CLogLogReg::Evaluate(std::vector> X){ + LinAlg alg; + Activation avn; + return avn.cloglog(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); + } + + std::vectorCLogLogReg::propagate(std::vector> X){ + LinAlg alg; + return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); + } + + double CLogLogReg::Evaluate(std::vector x){ + LinAlg alg; + Activation avn; + return avn.cloglog(alg.dot(weights, x) + bias); + } + + double CLogLogReg::propagate(std::vector x){ + LinAlg alg; + return alg.dot(weights, x) + bias; + } + + // cloglog ( wTx + b ) + void CLogLogReg::forwardPass(){ + LinAlg alg; + Activation avn; + + z = propagate(inputSet); + y_hat = avn.cloglog(z); + } +} \ No newline at end of file diff --git a/MLPP/CLogLogReg/CLogLogReg.hpp b/MLPP/CLogLogReg/CLogLogReg.hpp new file mode 100644 index 0000000..5635328 --- /dev/null +++ b/MLPP/CLogLogReg/CLogLogReg.hpp @@ -0,0 +1,58 @@ +// +// CLogLogReg.hpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#ifndef CLogLogReg_hpp +#define CLogLogReg_hpp + + +#include +#include + +namespace MLPP { + + class CLogLogReg{ + + public: + CLogLogReg(std::vector> inputSet, std::vector outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void MLE(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + double score(); + private: + + void weightInitialization(int k); + void biasInitialization(); + double Cost(std::vector y_hat, std::vector y); + + std::vector Evaluate(std::vector> X); + std::vector propagate(std::vector> X); + double Evaluate(std::vector x); + double propagate(std::vector x); + void forwardPass(); + + std::vector> inputSet; + std::vector outputSet; + std::vector y_hat; + std::vector z; + std::vector weights; + double bias; + + int n; + int k; + + // Regularization Params + std::string reg; + double lambda; + double alpha; /* This is the controlling param for Elastic Net*/ + + + }; +} + +#endif /* CLogLogReg_hpp */ diff --git a/MLPP/Convolutions/Convolutions.cpp b/MLPP/Convolutions/Convolutions.cpp new file mode 100644 index 0000000..0b3f4e4 --- /dev/null +++ b/MLPP/Convolutions/Convolutions.cpp @@ -0,0 +1,402 @@ +// +// Convolutions.cpp +// +// Created by Marc Melikyan on 4/6/21. +// + +#include +#include "Convolutions/Convolutions.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Stat/Stat.hpp" +#include + +namespace MLPP{ + + Convolutions::Convolutions() + : prewittHorizontal({{1,1,1}, {0,0,0}, {-1,-1,-1}}), prewittVertical({{1,0,-1}, {1,0,-1}, {1,0,-1}}), + sobelHorizontal({{1,2,1}, {0,0,0}, {-1,-2,-1}}), sobelVertical({{-1,0,1}, {-2,0,2}, {-1,0,1}}), + scharrHorizontal({{3,10,3}, {0,0,0}, {-3,-10,-3}}), scharrVertical({{3,0,-3}, {10,0,-10}, {3,0,-3}}), + robertsHorizontal({{0,1}, {-1,0}}), robertsVertical({{1,0}, {0,-1}}) + { + + } + + std::vector> Convolutions::convolve(std::vector> input, std::vector> filter, int S, int P){ + LinAlg alg; + std::vector> featureMap; + int N = input.size(); + int F = filter.size(); + int mapSize = (N - F + 2*P) / S + 1; // This is computed as ⌊mapSize⌋ by def- thanks C++! + + if(P != 0){ + std::vector> paddedInput; + paddedInput.resize(N + 2*P); + for(int i = 0; i < paddedInput.size(); i++){ + paddedInput[i].resize(N + 2*P); + } + for(int i = 0; i < paddedInput.size(); i++){ + for(int j = 0; j < paddedInput[i].size(); j++){ + if(i - P < 0 || j - P < 0 || i - P > input.size() - 1 || j - P > input[0].size() - 1){ + paddedInput[i][j] = 0; + } + else{ + paddedInput[i][j] = input[i - P][j - P]; + } + } + } + input.resize(paddedInput.size()); + for(int i = 0; i < paddedInput.size(); i++){ + input[i].resize(paddedInput[i].size()); + } + input = paddedInput; + } + + featureMap.resize(mapSize); + for(int i = 0; i < mapSize; i++){ + featureMap[i].resize(mapSize); + } + + for(int i = 0; i < mapSize; i++){ + for(int j = 0; j < mapSize; j++){ + std::vector convolvingInput; + for(int k = 0; k < F; k++){ + for(int p = 0; p < F; p++){ + if(i == 0 && j == 0){ + convolvingInput.push_back(input[i + k][j + p]); + } + else if(i == 0){ + convolvingInput.push_back(input[i + k][j + (S - 1) + p]); + } + else if(j == 0){ + convolvingInput.push_back(input[i + (S - 1) + k][j + p]); + } + else{ + convolvingInput.push_back(input[i + (S - 1) + k][j + (S - 1) + p]); + } + } + } + featureMap[i][j] = alg.dot(convolvingInput, alg.flatten(filter)); + } + } + return featureMap; + } + + std::vector>> Convolutions::convolve(std::vector>> input, std::vector>> filter, int S, int P){ + LinAlg alg; + std::vector>> featureMap; + int N = input[0].size(); + int F = filter[0].size(); + int C = filter.size() / input.size(); + int mapSize = (N - F + 2*P) / S + 1; // This is computed as ⌊mapSize⌋ by def. + + if(P != 0){ + for(int c = 0; c < input.size(); c++){ + std::vector> paddedInput; + paddedInput.resize(N + 2*P); + for(int i = 0; i < paddedInput.size(); i++){ + paddedInput[i].resize(N + 2*P); + } + for(int i = 0; i < paddedInput.size(); i++){ + for(int j = 0; j < paddedInput[i].size(); j++){ + if(i - P < 0 || j - P < 0 || i - P > input[c].size() - 1 || j - P > input[c][0].size() - 1){ + paddedInput[i][j] = 0; + } + else{ + paddedInput[i][j] = input[c][i - P][j - P]; + } + } + } + input[c].resize(paddedInput.size()); + for(int i = 0; i < paddedInput.size(); i++){ + input[c][i].resize(paddedInput[i].size()); + } + input[c] = paddedInput; + } + } + + featureMap.resize(C); + for(int i = 0; i < featureMap.size(); i++){ + featureMap[i].resize(mapSize); + for(int j = 0; j < featureMap[i].size(); j++){ + featureMap[i][j].resize(mapSize); + } + } + + for(int c = 0; c < C; c++){ + for(int i = 0; i < mapSize; i++){ + for(int j = 0; j < mapSize; j++){ + std::vector convolvingInput; + for(int t = 0; t < input.size(); t++){ + for(int k = 0; k < F; k++){ + for(int p = 0; p < F; p++){ + if(i == 0 && j == 0){ + convolvingInput.push_back(input[t][i + k][j + p]); + } + else if(i == 0){ + convolvingInput.push_back(input[t][i + k][j + (S - 1) + p]); + } + else if(j == 0){ + convolvingInput.push_back(input[t][i + (S - 1) + k][j + p]); + } + else{ + convolvingInput.push_back(input[t][i + (S - 1) + k][j + (S - 1) + p]); + } + } + } + } + featureMap[c][i][j] = alg.dot(convolvingInput, alg.flatten(filter)); + } + } + } + return featureMap; + } + + std::vector> Convolutions::pool(std::vector> input, int F, int S, std::string type){ + LinAlg alg; + std::vector> pooledMap; + int N = input.size(); + int mapSize = floor((N - F) / S + 1); + + pooledMap.resize(mapSize); + for(int i = 0; i < mapSize; i++){ + pooledMap[i].resize(mapSize); + } + + for(int i = 0; i < mapSize; i++){ + for(int j = 0; j < mapSize; j++){ + std::vector poolingInput; + for(int k = 0; k < F; k++){ + for(int p = 0; p < F; p++){ + if(i == 0 && j == 0){ + poolingInput.push_back(input[i + k][j + p]); + } + else if(i == 0){ + poolingInput.push_back(input[i + k][j + (S - 1) + p]); + } + else if(j == 0){ + poolingInput.push_back(input[i + (S - 1) + k][j + p]); + } + else{ + poolingInput.push_back(input[i + (S - 1) + k][j + (S - 1) + p]); + } + } + } + if(type == "Average"){ + Stat stat; + pooledMap[i][j] = stat.mean(poolingInput); + } + else if(type == "Min"){ + pooledMap[i][j] = alg.min(poolingInput); + } + else{ + pooledMap[i][j] = alg.max(poolingInput); + } + } + } + return pooledMap; + } + + std::vector>> Convolutions::pool(std::vector>> input, int F, int S, std::string type){ + std::vector>> pooledMap; + for(int i = 0; i < input.size(); i++){ + pooledMap.push_back(pool(input[i], F, S, type)); + } + return pooledMap; + } + + double Convolutions::globalPool(std::vector> input, std::string type){ + LinAlg alg; + if(type == "Average"){ + Stat stat; + return stat.mean(alg.flatten(input)); + } + else if(type == "Min"){ + return alg.min(alg.flatten(input)); + } + else{ + return alg.max(alg.flatten(input)); + } + } + + std::vector Convolutions::globalPool(std::vector>> input, std::string type){ + std::vector pooledMap; + for(int i = 0; i < input.size(); i++){ + pooledMap.push_back(globalPool(input[i], type)); + } + return pooledMap; + } + + double Convolutions::gaussian2D(double x, double y, double std){ + double std_sq = std * std; + return 1/(2 * M_PI * std_sq) * std::exp(-(x * x + y * y)/2 * std_sq); + } + + std::vector> Convolutions::gaussianFilter2D(int size, double std){ + std::vector> filter; + filter.resize(size); + for(int i = 0; i < filter.size(); i++){ + filter[i].resize(size); + } + for(int i = 0; i < size; i++){ + for(int j = 0; j < size; j++){ + filter[i][j] = gaussian2D(i - (size-1)/2, (size-1)/2 - j, std); + } + } + return filter; + } + + /* + Indeed a filter could have been used for this purpose, but I decided that it would've just + been easier to carry out the calculation explicitly, mainly because it is more informative, + and also because my convolution algorithm is only built for filters with equally sized + heights and widths. + */ + std::vector> Convolutions::dx(std::vector> input){ + std::vector> deriv; // We assume a gray scale image. + deriv.resize(input.size()); + for(int i = 0; i < deriv.size(); i++){ + deriv[i].resize(input[i].size()); + } + + for(int i = 0; i < input.size(); i++){ + for(int j = 0; j < input[i].size(); j++){ + if(j != 0 && j != input.size() - 1){ + deriv[i][j] = input[i][j + 1] - input[i][j - 1]; + } + else if(j == 0){ + deriv[i][j] = input[i][j + 1] - 0; // Implicit zero-padding + } + else{ + deriv[i][j] = 0 - input[i][j - 1]; // Implicit zero-padding + } + } + } + return deriv; + } + + std::vector> Convolutions::dy(std::vector> input){ + std::vector> deriv; + deriv.resize(input.size()); + for(int i = 0; i < deriv.size(); i++){ + deriv[i].resize(input[i].size()); + } + + for(int i = 0; i < input.size(); i++){ + for(int j = 0; j < input[i].size(); j++){ + if(i != 0 && i != input.size() - 1){ + deriv[i][j] = input[i - 1][j] - input[i + 1][j]; + } + else if(i == 0){ + deriv[i][j] = 0 - input[i + 1][j]; // Implicit zero-padding + } + else{ + deriv[i][j] = input[i - 1][j] - 0; // Implicit zero-padding + } + } + } + return deriv; + } + + std::vector> Convolutions::gradMagnitude(std::vector> input){ + LinAlg alg; + std::vector> xDeriv_2 = alg.hadamard_product(dx(input), dx(input)); + std::vector> yDeriv_2 = alg.hadamard_product(dy(input), dy(input)); + return alg.sqrt(alg.addition(xDeriv_2, yDeriv_2)); + } + + std::vector> Convolutions::gradOrientation(std::vector> input){ + std::vector> deriv; + deriv.resize(input.size()); + for(int i = 0; i < deriv.size(); i++){ + deriv[i].resize(input[i].size()); + } + + std::vector> xDeriv = dx(input); + std::vector> yDeriv = dy(input); + for(int i = 0; i < deriv.size(); i++){ + for(int j = 0; j < deriv[i].size(); j++){ + deriv[i][j] = std::atan2(yDeriv[i][j], xDeriv[i][j]); + } + } + return deriv; + } + + std::vector>> Convolutions::computeM(std::vector> input){ + double const SIGMA = 1; + double const GAUSSIAN_SIZE = 3; + + double const GAUSSIAN_PADDING = ( (input.size() - 1) + GAUSSIAN_SIZE - input.size() ) / 2; // Convs must be same. + std::cout << GAUSSIAN_PADDING << std::endl; + LinAlg alg; + std::vector> xDeriv = dx(input); + std::vector> yDeriv = dy(input); + + std::vector> gaussianFilter = gaussianFilter2D(GAUSSIAN_SIZE, SIGMA); // Sigma of 1, size of 3. + std::vector> xxDeriv = convolve(alg.hadamard_product(xDeriv, xDeriv), gaussianFilter, 1, GAUSSIAN_PADDING); + std::vector> yyDeriv = convolve(alg.hadamard_product(yDeriv, yDeriv), gaussianFilter, 1, GAUSSIAN_PADDING); + std::vector> xyDeriv = convolve(alg.hadamard_product(xDeriv, yDeriv), gaussianFilter, 1, GAUSSIAN_PADDING); + + std::vector>> M = {xxDeriv, yyDeriv, xyDeriv}; + return M; + } + std::vector> Convolutions::harrisCornerDetection(std::vector> input){ + double const k = 0.05; // Empirically determined wherein k -> [0.04, 0.06], though conventionally 0.05 is typically used as well. + LinAlg alg; + std::vector>> M = computeM(input); + std::vector> det = alg.subtraction(alg.hadamard_product(M[0], M[1]), alg.hadamard_product(M[2], M[2])); + std::vector> trace = alg.addition(M[0], M[1]); + + // The reason this is not a scalar is because xxDeriv, xyDeriv, yxDeriv, and yyDeriv are not scalars. + std::vector> r = alg.subtraction(det, alg.scalarMultiply(k, alg.hadamard_product(trace, trace))); + std::vector> imageTypes; + imageTypes.resize(r.size()); + alg.printMatrix(r); + for(int i = 0; i < r.size(); i++){ + imageTypes[i].resize(r[i].size()); + for(int j = 0; j < r[i].size(); j++){ + if(r[i][j] > 0){ + imageTypes[i][j] = "C"; + } + else if (r[i][j] < 0){ + imageTypes[i][j] = "E"; + } + else{ + imageTypes[i][j] = "N"; + } + } + } + return imageTypes; + } + + std::vector> Convolutions::getPrewittHorizontal(){ + return prewittHorizontal; + } + + std::vector> Convolutions::getPrewittVertical(){ + return prewittVertical; + } + + std::vector> Convolutions::getSobelHorizontal(){ + return sobelHorizontal; + } + + std::vector> Convolutions::getSobelVertical(){ + return sobelVertical; + } + + std::vector> Convolutions::getScharrHorizontal(){ + return scharrHorizontal; + } + + std::vector> Convolutions::getScharrVertical(){ + return scharrVertical; + } + + std::vector> Convolutions::getRobertsHorizontal(){ + return robertsHorizontal; + } + + std::vector> Convolutions::getRobertsVertical(){ + return robertsVertical; + } +} \ No newline at end of file diff --git a/MLPP/Convolutions/Convolutions.hpp b/MLPP/Convolutions/Convolutions.hpp new file mode 100644 index 0000000..f4b5e66 --- /dev/null +++ b/MLPP/Convolutions/Convolutions.hpp @@ -0,0 +1,51 @@ +#ifndef Convolutions_hpp +#define Convolutions_hpp + +#include + +namespace MLPP{ + class Convolutions{ + public: + Convolutions(); + std::vector> convolve(std::vector> input, std::vector> filter, int S, int P = 0); + std::vector>> convolve(std::vector>> input, std::vector>> filter, int S, int P = 0); + std::vector> pool(std::vector> input, int F, int S, std::string type); + std::vector>> pool(std::vector>> input, int F, int S, std::string type); + double globalPool(std::vector> input, std::string type); + std::vector globalPool(std::vector>> input, std::string type); + + double gaussian2D(double x, double y, double std); + std::vector> gaussianFilter2D(int size, double std); + + std::vector> dx(std::vector> input); + std::vector> dy(std::vector> input); + + std::vector> gradMagnitude(std::vector> input); + std::vector> gradOrientation(std::vector> input); + + std::vector>> computeM(std::vector> input); + std::vector> harrisCornerDetection(std::vector> input); + + std::vector> getPrewittHorizontal(); + std::vector> getPrewittVertical(); + std::vector> getSobelHorizontal(); + std::vector> getSobelVertical(); + std::vector> getScharrHorizontal(); + std::vector> getScharrVertical(); + std::vector> getRobertsHorizontal(); + std::vector> getRobertsVertical(); + + private: + std::vector> prewittHorizontal; + std::vector> prewittVertical; + std::vector> sobelHorizontal; + std::vector> sobelVertical; + std::vector> scharrHorizontal; + std::vector> scharrVertical; + std::vector> robertsHorizontal; + std::vector> robertsVertical; + + }; +} + +#endif // Convolutions_hpp \ No newline at end of file diff --git a/MLPP/Cost/Cost.cpp b/MLPP/Cost/Cost.cpp new file mode 100644 index 0000000..2974748 --- /dev/null +++ b/MLPP/Cost/Cost.cpp @@ -0,0 +1,422 @@ +// +// Reg.cpp +// +// Created by Marc Melikyan on 1/16/21. +// + +#include +#include +#include "Cost.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" + +namespace MLPP{ + double Cost::MSE(std::vector y_hat, std::vector y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + sum += (y_hat[i] - y[i]) * (y_hat[i] - y[i]); + } + return sum / 2 * y_hat.size(); + } + + double Cost::MSE(std::vector> y_hat, std::vector> y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + for(int j = 0; j < y_hat[i].size(); j++){ + sum += (y_hat[i][j] - y[i][j]) * (y_hat[i][j] - y[i][j]); + } + } + return sum / 2 * y_hat.size(); + } + + std::vector Cost::MSEDeriv(std::vector y_hat, std::vector y){ + LinAlg alg; + return alg.subtraction(y_hat, y); + } + + std::vector> Cost::MSEDeriv(std::vector> y_hat, std::vector> y){ + LinAlg alg; + return alg.subtraction(y_hat, y); + } + + double Cost::RMSE(std::vector y_hat, std::vector y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + sum += (y_hat[i] - y[i]) * (y_hat[i] - y[i]); + } + return sqrt(sum / y_hat.size()); + } + + double Cost::RMSE(std::vector> y_hat, std::vector> y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + for(int j = 0; j < y_hat[i].size(); j++){ + sum += (y_hat[i][j] - y[i][j]) * (y_hat[i][j] - y[i][j]); + } + } + return sqrt(sum / y_hat.size()); + } + + std::vector Cost::RMSEDeriv(std::vector y_hat, std::vector y){ + LinAlg alg; + return alg.scalarMultiply(1/(2*sqrt(MSE(y_hat, y))), MSEDeriv(y_hat, y)); + } + + std::vector> Cost::RMSEDeriv(std::vector> y_hat, std::vector> y){ + LinAlg alg; + return alg.scalarMultiply(1/(2/sqrt(MSE(y_hat, y))), MSEDeriv(y_hat, y)); + } + + double Cost::MAE(std::vector y_hat, std::vector y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + sum += abs((y_hat[i] - y[i])); + } + return sum / y_hat.size(); + } + + double Cost::MAE(std::vector> y_hat, std::vector> y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + for(int j = 0; j < y_hat[i].size(); j++){ + sum += abs((y_hat[i][j] - y[i][j])); + } + } + return sum / y_hat.size(); + } + + std::vector Cost::MAEDeriv(std::vector y_hat, std::vector y){ + std::vector deriv; + deriv.resize(y_hat.size()); + for(int i = 0; i < deriv.size(); i++){ + if(y_hat[i] < 0){ + deriv[i] = -1; + } + else if(y_hat[i] == 0){ + deriv[i] = 0; + } + else{ + deriv[i] = 1; + + } + } + return deriv; + } + + std::vector> Cost::MAEDeriv(std::vector> y_hat, std::vector> y){ + std::vector> deriv; + deriv.resize(y_hat.size()); + for(int i = 0; i < deriv.size(); i++){ + deriv.resize(y_hat[i].size()); + } + for(int i = 0; i < deriv.size(); i++){ + for(int j = 0; j < deriv[i].size(); j++){ + if(y_hat[i][j] < 0){ + deriv[i][j] = -1; + } + else if(y_hat[i][j] == 0){ + deriv[i][j] = 0; + } + else{ + deriv[i][j] = 1; + + } + } + } + return deriv; + } + + double Cost::MBE(std::vector y_hat, std::vector y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + sum += (y_hat[i] - y[i]); + } + return sum / y_hat.size(); + } + + double Cost::MBE(std::vector> y_hat, std::vector> y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + for(int j = 0; j < y_hat[i].size(); j++){ + sum += (y_hat[i][j] - y[i][j]); + } + } + return sum / y_hat.size(); + } + + std::vector Cost::MBEDeriv(std::vector y_hat, std::vector y){ + LinAlg alg; + return alg.onevec(y_hat.size()); + } + + std::vector> Cost::MBEDeriv(std::vector> y_hat, std::vector> y){ + LinAlg alg; + return alg.onemat(y_hat.size(), y_hat[0].size()); + } + + double Cost::LogLoss(std::vector y_hat, std::vector y){ + double sum = 0; + double eps = 1e-8; + for(int i = 0; i < y_hat.size(); i++){ + sum += -(y[i] * std::log(y_hat[i] + eps) + (1 - y[i]) * std::log(1 - y_hat[i] + eps)); + } + + return sum / y_hat.size(); + } + + double Cost::LogLoss(std::vector > y_hat, std::vector > y){ + double sum = 0; + double eps = 1e-8; + for(int i = 0; i < y_hat.size(); i++){ + for(int j = 0; j < y_hat[i].size(); j++){ + sum += -(y[i][j] * std::log(y_hat[i][j] + eps) + (1 - y[i][j]) * std::log(1 - y_hat[i][j] + eps)); + } + } + + return sum / y_hat.size(); + } + + std::vector Cost::LogLossDeriv(std::vector y_hat, std::vector y){ + LinAlg alg; + return alg.addition(alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)), alg.elementWiseDivision(alg.scalarMultiply(-1, alg.scalarAdd(-1, y)), alg.scalarMultiply(-1, alg.scalarAdd(-1, y_hat)))); + } + + std::vector> Cost::LogLossDeriv(std::vector> y_hat, std::vector> y){ + LinAlg alg; + return alg.addition(alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)), alg.elementWiseDivision(alg.scalarMultiply(-1, alg.scalarAdd(-1, y)), alg.scalarMultiply(-1, alg.scalarAdd(-1, y_hat)))); + } + + double Cost::CrossEntropy(std::vector y_hat, std::vector y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + sum += y[i] * std::log(y_hat[i]); + } + + return -1 * sum; + } + + double Cost::CrossEntropy(std::vector> y_hat, std::vector> y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + for(int j = 0; j < y_hat[i].size(); j++){ + sum += y[i][j] * std::log(y_hat[i][j]); + } + } + + return -1 * sum; + } + + std::vector Cost::CrossEntropyDeriv(std::vector y_hat, std::vector y){ + LinAlg alg; + return alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)); + } + + std::vector> Cost::CrossEntropyDeriv(std::vector> y_hat, std::vector> y){ + LinAlg alg; + return alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)); + } + + double Cost::HuberLoss(std::vector y_hat, std::vector y, double delta){ + LinAlg alg; + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + if(abs(y[i] - y_hat[i]) <= delta){ + sum += (y[i] - y_hat[i]) * (y[i] - y_hat[i]); + } + else{ + sum += 2 * delta * abs(y[i] - y_hat[i]) - delta * delta; + } + } + return sum; + } + + double Cost::HuberLoss(std::vector> y_hat, std::vector> y, double delta){ + LinAlg alg; + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + for(int j = 0; j < y_hat[i].size(); j++){ + if(abs(y[i][j] - y_hat[i][j]) <= delta){ + sum += (y[i][j] - y_hat[i][j]) * (y[i][j] - y_hat[i][j]); + } + else{ + sum += 2 * delta * abs(y[i][j] - y_hat[i][j]) - delta * delta; + } + } + } + return sum; + } + + std::vector Cost::HuberLossDeriv(std::vector y_hat, std::vector y, double delta){ + LinAlg alg; + double sum = 0; + std::vector deriv; + deriv.resize(y_hat.size()); + + for(int i = 0; i < y_hat.size(); i++){ + if(abs(y[i] - y_hat[i]) <= delta){ + deriv.push_back(-(y[i] - y_hat[i])); + } + else{ + if(y_hat[i] > 0 || y_hat[i] < 0){ + deriv.push_back(2 * delta * (y_hat[i]/abs(y_hat[i]))); + } + else{ + deriv.push_back(0); + } + } + } + return deriv; + } + + std::vector> Cost::HuberLossDeriv(std::vector> y_hat, std::vector> y, double delta){ + LinAlg alg; + double sum = 0; + std::vector> deriv; + deriv.resize(y_hat.size()); + for(int i = 0; i < deriv.size(); i++){ + deriv[i].resize(y_hat[i].size()); + } + + for(int i = 0; i < y_hat.size(); i++){ + for(int j = 0; j < y_hat[i].size(); j++){ + if(abs(y[i][j] - y_hat[i][j]) <= delta){ + deriv[i].push_back(-(y[i][j] - y_hat[i][j])); + } + else{ + if(y_hat[i][j] > 0 || y_hat[i][j] < 0){ + deriv[i].push_back(2 * delta * (y_hat[i][j]/abs(y_hat[i][j]))); + } + else{ + deriv[i].push_back(0); + } + } + } + } + return deriv; + } + + double Cost::HingeLoss(std::vector y_hat, std::vector y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + sum += fmax(0, 1 - y[i] * y_hat[i]); + } + + return sum / y_hat.size(); + } + + double Cost::HingeLoss(std::vector> y_hat, std::vector> y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + for(int j = 0; j < y_hat[i].size(); j++){ + sum += fmax(0, 1 - y[i][j] * y_hat[i][j]); + } + } + + return sum / y_hat.size(); + } + + std::vector Cost::HingeLossDeriv(std::vector y_hat, std::vector y){ + std::vector deriv; + deriv.resize(y_hat.size()); + for(int i = 0; i < y_hat.size(); i++){ + if(1 - y[i] * y_hat[i] > 0){ + deriv[i] = -y[i]; + } + else{ + deriv[i] = 0; + } + } + return deriv; + } + + std::vector> Cost::HingeLossDeriv(std::vector> y_hat, std::vector> y){ + std::vector> deriv; + for(int i = 0; i < y_hat.size(); i++){ + for(int j = 0; j < y_hat[i].size(); j++){ + if(1 - y[i][j] * y_hat[i][j] > 0){ + deriv[i][j] = -y[i][j]; + } + else{ + deriv[i][j] = 0; + } + } + } + return deriv; + } + + double Cost::WassersteinLoss(std::vector y_hat, std::vector y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + sum += y_hat[i] * y[i]; + } + return -sum / y_hat.size(); + } + + double Cost::WassersteinLoss(std::vector> y_hat, std::vector> y){ + double sum = 0; + for(int i = 0; i < y_hat.size(); i++){ + for(int j = 0; j < y_hat[i].size(); j++){ + sum += y_hat[i][j] * y[i][j]; + } + } + return -sum / y_hat.size(); + } + + std::vector Cost::WassersteinLossDeriv(std::vector y_hat, std::vector y){ + LinAlg alg; + return alg.scalarMultiply(-1, y); // Simple. + } + + std::vector> Cost::WassersteinLossDeriv(std::vector> y_hat, std::vector> y){ + LinAlg alg; + return alg.scalarMultiply(-1, y); // Simple. + } + + + double Cost::HingeLoss(std::vector y_hat, std::vector y, std::vector weights, double C){ + LinAlg alg; + Reg regularization; + return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge"); + } + double Cost::HingeLoss(std::vector> y_hat, std::vector> y, std::vector> weights, double C){ + LinAlg alg; + Reg regularization; + return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge"); + } + + std::vector Cost::HingeLossDeriv(std::vector y_hat, std::vector y, double C){ + LinAlg alg; + Reg regularization; + return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y)); + } + std::vector> Cost::HingeLossDeriv(std::vector> y_hat, std::vector> y, double C){ + LinAlg alg; + Reg regularization; + return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y)); + } + + double Cost::dualFormSVM(std::vector alpha, std::vector> X, std::vector y){ + LinAlg alg; + std::vector> Y = alg.diag(y); // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y. + std::vector> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations. + std::vector> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y); + double alphaQ = alg.matmult(alg.matmult({alpha}, Q), alg.transpose({alpha}))[0][0]; + std::vector one = alg.onevec(alpha.size()); + + return -alg.dot(one, alpha) + 0.5 * alphaQ; + } + + std::vector Cost::dualFormSVMDeriv(std::vector alpha, std::vector> X, std::vector y){ + LinAlg alg; + std::vector> Y = alg.zeromat(y.size(), y.size()); + for(int i = 0; i < y.size(); i++){ + Y[i][i] = y[i]; // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y. + } + std::vector> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations. + std::vector> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y); + std::vector alphaQDeriv = alg.mat_vec_mult(Q, alpha); + std::vector one = alg.onevec(alpha.size()); + + return alg.subtraction(alphaQDeriv, one); + } +} \ No newline at end of file diff --git a/MLPP/Cost/Cost.hpp b/MLPP/Cost/Cost.hpp new file mode 100644 index 0000000..d6c8fb0 --- /dev/null +++ b/MLPP/Cost/Cost.hpp @@ -0,0 +1,86 @@ +// +// Cost.hpp +// +// Created by Marc Melikyan on 1/16/21. +// + +#ifndef Cost_hpp +#define Cost_hpp + +#include + +namespace MLPP{ + class Cost{ + public: + // Regression Costs + double MSE(std::vector y_hat, std::vector y); + double MSE(std::vector> y_hat, std::vector> y); + + std::vector MSEDeriv(std::vector y_hat, std::vector y); + std::vector> MSEDeriv(std::vector> y_hat, std::vector> y); + + double RMSE(std::vector y_hat, std::vector y); + double RMSE(std::vector> y_hat, std::vector> y); + + std::vector RMSEDeriv(std::vector y_hat, std::vector y); + std::vector> RMSEDeriv(std::vector> y_hat, std::vector> y); + + double MAE(std::vector y_hat, std::vector y); + double MAE(std::vector> y_hat, std::vector> y); + + std::vector MAEDeriv(std::vector y_hat, std::vector y); + std::vector> MAEDeriv(std::vector> y_hat, std::vector> y); + + double MBE(std::vector y_hat, std::vector y); + double MBE(std::vector> y_hat, std::vector> y); + + std::vector MBEDeriv(std::vector y_hat, std::vector y); + std::vector> MBEDeriv(std::vector> y_hat, std::vector> y); + + // Classification Costs + double LogLoss(std::vector y_hat, std::vector y); + double LogLoss(std::vector> y_hat, std::vector> y); + + std::vector LogLossDeriv(std::vector y_hat, std::vector y); + std::vector> LogLossDeriv(std::vector> y_hat, std::vector> y); + + double CrossEntropy(std::vector y_hat, std::vector y); + double CrossEntropy(std::vector> y_hat, std::vector> y); + + std::vector CrossEntropyDeriv(std::vector y_hat, std::vector y); + std::vector> CrossEntropyDeriv(std::vector> y_hat, std::vector> y); + + double HuberLoss(std::vector y_hat, std::vector y, double delta); + double HuberLoss(std::vector> y_hat, std::vector> y, double delta); + + std::vector HuberLossDeriv(std::vector y_hat, std::vector y, double delta); + std::vector> HuberLossDeriv(std::vector> y_hat, std::vector> y, double delta); + + double HingeLoss(std::vector y_hat, std::vector y); + double HingeLoss(std::vector> y_hat, std::vector> y); + + std::vector HingeLossDeriv(std::vector y_hat, std::vector y); + std::vector> HingeLossDeriv(std::vector> y_hat, std::vector> y); + + double HingeLoss(std::vector y_hat, std::vector y, std::vector weights, double C); + double HingeLoss(std::vector> y_hat, std::vector> y, std::vector> weights, double C); + + std::vector HingeLossDeriv(std::vector y_hat, std::vector y, double C); + std::vector> HingeLossDeriv(std::vector> y_hat, std::vector> y, double C); + + double WassersteinLoss(std::vector y_hat, std::vector y); + double WassersteinLoss(std::vector> y_hat, std::vector> y); + + std::vector WassersteinLossDeriv(std::vector y_hat, std::vector y); + std::vector> WassersteinLossDeriv(std::vector> y_hat, std::vector> y); + + double dualFormSVM(std::vector alpha, std::vector> X, std::vector y); // TO DO: DON'T forget to add non-linear kernelizations. + + std::vector dualFormSVMDeriv(std::vector alpha, std::vector> X, std::vector y); + + + private: + }; +} + +#endif /* Cost_hpp */ diff --git a/MLPP/Data/Data.cpp b/MLPP/Data/Data.cpp new file mode 100644 index 0000000..e475002 --- /dev/null +++ b/MLPP/Data/Data.cpp @@ -0,0 +1,773 @@ +// +// Data.cpp +// MLP +// +// Created by Marc Melikyan on 11/4/20. +// + +#include "Data.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Stat/Stat.hpp" +#include "SoftmaxNet/SoftmaxNet.hpp" +#include +#include +#include +#include +#include +#include + + +namespace MLPP{ + // Loading Datasets + std::tuple>, std::vector> Data::loadBreastCancer(){ + const int BREAST_CANCER_SIZE = 30; // k = 30 + std::vector> inputSet; + std::vector outputSet; + + setData(BREAST_CANCER_SIZE, "MLPP/Data/Datasets/BreastCancer.csv", inputSet, outputSet); + return {inputSet, outputSet}; + } + + std::tuple>, std::vector> Data::loadBreastCancerSVC(){ + const int BREAST_CANCER_SIZE = 30; // k = 30 + std::vector> inputSet; + std::vector outputSet; + + setData(BREAST_CANCER_SIZE, "MLPP/Data/Datasets/BreastCancerSVM.csv", inputSet, outputSet); + return {inputSet, outputSet}; + } + + std::tuple>, std::vector>> Data::loadIris(){ + const int IRIS_SIZE = 4; + const int ONE_HOT_NUM = 3; + std::vector> inputSet; + std::vector tempOutputSet; + + setData(IRIS_SIZE, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet); + std::vector> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM); + return {inputSet, outputSet}; + } + + std::tuple>, std::vector>> Data::loadWine(){ + const int WINE_SIZE = 4; + const int ONE_HOT_NUM = 3; + std::vector> inputSet; + std::vector tempOutputSet; + + setData(WINE_SIZE, "MLPP/Data/Datasets/Iris.csv", inputSet, tempOutputSet); + std::vector> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM); + return {inputSet, outputSet}; + } + + std::tuple>, std::vector>> Data::loadMnistTrain(){ + const int MNIST_SIZE = 784; + const int ONE_HOT_NUM = 10; + std::vector> inputSet; + std::vector tempOutputSet; + + setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTrain.csv", inputSet, tempOutputSet); + std::vector> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM); + return {inputSet, outputSet}; + } + + std::tuple>, std::vector>> Data::loadMnistTest(){ + const int MNIST_SIZE = 784; + const int ONE_HOT_NUM = 10; + std::vector> inputSet; + std::vector tempOutputSet; + + setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTest.csv", inputSet, tempOutputSet); + std::vector> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM); + return {inputSet, outputSet}; + } + + std::tuple>, std::vector> Data::loadCaliforniaHousing(){ + const int CALIFORNIA_HOUSING_SIZE = 13; // k = 30 + std::vector> inputSet; + std::vector outputSet; + + setData(CALIFORNIA_HOUSING_SIZE, "MLPP/Data/Datasets/CaliforniaHousing.csv", inputSet, outputSet); + return {inputSet, outputSet}; + } + + std::tuple, std::vector> Data::loadFiresAndCrime(){ + std::vector inputSet; // k is implicitly 1. + std::vector outputSet; + + setData("MLPP/Data/Datasets/FiresAndCrime.csv", inputSet, outputSet); + return {inputSet, outputSet}; + } + + std::tuple>, std::vector>, std::vector>, std::vector>> Data::trainTestSplit(std::vector> inputSet, std::vector> outputSet, double testSize){ + std::random_device rd; + std::default_random_engine generator(rd()); + + std::shuffle(inputSet.begin(), inputSet.end(), generator); // inputSet random shuffle + std::shuffle(outputSet.begin(), outputSet.end(), generator); // outputSet random shuffle) + + std::vector> inputTestSet; + std::vector> outputTestSet; + + int testInputNumber = testSize * inputSet.size(); // implicit usage of floor + int testOutputNumber = testSize * outputSet.size(); // implicit usage of floor + + for(int i = 0; i < testInputNumber; i++){ + inputTestSet.push_back(inputSet[i]); + inputSet.erase(inputSet.begin()); + } + + for(int i = 0; i < testOutputNumber; i++){ + outputTestSet.push_back(outputSet[i]); + outputSet.erase(outputSet.begin()); + } + + return {inputSet, outputSet, inputTestSet, outputTestSet}; + + } + + // MULTIVARIATE SUPERVISED + + void Data::setData(int k, std::string fileName, std::vector>& inputSet, std::vector& outputSet){ + LinAlg alg; + std::string inputTemp; + std::string outputTemp; + + inputSet.resize(k); + + std::ifstream dataFile(fileName); + if(!dataFile.is_open()){ + std::cout << fileName << " failed to open." << std::endl; + } + + std::string line; + while(std::getline(dataFile, line)){ + std::stringstream ss(line); + + for(int i = 0; i < k; i++){ + std::getline(ss, inputTemp, ','); + inputSet[i].push_back(std::stod(inputTemp)); + + } + + std::getline(ss, outputTemp, ','); + outputSet.push_back(std::stod(outputTemp)); + } + inputSet = alg.transpose(inputSet); + dataFile.close(); + } + + void Data::printData(std::vector inputName, std::string outputName, std::vector> inputSet, std::vector outputSet){ + LinAlg alg; + inputSet = alg.transpose(inputSet); + for(int i = 0; i < inputSet.size(); i++){ + std::cout << inputName[i] << std::endl; + for(int j = 0; j < inputSet[i].size(); j++){ + std::cout << inputSet[i][j] << std::endl; + } + } + + std::cout << outputName << std::endl; + for(int i = 0; i < outputSet.size(); i++){ + std::cout << outputSet[i] << std::endl; + } + } + + // UNSUPERVISED + + void Data::setData(int k, std::string fileName, std::vector>& inputSet){ + LinAlg alg; + std::string inputTemp; + + inputSet.resize(k); + + std::ifstream dataFile(fileName); + if(!dataFile.is_open()){ + std::cout << fileName << " failed to open." << std::endl; + } + + std::string line; + while(std::getline(dataFile, line)){ + std::stringstream ss(line); + + for(int i = 0; i < k; i++){ + std::getline(ss, inputTemp, ','); + inputSet[i].push_back(std::stod(inputTemp)); + + } + } + inputSet = alg.transpose(inputSet); + dataFile.close(); + } + + void Data::printData(std::vector inputName, std::vector> inputSet){ + LinAlg alg; + inputSet = alg.transpose(inputSet); + for(int i = 0; i < inputSet.size(); i++){ + std::cout << inputName[i] << std::endl; + for(int j = 0; j < inputSet[i].size(); j++){ + std::cout << inputSet[i][j] << std::endl; + } + } + } + + // SIMPLE + + void Data::setData(std::string fileName, std::vector & inputSet, std::vector & outputSet){ + std::string inputTemp, outputTemp; + + std::ifstream dataFile(fileName); + if(!dataFile.is_open()){ + std::cout << "The file failed to open." << std::endl; + } + + std::string line; + + + while(std::getline(dataFile, line)){ + std::stringstream ss(line); + + std::getline(ss, inputTemp, ','); + std::getline(ss, outputTemp, ','); + + inputSet.push_back(std::stod(inputTemp)); + outputSet.push_back(std::stod(outputTemp)); + } + + dataFile.close(); + } + + void Data::printData(std::string& inputName, std::string& outputName, std::vector & inputSet, std::vector & outputSet){ + std::cout << inputName << std::endl; + for(int i = 0; i < inputSet.size(); i++){ + std::cout << inputSet[i] << std::endl; + } + + std::cout << outputName << std::endl; + for(int i = 0; i < inputSet.size(); i++){ + std::cout << outputSet[i] << std::endl; + } + } + + // Images + std::vector> Data::rgb2gray(std::vector>> input){ + std::vector> grayScale; + grayScale.resize(input[0].size()); + for(int i = 0; i < grayScale.size(); i++){ + grayScale[i].resize(input[0][i].size()); + } + for(int i = 0; i < grayScale.size(); i++){ + for(int j = 0; j < grayScale[i].size(); j++){ + grayScale[i][j] = 0.299 * input[0][i][j] + 0.587 * input[1][i][j] + 0.114 * input[2][i][j]; + } + } + return grayScale; + } + + std::vector>> Data::rgb2ycbcr(std::vector>> input){ + LinAlg alg; + std::vector>> YCbCr; + YCbCr = alg.resize(YCbCr, input); + for(int i = 0; i < YCbCr[0].size(); i++){ + for(int j = 0; j < YCbCr[0][i].size(); j++){ + YCbCr[0][i][j] = 0.299 * input[0][i][j] + 0.587 * input[1][i][j] + 0.114 * input[2][i][j]; + YCbCr[1][i][j] = -0.169 * input[0][i][j] - 0.331 * input[1][i][j] + 0.500 * input[2][i][j]; + YCbCr[2][i][j] = 0.500 * input[0][i][j] - 0.419 * input[1][i][j] - 0.081 * input[2][i][j]; + } + } + return YCbCr; + } + + // Conversion formulas available here: + // https://www.rapidtables.com/convert/color/rgb-to-hsv.html + std::vector>> Data::rgb2hsv(std::vector>> input){ + LinAlg alg; + std::vector>> HSV; + HSV = alg.resize(HSV, input); + for(int i = 0; i < HSV[0].size(); i++){ + for(int j = 0; j < HSV[0][i].size(); j++){ + double rPrime = input[0][i][j] / 255; + double gPrime = input[1][i][j] / 255; + double bPrime = input[2][i][j] / 255; + + double cMax = alg.max({rPrime, gPrime, bPrime}); + double cMin = alg.min({rPrime, gPrime, bPrime}); + double delta = cMax - cMin; + + // H calculation. + if(delta == 0){ + HSV[0][i][j] = 0; + } + else{ + if(cMax == rPrime){ + HSV[0][i][j] = 60 * fmod(((gPrime - bPrime) / delta), 6); + } + else if(cMax == gPrime){ + HSV[0][i][j] = 60 * ( (bPrime - rPrime) / delta + 2); + } + else{ // cMax == bPrime + HSV[0][i][j] = 60 * ( (rPrime - gPrime) / delta + 6); + } + } + + // S calculation. + if(cMax == 0){ + HSV[1][i][j] = 0; + } + else{ HSV[1][i][j] = delta/cMax; } + + // V calculation. + HSV[2][i][j] = cMax; + } + } + return HSV; + } + + // http://machinethatsees.blogspot.com/2013/07/how-to-convert-rgb-to-xyz-or-vice-versa.html + std::vector>> Data::rgb2xyz(std::vector>> input){ + LinAlg alg; + std::vector>> XYZ; + XYZ = alg.resize(XYZ, input); + std::vector> RGB2XYZ = {{0.4124564, 0.3575761, 0.1804375}, {0.2126726, 0.7151522, 0.0721750}, {0.0193339, 0.1191920, 0.9503041}}; + return alg.vector_wise_tensor_product(input, RGB2XYZ); + } + + std::vector>> Data::xyz2rgb(std::vector>> input){ + LinAlg alg; + std::vector>> XYZ; + XYZ = alg.resize(XYZ, input); + std::vector> RGB2XYZ = alg.inverse({{0.4124564, 0.3575761, 0.1804375}, {0.2126726, 0.7151522, 0.0721750}, {0.0193339, 0.1191920, 0.9503041}}); + return alg.vector_wise_tensor_product(input, RGB2XYZ); + } + + // TEXT-BASED & NLP + std::string Data::toLower(std::string text){ + for(int i = 0; i < text.size(); i++){ + text[i] = tolower(text[i]); + } + return text; + } + + std::vector Data::split(std::string text){ + std::vector split_data; + for(int i = 0; i < text.size(); i++){ + split_data.push_back(text[i]); + } + return split_data; + } + + std::vector Data::splitSentences(std::string data){ + std::vector sentences; + std::string currentStr = ""; + + for(int i = 0; i < data.length(); i++){ + currentStr.push_back(data[i]); + if(data[i] == '.' && data[i + 1] != '.'){ + sentences.push_back(currentStr); + currentStr = ""; + i++; + } + } + return sentences; + } + + std::vector Data::removeSpaces(std::vector data){ + for(int i = 0; i < data.size(); i++){ + auto it = data[i].begin(); + for(int j = 0; j < data[i].length(); j++){ + if(data[i][j] == ' '){ + data[i].erase(it); + } + it++; + } + } + return data; + } + + std::vector Data::removeNullByte(std::vector data){ + for(int i = 0; i < data.size(); i++){ + if(data[i] == "\0"){ + data.erase(data.begin() + i); + } + } + return data; + } + + std::vector Data::segment(std::string text){ + std::vector segmented_data; + int prev_delim = 0; + for(int i = 0; i < text.length(); i++){ + if(text[i] == ' '){ + segmented_data.push_back(text.substr(prev_delim, i - prev_delim)); + prev_delim = i + 1; + } + else if(text[i] == ',' || text[i] == '!' || text[i] == '.' || text[i] == '-'){ + segmented_data.push_back(text.substr(prev_delim, i - prev_delim)); + std::string punc; + punc.push_back(text[i]); + segmented_data.push_back(punc); + prev_delim = i + 2; + i++; + } + else if(i == text.length() - 1){ + segmented_data.push_back(text.substr(prev_delim, text.length() - prev_delim)); // hehe oops- forgot this + } + } + + return segmented_data; + } + + std::vector Data::tokenize(std::string text){ + int max_num = 0; + bool new_num = true; + std::vector segmented_data = segment(text); + std::vector tokenized_data; + tokenized_data.resize(segmented_data.size()); + for(int i = 0; i < segmented_data.size(); i++){ + for(int j = i - 1; j >= 0; j--){ + if(segmented_data[i] == segmented_data[j]){ + tokenized_data[i] = tokenized_data[j]; + new_num = false; + } + } + if(!new_num){ + new_num = true; + } + else{ + max_num++; + tokenized_data[i] = max_num; + } + } + return tokenized_data; + } + + std::vector Data::removeStopWords(std::string text){ + std::vector stopWords = {"i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"}; + std::vector segmented_data = removeSpaces(segment(toLower(text))); + + for(int i = 0; i < stopWords.size(); i++){ + for(int j = 0; j < segmented_data.size(); j++){ + if(segmented_data[j] == stopWords[i]){ + segmented_data.erase(segmented_data.begin() + j); + } + } + } + return segmented_data; + } + + std::vector Data::removeStopWords(std::vector segmented_data){ + std::vector stopWords = {"i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"}; + for(int i = 0; i < segmented_data.size(); i++){ + for(int j = 0; j < stopWords.size(); j++){ + if(segmented_data[i] == stopWords[j]){ + segmented_data.erase(segmented_data.begin() + i); + } + } + } + return segmented_data; + } + + std::string Data::stemming(std::string text){ + + // Our list of suffixes which we use to compare against + std::vector suffixes = {"eer", "er", "ion", "ity", "ment", "ness", "or", "sion", "ship", "th", "able", "ible", "al", "ant", "ary", "ful", "ic", "ious", "ous", "ive", "less", "y", "ed", "en", "ing", "ize", "ise", "ly", "ward", "wise"}; + int padding_size = 4; + char padding = ' '; // our padding + + for(int i = 0; i < padding_size; i++){ + text[text.length() + i] = padding; // ' ' will be our padding value + } + + + for(int i = 0; i < text.size(); i++){ + for(int j = 0; j < suffixes.size(); j++){ + if(text.substr(i, suffixes[j].length()) == suffixes[j] && (text[i + suffixes[j].length()] == ' ' || text[i + suffixes[j].length()] == ',' || text[i + suffixes[j].length()] == '-' || text[i + suffixes[j].length()] == '.' || text[i + suffixes[j].length()] == '!')){ + text.erase(i, suffixes[j].length()); + } + } + } + + return text; + } + + std::vector> Data::BOW(std::vector sentences, std::string type){ + /* + STEPS OF BOW: + 1) To lowercase (done by removeStopWords function by def) + 2) Removing stop words + 3) Obtain a list of the used words + 4) Create a one hot encoded vector of the words and sentences + 5) Sentence.size() x list.size() matrix + */ + + std::vector wordList = removeNullByte(removeStopWords(createWordList(sentences))); + + std::vector> segmented_sentences; + segmented_sentences.resize(sentences.size()); + + for(int i = 0; i < sentences.size(); i++){ + segmented_sentences[i] = removeStopWords(sentences[i]); + } + + std::vector> bow; + + bow.resize(sentences.size()); + for(int i = 0; i < bow.size(); i++){ + bow[i].resize(wordList.size()); + } + + + for(int i = 0; i < segmented_sentences.size(); i++){ + for(int j = 0; j < segmented_sentences[i].size(); j++){ + for(int k = 0; k < wordList.size(); k++){ + if(segmented_sentences[i][j] == wordList[k]){ + if(type == "Binary"){ + bow[i][k] = 1; + } + else{ + bow[i][k]++; + } + } + } + } + } + return bow; + } + + std::vector> Data::TFIDF(std::vector sentences){ + LinAlg alg; + std::vector wordList = removeNullByte(removeStopWords(createWordList(sentences))); + + std::vector> segmented_sentences; + segmented_sentences.resize(sentences.size()); + + for(int i = 0; i < sentences.size(); i++){ + segmented_sentences[i] = removeStopWords(sentences[i]); + } + + std::vector> TF; + std::vector frequency; + frequency.resize(wordList.size()); + TF.resize(segmented_sentences.size()); + for(int i = 0; i < TF.size(); i++){ + TF[i].resize(wordList.size()); + } + for(int i = 0; i < segmented_sentences.size(); i++){ + std::vector present(wordList.size(), 0); + for(int j = 0; j < segmented_sentences[i].size(); j++){ + for(int k = 0; k < wordList.size(); k++){ + if(segmented_sentences[i][j] == wordList[k]){ + TF[i][k]++; + if(!present[k]){ + frequency[k]++; + present[k] = true; + } + } + } + } + TF[i] = alg.scalarMultiply(double(1) / double(segmented_sentences[i].size()), TF[i]); + } + + std::vector IDF; + IDF.resize(frequency.size()); + + for(int i = 0; i < IDF.size(); i++){ + IDF[i] = std::log((double)segmented_sentences.size() / (double)frequency[i]); + } + + std::vector> TFIDF; + TFIDF.resize(segmented_sentences.size()); + for(int i = 0; i < TFIDF.size(); i++){ + TFIDF[i].resize(wordList.size()); + } + + for(int i = 0; i < TFIDF.size(); i++){ + for(int j = 0; j < TFIDF[i].size(); j++){ + TFIDF[i][j] = TF[i][j] * IDF[j]; + } + } + + return TFIDF; + } + + std::tuple>, std::vector> Data::word2Vec(std::vector sentences, std::string type, int windowSize, int dimension, double learning_rate, int max_epoch){ + std::vector wordList = removeNullByte(removeStopWords(createWordList(sentences))); + + std::vector> segmented_sentences; + segmented_sentences.resize(sentences.size()); + + for(int i = 0; i < sentences.size(); i++){ + segmented_sentences[i] = removeStopWords(sentences[i]); + } + + std::vector inputStrings; + std::vector outputStrings; + + for(int i = 0; i < segmented_sentences.size(); i++){ + for(int j = 0; j < segmented_sentences[i].size(); j++){ + for(int k = windowSize; k > 0; k--){ + if(j - k >= 0){ + inputStrings.push_back(segmented_sentences[i][j]); + + outputStrings.push_back(segmented_sentences[i][j - k]); + } + if(j + k <= segmented_sentences[i].size() - 1){ + inputStrings.push_back(segmented_sentences[i][j]); + outputStrings.push_back(segmented_sentences[i][j + k]); + } + } + } + } + + int inputSize = inputStrings.size(); + + inputStrings.insert(inputStrings.end(), outputStrings.begin(), outputStrings.end()); + + std::vector> BOW = Data::BOW(inputStrings, "Binary"); + + std::vector> inputSet; + std::vector> outputSet; + + for(int i = 0; i < inputSize; i++){ + inputSet.push_back(BOW[i]); + } + + for(int i = inputSize; i < BOW.size(); i++){ + outputSet.push_back(BOW[i]); + } + LinAlg alg; + SoftmaxNet* model; + if(type == "Skipgram"){ + model = new SoftmaxNet(outputSet, inputSet, dimension); + } + else { // else = CBOW. We maintain it is a default. + model = new SoftmaxNet(inputSet, outputSet, dimension); + } + model->gradientDescent(learning_rate, max_epoch, 1); + + std::vector> wordEmbeddings = model->getEmbeddings(); + delete model; + return {wordEmbeddings, wordList}; + } + + std::vector> Data::LSA(std::vector sentences, int dim){ + LinAlg alg; + std::vector> docWordData = BOW(sentences, "Binary"); + + auto [U, S, Vt] = alg.SVD(docWordData); + std::vector> S_trunc = alg.zeromat(dim, dim); + std::vector> Vt_trunc; + for(int i = 0; i < dim; i++){ + S_trunc[i][i] = S[i][i]; + Vt_trunc.push_back(Vt[i]); + } + + std::vector> embeddings = alg.matmult(S_trunc, Vt_trunc); + return embeddings; + } + + std::vector Data::createWordList(std::vector sentences){ + std::string combinedText = ""; + for(int i = 0; i < sentences.size(); i++){ + if(i != 0){ combinedText += " "; } + combinedText += sentences[i]; + } + + return removeSpaces(vecToSet(removeStopWords(combinedText))); + } + + // EXTRA + void Data::setInputNames(std::string fileName, std::vector& inputNames){ + std::string inputNameTemp; + std::ifstream dataFile(fileName); + if(!dataFile.is_open()){ + std::cout << fileName << " failed to open." << std::endl; + } + + while (std::getline(dataFile, inputNameTemp)) + { + inputNames.push_back(inputNameTemp); + } + + dataFile.close(); + } + + std::vector> Data::featureScaling(std::vector> X){ + LinAlg alg; + X = alg.transpose(X); + std::vector max_elements, min_elements; + max_elements.resize(X.size()); + min_elements.resize(X.size()); + + for(int i = 0; i < X.size(); i++){ + max_elements[i] = alg.max(X[i]); + min_elements[i] = alg.min(X[i]); + } + + for(int i = 0; i < X.size(); i++){ + for(int j = 0; j < X[i].size(); j++){ + X[i][j] = (X[i][j] - min_elements[i]) / (max_elements[i] - min_elements[i]); + } + } + return alg.transpose(X); + } + + + std::vector> Data::meanNormalization(std::vector> X){ + LinAlg alg; + Stat stat; + // (X_j - mu_j) / std_j, for every j + + X = meanCentering(X); + for(int i = 0; i < X.size(); i++){ + X[i] = alg.scalarMultiply(1/stat.standardDeviation(X[i]), X[i]); + } + return X; + } + + std::vector> Data::meanCentering(std::vector> X){ + LinAlg alg; + Stat stat; + for(int i = 0; i < X.size(); i++){ + double mean_i = stat.mean(X[i]); + for(int j = 0; j < X[i].size(); j++){ + X[i][j] -= mean_i; + } + } + return X; + } + + std::vector> Data::oneHotRep(std::vector tempOutputSet, int n_class){ + std::vector> outputSet; + outputSet.resize(tempOutputSet.size()); + for(int i = 0; i < tempOutputSet.size(); i++){ + for(int j = 0; j <= n_class - 1; j++){ + if(tempOutputSet[i] == j){ + outputSet[i].push_back(1); + } + else{ + outputSet[i].push_back(0); + } + } + } + return outputSet; + } + + std::vector Data::reverseOneHot(std::vector> tempOutputSet){ + std::vector outputSet; + int n_class = tempOutputSet[0].size(); + for(int i = 0; i < tempOutputSet.size(); i++){ + int current_class = 1; + for(int j = 0; j < tempOutputSet[i].size(); j++){ + if(tempOutputSet[i][j] == 1){ + break; + } + else{ + current_class++; + } + } + outputSet.push_back(current_class); + } + + return outputSet; + } +} diff --git a/MLPP/Data/Data.hpp b/MLPP/Data/Data.hpp new file mode 100644 index 0000000..65dee7a --- /dev/null +++ b/MLPP/Data/Data.hpp @@ -0,0 +1,99 @@ +// +// Data.hpp +// MLP +// +// Created by Marc Melikyan on 11/4/20. +// + +#ifndef Data_hpp +#define Data_hpp + +#include +#include +#include + + +namespace MLPP{ +class Data{ + public: + // Load Datasets + std::tuple>, std::vector> loadBreastCancer(); + std::tuple>, std::vector> loadBreastCancerSVC(); + std::tuple>, std::vector>> loadIris(); + std::tuple>, std::vector>> loadWine(); + std::tuple>, std::vector>> loadMnistTrain(); + std::tuple>, std::vector>> loadMnistTest(); + std::tuple>, std::vector> loadCaliforniaHousing(); + std::tuple, std::vector> loadFiresAndCrime(); + + std::tuple>, std::vector>, std::vector>, std::vector>> trainTestSplit(std::vector> inputSet, std::vector> outputSet, double testSize); + + // Supervised + void setData(int k, std::string fileName, std::vector>& inputSet, std::vector& outputSet); + void printData(std::vector inputName, std::string outputName, std::vector> inputSet, std::vector outputSet); + + // Unsupervised + void setData(int k, std::string fileName, std::vector>& inputSet); + void printData(std::vector inputName, std::vector> inputSet); + + // Simple + void setData(std::string fileName, std::vector & inputSet, std::vector & outputSet); + void printData(std::string& inputName, std::string& outputName, std::vector & inputSet, std::vector & outputSet); + + // Images + std::vector> rgb2gray(std::vector>> input); + std::vector>> rgb2ycbcr(std::vector>> input); + std::vector>> rgb2hsv(std::vector>> input); + std::vector>> rgb2xyz(std::vector>> input); + std::vector>> xyz2rgb(std::vector>> input); + + // Text-Based & NLP + std::string toLower(std::string text); + std::vector split(std::string text); + std::vector splitSentences(std::string data); + std::vector removeSpaces(std::vector data); + std::vector removeNullByte(std::vector data); + std::vector segment(std::string text); + std::vector tokenize(std::string text); + std::vector removeStopWords(std::string text); + std::vector removeStopWords(std::vector segmented_data); + + std::string stemming(std::string text); + + std::vector> BOW(std::vector sentences, std::string = "Default"); + std::vector> TFIDF(std::vector sentences); + std::tuple>, std::vector> word2Vec(std::vector sentences, std::string type, int windowSize, int dimension, double learning_rate, int max_epoch); + std::vector> LSA(std::vector sentences, int dim); + + std::vector createWordList(std::vector sentences); + + // Extra + void setInputNames(std::string fileName, std::vector& inputNames); + std::vector> featureScaling(std::vector> X); + std::vector> meanNormalization(std::vector> X); + std::vector> meanCentering(std::vector> X); + std::vector> oneHotRep (std::vector tempOutputSet, int n_class); + std::vector reverseOneHot(std::vector> tempOutputSet); + + template + std::vector vecToSet(std::vector inputSet){ + std::vector setInputSet; + for(int i = 0; i < inputSet.size(); i++){ + bool new_element = true; + for(int j = 0; j < setInputSet.size(); j++){ + if(setInputSet[j] == inputSet[i]){ + new_element = false; + } + } + if(new_element){ + setInputSet.push_back(inputSet[i]); + } + } + return setInputSet; + } + + private: + }; +} + +#endif /* Data_hpp */ diff --git a/MLPP/DualSVC/DualSVC.cpp b/MLPP/DualSVC/DualSVC.cpp new file mode 100644 index 0000000..09a8b52 --- /dev/null +++ b/MLPP/DualSVC/DualSVC.cpp @@ -0,0 +1,241 @@ +// +// DualSVC.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "DualSVC.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP{ + DualSVC::DualSVC(std::vector> inputSet, std::vector outputSet, double C, std::string kernel) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), C(C), kernel(kernel) + { + y_hat.resize(n); + bias = Utilities::biasInitialization(); + alpha = Utilities::weightInitialization(n); // One alpha for all training examples, as per the lagrangian multipliers. + K = kernelFunction(inputSet, inputSet, kernel); // For now this is unused. When non-linear kernels are added, the K will be manipulated. + } + + std::vector DualSVC::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + double DualSVC::modelTest(std::vector x){ + return Evaluate(x); + } + + void DualSVC::gradientDescent(double learning_rate, int max_epoch, bool UI){ + class Cost cost; + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(alpha, inputSet, outputSet); + + alpha = alg.subtraction(alpha, alg.scalarMultiply(learning_rate, cost.dualFormSVMDeriv(alpha, inputSet, outputSet))); + + alphaProjection(); + + // Calculating the bias + double biasGradient = 0; + for(int i = 0; i < alpha.size(); i++){ + double sum = 0; + if(alpha[i] < C && alpha[i] > 0){ + for(int j = 0; j < alpha.size(); j++){ + if(alpha[j] > 0){ + sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], inputSet[i]); // TO DO: DON'T forget to add non-linear kernelizations. + } + } + } + biasGradient = (1 - outputSet[i] * sum) / outputSet[i]; + break; + } + bias -= biasGradient * learning_rate; + + forwardPass(); + + // UI PORTION + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(alpha, inputSet, outputSet)); + Utilities::UI(alpha, bias); + std::cout << score() << std::endl; // TO DO: DELETE THIS. + } + epoch++; + + if(epoch > max_epoch) { break; } + + } + } + + // void DualSVC::SGD(double learning_rate, int max_epoch, bool UI){ + // class Cost cost; + // Activation avn; + // LinAlg alg; + // Reg regularization; + + // double cost_prev = 0; + // int epoch = 1; + + // while(true){ + // std::random_device rd; + // std::default_random_engine generator(rd()); + // std::uniform_int_distribution distribution(0, int(n - 1)); + // int outputIndex = distribution(generator); + + // cost_prev = Cost(alpha, inputSet[outputIndex], outputSet[outputIndex]); + + // // Bias updation + // bias -= learning_rate * costDeriv; + + // y_hat = Evaluate({inputSet[outputIndex]}); + + // if(UI) { + // Utilities::CostInfo(epoch, cost_prev, Cost(alpha)); + // Utilities::UI(weights, bias); + // } + // epoch++; + + // if(epoch > max_epoch) { break; } + // } + // forwardPass(); + // } + + // void DualSVC::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + // class Cost cost; + // Activation avn; + // LinAlg alg; + // Reg regularization; + // double cost_prev = 0; + // int epoch = 1; + + // // Creating the mini-batches + // int n_mini_batch = n/mini_batch_size; + // auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + // while(true){ + // for(int i = 0; i < n_mini_batch; i++){ + // std::vector y_hat = Evaluate(inputMiniBatches[i]); + // std::vector z = propagate(inputMiniBatches[i]); + // cost_prev = Cost(z, outputMiniBatches[i], weights, C); + + // // Calculating the weight gradients + // weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C)))); + // weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge"); + + + // // Calculating the bias gradients + // bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n; + + // forwardPass(); + + // y_hat = Evaluate(inputMiniBatches[i]); + + // if(UI) { + // Utilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C)); + // Utilities::UI(weights, bias); + // } + // } + // epoch++; + // if(epoch > max_epoch) { break; } + // } + // forwardPass(); + // } + + double DualSVC::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void DualSVC::save(std::string fileName){ + Utilities util; + util.saveParameters(fileName, alpha, bias); + } + + double DualSVC::Cost(std::vector alpha, std::vector> X, std::vector y){ + class Cost cost; + return cost.dualFormSVM(alpha, X, y); + } + + std::vector DualSVC::Evaluate(std::vector> X){ + Activation avn; + return avn.sign(propagate(X)); + } + + std::vector DualSVC::propagate(std::vector> X){ + LinAlg alg; + std::vector z; + for(int i = 0; i < X.size(); i++){ + double sum = 0; + for(int j = 0; j < alpha.size(); j++){ + if(alpha[j] != 0){ + sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], X[i]); // TO DO: DON'T forget to add non-linear kernelizations. + } + } + sum += bias; + z.push_back(sum); + } + return z; + } + + double DualSVC::Evaluate(std::vector x){ + Activation avn; + return avn.sign(propagate(x)); + } + + double DualSVC::propagate(std::vector x){ + LinAlg alg; + double z = 0; + for(int j = 0; j < alpha.size(); j++){ + if(alpha[j] != 0){ + z += alpha[j] * outputSet[j] * alg.dot(inputSet[j], x); // TO DO: DON'T forget to add non-linear kernelizations. + } + } + z += bias; + return z; + } + + void DualSVC::forwardPass(){ + LinAlg alg; + Activation avn; + + z = propagate(inputSet); + y_hat = avn.sign(z); + } + + void DualSVC::alphaProjection(){ + for(int i = 0; i < alpha.size(); i++){ + if(alpha[i] > C){ + alpha[i] = C; + } + else if(alpha[i] < 0){ + alpha[i] = 0; + } + } + } + + double DualSVC::kernelFunction(std::vector u, std::vector v, std::string kernel){ + LinAlg alg; + if(kernel == "Linear"){ + return alg.dot(u, v); + } // warning: non-void function does not return a value in all control paths [-Wreturn-type] + } + + std::vector> DualSVC::kernelFunction(std::vector> A, std::vector> B, std::string kernel){ + LinAlg alg; + if(kernel == "Linear"){ + return alg.matmult(inputSet, alg.transpose(inputSet)); + } // warning: non-void function does not return a value in all control paths [-Wreturn-type] + } +} \ No newline at end of file diff --git a/MLPP/DualSVC/DualSVC.hpp b/MLPP/DualSVC/DualSVC.hpp new file mode 100644 index 0000000..d3a34dd --- /dev/null +++ b/MLPP/DualSVC/DualSVC.hpp @@ -0,0 +1,71 @@ +// +// DualSVC.hpp +// +// Created by Marc Melikyan on 10/2/20. +// +// http://disp.ee.ntu.edu.tw/~pujols/Support%20Vector%20Machine.pdf +// http://ciml.info/dl/v0_99/ciml-v0_99-ch11.pdf +// Were excellent for the practical intution behind the dual formulation. + +#ifndef DualSVC_hpp +#define DualSVC_hpp + + +#include +#include + +namespace MLPP { + + class DualSVC{ + + public: + DualSVC(std::vector> inputSet, std::vector outputSet, double C, std::string kernel = "Linear"); + DualSVC(std::vector> inputSet, std::vector outputSet, double C, std::string kernel, double p, double c); + + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + double score(); + void save(std::string fileName); + private: + + void init(); + + double Cost(std::vector alpha, std::vector> X, std::vector y); + + std::vector Evaluate(std::vector> X); + std::vector propagate(std::vector> X); + double Evaluate(std::vector x); + double propagate(std::vector x); + void forwardPass(); + + void alphaProjection(); + + double kernelFunction(std::vector v, std::vector u, std::string kernel); + std::vector> kernelFunction(std::vector> U, std::vector> V, std::string kernel); + + std::vector> inputSet; + std::vector outputSet; + std::vector z; + std::vector y_hat; + double bias; + + std::vector alpha; + std::vector> K; + + double C; + int n; + int k; + + std::string kernel; + double p; // Poly + double c; // Poly + + // UI Portion + void UI(int epoch, double cost_prev); + }; +} + +#endif /* DualSVC_hpp */ diff --git a/MLPP/ExpReg/ExpReg.cpp b/MLPP/ExpReg/ExpReg.cpp new file mode 100644 index 0000000..a363136 --- /dev/null +++ b/MLPP/ExpReg/ExpReg.cpp @@ -0,0 +1,240 @@ +// +// ExpReg.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "ExpReg.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Stat/Stat.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP{ + ExpReg::ExpReg(std::vector> inputSet, std::vector outputSet, std::string reg, double lambda, double alpha) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) + { + y_hat.resize(n); + weights = Utilities::weightInitialization(k); + initial = Utilities::weightInitialization(k); + bias = Utilities::biasInitialization(); + } + + std::vector ExpReg::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + double ExpReg::modelTest(std::vector x){ + return Evaluate(x); + } + + void ExpReg::gradientDescent(double learning_rate, int max_epoch, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + + std::vector error = alg.subtraction(y_hat, outputSet); + + for(int i = 0; i < k; i++){ + + // Calculating the weight gradient + double sum = 0; + for(int j = 0; j < n; j++){ + sum += error[j] * inputSet[j][i] * std::pow(weights[i], inputSet[j][i] - 1); + } + double w_gradient = sum / n; + + // Calculating the initial gradient + double sum2 = 0; + for(int j = 0; j < n; j++){ + sum2 += error[j] * std::pow(weights[i], inputSet[j][i]); + } + + + double i_gradient = sum2 / n; + + // Weight/initial updation + weights[i] -= learning_rate * w_gradient; + initial[i] -= learning_rate * i_gradient; + + } + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradient + double sum = 0; + for(int j = 0; j < n; j++){ + sum += (y_hat[j] - outputSet[j]); + } + double b_gradient = sum / n; + + // bias updation + bias -= learning_rate * b_gradient; + forwardPass(); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + + } + } + + void ExpReg::SGD(double learning_rate, int max_epoch, bool UI){ + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + while(true){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + int outputIndex = distribution(generator); + + double y_hat = Evaluate(inputSet[outputIndex]); + cost_prev = Cost({y_hat}, {outputSet[outputIndex]}); + + + for(int i = 0; i < k; i++){ + + // Calculating the weight gradients + + double w_gradient = (y_hat - outputSet[outputIndex]) * inputSet[outputIndex][i] * std::pow(weights[i], inputSet[outputIndex][i] - 1); + double i_gradient = (y_hat - outputSet[outputIndex]) * std::pow(weights[i], inputSet[outputIndex][i]); + + // Weight/initial updation + weights[i] -= learning_rate * w_gradient; + initial[i] -= learning_rate * i_gradient; + } + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + double b_gradient = (y_hat - outputSet[outputIndex]); + + // Bias updation + bias -= learning_rate * b_gradient; + y_hat = Evaluate({inputSet[outputIndex]}); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]})); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void ExpReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + while(true){ + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = Evaluate(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + std::vector error = alg.subtraction(y_hat, outputMiniBatches[i]); + + for(int j = 0; j < k; j++){ + // Calculating the weight gradient + double sum = 0; + for(int k = 0; k < outputMiniBatches[i].size(); k++){ + sum += error[k] * inputMiniBatches[i][k][j] * std::pow(weights[j], inputMiniBatches[i][k][j] - 1); + } + double w_gradient = sum / outputMiniBatches[i].size(); + + // Calculating the initial gradient + double sum2 = 0; + for(int k = 0; k < outputMiniBatches[i].size(); k++){ + sum2 += error[k] * std::pow(weights[j], inputMiniBatches[i][k][j]); + } + + + double i_gradient = sum2 / outputMiniBatches[i].size(); + + // Weight/initial updation + weights[j] -= learning_rate * w_gradient; + initial[j] -= learning_rate * i_gradient; + } + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradient + double sum = 0; + for(int j = 0; j < outputMiniBatches[i].size(); j++){ + sum += (y_hat[j] - outputMiniBatches[i][j]); + } + double b_gradient = sum / outputMiniBatches[i].size(); + y_hat = Evaluate(inputMiniBatches[i]); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i])); + Utilities::UI(weights, bias); + } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + double ExpReg::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void ExpReg::save(std::string fileName){ + Utilities util; + util.saveParameters(fileName, weights, initial, bias); + } + + double ExpReg::Cost(std::vector y_hat, std::vector y){ + Reg regularization; + class Cost cost; + return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg); + } + + std::vector ExpReg::Evaluate(std::vector> X){ + std::vector y_hat; + y_hat.resize(X.size()); + for(int i = 0; i < X.size(); i++){ + y_hat[i] = 0; + for(int j = 0; j < X[i].size(); j++){ + y_hat[i] += initial[j] * std::pow(weights[j], X[i][j]); + } + y_hat[i] += bias; + } + return y_hat; + } + + double ExpReg::Evaluate(std::vector x){ + double y_hat = 0; + for(int i = 0; i < x.size(); i++){ + y_hat += initial[i] * std::pow(weights[i], x[i]); + } + + return y_hat + bias; + } + + // a * w^x + b + void ExpReg::forwardPass(){ + y_hat = Evaluate(inputSet); + } +} \ No newline at end of file diff --git a/MLPP/ExpReg/ExpReg.hpp b/MLPP/ExpReg/ExpReg.hpp new file mode 100644 index 0000000..eeea633 --- /dev/null +++ b/MLPP/ExpReg/ExpReg.hpp @@ -0,0 +1,51 @@ +// +// ExpReg.hpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#ifndef ExpReg_hpp +#define ExpReg_hpp + +#include +#include + +namespace MLPP{ + class ExpReg{ + + public: + ExpReg(std::vector> inputSet, std::vector outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + double score(); + void save(std::string fileName); + private: + + double Cost(std::vector y_hat, std::vector y); + + std::vector Evaluate(std::vector> X); + double Evaluate(std::vector x); + void forwardPass(); + + std::vector> inputSet; + std::vector outputSet; + std::vector y_hat; + std::vector weights; + std::vector initial; + double bias; + + int n; + int k; + + // Regularization Params + std::string reg; + double lambda; + double alpha; /* This is the controlling param for Elastic Net*/ + + }; +} + +#endif /* ExpReg_hpp */ diff --git a/MLPP/GAN/GAN.cpp b/MLPP/GAN/GAN.cpp new file mode 100644 index 0000000..9308330 --- /dev/null +++ b/MLPP/GAN/GAN.cpp @@ -0,0 +1,290 @@ +// +// GAN.cpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#include "GAN.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP { + GAN::GAN(double k, std::vector> outputSet) + : outputSet(outputSet), n(outputSet.size()), k(k) + { + + } + + GAN::~GAN(){ + delete outputLayer; + } + + std::vector> GAN::generateExample(int n){ + LinAlg alg; + return modelSetTestGenerator(alg.gaussianNoise(n, k)); + } + + void GAN::gradientDescent(double learning_rate, int max_epoch, bool UI){ + class Cost cost; + LinAlg alg; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, alg.onevec(n)); + + // Training of the discriminator. + + std::vector> generatorInputSet = alg.gaussianNoise(n, k); + std::vector> discriminatorInputSet = modelSetTestGenerator(generatorInputSet); + discriminatorInputSet.insert(discriminatorInputSet.end(), outputSet.begin(), outputSet.end()); // Fake + real inputs. + + std::vector y_hat = modelSetTestDiscriminator(discriminatorInputSet); + std::vector outputSet = alg.zerovec(n); + std::vector outputSetReal = alg.onevec(n); + outputSet.insert(outputSet.end(), outputSetReal.begin(), outputSetReal.end()); // Fake + real output scores. + + auto [cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad] = computeDiscriminatorGradients(y_hat, outputSet); + cumulativeDiscriminatorHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeDiscriminatorHiddenLayerWGrad); + outputDiscriminatorWGrad = alg.scalarMultiply(learning_rate/n, outputDiscriminatorWGrad); + updateDiscriminatorParameters(cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad, learning_rate); + + // Training of the generator. + generatorInputSet = alg.gaussianNoise(n, k); + discriminatorInputSet = modelSetTestGenerator(generatorInputSet); + y_hat = modelSetTestDiscriminator(discriminatorInputSet); + outputSet = alg.onevec(n); + + std::vector>> cumulativeGeneratorHiddenLayerWGrad = computeGeneratorGradients(y_hat, outputSet); + cumulativeGeneratorHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeGeneratorHiddenLayerWGrad); + updateGeneratorParameters(cumulativeGeneratorHiddenLayerWGrad, learning_rate); + + forwardPass(); + if(UI) { GAN::UI(epoch, cost_prev, GAN::y_hat, alg.onevec(n)); } + + epoch++; + if(epoch > max_epoch) { break; } + } + } + + double GAN::score(){ + LinAlg alg; + Utilities util; + forwardPass(); + return util.performance(y_hat, alg.onevec(n)); + } + + void GAN::save(std::string fileName){ + Utilities util; + if(!network.empty()){ + util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1); + for(int i = 1; i < network.size(); i++){ + util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); + } + util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1); + } + else{ + util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1); + } + } + + void GAN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){ + LinAlg alg; + if(network.empty()){ + network.push_back(HiddenLayer(n_hidden, activation, alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha)); + network[0].forwardPass(); + } + else{ + network.push_back(HiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha)); + network[network.size() - 1].forwardPass(); + } + } + + void GAN::addOutputLayer(std::string weightInit, std::string reg, double lambda, double alpha){ + LinAlg alg; + if(!network.empty()){ + outputLayer = new OutputLayer(network[network.size() - 1].n_hidden, "Sigmoid", "LogLoss", network[network.size() - 1].a, weightInit, reg, lambda, alpha); + } + else{ + outputLayer = new OutputLayer(k, "Sigmoid", "LogLoss", alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha); + } + } + + std::vector> GAN::modelSetTestGenerator(std::vector> X){ + if(!network.empty()){ + network[0].input = X; + network[0].forwardPass(); + + for(int i = 1; i <= network.size()/2; i++){ + network[i].input = network[i - 1].a; + network[i].forwardPass(); + } + } + return network[network.size()/2].a; + } + + std::vector GAN::modelSetTestDiscriminator(std::vector> X){ + if(!network.empty()){ + for(int i = network.size()/2 + 1; i < network.size(); i++){ + if(i == network.size()/2 + 1){ + network[i].input = X; + } + else { network[i].input = network[i - 1].a; } + network[i].forwardPass(); + } + outputLayer->input = network[network.size() - 1].a; + } + outputLayer->forwardPass(); + return outputLayer->a; + } + + double GAN::Cost(std::vector y_hat, std::vector y){ + Reg regularization; + class Cost cost; + double totalRegTerm = 0; + + auto cost_function = outputLayer->cost_map[outputLayer->cost]; + if(!network.empty()){ + for(int i = 0; i < network.size() - 1; i++){ + totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); + } + } + return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg); + } + + void GAN::forwardPass(){ + LinAlg alg; + if(!network.empty()){ + network[0].input = alg.gaussianNoise(n, k); + network[0].forwardPass(); + + for(int i = 1; i < network.size(); i++){ + network[i].input = network[i - 1].a; + network[i].forwardPass(); + } + outputLayer->input = network[network.size() - 1].a; + } + else{ // Should never happen, though. + outputLayer->input = alg.gaussianNoise(n, k); + } + outputLayer->forwardPass(); + y_hat = outputLayer->a; + } + + void GAN::updateDiscriminatorParameters(std::vector>> hiddenLayerUpdations, std::vector outputLayerUpdation, double learning_rate){ + LinAlg alg; + + outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation); + outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n; + + if(!network.empty()){ + network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]); + network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta)); + + for(int i = network.size() - 2; i > network.size()/2; i--){ + network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]); + network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta)); + } + } + } + + void GAN::updateGeneratorParameters(std::vector>> hiddenLayerUpdations, double learning_rate){ + LinAlg alg; + + if(!network.empty()){ + + for(int i = network.size()/2; i >= 0; i--){ + //std::cout << network[i].weights.size() << "x" << network[i].weights[0].size() << std::endl; + //std::cout << hiddenLayerUpdations[(network.size() - 2) - i + 1].size() << "x" << hiddenLayerUpdations[(network.size() - 2) - i + 1][0].size() << std::endl; + network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]); + network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta)); + } + } + } + + std::tuple>>, std::vector> GAN::computeDiscriminatorGradients(std::vector y_hat, std::vector outputSet){ + class Cost cost; + Activation avn; + LinAlg alg; + Reg regularization; + + std::vector>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. + + auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost]; + auto outputAvn = outputLayer->activation_map[outputLayer->activation]; + outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1)); + std::vector outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta); + outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg)); + + + if(!network.empty()){ + auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation]; + + network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta); + + cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well. + + //std::cout << "HIDDENLAYER FIRST:" << hiddenLayerWGrad.size() << "x" << hiddenLayerWGrad[0].size() << std::endl; + //std::cout << "WEIGHTS SECOND:" << network[network.size() - 1].weights.size() << "x" << network[network.size() - 1].weights[0].size() << std::endl; + + for(int i = network.size() - 2; i > network.size()/2; i--){ + auto hiddenLayerAvn = network[i].activation_map[network[i].activation]; + network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta); + + cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well. + + } + } + return {cumulativeHiddenLayerWGrad, outputWGrad}; + } + + std::vector>> GAN::computeGeneratorGradients(std::vector y_hat, std::vector outputSet){ + class Cost cost; + Activation avn; + LinAlg alg; + Reg regularization; + + std::vector>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. + + auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost]; + auto outputAvn = outputLayer->activation_map[outputLayer->activation]; + outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1)); + std::vector outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta); + outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg)); + if(!network.empty()){ + auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation]; + network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta); + cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well. + + for(int i = network.size() - 2; i >= 0; i--){ + auto hiddenLayerAvn = network[i].activation_map[network[i].activation]; + network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta); + cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well. + } + } + return cumulativeHiddenLayerWGrad; + } + + void GAN::UI(int epoch, double cost_prev, std::vector y_hat, std::vector outputSet){ + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + std::cout << "Layer " << network.size() + 1 << ": " << std::endl; + Utilities::UI(outputLayer->weights, outputLayer->bias); + if(!network.empty()){ + for(int i = network.size() - 1; i >= 0; i--){ + std::cout << "Layer " << i + 1 << ": " << std::endl; + Utilities::UI(network[i].weights, network[i].bias); + } + } + } +} \ No newline at end of file diff --git a/MLPP/GAN/GAN.hpp b/MLPP/GAN/GAN.hpp new file mode 100644 index 0000000..78f4b82 --- /dev/null +++ b/MLPP/GAN/GAN.hpp @@ -0,0 +1,56 @@ +// +// GAN.hpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#ifndef GAN_hpp +#define GAN_hpp + +#include "HiddenLayer/HiddenLayer.hpp" +#include "OutputLayer/OutputLayer.hpp" + +#include +#include +#include + +namespace MLPP{ + +class GAN{ + public: + GAN(double k, std::vector> outputSet); + ~GAN(); + std::vector> generateExample(int n); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + double score(); + void save(std::string fileName); + + void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + void addOutputLayer(std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + + private: + std::vector> modelSetTestGenerator(std::vector> X); // Evaluator for the generator of the gan. + std::vector modelSetTestDiscriminator(std::vector> X); // Evaluator for the discriminator of the gan. + + double Cost(std::vector y_hat, std::vector y); + + void forwardPass(); + void updateDiscriminatorParameters(std::vector>> hiddenLayerUpdations, std::vector outputLayerUpdation, double learning_rate); + void updateGeneratorParameters(std::vector>> hiddenLayerUpdations, double learning_rate); + std::tuple>>, std::vector> computeDiscriminatorGradients(std::vector y_hat, std::vector outputSet); + std::vector>> computeGeneratorGradients(std::vector y_hat, std::vector outputSet); + + void UI(int epoch, double cost_prev, std::vector y_hat, std::vector outputSet); + + std::vector> outputSet; + std::vector y_hat; + + std::vector network; + OutputLayer *outputLayer; + + int n; + int k; + }; +} + +#endif /* GAN_hpp */ \ No newline at end of file diff --git a/MLPP/GaussMarkovChecker/GaussMarkovChecker.cpp b/MLPP/GaussMarkovChecker/GaussMarkovChecker.cpp new file mode 100644 index 0000000..a037ce5 --- /dev/null +++ b/MLPP/GaussMarkovChecker/GaussMarkovChecker.cpp @@ -0,0 +1,59 @@ +// +// GaussMarkovChecker.cpp +// +// Created by Marc Melikyan on 11/13/20. +// + +#include "GaussMarkovChecker.hpp" +#include "Stat/Stat.hpp" +#include + + +namespace MLPP{ + void GaussMarkovChecker::checkGMConditions(std::vector eps){ + bool condition1 = arithmeticMean(eps); + bool condition2 = homoscedasticity(eps); + bool condition3 = exogeneity(eps); + + if(condition1 && condition2 && condition3){ + std::cout << "Gauss-Markov conditions were not violated. You may use OLS to obtain a BLUE estimator" << std::endl; + } + else{ + std::cout << "A test of the expected value of 0 of the error terms returned " << std::boolalpha << condition1 << ", a test of homoscedasticity has returned " << std::boolalpha << condition2 << ", and a test of exogenity has returned " << std::boolalpha << "." << std::endl; + } + + } + + bool GaussMarkovChecker::arithmeticMean(std::vector eps){ + Stat stat; + if(stat.mean(eps) == 0) { + return 1; + } + else { return 0; } + } + + bool GaussMarkovChecker::homoscedasticity(std::vector eps){ + Stat stat; + double currentVar = (eps[0] - stat.mean(eps)) * (eps[0] - stat.mean(eps)) / eps.size(); + for(int i = 0; i < eps.size(); i++){ + if(currentVar != (eps[i] - stat.mean(eps)) * (eps[i] - stat.mean(eps)) / eps.size()){ + return 0; + } + } + return 1; + } + + bool GaussMarkovChecker::exogeneity(std::vector eps){ + Stat stat; + for(int i = 0; i < eps.size(); i++){ + for(int j = 0; j < eps.size(); j++){ + if(i != j){ + if((eps[i] - stat.mean(eps)) * (eps[j] - stat.mean(eps)) / eps.size() != 0){ + return 0; + } + } + } + } + return 1; + } +} diff --git a/MLPP/GaussMarkovChecker/GaussMarkovChecker.hpp b/MLPP/GaussMarkovChecker/GaussMarkovChecker.hpp new file mode 100644 index 0000000..4944d4d --- /dev/null +++ b/MLPP/GaussMarkovChecker/GaussMarkovChecker.hpp @@ -0,0 +1,27 @@ +// +// GaussMarkovChecker.hpp +// +// Created by Marc Melikyan on 11/13/20. +// + +#ifndef GaussMarkovChecker_hpp +#define GaussMarkovChecker_hpp + +#include +#include + +namespace MLPP{ + class GaussMarkovChecker{ + public: + void checkGMConditions(std::vector eps); + + // Independent, 3 Gauss-Markov Conditions + bool arithmeticMean(std::vector eps); // 1) Arithmetic Mean of 0. + bool homoscedasticity(std::vector eps); // 2) Homoscedasticity + bool exogeneity(std::vector eps); // 3) Cov of any 2 non-equal eps values = 0. + private: + + }; +} + +#endif /* GaussMarkovChecker_hpp */ diff --git a/MLPP/GaussianNB/GaussianNB.cpp b/MLPP/GaussianNB/GaussianNB.cpp new file mode 100644 index 0000000..00acbc7 --- /dev/null +++ b/MLPP/GaussianNB/GaussianNB.cpp @@ -0,0 +1,92 @@ +// +// GaussianNB.cpp +// +// Created by Marc Melikyan on 1/17/21. +// + +#include "GaussianNB.hpp" +#include "Stat/Stat.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Utilities/Utilities.hpp" + +#include +#include +#include + +namespace MLPP{ + GaussianNB::GaussianNB(std::vector> inputSet, std::vector outputSet, int class_num) + : inputSet(inputSet), outputSet(outputSet), class_num(class_num) + { + y_hat.resize(outputSet.size()); + Evaluate(); + LinAlg alg; + } + + std::vector GaussianNB::modelSetTest(std::vector> X){ + std::vector y_hat; + for(int i = 0; i < X.size(); i++){ + y_hat.push_back(modelTest(X[i])); + } + return y_hat; + } + + double GaussianNB::modelTest(std::vector x){ + Stat stat; + LinAlg alg; + + double score[class_num]; + double y_hat_i = 1; + for(int i = class_num - 1; i >= 0; i--){ + y_hat_i += std::log(priors[i] * (1 / sqrt(2 * M_PI * sigma[i] * sigma[i])) * exp(-(x[i] * mu[i]) * (x[i] * mu[i]) / (2 * sigma[i] * sigma[i]))); + score[i] = exp(y_hat_i); + } + return std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double))); + } + + double GaussianNB::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void GaussianNB::Evaluate(){ + Stat stat; + LinAlg alg; + + // Computing mu_k_y and sigma_k_y + mu.resize(class_num); + sigma.resize(class_num); + for(int i = class_num - 1; i >= 0; i--){ + std::vector set; + for(int j = 0; j < inputSet.size(); j++){ + for(int k = 0; k < inputSet[j].size(); k++){ + if(outputSet[j] == i){ + set.push_back(inputSet[j][k]); + } + } + } + mu[i] = stat.mean(set); + sigma[i] = stat.standardDeviation(set); + } + + // Priors + priors.resize(class_num); + for(int i = 0; i < outputSet.size(); i++){ + priors[int(outputSet[i])]++; + } + priors = alg.scalarMultiply( double(1)/double(outputSet.size()), priors); + + for(int i = 0; i < outputSet.size(); i++){ + double score[class_num]; + double y_hat_i = 1; + for(int j = class_num - 1; j >= 0; j--){ + for(int k = 0; k < inputSet[i].size(); k++){ + y_hat_i += std::log(priors[j] * (1 / sqrt(2 * M_PI * sigma[j] * sigma[j])) * exp(-(inputSet[i][k] * mu[j]) * (inputSet[i][k] * mu[j]) / (2 * sigma[j] * sigma[j]))); + } + score[j] = exp(y_hat_i); + std::cout << score[j] << std::endl; + } + y_hat[i] = std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double))); + std::cout << std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double))) << std::endl; + } + } +} \ No newline at end of file diff --git a/MLPP/GaussianNB/GaussianNB.hpp b/MLPP/GaussianNB/GaussianNB.hpp new file mode 100644 index 0000000..636ed42 --- /dev/null +++ b/MLPP/GaussianNB/GaussianNB.hpp @@ -0,0 +1,42 @@ +// +// GaussianNB.hpp +// +// Created by Marc Melikyan on 1/17/21. +// + +#ifndef GaussianNB_hpp +#define GaussianNB_hpp + +#include + +namespace MLPP{ + class GaussianNB{ + + public: + GaussianNB(std::vector> inputSet, std::vector outputSet, int class_num); + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + double score(); + + private: + + void Evaluate(); + + int class_num; + + std::vector priors; + std::vector mu; + std::vector sigma; + + std::vector> inputSet; + std::vector outputSet; + + std::vector y_hat; + + + + + }; + + #endif /* GaussianNB_hpp */ +} \ No newline at end of file diff --git a/MLPP/HiddenLayer/HiddenLayer.cpp b/MLPP/HiddenLayer/HiddenLayer.cpp new file mode 100644 index 0000000..035d019 --- /dev/null +++ b/MLPP/HiddenLayer/HiddenLayer.cpp @@ -0,0 +1,114 @@ +// +// HiddenLayer.cpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#include "HiddenLayer.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Utilities/Utilities.hpp" + +#include +#include + +namespace MLPP { + HiddenLayer::HiddenLayer(int n_hidden, std::string activation, std::vector> input, std::string weightInit, std::string reg, double lambda, double alpha) + : n_hidden(n_hidden), activation(activation), input(input), weightInit(weightInit), reg(reg), lambda(lambda), alpha(alpha) + { + weights = Utilities::weightInitialization(input[0].size(), n_hidden, weightInit); + bias = Utilities::biasInitialization(n_hidden); + + activation_map["Linear"] = &Activation::linear; + activationTest_map["Linear"] = &Activation::linear; + + activation_map["Sigmoid"] = &Activation::sigmoid; + activationTest_map["Sigmoid"] = &Activation::sigmoid; + + activation_map["Swish"] = &Activation::swish; + activationTest_map["Swish"] = &Activation::swish; + + activation_map["Mish"] = &Activation::mish; + activationTest_map["Mish"] = &Activation::mish; + + activation_map["SinC"] = &Activation::sinc; + activationTest_map["SinC"] = &Activation::sinc; + + activation_map["Softplus"] = &Activation::softplus; + activationTest_map["Softplus"] = &Activation::softplus; + + activation_map["Softsign"] = &Activation::softsign; + activationTest_map["Softsign"] = &Activation::softsign; + + activation_map["CLogLog"] = &Activation::cloglog; + activationTest_map["CLogLog"] = &Activation::cloglog; + + activation_map["Logit"] = &Activation::logit; + activationTest_map["Logit"] = &Activation::logit; + + activation_map["GaussianCDF"] = &Activation::gaussianCDF; + activationTest_map["GaussianCDF"] = &Activation::gaussianCDF; + + activation_map["RELU"] = &Activation::RELU; + activationTest_map["RELU"] = &Activation::RELU; + + activation_map["GELU"] = &Activation::GELU; + activationTest_map["GELU"] = &Activation::GELU; + + activation_map["Sign"] = &Activation::sign; + activationTest_map["Sign"] = &Activation::sign; + + activation_map["UnitStep"] = &Activation::unitStep; + activationTest_map["UnitStep"] = &Activation::unitStep; + + activation_map["Sinh"] = &Activation::sinh; + activationTest_map["Sinh"] = &Activation::sinh; + + activation_map["Cosh"] = &Activation::cosh; + activationTest_map["Cosh"] = &Activation::cosh; + + activation_map["Tanh"] = &Activation::tanh; + activationTest_map["Tanh"] = &Activation::tanh; + + activation_map["Csch"] = &Activation::csch; + activationTest_map["Csch"] = &Activation::csch; + + activation_map["Sech"] = &Activation::sech; + activationTest_map["Sech"] = &Activation::sech; + + activation_map["Coth"] = &Activation::coth; + activationTest_map["Coth"] = &Activation::coth; + + activation_map["Arsinh"] = &Activation::arsinh; + activationTest_map["Arsinh"] = &Activation::arsinh; + + activation_map["Arcosh"] = &Activation::arcosh; + activationTest_map["Arcosh"] = &Activation::arcosh; + + activation_map["Artanh"] = &Activation::artanh; + activationTest_map["Artanh"] = &Activation::artanh; + + activation_map["Arcsch"] = &Activation::arcsch; + activationTest_map["Arcsch"] = &Activation::arcsch; + + activation_map["Arsech"] = &Activation::arsech; + activationTest_map["Arsech"] = &Activation::arsech; + + activation_map["Arcoth"] = &Activation::arcoth; + activationTest_map["Arcoth"] = &Activation::arcoth; + } + + void HiddenLayer::forwardPass(){ + LinAlg alg; + Activation avn; + z = alg.mat_vec_add(alg.matmult(input, weights), bias); + a = (avn.*activation_map[activation])(z, 0); + } + + void HiddenLayer::Test(std::vector x){ + LinAlg alg; + Activation avn; + z_test = alg.addition(alg.mat_vec_mult(alg.transpose(weights), x), bias); + a_test = (avn.*activationTest_map[activation])(z_test, 0); + } +} \ No newline at end of file diff --git a/MLPP/HiddenLayer/HiddenLayer.hpp b/MLPP/HiddenLayer/HiddenLayer.hpp new file mode 100644 index 0000000..b243043 --- /dev/null +++ b/MLPP/HiddenLayer/HiddenLayer.hpp @@ -0,0 +1,52 @@ +// +// HiddenLayer.hpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#ifndef HiddenLayer_hpp +#define HiddenLayer_hpp + +#include "Activation/Activation.hpp" + +#include +#include +#include + +namespace MLPP { + class HiddenLayer{ + public: + HiddenLayer(int n_hidden, std::string activation, std::vector> input, std::string weightInit, std::string reg, double lambda, double alpha); + + int n_hidden; + std::string activation; + + std::vector> input; + + std::vector> weights; + std::vector bias; + + std::vector> z; + std::vector> a; + + std::map> (Activation::*)(std::vector>, bool)> activation_map; + std::map (Activation::*)(std::vector, bool)> activationTest_map; + + std::vector z_test; + std::vector a_test; + + std::vector> delta; + + // Regularization Params + std::string reg; + double lambda; /* Regularization Parameter */ + double alpha; /* This is the controlling param for Elastic Net*/ + + std::string weightInit; + + void forwardPass(); + void Test(std::vector x); + }; +} + +#endif /* HiddenLayer_hpp */ \ No newline at end of file diff --git a/MLPP/HypothesisTesting/HypothesisTesting.cpp b/MLPP/HypothesisTesting/HypothesisTesting.cpp new file mode 100644 index 0000000..d0e4477 --- /dev/null +++ b/MLPP/HypothesisTesting/HypothesisTesting.cpp @@ -0,0 +1,19 @@ +// +// HypothesisTesting.cpp +// +// Created by Marc Melikyan on 3/10/21. +// + +#include "HypothesisTesting.hpp" + +namespace MLPP{ + + std::tuple HypothesisTesting::chiSquareTest(std::vector observed, std::vector expected){ + double df = observed.size() - 1; // These are our degrees of freedom + double sum = 0; + for(int i = 0; i < observed.size(); i++){ + sum += (observed[i] - expected[i]) * (observed[i] - expected[i]) / expected[i]; + } + } + +} \ No newline at end of file diff --git a/MLPP/HypothesisTesting/HypothesisTesting.hpp b/MLPP/HypothesisTesting/HypothesisTesting.hpp new file mode 100644 index 0000000..4764f62 --- /dev/null +++ b/MLPP/HypothesisTesting/HypothesisTesting.hpp @@ -0,0 +1,24 @@ +// +// HypothesisTesting.hpp +// +// Created by Marc Melikyan on 3/10/21. +// + +#ifndef HypothesisTesting_hpp +#define HypothesisTesting_hpp + +#include +#include + +namespace MLPP{ + class HypothesisTesting{ + + public: + std::tuple chiSquareTest(std::vector observed, std::vector expected); + + private: + + }; +} + +#endif /* HypothesisTesting_hpp */ diff --git a/MLPP/KMeans/KMeans.cpp b/MLPP/KMeans/KMeans.cpp new file mode 100644 index 0000000..2b0d4a8 --- /dev/null +++ b/MLPP/KMeans/KMeans.cpp @@ -0,0 +1,235 @@ +// +// KMeans.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "KMeans.hpp" +#include "Utilities/Utilities.hpp" +#include "LinAlg/LinAlg.hpp" + +#include +#include +#include + +namespace MLPP{ + KMeans::KMeans(std::vector> inputSet, int k, std::string init_type) + : inputSet(inputSet), k(k), init_type(init_type) + { + if(init_type == "KMeans++"){ + kmeansppInitialization(k); + } + else{ + centroidInitialization(k); + } + } + + std::vector> KMeans::modelSetTest(std::vector> X){ + LinAlg alg; + std::vector> closestCentroids; + for(int i = 0; i < inputSet.size(); i++){ + std::vector closestCentroid = mu[0]; + for(int j = 0; j < r[0].size(); j++){ + bool isCentroidCloser = alg.euclideanDistance(X[i], mu[j]) < alg.euclideanDistance(X[i], closestCentroid); + if(isCentroidCloser){ + closestCentroid = mu[j]; + } + } + closestCentroids.push_back(closestCentroid); + } + return closestCentroids; + } + + std::vector KMeans::modelTest(std::vector x){ + LinAlg alg; + std::vector closestCentroid = mu[0]; + for(int j = 0; j < mu.size(); j++){ + if(alg.euclideanDistance(x, mu[j]) < alg.euclideanDistance(x, closestCentroid)){ + closestCentroid = mu[j]; + } + } + return closestCentroid; + } + + void KMeans::train(int epoch_num, bool UI){ + double cost_prev = 0; + int epoch = 1; + + Evaluate(); + + while(true){ + + // STEPS OF THE ALGORITHM + // 1. DETERMINE r_nk + // 2. DETERMINE J + // 3. DETERMINE mu_k + + // STOP IF CONVERGED, ELSE REPEAT + + cost_prev = Cost(); + + computeMu(); + Evaluate(); + + // UI PORTION + if(UI) { Utilities::CostInfo(epoch, cost_prev, Cost()); } + epoch++; + + if(epoch > epoch_num) { break; } + + } + } + + double KMeans::score(){ + return Cost(); + } + + std::vector KMeans::silhouette_scores(){ + LinAlg alg; + std::vector> closestCentroids = modelSetTest(inputSet); + std::vector silhouette_scores; + for(int i = 0; i < inputSet.size(); i++){ + // COMPUTING a[i] + double a = 0; + for(int j = 0; j < inputSet.size(); j++){ + if(i != j && r[i] == r[j]){ + a += alg.euclideanDistance(inputSet[i], inputSet[j]); + } + } + // NORMALIZE a[i] + a /= closestCentroids[i].size() - 1; + + + // COMPUTING b[i] + double b = INT_MAX; + for(int j = 0; j < mu.size(); j++){ + if(closestCentroids[i] != mu[j]){ + double sum = 0; + for(int k = 0; k < inputSet.size(); k++){ + sum += alg.euclideanDistance(inputSet[i], inputSet[k]); + } + // NORMALIZE b[i] + double k_clusterSize = 0; + for(int k = 0; k < closestCentroids.size(); k++){ + if(closestCentroids[k] == mu[j]){ + k_clusterSize++; + } + } + if(sum / k_clusterSize < b) { b = sum / k_clusterSize; } + } + } + silhouette_scores.push_back((b - a)/fmax(a, b)); + // Or the expanded version: + // if(a < b) { + // silhouette_scores.push_back(1 - a/b); + // } + // else if(a == b){ + // silhouette_scores.push_back(0); + // } + // else{ + // silhouette_scores.push_back(b/a - 1); + // } + } + return silhouette_scores; + } + + // This simply computes r_nk + void KMeans::Evaluate(){ + LinAlg alg; + r.resize(inputSet.size()); + + for(int i = 0; i < r.size(); i++){ + r[i].resize(k); + } + + for(int i = 0; i < r.size(); i++){ + std::vector closestCentroid = mu[0]; + for(int j = 0; j < r[0].size(); j++){ + bool isCentroidCloser = alg.euclideanDistance(inputSet[i], mu[j]) < alg.euclideanDistance(inputSet[i], closestCentroid); + if(isCentroidCloser){ + closestCentroid = mu[j]; + } + } + for(int j = 0; j < r[0].size(); j++){ + if(mu[j] == closestCentroid) { + r[i][j] = 1; + } + else { r[i][j] = 0; } + } + } + + } + + // This simply computes or re-computes mu_k + void KMeans::computeMu(){ + LinAlg alg; + for(int i = 0; i < mu.size(); i++){ + std::vector num; + num.resize(r.size()); + + for(int i = 0; i < num.size(); i++){ + num[i] = 0; + } + + double den = 0; + for(int j = 0; j < r.size(); j++){ + num = alg.addition(num, alg.scalarMultiply(r[j][i], inputSet[j])); + } + for(int j = 0; j < r.size(); j++){ + den += r[j][i]; + } + mu[i] = alg.scalarMultiply(double(1)/double(den), num); + } + + } + + void KMeans::centroidInitialization(int k){ + mu.resize(k); + + for(int i = 0; i < k; i++){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(inputSet.size() - 1)); + + mu[i].resize(inputSet.size()); + mu[i] = inputSet[distribution(generator)]; + } + } + + void KMeans::kmeansppInitialization(int k){ + LinAlg alg; + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(inputSet.size() - 1)); + mu.push_back(inputSet[distribution(generator)]); + + for(int i = 0; i < k - 1; i++){ + std::vector farthestCentroid; + for(int j = 0; j < inputSet.size(); j++){ + double max_dist = 0; + /* SUM ALL THE SQUARED DISTANCES, CHOOSE THE ONE THAT'S FARTHEST + AS TO SPREAD OUT THE CLUSTER CENTROIDS. */ + double sum = 0; + for(int k = 0; k < mu.size(); k++){ + sum += alg.euclideanDistance(inputSet[j], mu[k]); + } + if(sum * sum > max_dist){ + farthestCentroid = inputSet[j]; + max_dist = sum * sum; + } + } + mu.push_back(farthestCentroid); + } + } + + double KMeans::Cost(){ + LinAlg alg; + double sum = 0; + for(int i = 0; i < r.size(); i++){ + for(int j = 0; j < r[0].size(); j++){ + sum += r[i][j] * alg.norm_sq(alg.subtraction(inputSet[i], mu[j])); + } + } + return sum; + } +} diff --git a/MLPP/KMeans/KMeans.hpp b/MLPP/KMeans/KMeans.hpp new file mode 100644 index 0000000..281bec8 --- /dev/null +++ b/MLPP/KMeans/KMeans.hpp @@ -0,0 +1,45 @@ +// +// KMeans.hpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#ifndef KMeans_hpp +#define KMeans_hpp + +#include +#include + +namespace MLPP{ + class KMeans{ + + public: + KMeans(std::vector> inputSet, int k, std::string init_type = "Default"); + std::vector> modelSetTest(std::vector> X); + std::vector modelTest(std::vector x); + void train(int epoch_num, bool UI = 1); + double score(); + std::vector silhouette_scores(); + private: + + void Evaluate(); + void computeMu(); + + void centroidInitialization(int k); + void kmeansppInitialization(int k); + double Cost(); + + std::vector> inputSet; + std::vector> mu; + std::vector> r; + + double euclideanDistance(std::vector A, std::vector B); + + double accuracy_threshold; + int k; + + std::string init_type; + }; +} + +#endif /* KMeans_hpp */ diff --git a/MLPP/LinAlg/LinAlg.cpp b/MLPP/LinAlg/LinAlg.cpp new file mode 100644 index 0000000..2c9ab5a --- /dev/null +++ b/MLPP/LinAlg/LinAlg.cpp @@ -0,0 +1,1231 @@ +// +// LinAlg.cpp +// +// Created by Marc Melikyan on 1/8/21. +// + +#include "LinAlg.hpp" +#include "Stat/Stat.hpp" +#include +#include +#include +#include + +namespace MLPP{ + + std::vector> LinAlg::gramMatrix(std::vector> A){ + return matmult(transpose(A), A); // AtA + } + + bool LinAlg::linearIndependenceChecker(std::vector> A){ + if (det(gramMatrix(A), A.size()) == 0){ + return false; + } + return true; + } + + std::vector> LinAlg::gaussianNoise(int n, int m){ + std::random_device rd; + std::default_random_engine generator(rd()); + + std::vector> A; + A.resize(n); + for(int i = 0; i < n; i++){ + A[i].resize(m); + for(int j = 0; j < m; j++){ + std::normal_distribution distribution(0, 1); // Standard normal distribution. Mean of 0, std of 1. + A[i][j] = distribution(generator); + } + } + return A; + } + + std::vector> LinAlg::addition(std::vector> A, std::vector> B){ + std::vector> C; + C.resize(A.size()); + for(int i = 0; i < C.size(); i++){ + C[i].resize(A[0].size()); + } + + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[0].size(); j++){ + C[i][j] = A[i][j] + B[i][j]; + } + } + return C; + } + + std::vector> LinAlg::subtraction(std::vector> A, std::vector> B){ + std::vector> C; + C.resize(A.size()); + for(int i = 0; i < C.size(); i++){ + C[i].resize(A[0].size()); + } + + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[0].size(); j++){ + C[i][j] = A[i][j] - B[i][j]; + } + } + return C; + } + + std::vector> LinAlg::matmult(std::vector> A, std::vector> B){ + std::vector> C; + C.resize(A.size()); + for(int i = 0; i < C.size(); i++){ + C[i].resize(B[0].size()); + } + + for(int i = 0; i < A.size(); i++){ + for(int k = 0; k < B.size(); k++){ + for(int j = 0; j < B[0].size(); j++){ + C[i][j] += A[i][k] * B[k][j]; + } + } + } + return C; + } + + std::vector> LinAlg::hadamard_product(std::vector> A, std::vector> B){ + std::vector> C; + C.resize(A.size()); + for(int i = 0; i < C.size(); i++){ + C[i].resize(A[0].size()); + } + + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[0].size(); j++){ + C[i][j] = A[i][j] * B[i][j]; + } + } + return C; + } + + std::vector> LinAlg::kronecker_product(std::vector> A, std::vector> B){ + std::vector> C; + + // [1,1,1,1] [1,2,3,4,5] + // [1,1,1,1] [1,2,3,4,5] + // [1,2,3,4,5] + + // [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] + // [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] + // [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] + // [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] + // [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] + // [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] + + // Resulting matrix: A.size() * B.size() + // A[0].size() * B[0].size() + + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < B.size(); j++){ + std::vector> row; + for(int k = 0; k < A[0].size(); k++){ + row.push_back(scalarMultiply(A[i][k], B[j])); + } + C.push_back(flatten(row)); + } + } + return C; + } + + std::vector> LinAlg::elementWiseDivision(std::vector> A, std::vector> B){ + std::vector> C; + C.resize(A.size()); + for(int i = 0; i < C.size(); i++){ + C[i].resize(A[0].size()); + } + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + C[i][j] = A[i][j] / B[i][j]; + } + } + return C; + } + + std::vector> LinAlg::transpose(std::vector> A){ + std::vector> AT; + AT.resize(A[0].size()); + for(int i = 0; i < AT.size(); i++){ + AT[i].resize(A.size()); + } + + for(int i = 0; i < A[0].size(); i++){ + for(int j = 0; j < A.size(); j++){ + AT[i][j] = A[j][i]; + } + } + return AT; + } + + std::vector> LinAlg::scalarMultiply(double scalar, std::vector> A){ + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + A[i][j] *= scalar; + } + } + return A; + } + + std::vector> LinAlg::scalarAdd(double scalar, std::vector> A){ + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + A[i][j] += scalar; + } + } + return A; + } + + std::vector> LinAlg::log(std::vector> A){ + std::vector> B; + B.resize(A.size()); + for(int i = 0; i < B.size(); i++){ + B[i].resize(A[0].size()); + } + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + B[i][j] = std::log(A[i][j]); + } + } + return B; + } + + std::vector> LinAlg::log10(std::vector> A){ + std::vector> B; + B.resize(A.size()); + for(int i = 0; i < B.size(); i++){ + B[i].resize(A[0].size()); + } + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + B[i][j] = std::log10(A[i][j]); + } + } + return B; + } + + std::vector> LinAlg::exp(std::vector> A){ + std::vector> B; + B.resize(A.size()); + for(int i = 0; i < B.size(); i++){ + B[i].resize(A[0].size()); + } + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + B[i][j] = std::exp(A[i][j]); + } + } + return B; + } + + std::vector> LinAlg::erf(std::vector> A){ + std::vector> B; + B.resize(A.size()); + for(int i = 0; i < B.size(); i++){ + B[i].resize(A[0].size()); + } + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + B[i][j] = std::erf(A[i][j]); + } + } + return B; + } + + std::vector> LinAlg::exponentiate(std::vector> A, double p){ + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + A[i][j] = std::pow(A[i][j], p); + } + } + return A; + } + + std::vector> LinAlg::sqrt(std::vector> A){ + return exponentiate(A, 0.5); + } + + std::vector> LinAlg::cbrt(std::vector> A){ + return exponentiate(A, double(1)/double(3)); + } + + std::vector> LinAlg::matrixPower(std::vector> A, int n){ + std::vector> B = identity(A.size()); + if(n == 0){ + return identity(A.size()); + } + else if(n < 0){ + A = inverse(A); + } + for(int i = 0; i < std::abs(n); i++){ + B = matmult(B, A); + } + return B; + } + + std::vector> LinAlg::abs(std::vector> A){ + std::vector> B; + B.resize(A.size()); + for(int i = 0; i < B.size(); i++){ + B[i].resize(A[0].size()); + } + for(int i = 0; i < B.size(); i++){ + for(int j = 0; j < B[i].size(); j++){ + B[i][j] = std::abs(A[i][j]); + } + } + return B; + } + + double LinAlg::det(std::vector> A, int d){ + + double deter = 0; + std::vector> B; + B.resize(d); + for(int i = 0; i < d; i++){ + B[i].resize(d); + } + + /* This is the base case in which the input is a 2x2 square matrix. + Recursion is performed unless and until we reach this base case, + such that we recieve a scalar as the result. */ + if(d == 2){ + return A[0][0] * A[1][1] - A[0][1] * A[1][0]; + } + + else{ + for(int i = 0; i < d; i++){ + int sub_i = 0; + for(int j = 1; j < d; j++){ + int sub_j = 0; + for(int k = 0; k < d; k++){ + if(k == i){ + continue; + } + B[sub_i][sub_j] = A[j][k]; + sub_j++; + } + sub_i++; + } + deter += std::pow(-1, i) * A[0][i] * det(B, d-1); + } + } + return deter; + } + + double LinAlg::trace(std::vector> A){ + double trace = 0; + for(int i = 0; i < A.size(); i++){ + trace += A[i][i]; + } + return trace; + } + + std::vector> LinAlg::cofactor(std::vector> A, int n, int i, int j){ + std::vector> cof; + cof.resize(A.size()); + for(int i = 0; i < cof.size(); i++){ + cof[i].resize(A.size()); + } + int sub_i = 0, sub_j = 0; + + for (int row = 0; row < n; row++){ + for (int col = 0; col < n; col++){ + if (row != i && col != j) { + cof[sub_i][sub_j++] = A[row][col]; + + if (sub_j == n - 1){ + sub_j = 0; + sub_i++; + } + } + } + } + return cof; + } + + std::vector> LinAlg::adjoint(std::vector> A){ + + //Resizing the initial adjoint matrix + std::vector> adj; + adj.resize(A.size()); + for(int i = 0; i < adj.size(); i++){ + adj[i].resize(A.size()); + } + + // Checking for the case where the given N x N matrix is a scalar + if(A.size() == 1){ + adj[0][0] = 1; + return adj; + } + + if(A.size() == 2){ + adj[0][0] = A[1][1]; + adj[1][1] = A[0][0]; + + adj[0][1] = -A[0][1]; + adj[1][0] = -A[1][0]; + return adj; + } + + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A.size(); j++){ + std::vector> cof = cofactor(A, int(A.size()), i, j); + // 1 if even, -1 if odd + int sign = (i + j) % 2 == 0 ? 1 : -1; + adj[j][i] = sign * det(cof, int(A.size()) - 1); + } + } + return adj; + } + + // The inverse can be computed as (1 / determinant(A)) * adjoint(A) + std::vector> LinAlg::inverse(std::vector> A){ + return scalarMultiply(1/det(A, int(A.size())), adjoint(A)); + } + + // This is simply the Moore-Penrose least squares approximation of the inverse. + std::vector> LinAlg::pinverse(std::vector> A){ + return matmult(inverse(matmult(transpose(A), A)), transpose(A)); + } + + std::vector> LinAlg::zeromat(int n, int m){ + std::vector> zeromat; + zeromat.resize(n); + for(int i = 0; i < zeromat.size(); i++){ + zeromat[i].resize(m); + } + return zeromat; + } + + std::vector> LinAlg::onemat(int n, int m){ + return full(n, m, 1); + } + + std::vector> LinAlg::full(int n, int m, int k){ + std::vector> full; + full.resize(n); + for(int i = 0; i < full.size(); i++){ + full[i].resize(m); + } + for(int i = 0; i < full.size(); i++){ + for(int j = 0; j < full[i].size(); j++){ + full[i][j] = k; + } + } + return full; + } + + std::vector> LinAlg::sin(std::vector> A){ + std::vector> B; + B.resize(A.size()); + for(int i = 0; i < B.size(); i++){ + B[i].resize(A[0].size()); + } + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + B[i][j] = std::sin(A[i][j]); + } + } + return B; + } + + std::vector> LinAlg::cos(std::vector> A){ + std::vector> B; + B.resize(A.size()); + for(int i = 0; i < B.size(); i++){ + B[i].resize(A[0].size()); + } + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + B[i][j] = std::cos(A[i][j]); + } + } + return B; + } + + std::vector LinAlg::max(std::vector a, std::vector b){ + std::vector c; + c.resize(a.size()); + for(int i = 0; i < c.size(); i++){ + if(a[i] >= b[i]) { + c[i] = a[i]; + } + else { c[i] = b[i]; } + } + return c; + } + + double LinAlg::max(std::vector> A){ + return max(flatten(A)); + } + + double LinAlg::min(std::vector> A){ + return min(flatten(A)); + } + + std::vector> LinAlg::round(std::vector> A){ + std::vector> B; + B.resize(A.size()); + for(int i = 0; i < B.size(); i++){ + B[i].resize(A[0].size()); + } + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + B[i][j] = std::round(A[i][j]); + } + } + return B; + } + + double LinAlg::norm_2(std::vector> A){ + double sum = 0; + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + sum += A[i][j] * A[i][j]; + } + } + return std::sqrt(sum); + } + + std::vector> LinAlg::identity(double d){ + std::vector> identityMat; + identityMat.resize(d); + for(int i = 0; i < identityMat.size(); i++){ + identityMat[i].resize(d); + } + for(int i = 0; i < identityMat.size(); i++){ + for(int j = 0; j < identityMat.size(); j++){ + if(i == j){ + identityMat[i][j] = 1; + } + else { identityMat[i][j] = 0; } + } + } + return identityMat; + } + + std::vector> LinAlg::cov(std::vector> A){ + Stat stat; + std::vector> covMat; + covMat.resize(A.size()); + for(int i = 0; i < covMat.size(); i++){ + covMat[i].resize(A.size()); + } + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A.size(); j++){ + covMat[i][j] = stat.covariance(A[i], A[j]); + } + } + return covMat; + } + + std::tuple>, std::vector>> LinAlg::eig(std::vector> A){ + /* + A (the entered parameter) in most use cases will be X'X, XX', etc. and must be symmetric. + That simply means that 1) X' = X and 2) X is a square matrix. This function that computes the + eigenvalues of a matrix is utilizing Jacobi's method. + */ + + double diagonal = true; // Perform the iterative Jacobi algorithm unless and until we reach a diagonal matrix which yields us the eigenvals. + + std::map val_to_vec; + std::vector> a_new; + std::vector> eigenvectors = identity(A.size()); + do{ + double a_ij = A[0][1]; + double sub_i = 0; + double sub_j = 1; + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + if(i != j && std::abs(A[i][j]) > a_ij){ + a_ij = A[i][j]; + sub_i = i; + sub_j = j; + } + else if(i != j && std::abs(A[i][j]) == a_ij){ + if(i < sub_i){ + a_ij = A[i][j]; + sub_i = i; + sub_j = j; + } + } + } + } + + double a_ii = A[sub_i][sub_i]; + double a_jj = A[sub_j][sub_j]; + double a_ji = A[sub_j][sub_i]; + double theta; + + if(a_ii == a_jj) { + theta = M_PI / 4; + } + else{ + theta = 0.5 * atan(2 * a_ij / (a_ii - a_jj)); + } + + std::vector> P = identity(A.size()); + P[sub_i][sub_j] = -std::sin(theta); + P[sub_i][sub_i] = std::cos(theta); + P[sub_j][sub_j] = std::cos(theta); + P[sub_j][sub_i] = std::sin(theta); + + a_new = matmult(matmult(inverse(P), A), P); + + for(int i = 0; i < a_new.size(); i++){ + for(int j = 0; j < a_new[i].size(); j++){ + if(i != j && std::round(a_new[i][j]) == 0){ + a_new[i][j] = 0; + } + } + } + + bool non_zero = false; + for(int i = 0; i < a_new.size(); i++){ + for(int j = 0; j < a_new[i].size(); j++){ + if(i != j && std::round(a_new[i][j]) != 0){ + non_zero = true; + } + } + } + + if(non_zero) { + diagonal = false; + } + else{ + diagonal = true; + } + + if(a_new == A){ + diagonal = true; + for(int i = 0; i < a_new.size(); i++){ + for(int j = 0; j < a_new[i].size(); j++){ + if(i != j){ + a_new[i][j] = 0; + } + } + } + } + + eigenvectors = matmult(eigenvectors, P); + A = a_new; + + } while(!diagonal); + + std::vector> a_new_prior = a_new; + + // Bubble Sort. Should change this later. + for(int i = 0; i < a_new.size() - 1; i++){ + for(int j = 0; j < a_new.size() - 1 - i; j++){ + if(a_new[j][j] < a_new[j + 1][j + 1]){ + double temp = a_new[j + 1][j + 1]; + a_new[j + 1][j + 1] = a_new[j][j]; + a_new[j][j] = temp; + } + } + } + + + for(int i = 0; i < a_new.size(); i++){ + for(int j = 0; j < a_new.size(); j++){ + if(a_new[i][i] == a_new_prior[j][j]){ + val_to_vec[i] = j; + } + } + } + + std::vector> eigen_temp = eigenvectors; + for(int i = 0; i < eigenvectors.size(); i++){ + for(int j = 0; j < eigenvectors[i].size(); j++){ + eigenvectors[i][j] = eigen_temp[i][val_to_vec[j]]; + } + } + return {eigenvectors, a_new}; + + } + + std::tuple>, std::vector>, std::vector>> LinAlg::SVD(std::vector> A){ + auto [left_eigenvecs, eigenvals] = eig(matmult(A, transpose(A))); + auto [right_eigenvecs, right_eigenvals] = eig(matmult(transpose(A), A)); + + std::vector> singularvals = sqrt(eigenvals); + std::vector> sigma = zeromat(A.size(), A[0].size()); + for(int i = 0; i < singularvals.size(); i++){ + for(int j = 0; j < singularvals[i].size(); j++){ + sigma[i][j] = singularvals[i][j]; + } + } + return {left_eigenvecs, sigma, right_eigenvecs}; + } + + std::vector LinAlg::vectorProjection(std::vector a, std::vector b){ + double product = dot(a, b)/dot(a, a); + return scalarMultiply(product, a); // Projection of vector a onto b. Denotated as proj_a(b). + } + + std::vector> LinAlg::gramSchmidtProcess(std::vector> A){ + A = transpose(A); // C++ vectors lack a mechanism to directly index columns. So, we transpose *a copy* of A for this purpose for ease of use. + std::vector> B; + B.resize(A.size()); + for(int i = 0; i < B.size(); i++){ + B[i].resize(A[0].size()); + } + + B[0] = A[0]; // We set a_1 = b_1 as an initial condition. + B[0] = scalarMultiply(1/norm_2(B[0]), B[0]); + for(int i = 1; i < B.size(); i++){ + B[i] = A[i]; + for(int j = i-1; j >= 0; j--){ + B[i] = subtraction(B[i], vectorProjection(B[j], A[i])); + } + B[i] = scalarMultiply(1/norm_2(B[i]), B[i]); // Very simply multiply all elements of vec B[i] by 1/||B[i]||_2 + } + return transpose(B); // We re-transpose the marix. + } + + std::tuple>, std::vector>> LinAlg::QRD(std::vector> A){ + std::vector> Q = gramSchmidtProcess(A); + std::vector> R = matmult(transpose(Q), A); + return {Q, R}; + + } + + std::tuple>, std::vector>> LinAlg::chol(std::vector> A){ + std::vector> L = zeromat(A.size(), A[0].size()); + for(int j = 0; j < L.size(); j++){ // Matrices entered must be square. No problem here. + for(int i = j; i < L.size(); i++){ + if(i == j){ + double sum = 0; + for(int k = 0; k < j; k++){ + sum += L[i][k] * L[i][k]; + } + L[i][j] = std::sqrt(A[i][j] - sum); + } + else{ // That is, i!=j + double sum = 0; + for(int k = 0; k < j; k++){ + sum += L[i][k] * L[j][k]; + } + L[i][j] = (A[i][j] - sum)/L[j][j]; + } + } + } + return {L, transpose(L)}; // Indeed, L.T is our upper triangular matrix. + } + + double LinAlg::sum_elements(std::vector> A){ + double sum = 0; + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + sum += A[i][j]; + } + } + return sum; + } + + std::vector LinAlg::flatten(std::vector> A){ + std::vector a; + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + a.push_back(A[i][j]); + } + } + return a; + } + + std::vector LinAlg::solve(std::vector> A, std::vector b){ + return mat_vec_mult(inverse(A), b); + } + + bool LinAlg::positiveDefiniteChecker(std::vector> A){ + auto [eigenvectors, eigenvals] = eig(A); + std::vector eigenvals_vec; + for(int i = 0; i < eigenvals.size(); i++){ + eigenvals_vec.push_back(eigenvals[i][i]); + } + for(int i = 0; i < eigenvals_vec.size(); i++){ + if(eigenvals_vec[i] <= 0){ // Simply check to ensure all eigenvalues are positive. + return false; + } + } + return true; + } + + bool LinAlg::negativeDefiniteChecker(std::vector> A){ + auto [eigenvectors, eigenvals] = eig(A); + std::vector eigenvals_vec; + for(int i = 0; i < eigenvals.size(); i++){ + eigenvals_vec.push_back(eigenvals[i][i]); + } + for(int i = 0; i < eigenvals_vec.size(); i++){ + if(eigenvals_vec[i] >= 0){ // Simply check to ensure all eigenvalues are negative. + return false; + } + } + return true; + } + + bool LinAlg::zeroEigenvalue(std::vector> A){ + auto [eigenvectors, eigenvals] = eig(A); + std::vector eigenvals_vec; + for(int i = 0; i < eigenvals.size(); i++){ + eigenvals_vec.push_back(eigenvals[i][i]); + } + for(int i = 0; i < eigenvals_vec.size(); i++){ + if(eigenvals_vec[i] == 0){ + return true; + } + } + return false; + } + + void LinAlg::printMatrix(std::vector> A){ + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + std::cout << A[i][j] << " "; + } + std::cout << std::endl; + } + } + + std::vector> LinAlg::outerProduct(std::vector a, std::vector b){ + std::vector> C; + C.resize(a.size()); + for(int i = 0; i < C.size(); i++){ + C[i] = scalarMultiply(a[i], b); + } + return C; + } + + std::vector LinAlg::hadamard_product(std::vector a, std::vector b){ + std::vector c; + c.resize(a.size()); + + for(int i = 0; i < a.size(); i++){ + c[i] = a[i] * b[i]; + } + + return c; + } + + std::vector LinAlg::elementWiseDivision(std::vector a, std::vector b){ + std::vector c; + c.resize(a.size()); + + for(int i = 0; i < a.size(); i++){ + c[i] = a[i] / b[i]; + } + return c; + } + + std::vector LinAlg::scalarMultiply(double scalar, std::vector a){ + for(int i = 0; i < a.size(); i++){ + a[i] *= scalar; + } + return a; + } + + std::vector LinAlg::scalarAdd(double scalar, std::vector a){ + for(int i = 0; i < a.size(); i++){ + a[i] += scalar; + } + return a; + } + + std::vector LinAlg::addition(std::vector a, std::vector b){ + std::vector c; + c.resize(a.size()); + for(int i = 0; i < a.size(); i++){ + c[i] = a[i] + b[i]; + } + return c; + } + + std::vector LinAlg::subtraction(std::vector a, std::vector b){ + std::vector c; + c.resize(a.size()); + for(int i = 0; i < a.size(); i++){ + c[i] = a[i] - b[i]; + } + return c; + } + + std::vector LinAlg::subtractMatrixRows(std::vector a, std::vector> B){ + for(int i = 0; i < B.size(); i++){ + a = subtraction(a, B[i]); + } + return a; + } + + std::vector LinAlg::log(std::vector a){ + std::vector b; + b.resize(a.size()); + for(int i = 0; i < a.size(); i++){ + b[i] = std::log(a[i]); + } + return b; + } + + std::vector LinAlg::log10(std::vector a){ + std::vector b; + b.resize(a.size()); + for(int i = 0; i < a.size(); i++){ + b[i] = std::log10(a[i]); + } + return b; + } + + std::vector LinAlg::exp(std::vector a){ + std::vector b; + b.resize(a.size()); + for(int i = 0; i < a.size(); i++){ + b[i] = std::exp(a[i]); + } + return b; + } + + std::vector LinAlg::erf(std::vector a){ + std::vector b; + b.resize(a.size()); + for(int i = 0; i < a.size(); i++){ + b[i] = std::erf(a[i]); + } + return b; + } + + std::vector LinAlg::exponentiate(std::vector a, double p){ + std::vector b; + b.resize(a.size()); + for(int i = 0; i < b.size(); i++){ + b[i] = std::pow(a[i], p); + } + return b; + } + + std::vector LinAlg::sqrt(std::vector a){ + return exponentiate(a, 0.5); + } + + std::vector LinAlg::cbrt(std::vector a){ + return exponentiate(a, double(1)/double(3)); + } + + double LinAlg::dot(std::vector a, std::vector b){ + double c = 0; + for(int i = 0; i < a.size(); i++){ + c += a[i] * b[i]; + } + return c; + } + + std::vector LinAlg::cross(std::vector a, std::vector b){ + // Cross products exist in R^7 also. Though, I will limit it to R^3 as Wolfram does this. + std::vector> mat = {onevec(3), a, b}; + + double det1 = det({{a[1], a[2]}, {b[1], b[2]}}, 2); + double det2 = -det({{a[0], a[2]}, {b[0], b[2]}}, 2); + double det3 = det({{a[0], a[1]}, {b[0], b[1]}}, 2); + + return {det1, det2, det3}; + } + + std::vector LinAlg::abs(std::vector a){ + std::vector b; + b.resize(a.size()); + for(int i = 0; i < b.size(); i++){ + b[i] = std::abs(a[i]); + } + return b; + } + + std::vector LinAlg::zerovec(int n){ + std::vector zerovec; + zerovec.resize(n); + return zerovec; + } + + std::vector LinAlg::onevec(int n){ + return full(n, 1); + } + + std::vector> LinAlg::diag(std::vector a){ + std::vector> B = zeromat(a.size(), a.size()); + for(int i = 0; i < B.size(); i++){ + B[i][i] = a[i]; + } + return B; + } + + std::vector LinAlg::full(int n, int k){ + std::vector full; + full.resize(n); + for(int i = 0; i < full.size(); i++){ + full[i] = k; + } + return full; + } + + std::vector LinAlg::sin(std::vector a){ + std::vector b; + b.resize(a.size()); + for(int i = 0; i < a.size(); i++){ + b[i] = std::sin(a[i]); + } + return b; + } + + std::vector LinAlg::cos(std::vector a){ + std::vector b; + b.resize(a.size()); + for(int i = 0; i < a.size(); i++){ + b[i] = std::cos(a[i]); + } + return b; + } + + std::vector> LinAlg::rotate(std::vector> A, double theta, int axis){ + std::vector> rotationMatrix = {{std::cos(theta), -std::sin(theta)}, {std::sin(theta), std::cos(theta)}}; + if(axis == 0) {rotationMatrix = {{1, 0, 0}, {0, std::cos(theta), -std::sin(theta)}, {0, std::sin(theta), std::cos(theta)}};} + else if(axis == 1) {rotationMatrix = {{std::cos(theta), 0, std::sin(theta)}, {0, 1, 0}, {-std::sin(theta), 0, std::cos(theta)}};} + else if (axis == 2) {rotationMatrix = {{std::cos(theta), -std::sin(theta), 0}, {std::sin(theta), std::cos(theta), 0}, {1, 0, 0}};} + + return matmult(A, rotationMatrix); + } + + std::vector> LinAlg::max(std::vector> A, std::vector> B){ + std::vector> C; + C.resize(A.size()); + for(int i = 0; i < C.size(); i++){ + C[i].resize(A[0].size()); + } + for(int i = 0; i < A.size(); i++){ + C[i] = max(A[i], B[i]); + } + return C; + } + + double LinAlg::max(std::vector a){ + int max = a[0]; + for(int i = 0; i < a.size(); i++){ + if(a[i] > max){ + max = a[i]; + } + } + return max; + } + + double LinAlg::min(std::vector a){ + int min = a[0]; + for(int i = 0; i < a.size(); i++){ + if(a[i] < min){ + min = a[i]; + } + } + return min; + } + + std::vector LinAlg::round(std::vector a){ + std::vector b; + b.resize(a.size()); + for(int i = 0; i < a.size(); i++){ + b[i] = std::round(a[i]); + } + return b; + } + + // Multidimensional Euclidean Distance + double LinAlg::euclideanDistance(std::vector a, std::vector b){ + double dist = 0; + for(int i = 0; i < a.size(); i++){ + dist += (a[i] - b[i])*(a[i] - b[i]); + } + return std::sqrt(dist); + } + + double LinAlg::norm_2(std::vector a){ + return std::sqrt(norm_sq(a)); + } + + double LinAlg::norm_sq(std::vector a){ + double n_sq = 0; + for(int i = 0; i < a.size(); i++){ + n_sq += a[i] * a[i]; + } + return n_sq; + } + + double LinAlg::sum_elements(std::vector a){ + double sum = 0; + for(int i = 0; i < a.size(); i++){ + sum += a[i]; + } + return sum; + } + + double LinAlg::cosineSimilarity(std::vector a, std::vector b){ + return dot(a, b) / (norm_2(a) * norm_2(b)); + } + + void LinAlg::printVector(std::vector a){ + for(int i = 0; i < a.size(); i++){ + std::cout << a[i] << " "; + } + std::cout << std::endl; + } + + std::vector> LinAlg::mat_vec_add(std::vector> A, std::vector b){ + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + A[i][j] += b[j]; + } + } + return A; + } + + std::vector LinAlg::mat_vec_mult(std::vector> A, std::vector b){ + std::vector c; + c.resize(A.size()); + + for(int i = 0; i < A.size(); i++){ + for(int k = 0; k < b.size(); k++){ + c[i] += A[i][k] * b[k]; + } + } + return c; + } + + std::vector>> LinAlg::addition(std::vector>> A, std::vector>> B){ + for(int i = 0; i < A.size(); i++){ + A[i] = addition(A[i], B[i]); + } + return A; + } + + std::vector>> LinAlg::elementWiseDivision(std::vector>> A, std::vector>> B){ + for(int i = 0; i < A.size(); i++){ + A[i] = elementWiseDivision(A[i], B[i]); + } + return A; + } + + std::vector>> LinAlg::sqrt(std::vector>> A){ + for(int i = 0; i < A.size(); i++){ + A[i] = sqrt(A[i]); + } + return A; + } + + std::vector>> LinAlg::exponentiate(std::vector>> A, double p){ + for(int i = 0; i < A.size(); i++){ + A[i] = exponentiate(A[i], p); + } + return A; + } + + std::vector> LinAlg::tensor_vec_mult(std::vector>> A, std::vector b){ + std::vector> C; + C.resize(A.size()); + for(int i = 0; i < C.size(); i++){ + C[i].resize(A[0].size()); + } + for(int i = 0; i < C.size(); i++){ + for(int j = 0; j < C[i].size(); j++){ + C[i][j] = dot(A[i][j], b); + } + } + return C; + } + + std::vector LinAlg::flatten(std::vector>> A){ + std::vector c; + for(int i = 0; i < A.size(); i++){ + std::vector flattenedVec = flatten(A[i]); + c.insert(c.end(), flattenedVec.begin(), flattenedVec.end()); + } + return c; + } + + void LinAlg::printTensor(std::vector>> A){ + for(int i = 0; i < A.size(); i++){ + printMatrix(A[i]); + if(i != A.size() - 1) { std::cout << std::endl; } + } + } + + std::vector>> LinAlg::scalarMultiply(double scalar, std::vector>> A){ + for(int i = 0; i < A.size(); i++){ + A[i] = scalarMultiply(scalar, A[i]); + } + return A; + } + + std::vector>> LinAlg::scalarAdd(double scalar, std::vector>> A){ + for(int i = 0; i < A.size(); i++){ + A[i] = scalarAdd(scalar, A[i]); + } + return A; + } + + std::vector>> LinAlg::resize(std::vector>> A, std::vector>> B){ + A.resize(B.size()); + for(int i = 0; i < B.size(); i++){ + A[i].resize(B[i].size()); + for(int j = 0; j < B[i].size(); j++){ + A[i][j].resize(B[i][j].size()); + } + } + return A; + } + + std::vector>> LinAlg::max(std::vector>> A, std::vector>> B){ + for(int i = 0; i < A.size(); i++){ + A[i] = max(A[i], B[i]); + } + return A; + } + + std::vector>> LinAlg::abs(std::vector>> A){ + for(int i = 0; i < A.size(); i++){ + A[i] = abs(A[i]); + } + return A; + } + + double LinAlg::norm_2(std::vector>> A){ + double sum = 0; + for(int i = 0; i < A.size(); i++){ + for(int j = 0; j < A[i].size(); j++){ + for(int k = 0; k < A[i][j].size(); k++){ + sum += A[i][j][k] * A[i][j][k]; + } + } + } + return std::sqrt(sum); + } + + // Bad implementation. Change this later. + std::vector>> LinAlg::vector_wise_tensor_product(std::vector>> A, std::vector> B){ + std::vector>> C; + C = resize(C, A); + for(int i = 0; i < A[0].size(); i++){ + for(int j = 0; j < A[0][i].size(); j++){ + std::vector currentVector; + currentVector.resize(A.size()); + + for(int k = 0; k < C.size(); k++){ + currentVector[k] = A[k][i][j]; + } + + currentVector = mat_vec_mult(B, currentVector); + + for(int k = 0; k < C.size(); k++){ + C[k][i][j] = currentVector[k]; + } + } + } + return C; + } +} \ No newline at end of file diff --git a/MLPP/LinAlg/LinAlg.hpp b/MLPP/LinAlg/LinAlg.hpp new file mode 100644 index 0000000..6ec64ae --- /dev/null +++ b/MLPP/LinAlg/LinAlg.hpp @@ -0,0 +1,236 @@ +// +// LinAlg.hpp +// +// Created by Marc Melikyan on 1/8/21. +// + +#ifndef LinAlg_hpp +#define LinAlg_hpp + +#include +#include + +namespace MLPP{ + class LinAlg{ + public: + + // MATRIX FUNCTIONS + + std::vector> gramMatrix(std::vector> A); + + bool linearIndependenceChecker(std::vector> A); + + std::vector> gaussianNoise(int n, int m); + + std::vector> addition(std::vector> A, std::vector> B); + + std::vector> subtraction(std::vector> A, std::vector> B); + + std::vector> matmult(std::vector> A, std::vector> B); + + std::vector> hadamard_product(std::vector> A, std::vector> B); + + std::vector> kronecker_product(std::vector> A, std::vector> B); + + std::vector> elementWiseDivision(std::vector> A, std::vector> B); + + std::vector> transpose(std::vector> A); + + std::vector> scalarMultiply(double scalar, std::vector> A); + + std::vector> scalarAdd(double scalar, std::vector> A); + + std::vector> log(std::vector> A); + + std::vector> log10(std::vector> A); + + std::vector> exp(std::vector> A); + + std::vector> erf(std::vector> A); + + std::vector> exponentiate(std::vector> A, double p); + + std::vector> sqrt(std::vector> A); + + std::vector> cbrt(std::vector> A); + + std::vector> matrixPower(std::vector> A, int n); + + std::vector> abs(std::vector> A); + + double det(std::vector> A, int d); + + double trace(std::vector> A); + + std::vector> cofactor(std::vector> A, int n, int i, int j); + + std::vector> adjoint(std::vector> A); + + std::vector> inverse(std::vector> A); + + std::vector> pinverse(std::vector> A); + + std::vector> zeromat(int n, int m); + + std::vector> onemat(int n, int m); + + std::vector> full(int n, int m, int k); + + std::vector> sin(std::vector> A); + + std::vector> cos(std::vector> A); + + std::vector> rotate(std::vector> A, double theta, int axis = -1); + + std::vector> max(std::vector> A, std::vector> B); + + double max(std::vector> A); + + double min(std::vector> A); + + std::vector> round(std::vector> A); + + double norm_2(std::vector> A); + + std::vector> identity(double d); + + std::vector> cov(std::vector> A); + + std::tuple>, std::vector>> eig(std::vector> A); + + std::tuple>, std::vector>, std::vector>> SVD(std::vector> A); + + std::vector vectorProjection(std::vector a, std::vector b); + + std::vector> gramSchmidtProcess(std::vector> A); + + std::tuple>, std::vector>> QRD(std::vector> A); + + std::tuple>, std::vector>> chol(std::vector> A); + + double sum_elements(std::vector> A); + + std::vector flatten(std::vector> A); + + std::vector solve(std::vector> A, std::vector b); + + bool positiveDefiniteChecker(std::vector> A); + + bool negativeDefiniteChecker(std::vector> A); + + bool zeroEigenvalue(std::vector> A); + + void printMatrix(std::vector> A); + + // VECTOR FUNCTIONS + + std::vector> outerProduct(std::vector a, std::vector b); // This multiplies a, bT + + std::vector hadamard_product(std::vector a, std::vector b); + + std::vector elementWiseDivision(std::vector a, std::vector b); + + std::vector scalarMultiply(double scalar, std::vector a); + + std::vector scalarAdd(double scalar, std::vector a); + + std::vector addition(std::vector a, std::vector b); + + std::vector subtraction(std::vector a, std::vector b); + + std::vector subtractMatrixRows(std::vector a, std::vector> B); + + std::vector log(std::vector a); + + std::vector log10(std::vector a); + + std::vector exp(std::vector a); + + std::vector erf(std::vector a); + + std::vector exponentiate(std::vector a, double p); + + std::vector sqrt(std::vector a); + + std::vector cbrt(std::vector a); + + double dot(std::vector a, std::vector b); + + std::vector cross(std::vector a, std::vector b); + + std::vector abs(std::vector a); + + std::vector zerovec(int n); + + std::vector onevec(int n); + + std::vector> diag(std::vector a); + + std::vector full(int n, int k); + + std::vector sin(std::vector a); + + std::vector cos(std::vector a); + + std::vector max(std::vector a, std::vector b); + + double max(std::vector a); + + double min(std::vector a); + + std::vector round(std::vector a); + + double euclideanDistance(std::vector a, std::vector b); + + double norm_2(std::vector a); + + double norm_sq(std::vector a); + + double sum_elements(std::vector a); + + double cosineSimilarity(std::vector a, std::vector b); + + void printVector(std::vector a); + + // MATRIX-VECTOR FUNCTIONS + std::vector> mat_vec_add(std::vector> A, std::vector b); + + std::vector mat_vec_mult(std::vector> A, std::vector b); + + // TENSOR FUNCTIONS + std::vector>> addition(std::vector>> A, std::vector>> B); + + std::vector>> elementWiseDivision(std::vector>> A, std::vector>> B); + + std::vector>> sqrt(std::vector>> A); + + std::vector>> exponentiate(std::vector>> A, double p); + + std::vector> tensor_vec_mult(std::vector>> A, std::vector b); + + std::vector flatten(std::vector>> A); + + void printTensor(std::vector>> A); + + std::vector>> scalarMultiply(double scalar, std::vector>> A); + + std::vector>> scalarAdd(double scalar, std::vector>> A); + + std::vector>> resize(std::vector>> A, std::vector>> B); + + std::vector>> hadamard_product(std::vector>> A, std::vector>> B); + + std::vector>> max(std::vector>> A, std::vector>> B); + + std::vector>> abs(std::vector>> A); + + double norm_2(std::vector>> A); + + std::vector>> vector_wise_tensor_product(std::vector>> A, std::vector> B); + + private: + }; + +} + +#endif /* LinAlg_hpp */ \ No newline at end of file diff --git a/MLPP/LinReg/LinReg.cpp b/MLPP/LinReg/LinReg.cpp new file mode 100644 index 0000000..ca80066 --- /dev/null +++ b/MLPP/LinReg/LinReg.cpp @@ -0,0 +1,233 @@ +// +// LinReg.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "LinReg.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Stat/Stat.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include +#include + +namespace MLPP{ + + LinReg::LinReg(std::vector> inputSet, std::vector outputSet, std::string reg, double lambda, double alpha) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) + { + y_hat.resize(n); + + weights = Utilities::weightInitialization(k); + bias = Utilities::biasInitialization(); + } + + std::vector LinReg::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + double LinReg::modelTest(std::vector x){ + return Evaluate(x); + } + + void LinReg::NewtonRaphson(double learning_rate, int max_epoch, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + while(true){ + cost_prev = Cost(y_hat, outputSet); + + std::vector error = alg.subtraction(y_hat, outputSet); + + // Calculating the weight gradients (2nd derivative) + std::vector first_derivative = alg.mat_vec_mult(alg.transpose(inputSet), error); + std::vector> second_derivative = alg.matmult(alg.transpose(inputSet), inputSet); + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(alg.inverse(second_derivative)), first_derivative))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients (2nd derivative) + bias -= learning_rate * alg.sum_elements(error) / n; // We keep this the same. The 2nd derivative is just [1]. + forwardPass(); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + Utilities::UI(weights, bias); + } + epoch++; + if(epoch > max_epoch) { break; } + } + } + + void LinReg::gradientDescent(double learning_rate, int max_epoch, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + + std::vector error = alg.subtraction(y_hat, outputSet); + + // Calculating the weight gradients + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), error))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(error) / n; + forwardPass(); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + Utilities::UI(weights, bias); + } + epoch++; + if(epoch > max_epoch) { break; } + } + } + + void LinReg::SGD(double learning_rate, int max_epoch, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + while(true){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + int outputIndex = distribution(generator); + + double y_hat = Evaluate(inputSet[outputIndex]); + cost_prev = Cost({y_hat}, {outputSet[outputIndex]}); + + double error = y_hat - outputSet[outputIndex]; + + // Weight updation + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error, inputSet[outputIndex])); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Bias updation + bias -= learning_rate * error; + + y_hat = Evaluate({inputSet[outputIndex]}); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]})); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void LinReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + while(true){ + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = Evaluate(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + std::vector error = alg.subtraction(y_hat, outputMiniBatches[i]); + + // Calculating the weight gradients + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); + y_hat = Evaluate(inputMiniBatches[i]); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i])); + Utilities::UI(weights, bias); + } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void LinReg::normalEquation(){ + LinAlg alg; + Stat stat; + std::vector x_means; + std::vector> inputSetT = alg.transpose(inputSet); + + x_means.resize(inputSetT.size()); + for(int i = 0; i < inputSetT.size(); i++){ + x_means[i] = (stat.mean(inputSetT[i])); + } + + try{ + std::vector temp; + temp.resize(k); + temp = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet)); + if(std::isnan(temp[0])){ + throw 99; + } + else{ + if(reg == "Ridge") { + weights = alg.mat_vec_mult(alg.inverse(alg.addition(alg.matmult(alg.transpose(inputSet), inputSet), alg.scalarMultiply(lambda, alg.identity(k)))), alg.mat_vec_mult(alg.transpose(inputSet), outputSet)); + } + else{ weights = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet)); } + + bias = stat.mean(outputSet) - alg.dot(weights, x_means); + + forwardPass(); + } + } + catch(int err_num){ + std::cout << "ERR " << err_num << ": Resulting matrix was noninvertible/degenerate, and so the normal equation could not be performed. Try utilizing gradient descent." << std::endl; + } + } + + double LinReg::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void LinReg::save(std::string fileName){ + Utilities util; + util.saveParameters(fileName, weights, bias); + } + + double LinReg::Cost(std::vector y_hat, std::vector y){ + Reg regularization; + class Cost cost; + return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg); + } + + std::vector LinReg::Evaluate(std::vector> X){ + LinAlg alg; + return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); + } + + double LinReg::Evaluate(std::vector x){ + LinAlg alg; + return alg.dot(weights, x) + bias; + } + + // wTx + b + void LinReg::forwardPass(){ + y_hat = Evaluate(inputSet); + } +} \ No newline at end of file diff --git a/MLPP/LinReg/LinReg.hpp b/MLPP/LinReg/LinReg.hpp new file mode 100644 index 0000000..56b5ef8 --- /dev/null +++ b/MLPP/LinReg/LinReg.hpp @@ -0,0 +1,53 @@ +// +// LinReg.hpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#ifndef LinReg_hpp +#define LinReg_hpp + +#include +#include + +namespace MLPP{ + class LinReg{ + + public: + LinReg(std::vector> inputSet, std::vector outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + void NewtonRaphson(double learning_rate, int max_epoch, bool UI); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + void normalEquation(); + double score(); + void save(std::string fileName); + private: + + double Cost(std::vector y_hat, std::vector y); + + std::vector Evaluate(std::vector> X); + double Evaluate(std::vector x); + void forwardPass(); + + std::vector> inputSet; + std::vector outputSet; + std::vector y_hat; + std::vector weights; + double bias; + + int n; + int k; + + // Regularization Params + std::string reg; + int lambda; + int alpha; /* This is the controlling param for Elastic Net*/ + + + }; +} + +#endif /* LinReg_hpp */ diff --git a/MLPP/LogReg/LogReg.cpp b/MLPP/LogReg/LogReg.cpp new file mode 100644 index 0000000..4ca769d --- /dev/null +++ b/MLPP/LogReg/LogReg.cpp @@ -0,0 +1,200 @@ +// +// LogReg.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "LogReg.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP{ + LogReg::LogReg(std::vector> inputSet, std::vector outputSet, std::string reg, double lambda, double alpha) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) + { + y_hat.resize(n); + weights = Utilities::weightInitialization(k); + bias = Utilities::biasInitialization(); + } + + std::vector LogReg::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + double LogReg::modelTest(std::vector x){ + return Evaluate(x); + } + + void LogReg::gradientDescent(double learning_rate, int max_epoch, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + + std::vector error = alg.subtraction(y_hat, outputSet); + + // Calculating the weight gradients + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), error))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(error) / n; + forwardPass(); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + + } + } + + void LogReg::MLE(double learning_rate, int max_epoch, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + + std::vector error = alg.subtraction(outputSet, y_hat); + + // Calculating the weight gradients + weights = alg.addition(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), error))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias += learning_rate * alg.sum_elements(error) / n; + forwardPass(); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + Utilities::UI(weights, bias); + } + epoch++; + if(epoch > max_epoch) { break; } + } + } + + void LogReg::SGD(double learning_rate, int max_epoch, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + while(true){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + int outputIndex = distribution(generator); + + double y_hat = Evaluate(inputSet[outputIndex]); + cost_prev = Cost({y_hat}, {outputSet[outputIndex]}); + + double error = y_hat - outputSet[outputIndex]; + + // Weight updation + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error, inputSet[outputIndex])); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Bias updation + bias -= learning_rate * error; + + y_hat = Evaluate({inputSet[outputIndex]}); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]})); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void LogReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + while(true){ + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = Evaluate(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + std::vector error = alg.subtraction(y_hat, outputMiniBatches[i]); + + // Calculating the weight gradients + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); + y_hat = Evaluate(inputMiniBatches[i]); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i])); + Utilities::UI(weights, bias); + } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + double LogReg::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void LogReg::save(std::string fileName){ + Utilities util; + util.saveParameters(fileName, weights, bias); + } + + double LogReg::Cost(std::vector y_hat, std::vector y){ + Reg regularization; + class Cost cost; + return cost.LogLoss(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg); + } + + + std::vector LogReg::Evaluate(std::vector> X){ + LinAlg alg; + Activation avn; + return avn.sigmoid(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); + } + + double LogReg::Evaluate(std::vector x){ + LinAlg alg; + Activation avn; + return avn.sigmoid(alg.dot(weights, x) + bias); + } + + // sigmoid ( wTx + b ) + void LogReg::forwardPass(){ + y_hat = Evaluate(inputSet); + } +} \ No newline at end of file diff --git a/MLPP/LogReg/LogReg.hpp b/MLPP/LogReg/LogReg.hpp new file mode 100644 index 0000000..ad12cb3 --- /dev/null +++ b/MLPP/LogReg/LogReg.hpp @@ -0,0 +1,53 @@ +// +// LogReg.hpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#ifndef LogReg_hpp +#define LogReg_hpp + + +#include +#include + +namespace MLPP { + + class LogReg{ + + public: + LogReg(std::vector> inputSet, std::vector outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void MLE(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + double score(); + void save(std::string fileName); + private: + + double Cost(std::vector y_hat, std::vector y); + + std::vector Evaluate(std::vector> X); + double Evaluate(std::vector x); + void forwardPass(); + + std::vector> inputSet; + std::vector outputSet; + std::vector y_hat; + std::vector weights; + double bias; + + int n; + int k; + double learning_rate; + + // Regularization Params + std::string reg; + double lambda; /* Regularization Parameter */ + double alpha; /* This is the controlling param for Elastic Net*/ + }; +} + +#endif /* LogReg_hpp */ diff --git a/MLPP/MANN/MANN.cpp b/MLPP/MANN/MANN.cpp new file mode 100644 index 0000000..e8c8491 --- /dev/null +++ b/MLPP/MANN/MANN.cpp @@ -0,0 +1,197 @@ +// +// MANN.cpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#include "MANN.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include + +namespace MLPP { + MANN::MANN(std::vector> inputSet, std::vector> outputSet) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), n_output(outputSet[0].size()) + { + + } + + MANN::~MANN(){ + delete outputLayer; + } + + std::vector> MANN::modelSetTest(std::vector> X){ + if(!network.empty()){ + network[0].input = X; + network[0].forwardPass(); + + for(int i = 1; i < network.size(); i++){ + network[i].input = network[i - 1].a; + network[i].forwardPass(); + } + outputLayer->input = network[network.size() - 1].a; + } + else { + outputLayer->input = X; + } + outputLayer->forwardPass(); + return outputLayer->a; + } + + std::vector MANN::modelTest(std::vector x){ + if(!network.empty()){ + network[0].Test(x); + for(int i = 1; i < network.size(); i++){ + network[i].Test(network[i - 1].a_test); + } + outputLayer->Test(network[network.size() - 1].a_test); + } + else{ + outputLayer->Test(x); + } + return outputLayer->a_test; + } + + void MANN::gradientDescent(double learning_rate, int max_epoch, bool UI){ + class Cost cost; + Activation avn; + LinAlg alg; + Reg regularization; + + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + + if(outputLayer->activation == "Softmax"){ + outputLayer->delta = alg.subtraction(y_hat, outputSet); + } + else{ + auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost]; + auto outputAvn = outputLayer->activation_map[outputLayer->activation]; + outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1)); + } + + std::vector> outputWGrad = alg.matmult(alg.transpose(outputLayer->input), outputLayer->delta); + + outputLayer->weights = alg.subtraction(outputLayer->weights, alg.scalarMultiply(learning_rate/n, outputWGrad)); + outputLayer->weights = regularization.regWeights(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg); + outputLayer->bias = alg.subtractMatrixRows(outputLayer->bias, alg.scalarMultiply(learning_rate/n, outputLayer->delta)); + + if(!network.empty()){ + auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation]; + network[network.size() - 1].delta = alg.hadamard_product(alg.matmult(outputLayer->delta, alg.transpose(outputLayer->weights)), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta); + + network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad)); + network[network.size() - 1].weights = regularization.regWeights(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg); + network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta)); + + for(int i = network.size() - 2; i >= 0; i--){ + auto hiddenLayerAvn = network[i].activation_map[network[i].activation]; + network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, network[i + 1].weights), (avn.*hiddenLayerAvn)(network[i].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta); + network[i].weights = alg.subtraction(network[i].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad)); + network[i].weights = regularization.regWeights(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); + network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta)); + } + } + + forwardPass(); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + std::cout << "Layer " << network.size() + 1 << ": " << std::endl; + Utilities::UI(outputLayer->weights, outputLayer->bias); + if(!network.empty()){ + std::cout << "Layer " << network.size() << ": " << std::endl; + for(int i = network.size() - 1; i >= 0; i--){ + std::cout << "Layer " << i + 1 << ": " << std::endl; + Utilities::UI(network[i].weights, network[i].bias); + } + } + } + + epoch++; + if(epoch > max_epoch) { break; } + } + } + + double MANN::score(){ + Utilities util; + forwardPass(); + return util.performance(y_hat, outputSet); + } + + void MANN::save(std::string fileName){ + Utilities util; + if(!network.empty()){ + util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1); + for(int i = 1; i < network.size(); i++){ + util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); + } + util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1); + } + else{ + util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1); + } + } + + void MANN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){ + if(network.empty()){ + network.push_back(HiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha)); + network[0].forwardPass(); + } + else{ + network.push_back(HiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha)); + network[network.size() - 1].forwardPass(); + } + } + + void MANN::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, double lambda, double alpha){ + if(!network.empty()){ + outputLayer = new MultiOutputLayer(n_output, network[0].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha); + } + else{ + outputLayer = new MultiOutputLayer(n_output, k, activation, loss, inputSet, weightInit, reg, lambda, alpha); + } + } + + double MANN::Cost(std::vector> y_hat, std::vector> y){ + Reg regularization; + class Cost cost; + double totalRegTerm = 0; + + auto cost_function = outputLayer->cost_map[outputLayer->cost]; + if(!network.empty()){ + for(int i = 0; i < network.size() - 1; i++){ + totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); + } + } + return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg); + } + + void MANN::forwardPass(){ + if(!network.empty()){ + network[0].input = inputSet; + network[0].forwardPass(); + + for(int i = 1; i < network.size(); i++){ + network[i].input = network[i - 1].a; + network[i].forwardPass(); + } + outputLayer->input = network[network.size() - 1].a; + } + else{ + outputLayer->input = inputSet; + } + outputLayer->forwardPass(); + y_hat = outputLayer->a; + } +} \ No newline at end of file diff --git a/MLPP/MANN/MANN.hpp b/MLPP/MANN/MANN.hpp new file mode 100644 index 0000000..1edfd27 --- /dev/null +++ b/MLPP/MANN/MANN.hpp @@ -0,0 +1,48 @@ +// +// MANN.hpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#ifndef MANN_hpp +#define MANN_hpp + +#include "HiddenLayer/HiddenLayer.hpp" +#include "MultiOutputLayer/MultiOutputLayer.hpp" + +#include +#include + +namespace MLPP{ + +class MANN{ + public: + MANN(std::vector> inputSet, std::vector> outputSet); + ~MANN(); + std::vector> modelSetTest(std::vector> X); + std::vector modelTest(std::vector x); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + double score(); + void save(std::string fileName); + + void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + + private: + double Cost(std::vector> y_hat, std::vector> y); + void forwardPass(); + + std::vector> inputSet; + std::vector> outputSet; + std::vector> y_hat; + + std::vector network; + MultiOutputLayer *outputLayer; + + int n; + int k; + int n_output; + }; +} + +#endif /* MANN_hpp */ \ No newline at end of file diff --git a/MLPP/MLP/MLP.cpp b/MLPP/MLP/MLP.cpp new file mode 100644 index 0000000..c266806 --- /dev/null +++ b/MLPP/MLP/MLP.cpp @@ -0,0 +1,270 @@ +// +// MLP.cpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#include "MLP.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP { + MLP::MLP(std::vector> inputSet, std::vector outputSet, int n_hidden, std::string reg, double lambda, double alpha) + : inputSet(inputSet), outputSet(outputSet), n_hidden(n_hidden), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) + { + Activation avn; + y_hat.resize(n); + + weights1 = Utilities::weightInitialization(k, n_hidden); + weights2 = Utilities::weightInitialization(n_hidden); + bias1 = Utilities::biasInitialization(n_hidden); + bias2 = Utilities::biasInitialization(); + } + + std::vector MLP::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + double MLP::modelTest(std::vector x){ + return Evaluate(x); + } + + void MLP::gradientDescent(double learning_rate, int max_epoch, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + + // Calculating the errors + std::vector error = alg.subtraction(y_hat, outputSet); + + // Calculating the weight/bias gradients for layer 2 + + std::vector D2_1 = alg.mat_vec_mult(alg.transpose(a2), error); + + // weights and bias updation for layer 2 + weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate/n, D2_1)); + weights2 = regularization.regWeights(weights2, lambda, alpha, reg); + + bias2 -= learning_rate * alg.sum_elements(error) / n; + + // Calculating the weight/bias for layer 1 + + std::vector> D1_1; + D1_1.resize(n); + + D1_1 = alg.outerProduct(error, weights2); + + std::vector> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1)); + + std::vector> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2); + + + // weight an bias updation for layer 1 + weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate/n, D1_3)); + weights1 = regularization.regWeights(weights1, lambda, alpha, reg); + + bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate/n, D1_2)); + + forwardPass(); + + // UI PORTION + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + std::cout << "Layer 1:" << std::endl; + Utilities::UI(weights1, bias1); + std::cout << "Layer 2:" << std::endl; + Utilities::UI(weights2, bias2); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + + } + + void MLP::SGD(double learning_rate, int max_epoch, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + while(true){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + int outputIndex = distribution(generator); + + double y_hat = Evaluate(inputSet[outputIndex]); + auto [z2, a2] = propagate(inputSet[outputIndex]); + cost_prev = Cost({y_hat}, {outputSet[outputIndex]}); + double error = y_hat - outputSet[outputIndex]; + + // Weight updation for layer 2 + std::vector D2_1 = alg.scalarMultiply(error, a2); + weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1)); + weights2 = regularization.regWeights(weights2, lambda, alpha, reg); + + // Bias updation for layer 2 + bias2 -= learning_rate * error; + + // Weight updation for layer 1 + std::vector D1_1 = alg.scalarMultiply(error, weights2); + std::vector D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1)); + std::vector> D1_3 = alg.outerProduct(inputSet[outputIndex], D1_2); + + weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3)); + weights1 = regularization.regWeights(weights1, lambda, alpha, reg); + // Bias updation for layer 1 + + bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2)); + + y_hat = Evaluate(inputSet[outputIndex]); + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]})); + std::cout << "Layer 1:" << std::endl; + Utilities::UI(weights1, bias1); + std::cout << "Layer 2:" << std::endl; + Utilities::UI(weights2, bias2); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void MLP::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + while(true){ + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = Evaluate(inputMiniBatches[i]); + auto [z2, a2] = propagate(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + // Calculating the errors + std::vector error = alg.subtraction(y_hat, outputMiniBatches[i]); + + // Calculating the weight/bias gradients for layer 2 + + std::vector D2_1 = alg.mat_vec_mult(alg.transpose(a2), error); + + // weights and bias updation for layser 2 + weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), D2_1)); + weights2 = regularization.regWeights(weights2, lambda, alpha, reg); + + // Calculating the bias gradients for layer 2 + double b_gradient = alg.sum_elements(error); + + // Bias Updation for layer 2 + bias2 -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); + + //Calculating the weight/bias for layer 1 + + std::vector> D1_1 = alg.outerProduct(error, weights2); + + std::vector> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1)); + + std::vector> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2); + + + // weight an bias updation for layer 1 + weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), D1_3)); + weights1 = regularization.regWeights(weights1, lambda, alpha, reg); + + bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), D1_2)); + + y_hat = Evaluate(inputMiniBatches[i]); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i])); + std::cout << "Layer 1:" << std::endl; + Utilities::UI(weights1, bias1); + std::cout << "Layer 2:" << std::endl; + Utilities::UI(weights2, bias2); + } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + double MLP::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void MLP::save(std::string fileName){ + Utilities util; + util.saveParameters(fileName, weights1, bias1, 0, 1); + util.saveParameters(fileName, weights2, bias2, 1, 2); + } + + double MLP::Cost(std::vector y_hat, std::vector y){ + Reg regularization; + class Cost cost; + return cost.LogLoss(y_hat, y) + regularization.regTerm(weights2, lambda, alpha, reg) + regularization.regTerm(weights1, lambda, alpha, reg); + } + + std::vector MLP::Evaluate(std::vector> X){ + LinAlg alg; + Activation avn; + std::vector> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1); + std::vector> a2 = avn.sigmoid(z2); + return avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2))); + } + + std::tuple>, std::vector>> MLP::propagate(std::vector> X){ + LinAlg alg; + Activation avn; + std::vector> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1); + std::vector> a2 = avn.sigmoid(z2); + return {z2, a2}; + } + + double MLP::Evaluate(std::vector x){ + LinAlg alg; + Activation avn; + std::vector z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); + std::vector a2 = avn.sigmoid(z2); + return avn.sigmoid(alg.dot(weights2, a2) + bias2); + } + + std::tuple, std::vector> MLP::propagate(std::vector x){ + LinAlg alg; + Activation avn; + std::vector z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); + std::vector a2 = avn.sigmoid(z2); + return {z2, a2}; + } + + void MLP::forwardPass(){ + LinAlg alg; + Activation avn; + z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1); + a2 = avn.sigmoid(z2); + y_hat = avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2))); + } +} diff --git a/MLPP/MLP/MLP.hpp b/MLPP/MLP/MLP.hpp new file mode 100644 index 0000000..935744d --- /dev/null +++ b/MLPP/MLP/MLP.hpp @@ -0,0 +1,61 @@ +// +// MLP.hpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#ifndef MLP_hpp +#define MLP_hpp + +#include +#include +#include + +namespace MLPP { + +class MLP{ + public: + MLP(std::vector> inputSet, std::vector outputSet, int n_hidden, std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + double score(); + void save(std::string fileName); + + private: + double Cost(std::vector y_hat, std::vector y); + + std::vector Evaluate(std::vector> X); + std::tuple>, std::vector>> propagate(std::vector> X); + double Evaluate(std::vector x); + std::tuple, std::vector> propagate(std::vector x); + void forwardPass(); + + std::vector> inputSet; + std::vector outputSet; + std::vector y_hat; + + std::vector> weights1; + std::vector weights2; + + std::vector bias1; + double bias2; + + std::vector> z2; + std::vector> a2; + + int n; + int k; + int n_hidden; + + + // Regularization Params + std::string reg; + double lambda; /* Regularization Parameter */ + double alpha; /* This is the controlling param for Elastic Net*/ + }; +} + +#endif /* MLP_hpp */ diff --git a/MLPP/MultiOutputLayer/MultiOutputLayer.cpp b/MLPP/MultiOutputLayer/MultiOutputLayer.cpp new file mode 100644 index 0000000..9e8f654 --- /dev/null +++ b/MLPP/MultiOutputLayer/MultiOutputLayer.cpp @@ -0,0 +1,133 @@ +// +// MultiOutputLayer.cpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#include "MultiOutputLayer.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Utilities/Utilities.hpp" + +#include +#include + +namespace MLPP { + MultiOutputLayer::MultiOutputLayer(int n_output, int n_hidden, std::string activation, std::string cost, std::vector> input, std::string weightInit, std::string reg, double lambda, double alpha) + : n_output(n_output), n_hidden(n_hidden), activation(activation), cost(cost), input(input), weightInit(weightInit), reg(reg), lambda(lambda), alpha(alpha) + { + weights = Utilities::weightInitialization(n_hidden, n_output, weightInit); + bias = Utilities::biasInitialization(n_output); + + activation_map["Linear"] = &Activation::linear; + activationTest_map["Linear"] = &Activation::linear; + + activation_map["Sigmoid"] = &Activation::sigmoid; + activationTest_map["Sigmoid"] = &Activation::sigmoid; + + activation_map["Softmax"] = &Activation::softmax; + activationTest_map["Softmax"] = &Activation::softmax; + + activation_map["Swish"] = &Activation::swish; + activationTest_map["Swish"] = &Activation::swish; + + activation_map["Mish"] = &Activation::mish; + activationTest_map["Mish"] = &Activation::mish; + + activation_map["SinC"] = &Activation::sinc; + activationTest_map["SinC"] = &Activation::sinc; + + activation_map["Softplus"] = &Activation::softplus; + activationTest_map["Softplus"] = &Activation::softplus; + + activation_map["Softsign"] = &Activation::softsign; + activationTest_map["Softsign"] = &Activation::softsign; + + activation_map["CLogLog"] = &Activation::cloglog; + activationTest_map["CLogLog"] = &Activation::cloglog; + + activation_map["Logit"] = &Activation::logit; + activationTest_map["Logit"] = &Activation::logit; + + activation_map["GaussianCDF"] = &Activation::gaussianCDF; + activationTest_map["GaussianCDF"] = &Activation::gaussianCDF; + + activation_map["RELU"] = &Activation::RELU; + activationTest_map["RELU"] = &Activation::RELU; + + activation_map["GELU"] = &Activation::GELU; + activationTest_map["GELU"] = &Activation::GELU; + + activation_map["Sign"] = &Activation::sign; + activationTest_map["Sign"] = &Activation::sign; + + activation_map["UnitStep"] = &Activation::unitStep; + activationTest_map["UnitStep"] = &Activation::unitStep; + + activation_map["Sinh"] = &Activation::sinh; + activationTest_map["Sinh"] = &Activation::sinh; + + activation_map["Cosh"] = &Activation::cosh; + activationTest_map["Cosh"] = &Activation::cosh; + + activation_map["Tanh"] = &Activation::tanh; + activationTest_map["Tanh"] = &Activation::tanh; + + activation_map["Csch"] = &Activation::csch; + activationTest_map["Csch"] = &Activation::csch; + + activation_map["Sech"] = &Activation::sech; + activationTest_map["Sech"] = &Activation::sech; + + activation_map["Coth"] = &Activation::coth; + activationTest_map["Coth"] = &Activation::coth; + + activation_map["Arsinh"] = &Activation::arsinh; + activationTest_map["Arsinh"] = &Activation::arsinh; + + activation_map["Arcosh"] = &Activation::arcosh; + activationTest_map["Arcosh"] = &Activation::arcosh; + + activation_map["Artanh"] = &Activation::artanh; + activationTest_map["Artanh"] = &Activation::artanh; + + activation_map["Arcsch"] = &Activation::arcsch; + activationTest_map["Arcsch"] = &Activation::arcsch; + + activation_map["Arsech"] = &Activation::arsech; + activationTest_map["Arsech"] = &Activation::arsech; + + activation_map["Arcoth"] = &Activation::arcoth; + activationTest_map["Arcoth"] = &Activation::arcoth; + + costDeriv_map["MSE"] = &Cost::MSEDeriv; + cost_map["MSE"] = &Cost::MSE; + costDeriv_map["RMSE"] = &Cost::RMSEDeriv; + cost_map["RMSE"] = &Cost::RMSE; + costDeriv_map["MAE"] = &Cost::MAEDeriv; + cost_map["MAE"] = &Cost::MAE; + costDeriv_map["MBE"] = &Cost::MBEDeriv; + cost_map["MBE"] = &Cost::MBE; + costDeriv_map["LogLoss"] = &Cost::LogLossDeriv; + cost_map["LogLoss"] = &Cost::LogLoss; + costDeriv_map["CrossEntropy"] = &Cost::CrossEntropyDeriv; + cost_map["CrossEntropy"] = &Cost::CrossEntropy; + costDeriv_map["HingeLoss"] = &Cost::HingeLossDeriv; + cost_map["HingeLoss"] = &Cost::HingeLoss; + costDeriv_map["WassersteinLoss"] = &Cost::HingeLossDeriv; + cost_map["WassersteinLoss"] = &Cost::HingeLoss; + } + + void MultiOutputLayer::forwardPass(){ + LinAlg alg; + Activation avn; + z = alg.mat_vec_add(alg.matmult(input, weights), bias); + a = (avn.*activation_map[activation])(z, 0); + } + + void MultiOutputLayer::Test(std::vector x){ + LinAlg alg; + Activation avn; + z_test = alg.addition(alg.mat_vec_mult(alg.transpose(weights), x), bias); + a_test = (avn.*activationTest_map[activation])(z_test, 0); + } +} \ No newline at end of file diff --git a/MLPP/MultiOutputLayer/MultiOutputLayer.hpp b/MLPP/MultiOutputLayer/MultiOutputLayer.hpp new file mode 100644 index 0000000..0158098 --- /dev/null +++ b/MLPP/MultiOutputLayer/MultiOutputLayer.hpp @@ -0,0 +1,58 @@ +// +// MultiOutputLayer.hpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#ifndef MultiOutputLayer_hpp +#define MultiOutputLayer_hpp + +#include "Activation/Activation.hpp" +#include "Cost/Cost.hpp" + +#include +#include +#include + +namespace MLPP { + class MultiOutputLayer{ + public: + MultiOutputLayer(int n_output, int n_hidden, std::string activation, std::string cost, std::vector> input, std::string weightInit, std::string reg, double lambda, double alpha); + + int n_output; + int n_hidden; + std::string activation; + std::string cost; + + std::vector> input; + + std::vector> weights; + std::vector bias; + + std::vector> z; + std::vector> a; + + std::map> (Activation::*)(std::vector>, bool)> activation_map; + std::map (Activation::*)(std::vector, bool)> activationTest_map; + std::map>, std::vector>)> cost_map; + std::map> (Cost::*)(std::vector>, std::vector>)> costDeriv_map; + + std::vector z_test; + std::vector a_test; + + std::vector> delta; + + // Regularization Params + std::string reg; + double lambda; /* Regularization Parameter */ + double alpha; /* This is the controlling param for Elastic Net*/ + + std::string weightInit; + + void forwardPass(); + void Test(std::vector x); + }; +} + +#endif /* MultiOutputLayer_hpp */ + diff --git a/MLPP/MultinomialNB/MultinomialNB.cpp b/MLPP/MultinomialNB/MultinomialNB.cpp new file mode 100644 index 0000000..d1c68de --- /dev/null +++ b/MLPP/MultinomialNB/MultinomialNB.cpp @@ -0,0 +1,121 @@ +// +// MultinomialNB.cpp +// +// Created by Marc Melikyan on 1/17/21. +// + +#include "MultinomialNB.hpp" +#include "Utilities/Utilities.hpp" +#include "LinAlg/LinAlg.hpp" + +#include +#include +#include + +namespace MLPP{ + MultinomialNB::MultinomialNB(std::vector> inputSet, std::vector outputSet, int class_num) + : inputSet(inputSet), outputSet(outputSet), class_num(class_num) + { + y_hat.resize(outputSet.size()); + Evaluate(); + } + + std::vector MultinomialNB::modelSetTest(std::vector> X){ + std::vector y_hat; + for(int i = 0; i < X.size(); i++){ + y_hat.push_back(modelTest(X[i])); + } + return y_hat; + } + + double MultinomialNB::modelTest(std::vector x){ + double score[class_num]; + computeTheta(); + + for(int j = 0; j < x.size(); j++){ + for(int k = 0; k < vocab.size(); k++){ + if(x[j] == vocab[k]){ + for(int p = class_num - 1; p >= 0; p--){ + score[p] += std::log(theta[p][vocab[k]]); + } + } + } + } + + for(int i = 0; i < priors.size(); i++){ + score[i] += std::log(priors[i]); + } + + return std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double))); + } + + double MultinomialNB::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void MultinomialNB::computeTheta(){ + + // Resizing theta for the sake of ease & proper access of the elements. + theta.resize(class_num); + + // Setting all values in the hasmap by default to 0. + for(int i = class_num - 1; i >= 0; i--){ + for(int j = 0; j < vocab.size(); j++){ + theta[i][vocab[j]] = 0; + } + } + + for(int i = 0; i < inputSet.size(); i++){ + for(int j = 0; j < inputSet[0].size(); j++){ + theta[outputSet[i]][inputSet[i][j]]++; + } + } + + for(int i = 0; i < theta.size(); i++){ + for(int j = 0; j < theta[i].size(); j++){ + theta[i][j] /= priors[i] * y_hat.size(); + } + } + } + + void MultinomialNB::Evaluate(){ + LinAlg alg; + for(int i = 0; i < outputSet.size(); i++){ + // Pr(B | A) * Pr(A) + double score[class_num]; + + // Easy computation of priors, i.e. Pr(C_k) + priors.resize(class_num); + for(int i = 0; i < outputSet.size(); i++){ + priors[int(outputSet[i])]++; + } + priors = alg.scalarMultiply( double(1)/double(outputSet.size()), priors); + + // Evaluating Theta... + computeTheta(); + + for(int j = 0; j < inputSet.size(); j++){ + for(int k = 0; k < vocab.size(); k++){ + if(inputSet[i][j] == vocab[k]){ + for(int p = class_num - 1; p >= 0; p--){ + score[p] += std::log(theta[i][vocab[k]]); + } + } + } + } + + for(int i = 0; i < priors.size(); i++){ + score[i] += std::log(priors[i]); + score[i] = exp(score[i]); + } + + for(int i = 0; i < 2; i++){ + std::cout << score[i] << std::endl; + } + + // Assigning the traning example's y_hat to a class + y_hat[i] = std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double))); + } + } +} \ No newline at end of file diff --git a/MLPP/MultinomialNB/MultinomialNB.hpp b/MLPP/MultinomialNB/MultinomialNB.hpp new file mode 100644 index 0000000..3fadcdf --- /dev/null +++ b/MLPP/MultinomialNB/MultinomialNB.hpp @@ -0,0 +1,45 @@ +// +// MultinomialNB.hpp +// +// Created by Marc Melikyan on 1/17/21. +// + +#ifndef MultinomialNB_hpp +#define MultinomialNB_hpp + +#include +#include + +namespace MLPP{ + class MultinomialNB{ + + public: + MultinomialNB(std::vector> inputSet, std::vector outputSet, int class_num); + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + double score(); + + private: + + void computeTheta(); + void Evaluate(); + + // Model Params + std::vector priors; + + std::vector> theta; + std::vector vocab; + int class_num; + + // Datasets + std::vector> inputSet; + std::vector outputSet; + std::vector y_hat; + + + + + }; + + #endif /* MultinomialNB_hpp */ +} \ No newline at end of file diff --git a/MLPP/NumericalAnalysis/NumericalAnalysis.cpp b/MLPP/NumericalAnalysis/NumericalAnalysis.cpp new file mode 100644 index 0000000..4762044 --- /dev/null +++ b/MLPP/NumericalAnalysis/NumericalAnalysis.cpp @@ -0,0 +1,305 @@ +// +// NumericalAnalysis.cpp +// +// Created by Marc Melikyan on 11/13/20. +// + +#include "NumericalAnalysis.hpp" +#include "LinAlg/LinAlg.hpp" +#include +#include +#include +#include + +namespace MLPP{ + + double NumericalAnalysis::numDiff(double(*function)(double), double x){ + double eps = 1e-10; + return (function(x + eps) - function(x)) / eps; // This is just the formal def. of the derivative. + } + + + double NumericalAnalysis::numDiff_2(double(*function)(double), double x){ + double eps = 1e-5; + return (function(x + 2 * eps) - 2 * function(x + eps) + function(x)) / (eps * eps); + } + + double NumericalAnalysis::numDiff_3(double(*function)(double), double x){ + double eps = 1e-5; + double t1 = function(x + 3 * eps) - 2 * function(x + 2 * eps) + function(x + eps); + double t2 = function(x + 2 * eps) - 2 * function(x + eps) + function(x); + return (t1 - t2)/(eps * eps * eps); + } + + double NumericalAnalysis::constantApproximation(double(*function)(double), double c){ + return function(c); + } + + double NumericalAnalysis::linearApproximation(double(*function)(double), double c, double x){ + return constantApproximation(function, c) + numDiff(function, c) * (x - c); + } + + double NumericalAnalysis::quadraticApproximation(double(*function)(double), double c, double x){ + return linearApproximation(function, c, x) + 0.5 * numDiff_2(function, c) * (x - c) * (x - c); + } + + double NumericalAnalysis::cubicApproximation(double(*function)(double), double c, double x){ + return quadraticApproximation(function, c, x) + (1/6) * numDiff_3(function, c) * (x - c) * (x - c) * (x - c); + } + + double NumericalAnalysis::numDiff(double(*function)(std::vector), std::vector x, int axis){ + // For multivariable function analysis. + // This will be used for calculating Jacobian vectors. + // Diffrentiate with respect to indicated axis. (0, 1, 2 ...) + double eps = 1e-10; + std::vector x_eps = x; + x_eps[axis] += eps; + + return (function(x_eps) - function(x)) / eps; + } + + double NumericalAnalysis::numDiff_2(double(*function)(std::vector), std::vector x, int axis1, int axis2){ + //For Hessians. + double eps = 1e-5; + + std::vector x_pp = x; + x_pp[axis1] += eps; + x_pp[axis2] += eps; + + std::vector x_np = x; + x_np[axis2] += eps; + + std::vector x_pn = x; + x_pn[axis1] += eps; + + return (function(x_pp) - function(x_np) - function(x_pn) + function(x))/(eps * eps); + } + + double NumericalAnalysis::numDiff_3(double(*function)(std::vector), std::vector x, int axis1, int axis2, int axis3){ + // For third order derivative tensors. + // NOTE: Approximations do not appear to be accurate for sinusodial functions... + // Should revisit this later. + double eps = INT_MAX; + + std::vector x_ppp = x; + x_ppp[axis1] += eps; + x_ppp[axis2] += eps; + x_ppp[axis3] += eps; + + std::vector x_npp = x; + x_npp[axis2] += eps; + x_npp[axis3] += eps; + + std::vector x_pnp = x; + x_pnp[axis1] += eps; + x_pnp[axis3] += eps; + + std::vector x_nnp = x; + x_nnp[axis3] += eps; + + + std::vector x_ppn = x; + x_ppn[axis1] += eps; + x_ppn[axis2] += eps; + + std::vector x_npn = x; + x_npn[axis2] += eps; + + std::vector x_pnn = x; + x_pnn[axis1] += eps; + + double thirdAxis = function(x_ppp) - function(x_npp) - function(x_pnp) + function(x_nnp); + double noThirdAxis = function(x_ppn) - function(x_npn) - function(x_pnn) + function(x); + return (thirdAxis - noThirdAxis)/(eps * eps * eps); + } + + double NumericalAnalysis::newtonRaphsonMethod(double(*function)(double), double x_0, double epoch_num){ + double x = x_0; + for(int i = 0; i < epoch_num; i++){ + x -= function(x)/numDiff(function, x); + } + return x; + } + + double NumericalAnalysis::halleyMethod(double (*function)(double), double x_0, double epoch_num){ + double x = x_0; + for(int i = 0; i < epoch_num; i++){ + x -= ((2 * function(x) * numDiff(function, x))/(2 * numDiff(function, x) * numDiff(function, x) - function(x) * numDiff_2(function, x))); + } + return x; + } + + double NumericalAnalysis::invQuadraticInterpolation(double (*function)(double), std::vector x_0, double epoch_num){ + double x = 0; + std::vector currentThree = x_0; + for(int i = 0; i < epoch_num; i++){ + double t1 = ((function(currentThree[1]) * function(currentThree[2]))/( (function(currentThree[0]) - function(currentThree[1])) * (function(currentThree[0]) - function(currentThree[2])) ) ) * currentThree[0]; + double t2 = ((function(currentThree[0]) * function(currentThree[2]))/( (function(currentThree[1]) - function(currentThree[0])) * (function(currentThree[1]) - function(currentThree[2])) ) ) * currentThree[1]; + double t3 = ((function(currentThree[0]) * function(currentThree[1]))/( (function(currentThree[2]) - function(currentThree[0])) * (function(currentThree[2]) - function(currentThree[1])) ) ) * currentThree[2]; + x = t1 + t2 + t3; + + currentThree.erase(currentThree.begin()); + currentThree.push_back(x); + + } + return x; + } + + double NumericalAnalysis::eulerianMethod(double(*derivative)(double), std::vector q_0, double p, double h){ + double max_epoch = (p - q_0[0])/h; + double x = q_0[0]; + double y = q_0[1]; + for(int i = 0; i < max_epoch; i++){ + y = y + h * derivative(x); + x += h; + } + return y; + } + + double NumericalAnalysis::eulerianMethod(double(*derivative)(std::vector), std::vector q_0, double p, double h){ + double max_epoch = (p - q_0[0])/h; + double x = q_0[0]; + double y = q_0[1]; + for(int i = 0; i < max_epoch; i++){ + y = y + h * derivative({x, y}); + x += h; + } + return y; + } + + double NumericalAnalysis::growthMethod(double C, double k, double t){ + /* + dP/dt = kP + dP/P = kdt + integral(1/P)dP = integral(k) dt + ln|P| = kt + C_initial + |P| = e^(kt + C_initial) + |P| = e^(C_initial) * e^(kt) + P = +/- e^(C_initial) * e^(kt) + P = C * e^(kt) + */ + + // auto growthFunction = [&C, &k](double t) { return C * exp(k * t); }; + return C * std::exp(k * t); + } + + std::vector NumericalAnalysis::jacobian(double(*function)(std::vector), std::vector x){ + std::vector jacobian; + jacobian.resize(x.size()); + for(int i = 0; i < jacobian.size(); i++){ + jacobian[i] = numDiff(function, x, i); // Derivative w.r.t axis i evaluated at x. For all x_i. + } + return jacobian; + } + std::vector> NumericalAnalysis::hessian(double(*function)(std::vector), std::vector x){ + std::vector> hessian; + hessian.resize(x.size()); + for(int i = 0; i < hessian.size(); i++){ + hessian[i].resize(x.size()); + } + for(int i = 0; i < hessian.size(); i++){ + for(int j = 0; j < hessian[i].size(); j++){ + hessian[i][j] = numDiff_2(function, x, i, j); + } + } + return hessian; + } + + std::vector>> NumericalAnalysis::thirdOrderTensor(double(*function)(std::vector), std::vector x){ + std::vector>> tensor; + tensor.resize(x.size()); + for(int i = 0; i < tensor.size(); i++){ + tensor[i].resize(x.size()); + for(int j = 0; j < tensor[i].size(); j++){ + tensor[i][j].resize(x.size()); + } + } + for(int i = 0; i < tensor.size(); i++){ // O(n^3) time complexity :( + for(int j = 0; j < tensor[i].size(); j++){ + for(int k = 0; k < tensor[i][j].size(); k++) + tensor[i][j][k] = numDiff_3(function, x, i, j, k); + } + } + return tensor; + } + + double NumericalAnalysis::constantApproximation(double(*function)(std::vector), std::vector c){ + return function(c); + } + + double NumericalAnalysis::linearApproximation(double(*function)(std::vector), std::vector c, std::vector x){ + LinAlg alg; + return constantApproximation(function, c) + alg.matmult(alg.transpose({jacobian(function, c)}), {alg.subtraction(x, c)})[0][0]; + } + + double NumericalAnalysis::quadraticApproximation(double(*function)(std::vector), std::vector c, std::vector x){ + LinAlg alg; + return linearApproximation(function, c, x) + 0.5 * alg.matmult({(alg.subtraction(x, c))}, alg.matmult(hessian(function, c), alg.transpose({alg.subtraction(x, c)})))[0][0]; + } + + double NumericalAnalysis::cubicApproximation(double(*function)(std::vector), std::vector c, std::vector x){ + /* + Not completely sure as the literature seldom discusses the third order taylor approximation, + in particular for multivariate cases, but ostensibly, the matrix/tensor/vector multiplies + should look something like this: + + (N x N x N) (N x 1) [tensor vector mult] => (N x N x 1) => (N x N) + Perform remaining multiplies as done for the 2nd order approximation. + Result is a scalar. + */ + LinAlg alg; + std::vector> resultMat = alg.tensor_vec_mult(thirdOrderTensor(function, c), alg.subtraction(x, c)); + double resultScalar = alg.matmult({(alg.subtraction(x, c))}, alg.matmult(resultMat, alg.transpose({alg.subtraction(x, c)})))[0][0]; + + return quadraticApproximation(function, c, x) + (1/6) * resultScalar; + } + + double NumericalAnalysis::laplacian(double(*function)(std::vector), std::vector x){ + LinAlg alg; + std::vector> hessian_matrix = hessian(function, x); + double laplacian = 0; + for(int i = 0; i < hessian_matrix.size(); i++){ + laplacian += hessian_matrix[i][i]; // homogenous 2nd derivs w.r.t i, then i + } + return laplacian; + } + + std::string NumericalAnalysis::secondPartialDerivativeTest(double(*function)(std::vector), std::vector x){ + LinAlg alg; + std::vector> hessianMatrix = hessian(function, x); + /* + The reason we do this is because the 2nd partial derivative test is less conclusive for functions of variables greater than + 2, and the calculations specific to the bivariate case are less computationally intensive. + */ + if(x.size() == 2){ + double det = alg.det(hessianMatrix, hessianMatrix.size()); + double secondDerivative = numDiff_2(function, x, 0, 0); + if(secondDerivative > 0 && det > 0){ + return "min"; + } + else if(secondDerivative < 0 && det > 0){ + return "max"; + } + else if(det < 0){ + return "saddle"; + } + else{ + return "test was inconclusive"; + } + } + else { + if(alg.positiveDefiniteChecker(hessianMatrix)){ + return "min"; + } + else if(alg.negativeDefiniteChecker(hessianMatrix)){ + return "max"; + } + else if(!alg.zeroEigenvalue(hessianMatrix)){ + return "saddle"; + } + else{ + return "test was inconclusive"; + } + } + } +} \ No newline at end of file diff --git a/MLPP/NumericalAnalysis/NumericalAnalysis.hpp b/MLPP/NumericalAnalysis/NumericalAnalysis.hpp new file mode 100644 index 0000000..8899954 --- /dev/null +++ b/MLPP/NumericalAnalysis/NumericalAnalysis.hpp @@ -0,0 +1,57 @@ +// +// NumericalAnalysis.hpp +// +// + +#ifndef NumericalAnalysis_hpp +#define NumericalAnalysis_hpp + +#include +#include + +namespace MLPP{ + class NumericalAnalysis{ + public: + /* A numerical method for derivatives is used. This may be subject to change, + as an analytical method for calculating derivatives will most likely be used in + the future. + */ + double numDiff(double(*function)(double), double x); + double numDiff_2(double(*function)(double), double x); + double numDiff_3(double(*function)(double), double x); + + double constantApproximation(double(*function)(double), double c); + double linearApproximation(double(*function)(double), double c, double x); + double quadraticApproximation(double(*function)(double), double c, double x); + double cubicApproximation(double(*function)(double), double c, double x); + + double numDiff(double(*function)(std::vector), std::vector x, int axis); + double numDiff_2(double(*function)(std::vector), std::vector x, int axis1, int axis2); + double numDiff_3(double(*function)(std::vector), std::vector x, int axis1, int axis2, int axis3); + + double newtonRaphsonMethod(double(*function)(double), double x_0, double epoch_num); + double halleyMethod(double(*function)(double), double x_0, double epoch_num); + double invQuadraticInterpolation(double (*function)(double), std::vector x_0, double epoch_num); + + double eulerianMethod(double(*derivative)(double), std::vector q_0, double p, double h); // Euler's method for solving diffrential equations. + double eulerianMethod(double(*derivative)(std::vector), std::vector q_0, double p, double h); // Euler's method for solving diffrential equations. + + double growthMethod(double C, double k, double t); // General growth-based diffrential equations can be solved by seperation of variables. + + std::vector jacobian(double(*function)(std::vector), std::vector x); // Indeed, for functions with scalar outputs the Jacobians will be vectors. + std::vector> hessian(double(*function)(std::vector), std::vector x); + std::vector>> thirdOrderTensor(double(*function)(std::vector), std::vector x); + + double constantApproximation(double(*function)(std::vector), std::vector c); + double linearApproximation(double(*function)(std::vector), std::vector c, std::vector x); + double quadraticApproximation(double(*function)(std::vector), std::vector c, std::vector x); + double cubicApproximation(double(*function)(std::vector), std::vector c, std::vector x); + + double laplacian(double(*function)(std::vector), std::vector x); // laplacian + + std::string secondPartialDerivativeTest(double(*function)(std::vector), std::vector x); + + }; +} + +#endif /* NumericalAnalysis_hpp */ diff --git a/MLPP/OutlierFinder/OutlierFinder.cpp b/MLPP/OutlierFinder/OutlierFinder.cpp new file mode 100644 index 0000000..29fa29e --- /dev/null +++ b/MLPP/OutlierFinder/OutlierFinder.cpp @@ -0,0 +1,43 @@ +// +// OutlierFinder.cpp +// +// Created by Marc Melikyan on 11/13/20. +// + +#include "OutlierFinder.hpp" +#include "Stat/Stat.hpp" +#include + +namespace MLPP{ + OutlierFinder::OutlierFinder(int threshold) + : threshold(threshold){ + + } + + std::vector> OutlierFinder::modelSetTest(std::vector> inputSet){ + Stat stat; + std::vector> outliers; + outliers.resize(inputSet.size()); + for(int i = 0; i < inputSet.size(); i++){ + for(int j = 0; j < inputSet[i].size(); j++){ + double z = (inputSet[i][j] - stat.mean(inputSet[i])) / stat.standardDeviation(inputSet[i]); + if(abs(z) > threshold){ + outliers[i].push_back(inputSet[i][j]); + } + } + } + return outliers; + } + + std::vector OutlierFinder::modelTest(std::vector inputSet){ + Stat stat; + std::vector outliers; + for(int i = 0; i < inputSet.size(); i++){ + double z = (inputSet[i] - stat.mean(inputSet)) / stat.standardDeviation(inputSet); + if(abs(z) > threshold){ + outliers.push_back(inputSet[i]); + } + } + return outliers; + } +} \ No newline at end of file diff --git a/MLPP/OutlierFinder/OutlierFinder.hpp b/MLPP/OutlierFinder/OutlierFinder.hpp new file mode 100644 index 0000000..eaaf648 --- /dev/null +++ b/MLPP/OutlierFinder/OutlierFinder.hpp @@ -0,0 +1,27 @@ +// +// OutlierFinder.hpp +// +// Created by Marc Melikyan on 11/13/20. +// + +#ifndef OutlierFinder_hpp +#define OutlierFinder_hpp + +#include + +namespace MLPP{ + class OutlierFinder{ + public: + // Cnstr + OutlierFinder(int threshold); + + std::vector> modelSetTest(std::vector> inputSet); + std::vector modelTest(std::vector inputSet); + + // Variables required + int threshold; + + }; +} + +#endif /* OutlierFinder_hpp */ diff --git a/MLPP/OutputLayer/OutputLayer.cpp b/MLPP/OutputLayer/OutputLayer.cpp new file mode 100644 index 0000000..0bfe799 --- /dev/null +++ b/MLPP/OutputLayer/OutputLayer.cpp @@ -0,0 +1,130 @@ +// +// OutputLayer.cpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#include "OutputLayer.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Utilities/Utilities.hpp" + +#include +#include + +namespace MLPP { + OutputLayer::OutputLayer(int n_hidden, std::string activation, std::string cost, std::vector> input, std::string weightInit, std::string reg, double lambda, double alpha) + : n_hidden(n_hidden), activation(activation), cost(cost), input(input), weightInit(weightInit), reg(reg), lambda(lambda), alpha(alpha) + { + weights = Utilities::weightInitialization(n_hidden, weightInit); + bias = Utilities::biasInitialization(); + + activation_map["Linear"] = &Activation::linear; + activationTest_map["Linear"] = &Activation::linear; + + activation_map["Sigmoid"] = &Activation::sigmoid; + activationTest_map["Sigmoid"] = &Activation::sigmoid; + + activation_map["Swish"] = &Activation::swish; + activationTest_map["Swish"] = &Activation::swish; + + activation_map["Mish"] = &Activation::mish; + activationTest_map["Mish"] = &Activation::mish; + + activation_map["SinC"] = &Activation::sinc; + activationTest_map["SinC"] = &Activation::sinc; + + activation_map["Softplus"] = &Activation::softplus; + activationTest_map["Softplus"] = &Activation::softplus; + + activation_map["Softsign"] = &Activation::softsign; + activationTest_map["Softsign"] = &Activation::softsign; + + activation_map["CLogLog"] = &Activation::cloglog; + activationTest_map["CLogLog"] = &Activation::cloglog; + + activation_map["Logit"] = &Activation::logit; + activationTest_map["Logit"] = &Activation::logit; + + activation_map["GaussianCDF"] = &Activation::gaussianCDF; + activationTest_map["GaussianCDF"] = &Activation::gaussianCDF; + + activation_map["RELU"] = &Activation::RELU; + activationTest_map["RELU"] = &Activation::RELU; + + activation_map["GELU"] = &Activation::GELU; + activationTest_map["GELU"] = &Activation::GELU; + + activation_map["Sign"] = &Activation::sign; + activationTest_map["Sign"] = &Activation::sign; + + activation_map["UnitStep"] = &Activation::unitStep; + activationTest_map["UnitStep"] = &Activation::unitStep; + + activation_map["Sinh"] = &Activation::sinh; + activationTest_map["Sinh"] = &Activation::sinh; + + activation_map["Cosh"] = &Activation::cosh; + activationTest_map["Cosh"] = &Activation::cosh; + + activation_map["Tanh"] = &Activation::tanh; + activationTest_map["Tanh"] = &Activation::tanh; + + activation_map["Csch"] = &Activation::csch; + activationTest_map["Csch"] = &Activation::csch; + + activation_map["Sech"] = &Activation::sech; + activationTest_map["Sech"] = &Activation::sech; + + activation_map["Coth"] = &Activation::coth; + activationTest_map["Coth"] = &Activation::coth; + + activation_map["Arsinh"] = &Activation::arsinh; + activationTest_map["Arsinh"] = &Activation::arsinh; + + activation_map["Arcosh"] = &Activation::arcosh; + activationTest_map["Arcosh"] = &Activation::arcosh; + + activation_map["Artanh"] = &Activation::artanh; + activationTest_map["Artanh"] = &Activation::artanh; + + activation_map["Arcsch"] = &Activation::arcsch; + activationTest_map["Arcsch"] = &Activation::arcsch; + + activation_map["Arsech"] = &Activation::arsech; + activationTest_map["Arsech"] = &Activation::arsech; + + activation_map["Arcoth"] = &Activation::arcoth; + activationTest_map["Arcoth"] = &Activation::arcoth; + + costDeriv_map["MSE"] = &Cost::MSEDeriv; + cost_map["MSE"] = &Cost::MSE; + costDeriv_map["RMSE"] = &Cost::RMSEDeriv; + cost_map["RMSE"] = &Cost::RMSE; + costDeriv_map["MAE"] = &Cost::MAEDeriv; + cost_map["MAE"] = &Cost::MAE; + costDeriv_map["MBE"] = &Cost::MBEDeriv; + cost_map["MBE"] = &Cost::MBE; + costDeriv_map["LogLoss"] = &Cost::LogLossDeriv; + cost_map["LogLoss"] = &Cost::LogLoss; + costDeriv_map["CrossEntropy"] = &Cost::CrossEntropyDeriv; + cost_map["CrossEntropy"] = &Cost::CrossEntropy; + costDeriv_map["HingeLoss"] = &Cost::HingeLossDeriv; + cost_map["HingeLoss"] = &Cost::HingeLoss; + costDeriv_map["WassersteinLoss"] = &Cost::HingeLossDeriv; + cost_map["WassersteinLoss"] = &Cost::HingeLoss; + } + + void OutputLayer::forwardPass(){ + LinAlg alg; + Activation avn; + z = alg.scalarAdd(bias, alg.mat_vec_mult(input, weights)); + a = (avn.*activation_map[activation])(z, 0); + } + + void OutputLayer::Test(std::vector x){ + LinAlg alg; + Activation avn; + z_test = alg.dot(weights, x) + bias; + a_test = (avn.*activationTest_map[activation])(z_test, 0); + } +} \ No newline at end of file diff --git a/MLPP/OutputLayer/OutputLayer.hpp b/MLPP/OutputLayer/OutputLayer.hpp new file mode 100644 index 0000000..3b90069 --- /dev/null +++ b/MLPP/OutputLayer/OutputLayer.hpp @@ -0,0 +1,56 @@ +// +// OutputLayer.hpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#ifndef OutputLayer_hpp +#define OutputLayer_hpp + +#include "Activation/Activation.hpp" +#include "Cost/Cost.hpp" + +#include +#include +#include + +namespace MLPP { + class OutputLayer{ + public: + OutputLayer(int n_hidden, std::string activation, std::string cost, std::vector> input, std::string weightInit, std::string reg, double lambda, double alpha); + + int n_hidden; + std::string activation; + std::string cost; + + std::vector> input; + + std::vector weights; + double bias; + + std::vector z; + std::vector a; + + std::map (Activation::*)(std::vector, bool)> activation_map; + std::map activationTest_map; + std::map, std::vector)> cost_map; + std::map (Cost::*)(std::vector, std::vector)> costDeriv_map; + + double z_test; + double a_test; + + std::vector delta; + + // Regularization Params + std::string reg; + double lambda; /* Regularization Parameter */ + double alpha; /* This is the controlling param for Elastic Net*/ + + std::string weightInit; + + void forwardPass(); + void Test(std::vector x); + }; +} + +#endif /* OutputLayer_hpp */ diff --git a/MLPP/PCA/PCA.cpp b/MLPP/PCA/PCA.cpp new file mode 100644 index 0000000..4e4e8a1 --- /dev/null +++ b/MLPP/PCA/PCA.cpp @@ -0,0 +1,56 @@ +// +// PCA.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "PCA.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Data/Data.hpp" + +#include +#include + +namespace MLPP{ + + PCA::PCA(std::vector> inputSet, int k) + : inputSet(inputSet), k(k) + { + + } + + std::vector> PCA::principalComponents(){ + LinAlg alg; + Data data; + + auto [U, S, Vt] = alg.SVD(alg.cov(inputSet)); + X_normalized = data.meanCentering(inputSet); + U_reduce.resize(U.size()); + for(int i = 0; i < k; i++){ + for(int j = 0; j < U.size(); j++){ + U_reduce[j].push_back(U[j][i]); + } + } + Z = alg.matmult(alg.transpose(U_reduce), X_normalized); + return Z; + } + // Simply tells us the percentage of variance maintained. + double PCA::score(){ + LinAlg alg; + std::vector> X_approx = alg.matmult(U_reduce, Z); + double num, den = 0; + for(int i = 0; i < X_normalized.size(); i++){ + num += alg.norm_sq(alg.subtraction(X_normalized[i], X_approx[i])); + } + num /= X_normalized.size(); + for(int i = 0; i < X_normalized.size(); i++){ + den += alg.norm_sq(X_normalized[i]); + } + + den /= X_normalized.size(); + if(den == 0){ + den+=1e-10; // For numerical sanity as to not recieve a domain error + } + return 1 - num/den; + } +} diff --git a/MLPP/PCA/PCA.hpp b/MLPP/PCA/PCA.hpp new file mode 100644 index 0000000..59e1d75 --- /dev/null +++ b/MLPP/PCA/PCA.hpp @@ -0,0 +1,28 @@ +// +// PCA.hpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#ifndef PCA_hpp +#define PCA_hpp + +#include + +namespace MLPP{ + class PCA{ + + public: + PCA(std::vector> inputSet, int k); + std::vector> principalComponents(); + double score(); + private: + std::vector> inputSet; + std::vector> X_normalized; + std::vector> U_reduce; + std::vector> Z; + int k; + }; +} + +#endif /* PCA_hpp */ diff --git a/MLPP/ProbitReg/ProbitReg.cpp b/MLPP/ProbitReg/ProbitReg.cpp new file mode 100644 index 0000000..1cf40cc --- /dev/null +++ b/MLPP/ProbitReg/ProbitReg.cpp @@ -0,0 +1,239 @@ +// +// ProbitReg.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "ProbitReg.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP{ + ProbitReg::ProbitReg(std::vector> inputSet, std::vector outputSet, std::string reg, double lambda, double alpha) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) + { + y_hat.resize(n); + weights = Utilities::weightInitialization(k); + bias = Utilities::biasInitialization(); + } + + std::vector ProbitReg::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + double ProbitReg::modelTest(std::vector x){ + return Evaluate(x); + } + + void ProbitReg::gradientDescent(double learning_rate, int max_epoch, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + + std::vector error = alg.subtraction(y_hat, outputSet); + + // Calculating the weight gradients + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.gaussianCDF(z, 1))))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / n; + forwardPass(); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + } + + void ProbitReg::MLE(double learning_rate, int max_epoch, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + + std::vector error = alg.subtraction(outputSet, y_hat); + + // Calculating the weight gradients + weights = alg.addition(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.gaussianCDF(z, 1))))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias += learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / n; + forwardPass(); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + } + + void ProbitReg::SGD(double learning_rate, int max_epoch, bool UI){ + // NOTE: ∂y_hat/∂z is sparse + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + while(true){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + int outputIndex = distribution(generator); + + double y_hat = Evaluate(inputSet[outputIndex]); + double z = propagate(inputSet[outputIndex]); + cost_prev = Cost({y_hat}, {outputSet[outputIndex]}); + + double error = y_hat - outputSet[outputIndex]; + + // Weight Updation + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error * ((1 / sqrt(2 * M_PI)) * exp(-z * z / 2)), inputSet[outputIndex])); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Bias updation + bias -= learning_rate * error * ((1 / sqrt(2 * M_PI)) * exp(-z * z / 2)); + + y_hat = Evaluate({inputSet[outputIndex]}); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]})); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void ProbitReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + // Creating the mini-batches + for(int i = 0; i < n_mini_batch; i++){ + std::vector> currentInputSet; + std::vector currentOutputSet; + for(int j = 0; j < n/n_mini_batch; j++){ + currentInputSet.push_back(inputSet[n/n_mini_batch * i + j]); + currentOutputSet.push_back(outputSet[n/n_mini_batch * i + j]); + } + inputMiniBatches.push_back(currentInputSet); + outputMiniBatches.push_back(currentOutputSet); + } + + if(double(n)/double(n_mini_batch) - int(n/n_mini_batch) != 0){ + for(int i = 0; i < n - n/n_mini_batch * n_mini_batch; i++){ + inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n/n_mini_batch * n_mini_batch + i]); + outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n/n_mini_batch * n_mini_batch + i]); + } + } + + while(true){ + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = Evaluate(inputMiniBatches[i]); + std::vector z = propagate(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + std::vector error = alg.subtraction(y_hat, outputMiniBatches[i]); + + // Calculating the weight gradients + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/outputMiniBatches.size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.gaussianCDF(z, 1))))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / outputMiniBatches.size(); + y_hat = Evaluate(inputMiniBatches[i]); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i])); + Utilities::UI(weights, bias); + } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + double ProbitReg::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void ProbitReg::save(std::string fileName){ + Utilities util; + util.saveParameters(fileName, weights, bias); + } + + double ProbitReg::Cost(std::vector y_hat, std::vector y){ + Reg regularization; + class Cost cost; + return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg); + } + + std::vector ProbitReg::Evaluate(std::vector> X){ + LinAlg alg; + Activation avn; + return avn.gaussianCDF(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); + } + + std::vectorProbitReg::propagate(std::vector> X){ + LinAlg alg; + return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); + } + + double ProbitReg::Evaluate(std::vector x){ + LinAlg alg; + Activation avn; + return avn.gaussianCDF(alg.dot(weights, x) + bias); + } + + double ProbitReg::propagate(std::vector x){ + LinAlg alg; + return alg.dot(weights, x) + bias; + } + + // gaussianCDF ( wTx + b ) + void ProbitReg::forwardPass(){ + LinAlg alg; + Activation avn; + + z = propagate(inputSet); + y_hat = avn.gaussianCDF(z); + } +} \ No newline at end of file diff --git a/MLPP/ProbitReg/ProbitReg.hpp b/MLPP/ProbitReg/ProbitReg.hpp new file mode 100644 index 0000000..b001e02 --- /dev/null +++ b/MLPP/ProbitReg/ProbitReg.hpp @@ -0,0 +1,57 @@ +// +// ProbitReg.hpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#ifndef ProbitReg_hpp +#define ProbitReg_hpp + + +#include +#include + +namespace MLPP { + + class ProbitReg{ + + public: + ProbitReg(std::vector> inputSet, std::vector outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + void gradientDescent(double learning_rate, int max_epoch = 0, bool UI = 1); + void MLE(double learning_rate, int max_epoch = 0, bool UI = 1); + void SGD(double learning_rate, int max_epoch = 0, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + double score(); + void save(std::string fileName); + private: + + double Cost(std::vector y_hat, std::vector y); + + std::vector Evaluate(std::vector> X); + std::vector propagate(std::vector> X); + double Evaluate(std::vector x); + double propagate(std::vector x); + void forwardPass(); + + std::vector> inputSet; + std::vector outputSet; + std::vector z; + std::vector y_hat; + std::vector weights; + double bias; + + int n; + int k; + + // Regularization Params + std::string reg; + double lambda; + double alpha; /* This is the controlling param for Elastic Net*/ + + + }; +} + +#endif /* ProbitReg_hpp */ diff --git a/MLPP/Regularization/Reg.cpp b/MLPP/Regularization/Reg.cpp new file mode 100644 index 0000000..ad7b403 --- /dev/null +++ b/MLPP/Regularization/Reg.cpp @@ -0,0 +1,177 @@ +// +// Reg.cpp +// +// Created by Marc Melikyan on 1/16/21. +// + +#include +#include +#include "Reg.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Activation/Activation.hpp" + +namespace MLPP{ + + double Reg::regTerm(std::vector weights, double lambda, double alpha, std::string reg){ + if(reg == "Ridge"){ + double reg = 0; + for(int i = 0; i < weights.size(); i++){ + reg += weights[i] * weights[i]; + } + return reg * lambda / 2; + } + else if(reg == "Lasso"){ + double reg = 0; + for(int i = 0; i < weights.size(); i++){ + reg += abs(weights[i]); + } + return reg * lambda; + } + else if(reg == "ElasticNet"){ + double reg = 0; + for(int i = 0; i < weights.size(); i++){ + reg += alpha * abs(weights[i]); // Lasso Reg + reg += ((1 - alpha) / 2) * weights[i] * weights[i]; // Ridge Reg + } + return reg * lambda; + } + return 0; + } + + double Reg::regTerm(std::vector> weights, double lambda, double alpha, std::string reg){ + if(reg == "Ridge"){ + double reg = 0; + for(int i = 0; i < weights.size(); i++){ + for(int j = 0; j < weights[i].size(); j++){ + reg += weights[i][j] * weights[i][j]; + } + } + return reg * lambda / 2; + } + else if(reg == "Lasso"){ + double reg = 0; + for(int i = 0; i < weights.size(); i++){ + for(int j = 0; j < weights[i].size(); j++){ + reg += abs(weights[i][j]); + } + } + return reg * lambda; + } + else if(reg == "ElasticNet"){ + double reg = 0; + for(int i = 0; i < weights.size(); i++){ + for(int j = 0; j < weights[i].size(); j++){ + reg += alpha * abs(weights[i][j]); // Lasso Reg + reg += ((1 - alpha) / 2) * weights[i][j] * weights[i][j]; // Ridge Reg + } + } + return reg * lambda; + } + return 0; + } + + std::vector Reg::regWeights(std::vector weights, double lambda, double alpha, std::string reg){ + LinAlg alg; + if(reg == "WeightClipping"){ return regDerivTerm(weights, lambda, alpha, reg); } + return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg)); + // for(int i = 0; i < weights.size(); i++){ + // weights[i] -= regDerivTerm(weights, lambda, alpha, reg, i); + // } + // return weights; + } + + std::vector> Reg::regWeights(std::vector> weights, double lambda, double alpha, std::string reg){ + LinAlg alg; + if(reg == "WeightClipping"){ return regDerivTerm(weights, lambda, alpha, reg); } + return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg)); + // for(int i = 0; i < weights.size(); i++){ + // for(int j = 0; j < weights[i].size(); j++){ + // weights[i][j] -= regDerivTerm(weights, lambda, alpha, reg, i, j); + // } + // } + // return weights; + } + + std::vector Reg::regDerivTerm(std::vector weights, double lambda, double alpha, std::string reg){ + std::vector regDeriv; + regDeriv.resize(weights.size()); + + for(int i = 0; i < regDeriv.size(); i++){ + regDeriv[i] = regDerivTerm(weights, lambda, alpha, reg, i); + } + return regDeriv; + } + + std::vector> Reg::regDerivTerm(std::vector> weights, double lambda, double alpha, std::string reg){ + std::vector> regDeriv; + regDeriv.resize(weights.size()); + for(int i = 0; i < regDeriv.size(); i++){ + regDeriv[i].resize(weights[0].size()); + } + + for(int i = 0; i < regDeriv.size(); i++){ + for(int j = 0; j < regDeriv[i].size(); j++){ + regDeriv[i][j] = regDerivTerm(weights, lambda, alpha, reg, i, j); + } + } + return regDeriv; + } + + double Reg::regDerivTerm(std::vector weights, double lambda, double alpha, std::string reg, int j){ + Activation act; + if(reg == "Ridge"){ + return lambda * weights[j]; + } + else if(reg == "Lasso"){ + return lambda * act.sign(weights[j]); + } + else if(reg == "ElasticNet"){ + return alpha * lambda * act.sign(weights[j]) + (1 - alpha) * lambda * weights[j]; + } + else if(reg == "WeightClipping"){ // Preparation for Wasserstein GANs. + // We assume lambda is the lower clipping threshold, while alpha is the higher clipping threshold. + // alpha > lambda. + if(weights[j] > alpha){ + return alpha; + } + else if(weights[j] < lambda){ + return lambda; + } + else{ + return weights[j]; + } + } + else { + return 0; + } + } + + double Reg::regDerivTerm(std::vector> weights, double lambda, double alpha, std::string reg, int i, int j){ + Activation act; + if(reg == "Ridge"){ + return lambda * weights[i][j]; + } + else if(reg == "Lasso"){ + return lambda * act.sign(weights[i][j]); + } + else if(reg == "ElasticNet"){ + return alpha * lambda * act.sign(weights[i][j]) + (1 - alpha) * lambda * weights[i][j]; + } + else if(reg == "WeightClipping"){ // Preparation for Wasserstein GANs. + // We assume lambda is the lower clipping threshold, while alpha is the higher clipping threshold. + // alpha > lambda. + if(weights[i][j] > alpha){ + return alpha; + } + else if(weights[i][j] < lambda){ + return lambda; + } + else{ + return weights[i][j]; + } + } + else { + return 0; + } + } +} diff --git a/MLPP/Regularization/Reg.hpp b/MLPP/Regularization/Reg.hpp new file mode 100644 index 0000000..e9f4979 --- /dev/null +++ b/MLPP/Regularization/Reg.hpp @@ -0,0 +1,31 @@ +// +// Reg.hpp +// +// Created by Marc Melikyan on 1/16/21. +// + +#ifndef Reg_hpp +#define Reg_hpp + +#include + +namespace MLPP{ + class Reg{ + public: + + double regTerm(std::vector weights, double lambda, double alpha, std::string reg); + double regTerm(std::vector> weights, double lambda, double alpha, std::string reg); + + std::vector regWeights(std::vector weights, double lambda, double alpha, std::string reg); + std::vector> regWeights(std::vector> weights, double lambda, double alpha, std::string reg); + + std::vector regDerivTerm(std::vector weights, double lambda, double alpha, std::string reg); + std::vector> regDerivTerm(std::vector>, double lambda, double alpha, std::string reg); + + private: + double regDerivTerm(std::vector weights, double lambda, double alpha, std::string reg, int j); + double regDerivTerm(std::vector> weights, double lambda, double alpha, std::string reg, int i, int j); + }; +} + +#endif /* Reg_hpp */ diff --git a/MLPP/SVC/SVC.cpp b/MLPP/SVC/SVC.cpp new file mode 100644 index 0000000..46e798c --- /dev/null +++ b/MLPP/SVC/SVC.cpp @@ -0,0 +1,195 @@ +// +// SVC.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "SVC.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP{ + SVC::SVC(std::vector> inputSet, std::vector outputSet, double C) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), C(C) + { + y_hat.resize(n); + weights = Utilities::weightInitialization(k); + bias = Utilities::biasInitialization(); + } + + std::vector SVC::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + double SVC::modelTest(std::vector x){ + return Evaluate(x); + } + + void SVC::gradientDescent(double learning_rate, int max_epoch, bool UI){ + class Cost cost; + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet, weights, C); + + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), cost.HingeLossDeriv(z, outputSet, C)))); + weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge"); + + // Calculating the bias gradients + bias += learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputSet, C)) / n; + + forwardPass(); + + // UI PORTION + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet, weights, C)); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + + } + } + + void SVC::SGD(double learning_rate, int max_epoch, bool UI){ + class Cost cost; + Activation avn; + LinAlg alg; + Reg regularization; + + double cost_prev = 0; + int epoch = 1; + + while(true){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + int outputIndex = distribution(generator); + + double y_hat = Evaluate(inputSet[outputIndex]); + double z = propagate(inputSet[outputIndex]); + cost_prev = Cost({z}, {outputSet[outputIndex]}, weights, C); + + double costDeriv = cost.HingeLossDeriv(std::vector({z}), std::vector({outputSet[outputIndex]}), C)[0]; // Explicit conversion to avoid ambiguity with overloaded function. Error occured on Ubuntu. + + // Weight Updation + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * costDeriv, inputSet[outputIndex])); + weights = regularization.regWeights(weights, learning_rate, 0, "Ridge"); + + // Bias updation + bias -= learning_rate * costDeriv; + + y_hat = Evaluate({inputSet[outputIndex]}); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost({z}, {outputSet[outputIndex]}, weights, C)); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void SVC::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + class Cost cost; + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + while(true){ + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = Evaluate(inputMiniBatches[i]); + std::vector z = propagate(inputMiniBatches[i]); + cost_prev = Cost(z, outputMiniBatches[i], weights, C); + + // Calculating the weight gradients + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C)))); + weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge"); + + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n; + + forwardPass(); + + y_hat = Evaluate(inputMiniBatches[i]); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C)); + Utilities::UI(weights, bias); + } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + double SVC::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void SVC::save(std::string fileName){ + Utilities util; + util.saveParameters(fileName, weights, bias); + } + + double SVC::Cost(std::vector z, std::vector y, std::vector weights, double C){ + class Cost cost; + return cost.HingeLoss(z, y, weights, C); + } + + std::vector SVC::Evaluate(std::vector> X){ + LinAlg alg; + Activation avn; + return avn.sign(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); + } + + std::vectorSVC::propagate(std::vector> X){ + LinAlg alg; + Activation avn; + return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); + } + + double SVC::Evaluate(std::vector x){ + LinAlg alg; + Activation avn; + return avn.sign(alg.dot(weights, x) + bias); + } + + double SVC::propagate(std::vector x){ + LinAlg alg; + Activation avn; + return alg.dot(weights, x) + bias; + } + + // sign ( wTx + b ) + void SVC::forwardPass(){ + LinAlg alg; + Activation avn; + + z = propagate(inputSet); + y_hat = avn.sign(z); + } +} \ No newline at end of file diff --git a/MLPP/SVC/SVC.hpp b/MLPP/SVC/SVC.hpp new file mode 100644 index 0000000..094f1ea --- /dev/null +++ b/MLPP/SVC/SVC.hpp @@ -0,0 +1,56 @@ +// +// SVC.hpp +// +// Created by Marc Melikyan on 10/2/20. +// + + +// https://towardsdatascience.com/svm-implementation-from-scratch-python-2db2fc52e5c2 +// Illustratd a practical definition of the Hinge Loss function and its gradient when optimizing with SGD. +#ifndef SVC_hpp +#define SVC_hpp + + +#include +#include + +namespace MLPP { + + class SVC{ + + public: + SVC(std::vector> inputSet, std::vector outputSet, double C); + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + double score(); + void save(std::string fileName); + private: + + double Cost(std::vector y_hat, std::vector y, std::vector weights, double C); + + std::vector Evaluate(std::vector> X); + std::vector propagate(std::vector> X); + double Evaluate(std::vector x); + double propagate(std::vector x); + void forwardPass(); + + std::vector> inputSet; + std::vector outputSet; + std::vector z; + std::vector y_hat; + std::vector weights; + double bias; + + double C; + int n; + int k; + + // UI Portion + void UI(int epoch, double cost_prev); + }; +} + +#endif /* SVC_hpp */ diff --git a/MLPP/SoftmaxNet/SoftmaxNet.cpp b/MLPP/SoftmaxNet/SoftmaxNet.cpp new file mode 100644 index 0000000..15907a4 --- /dev/null +++ b/MLPP/SoftmaxNet/SoftmaxNet.cpp @@ -0,0 +1,290 @@ +// +// SoftmaxNet.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "SoftmaxNet.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Data/Data.hpp" +#include "Regularization/Reg.hpp" +#include "Activation/Activation.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP{ + SoftmaxNet::SoftmaxNet(std::vector> inputSet, std::vector> outputSet, int n_hidden, std::string reg, double lambda, double alpha) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), n_hidden(n_hidden), n_class(outputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) + { + y_hat.resize(n); + + weights1 = Utilities::weightInitialization(k, n_hidden); + weights2 = Utilities::weightInitialization(n_hidden, n_class); + bias1 = Utilities::biasInitialization(n_hidden); + bias2 = Utilities::biasInitialization(n_class); + } + + std::vector SoftmaxNet::modelTest(std::vector x){ + return Evaluate(x); + } + + std::vector> SoftmaxNet::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + void SoftmaxNet::gradientDescent(double learning_rate, int max_epoch, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + + // Calculating the errors + std::vector> error = alg.subtraction(y_hat, outputSet); + + // Calculating the weight/bias gradients for layer 2 + + std::vector> D2_1 = alg.matmult(alg.transpose(a2), error); + + // weights and bias updation for layer 2 + weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1)); + weights2 = regularization.regWeights(weights2, lambda, alpha, reg); + + bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error)); + + //Calculating the weight/bias for layer 1 + + std::vector> D1_1 = alg.matmult(error, alg.transpose(weights2)); + + std::vector> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1)); + + std::vector> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2); + + + // weight an bias updation for layer 1 + weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3)); + weights1 = regularization.regWeights(weights1, lambda, alpha, reg); + + bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate, D1_2)); + + forwardPass(); + + // UI PORTION + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + std::cout << "Layer 1:" << std::endl; + Utilities::UI(weights1, bias1); + std::cout << "Layer 2:" << std::endl; + Utilities::UI(weights2, bias2); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + + } + + void SoftmaxNet::SGD(double learning_rate, int max_epoch, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + while(true){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + int outputIndex = distribution(generator); + + std::vector y_hat = Evaluate(inputSet[outputIndex]); + auto [z2, a2] = propagate(inputSet[outputIndex]); + cost_prev = Cost({y_hat}, {outputSet[outputIndex]}); + std::vector error = alg.subtraction(y_hat, outputSet[outputIndex]); + + // Weight updation for layer 2 + std::vector> D2_1 = alg.outerProduct(error, a2); + weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, alg.transpose(D2_1))); + weights2 = regularization.regWeights(weights2, lambda, alpha, reg); + + // Bias updation for layer 2 + bias2 = alg.subtraction(bias2, alg.scalarMultiply(learning_rate, error)); + + // Weight updation for layer 1 + std::vector D1_1 = alg.mat_vec_mult(weights2, error); + std::vector D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1)); + std::vector> D1_3 = alg.outerProduct(inputSet[outputIndex], D1_2); + + weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3)); + weights1 = regularization.regWeights(weights1, lambda, alpha, reg); + // Bias updation for layer 1 + + bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2)); + + y_hat = Evaluate(inputSet[outputIndex]); + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]})); + std::cout << "Layer 1:" << std::endl; + Utilities::UI(weights1, bias1); + std::cout << "Layer 2:" << std::endl; + Utilities::UI(weights2, bias2); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void SoftmaxNet::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + // Creating the mini-batches + for(int i = 0; i < n_mini_batch; i++){ + std::vector> currentInputSet; + std::vector> currentOutputSet; + for(int j = 0; j < n/n_mini_batch; j++){ + currentInputSet.push_back(inputSet[n/n_mini_batch * i + j]); + currentOutputSet.push_back(outputSet[n/n_mini_batch * i + j]); + } + inputMiniBatches.push_back(currentInputSet); + outputMiniBatches.push_back(currentOutputSet); + } + + if(double(n)/double(n_mini_batch) - int(n/n_mini_batch) != 0){ + for(int i = 0; i < n - n/n_mini_batch * n_mini_batch; i++){ + inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n/n_mini_batch * n_mini_batch + i]); + outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n/n_mini_batch * n_mini_batch + i]); + } + } + + while(true){ + for(int i = 0; i < n_mini_batch; i++){ + std::vector> y_hat = Evaluate(inputMiniBatches[i]); + auto [z2, a2] = propagate(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + // Calculating the errors + std::vector> error = alg.subtraction(y_hat, outputMiniBatches[i]); + + // Calculating the weight/bias gradients for layer 2 + + std::vector> D2_1 = alg.matmult(alg.transpose(a2), error); + + // weights and bias updation for layser 2 + weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1)); + weights2 = regularization.regWeights(weights2, lambda, alpha, reg); + + // Bias Updation for layer 2 + bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error)); + + //Calculating the weight/bias for layer 1 + + std::vector> D1_1 = alg.matmult(error, alg.transpose(weights2)); + + std::vector> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1)); + + std::vector> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2); + + + // weight an bias updation for layer 1 + weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3)); + weights1 = regularization.regWeights(weights1, lambda, alpha, reg); + + bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate, D1_2)); + + y_hat = Evaluate(inputMiniBatches[i]); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i])); + std::cout << "Layer 1:" << std::endl; + Utilities::UI(weights1, bias1); + std::cout << "Layer 2:" << std::endl; + Utilities::UI(weights2, bias2); + } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + double SoftmaxNet::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void SoftmaxNet::save(std::string fileName){ + Utilities util; + util.saveParameters(fileName, weights1, bias1, 0, 1); + util.saveParameters(fileName, weights2, bias2, 1, 2); + + LinAlg alg; + } + + std::vector> SoftmaxNet::getEmbeddings(){ + return weights1; + } + + double SoftmaxNet::Cost(std::vector> y_hat, std::vector> y){ + Reg regularization; + Data data; + class Cost cost; + return cost.CrossEntropy(y_hat, y) + regularization.regTerm(weights1, lambda, alpha, reg) + regularization.regTerm(weights2, lambda, alpha, reg); + } + + std::vector> SoftmaxNet::Evaluate(std::vector> X){ + LinAlg alg; + Activation avn; + std::vector> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1); + std::vector> a2 = avn.sigmoid(z2); + return avn.adjSoftmax(alg.mat_vec_add(alg.matmult(a2, weights2), bias2)); + } + + std::tuple>, std::vector>> SoftmaxNet::propagate(std::vector> X){ + LinAlg alg; + Activation avn; + std::vector> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1); + std::vector> a2 = avn.sigmoid(z2); + return {z2, a2}; + } + + std::vector SoftmaxNet::Evaluate(std::vector x){ + LinAlg alg; + Activation avn; + std::vector z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); + std::vector a2 = avn.sigmoid(z2); + return avn.adjSoftmax(alg.addition(alg.mat_vec_mult(alg.transpose(weights2), a2), bias2)); + } + + std::tuple, std::vector> SoftmaxNet::propagate(std::vector x){ + LinAlg alg; + Activation avn; + std::vector z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); + std::vector a2 = avn.sigmoid(z2); + return {z2, a2}; + } + + void SoftmaxNet::forwardPass(){ + LinAlg alg; + Activation avn; + z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1); + a2 = avn.sigmoid(z2); + y_hat = avn.adjSoftmax(alg.mat_vec_add(alg.matmult(a2, weights2), bias2)); + } +} \ No newline at end of file diff --git a/MLPP/SoftmaxNet/SoftmaxNet.hpp b/MLPP/SoftmaxNet/SoftmaxNet.hpp new file mode 100644 index 0000000..a606481 --- /dev/null +++ b/MLPP/SoftmaxNet/SoftmaxNet.hpp @@ -0,0 +1,66 @@ +// +// SoftmaxNet.hpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#ifndef SoftmaxNet_hpp +#define SoftmaxNet_hpp + + +#include +#include + +namespace MLPP { + + class SoftmaxNet{ + + public: + SoftmaxNet(std::vector> inputSet, std::vector> outputSet, int n_hidden, std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + std::vector modelTest(std::vector x); + std::vector> modelSetTest(std::vector> X); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + double score(); + void save(std::string fileName); + + std::vector> getEmbeddings(); // This class is used (mostly) for word2Vec. This function returns our embeddings. + private: + + double Cost(std::vector> y_hat, std::vector> y); + + std::vector> Evaluate(std::vector> X); + std::tuple>, std::vector>> propagate(std::vector> X); + std::vector Evaluate(std::vector x); + std::tuple, std::vector> propagate(std::vector x); + void forwardPass(); + + std::vector> inputSet; + std::vector> outputSet; + std::vector> y_hat; + + std::vector> weights1; + std::vector> weights2; + + std::vector bias1; + std::vector bias2; + + std::vector> z2; + std::vector> a2; + + int n; + int k; + int n_class; + int n_hidden; + + // Regularization Params + std::string reg; + double lambda; + double alpha; /* This is the controlling param for Elastic Net*/ + + + }; +} + +#endif /* SoftmaxNet_hpp */ diff --git a/MLPP/SoftmaxReg/SoftmaxReg.cpp b/MLPP/SoftmaxReg/SoftmaxReg.cpp new file mode 100644 index 0000000..a3bac01 --- /dev/null +++ b/MLPP/SoftmaxReg/SoftmaxReg.cpp @@ -0,0 +1,192 @@ +// +// SoftmaxReg.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "SoftmaxReg.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" +#include "Activation/Activation.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP{ + SoftmaxReg::SoftmaxReg(std::vector> inputSet, std::vector> outputSet, std::string reg, double lambda, double alpha) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), n_class(outputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) + { + y_hat.resize(n); + weights = Utilities::weightInitialization(k, n_class); + bias = Utilities::biasInitialization(n_class); + } + + std::vector SoftmaxReg::modelTest(std::vector x){ + return Evaluate(x); + + } + + std::vector> SoftmaxReg::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + void SoftmaxReg::gradientDescent(double learning_rate, int max_epoch, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + std::vector> error = alg.subtraction(y_hat, outputSet); + + + //Calculating the weight gradients + std::vector> w_gradient = alg.matmult(alg.transpose(inputSet), error); + + //Weight updation + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient)); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + + // Calculating the bias gradients + //double b_gradient = alg.sum_elements(error); + + // Bias Updation + bias = alg.subtractMatrixRows(bias, alg.scalarMultiply(learning_rate, error)); + + forwardPass(); + + // UI PORTION + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + } + + void SoftmaxReg::SGD(double learning_rate, int max_epoch, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + while(true){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + double outputIndex = distribution(generator); + + std::vector y_hat = Evaluate(inputSet[outputIndex]); + cost_prev = Cost({y_hat}, {outputSet[outputIndex]}); + + // Calculating the weight gradients + std::vector> w_gradient = alg.outerProduct(inputSet[outputIndex], alg.subtraction(y_hat, outputSet[outputIndex])); + + // Weight Updation + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient)); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + std::vector b_gradient = alg.subtraction(y_hat, outputSet[outputIndex]); + + // Bias updation + bias = alg.subtraction(bias, alg.scalarMultiply(learning_rate, b_gradient)); + + y_hat = Evaluate({inputSet[outputIndex]}); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]})); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + forwardPass(); + + } + + void SoftmaxReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + while(true){ + for(int i = 0; i < n_mini_batch; i++){ + std::vector> y_hat = Evaluate(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + std::vector> error = alg.subtraction(y_hat, outputMiniBatches[i]); + + // Calculating the weight gradients + std::vector> w_gradient = alg.matmult(alg.transpose(inputMiniBatches[i]), error); + + //Weight updation + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient)); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Calculating the bias gradients + bias = alg.subtractMatrixRows(bias, alg.scalarMultiply(learning_rate, error)); + y_hat = Evaluate(inputMiniBatches[i]); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i])); + Utilities::UI(weights, bias); + } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + double SoftmaxReg::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void SoftmaxReg::save(std::string fileName){ + Utilities util; + util.saveParameters(fileName, weights, bias); + } + + double SoftmaxReg::Cost(std::vector> y_hat, std::vector> y){ + Reg regularization; + class Cost cost; + return cost.CrossEntropy(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg); + } + + std::vector SoftmaxReg::Evaluate(std::vector x){ + LinAlg alg; + Activation avn; + return avn.softmax(alg.addition(bias, alg.mat_vec_mult(alg.transpose(weights), x))); + + } + + std::vector> SoftmaxReg::Evaluate(std::vector> X){ + LinAlg alg; + Activation avn; + + return avn.softmax(alg.mat_vec_add(alg.matmult(X, weights), bias)); + } + + // softmax ( wTx + b ) + void SoftmaxReg::forwardPass(){ + LinAlg alg; + Activation avn; + + y_hat = avn.softmax(alg.mat_vec_add(alg.matmult(inputSet, weights), bias)); + } +} \ No newline at end of file diff --git a/MLPP/SoftmaxReg/SoftmaxReg.hpp b/MLPP/SoftmaxReg/SoftmaxReg.hpp new file mode 100644 index 0000000..8e16bd4 --- /dev/null +++ b/MLPP/SoftmaxReg/SoftmaxReg.hpp @@ -0,0 +1,54 @@ +// +// SoftmaxReg.hpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#ifndef SoftmaxReg_hpp +#define SoftmaxReg_hpp + + +#include +#include + +namespace MLPP { + + class SoftmaxReg{ + + public: + SoftmaxReg(std::vector> inputSet, std::vector> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + std::vector modelTest(std::vector x); + std::vector> modelSetTest(std::vector> X); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + double score(); + void save(std::string fileName); + private: + + double Cost(std::vector> y_hat, std::vector> y); + + std::vector> Evaluate(std::vector> X); + std::vector Evaluate(std::vector x); + void forwardPass(); + + std::vector> inputSet; + std::vector> outputSet; + std::vector> y_hat; + std::vector> weights; + std::vector bias; + + int n; + int k; + int n_class; + + // Regularization Params + std::string reg; + double lambda; + double alpha; /* This is the controlling param for Elastic Net*/ + + + }; +} + +#endif /* SoftmaxReg_hpp */ diff --git a/MLPP/Stat/Stat.cpp b/MLPP/Stat/Stat.cpp new file mode 100644 index 0000000..1fd8d1f --- /dev/null +++ b/MLPP/Stat/Stat.cpp @@ -0,0 +1,219 @@ +// +// Stat.cpp +// +// Created by Marc Melikyan on 9/29/20. +// + +#include "Stat.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Data/Data.hpp" +#include +#include +#include + +#include + +namespace MLPP{ + double Stat::b0Estimation(const std::vector& x, const std::vector& y){ + return mean(y) - b1Estimation(x, y) * mean(x); + } + + double Stat::b1Estimation(const std::vector& x, const std::vector& y){ + return covariance(x, y) / variance(x); + } + + double Stat::mean(const std::vector& x){ + double sum = 0; + for(int i = 0; i < x.size(); i++){ + sum += x[i]; + } + return sum / x.size(); + } + + double Stat::median(std::vector x){ + double center = double(x.size())/double(2); + sort(x.begin(), x.end()); + if(x.size() % 2 == 0){ + return mean({x[center - 1], x[center]}); + } + else{ + return x[center - 1 + 0.5]; + } + } + + std::vector Stat::mode(const std::vector& x){ + Data data; + std::vector x_set = data.vecToSet(x); + std::map element_num; + for(int i = 0; i < x_set.size(); i++){ + element_num[x[i]] = 0; + } + for(int i = 0; i < x.size(); i++){ + element_num[x[i]]++; + } + std::vector modes; + double max_num = element_num[x_set[0]]; + for(int i = 0; i < x_set.size(); i++){ + if(element_num[x_set[i]] > max_num){ + max_num = element_num[x_set[i]]; + modes.clear(); + modes.push_back(x_set[i]); + } + else if(element_num[x_set[i]] == max_num){ + modes.push_back(x_set[i]); + } + } + return modes; + } + + double Stat::range(const std::vector& x){ + LinAlg alg; + return alg.max(x) - alg.min(x); + } + + double Stat::midrange(const std::vector& x){ + return range(x)/2; + } + + double Stat::absAvgDeviation(const std::vector& x){ + double sum = 0; + for(int i = 0; i < x.size(); i++){ + sum += std::abs(x[i] - mean(x)); + } + return sum / x.size(); + } + + double Stat::standardDeviation(const std::vector& x){ + return std::sqrt(variance(x)); + } + + double Stat::variance(const std::vector& x){ + double sum = 0; + for(int i = 0; i < x.size(); i++){ + sum += (x[i] - mean(x)) * (x[i] - mean(x)); + } + return sum / (x.size() - 1); + } + + double Stat::covariance(const std::vector& x, const std::vector& y){ + double sum = 0; + for(int i = 0; i < x.size(); i++){ + sum += (x[i] - mean(x)) * (y[i] - mean(y)); + } + return sum / (x.size() - 1); + } + + double Stat::correlation(const std::vector& x, const std::vector& y){ + return covariance(x, y) / (standardDeviation(x) * standardDeviation(y)); + } + + double Stat::R2(const std::vector& x, const std::vector& y){ + return correlation(x, y) * correlation(x, y); + } + + double Stat::chebyshevIneq(const double k){ + // X may or may not belong to a Gaussian Distribution + return 1 - 1 / (k * k); + } + + double Stat::weightedMean(const std::vector& x, const std::vector& weights){ + double sum = 0; + double weights_sum = 0; + for(int i = 0; i < x.size(); i++){ + sum += x[i] * weights[i]; + weights_sum += weights[i]; + } + return sum / weights_sum; + } + + double Stat::geometricMean(const std::vector& x){ + double product = 1; + for(int i = 0; i < x.size(); i++){ + product *= x[i]; + } + return std::pow(product, 1.0/x.size()); + } + + double Stat::harmonicMean(const std::vector& x){ + double sum = 0; + for(int i = 0; i < x.size(); i++){ + sum += 1/x[i]; + } + return x.size()/sum; + } + + double Stat::RMS(const std::vector& x){ + double sum = 0; + for(int i = 0; i < x.size(); i++){ + sum += x[i] * x[i]; + } + return sqrt(sum / x.size()); + } + + double Stat::powerMean(const std::vector& x, const double p){ + double sum = 0; + for(int i = 0; i < x.size(); i++){ + sum += std::pow(x[i], p); + } + return std::pow(sum / x.size(), 1/p); + } + + double Stat::lehmerMean(const std::vector& x, const double p){ + double num = 0; + double den = 0; + for(int i = 0; i < x.size(); i++){ + num += std::pow(x[i], p); + den += std::pow(x[i], p - 1); + } + return num/den; + } + + double Stat::weightedLehmerMean(const std::vector& x, const std::vector& weights, const double p){ + double num = 0; + double den = 0; + for(int i = 0; i < x.size(); i++){ + num += weights[i] * std::pow(x[i], p); + den += weights[i] * std::pow(x[i], p - 1); + } + return num/den; + } + + double Stat::heronianMean(const double A, const double B){ + return (A + sqrt(A * B) + B) / 3; + } + + double Stat::contraHarmonicMean(const std::vector& x){ + return lehmerMean(x, 2); + } + + double Stat::heinzMean(const double A, const double B, const double x){ + return (std::pow(A, x) * std::pow(B, 1 - x) + std::pow(A, 1 - x) * std::pow(B, x)) / 2; + } + + double Stat::neumanSandorMean(const double a, const double b){ + Activation avn; + return (a - b) / 2 * avn.arsinh((a - b)/(a + b)); + } + + double Stat::stolarskyMean(const double x, const double y, const double p){ + if(x == y){ + return x; + } + return std::pow((std::pow(x, p) - std::pow(y, p)) / (p * (x - y)), 1/(p - 1)); + } + + double Stat::identricMean(const double x, const double y){ + if(x == y){ + return x; + } + return (1/M_E) * std::pow(std::pow(x, x) / std::pow(y, y), 1/(x-y)); + } + + double Stat::logMean(const double x, const double y){ + if(x == y){ + return x; + } + return (y - x) / (log(y) - std::log(x)); + } +} \ No newline at end of file diff --git a/MLPP/Stat/Stat.hpp b/MLPP/Stat/Stat.hpp new file mode 100644 index 0000000..e6a305e --- /dev/null +++ b/MLPP/Stat/Stat.hpp @@ -0,0 +1,54 @@ +// +// Stat.hpp +// +// Created by Marc Melikyan on 9/29/20. +// + +#ifndef Stat_hpp +#define Stat_hpp + +#include + +namespace MLPP{ + class Stat{ + + public: + // These functions are for univariate lin reg module- not for users. + double b0Estimation(const std::vector& x, const std::vector& y); + double b1Estimation(const std::vector& x, const std::vector& y); + + // Statistical Functions + double mean(const std::vector & x); + double median(std::vector x); + std::vector mode(const std::vector& x); + double range(const std::vector& x); + double midrange(const std::vector& x); + double absAvgDeviation(const std::vector& x); + double standardDeviation(const std::vector& x); + double variance(const std::vector & x); + double covariance(const std::vector& x, const std::vector& y); + double correlation(const std::vector & x, const std::vector& y); + double R2(const std::vector& x, const std::vector& y); + double chebyshevIneq(const double k); + + + // Extras + double weightedMean(const std::vector& x, const std::vector& weights); + double geometricMean(const std::vector& x); + double harmonicMean(const std::vector& x); + double RMS(const std::vector& x); + double powerMean(const std::vector& x, const double p); + double lehmerMean(const std::vector& x, const double p); + double weightedLehmerMean(const std::vector& x, const std::vector& weights, const double p); + double contraHarmonicMean(const std::vector& x); + double heronianMean(const double A, const double B); + double heinzMean(const double A, const double B, const double x); + double neumanSandorMean(const double a, const double b); + double stolarskyMean(const double x, const double y, const double p); + double identricMean(const double x, const double y); + double logMean(const double x, const double y); + + }; +} + +#endif /* Stat_hpp */ diff --git a/MLPP/TanhReg/TanhReg.cpp b/MLPP/TanhReg/TanhReg.cpp new file mode 100644 index 0000000..16c3763 --- /dev/null +++ b/MLPP/TanhReg/TanhReg.cpp @@ -0,0 +1,193 @@ +// +// TanhReg.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "TanhReg.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP{ + TanhReg::TanhReg(std::vector> inputSet, std::vector outputSet, std::string reg, double lambda, double alpha) + : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) + { + y_hat.resize(n); + weights = Utilities::weightInitialization(k); + bias = Utilities::biasInitialization(); + } + + std::vector TanhReg::modelSetTest(std::vector> X){ + return Evaluate(X); + } + + double TanhReg::modelTest(std::vector x){ + return Evaluate(x); + } + + void TanhReg::gradientDescent(double learning_rate, int max_epoch, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + while(true){ + cost_prev = Cost(y_hat, outputSet); + + std::vector error = alg.subtraction(y_hat, outputSet); + + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.tanh(z, 1))))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.tanh(z, 1))) / n; + + forwardPass(); + + // UI PORTION + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + + } + } + + void TanhReg::SGD(double learning_rate, int max_epoch, bool UI){ + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + while(true){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_int_distribution distribution(0, int(n - 1)); + int outputIndex = distribution(generator); + + double y_hat = Evaluate(inputSet[outputIndex]); + cost_prev = Cost({y_hat}, {outputSet[outputIndex]}); + + double error = y_hat - outputSet[outputIndex]; + + // Weight Updation + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error * (1 - y_hat * y_hat), inputSet[outputIndex])); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + // Bias updation + bias -= learning_rate * error * (1 - y_hat * y_hat); + + y_hat = Evaluate({inputSet[outputIndex]}); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]})); + Utilities::UI(weights, bias); + } + epoch++; + + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + void TanhReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){ + Activation avn; + LinAlg alg; + Reg regularization; + double cost_prev = 0; + int epoch = 1; + + // Creating the mini-batches + int n_mini_batch = n/mini_batch_size; + auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch); + + while(true){ + for(int i = 0; i < n_mini_batch; i++){ + std::vector y_hat = Evaluate(inputMiniBatches[i]); + std::vector z = propagate(inputMiniBatches[i]); + cost_prev = Cost(y_hat, outputMiniBatches[i]); + + std::vector error = alg.subtraction(y_hat, outputMiniBatches[i]); + + // Calculating the weight gradients + weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.tanh(z, 1))))); + weights = regularization.regWeights(weights, lambda, alpha, reg); + + + // Calculating the bias gradients + bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.tanh(z, 1))) / n; + + forwardPass(); + + y_hat = Evaluate(inputMiniBatches[i]); + + if(UI) { + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i])); + Utilities::UI(weights, bias); + } + } + epoch++; + if(epoch > max_epoch) { break; } + } + forwardPass(); + } + + double TanhReg::score(){ + Utilities util; + return util.performance(y_hat, outputSet); + } + + void TanhReg::save(std::string fileName){ + Utilities util; + util.saveParameters(fileName, weights, bias); + } + + double TanhReg::Cost(std::vector y_hat, std::vector y){ + Reg regularization; + class Cost cost; + return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg); + } + + std::vector TanhReg::Evaluate(std::vector> X){ + LinAlg alg; + Activation avn; + return avn.tanh(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); + } + + std::vectorTanhReg::propagate(std::vector> X){ + LinAlg alg; + return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); + } + + double TanhReg::Evaluate(std::vector x){ + LinAlg alg; + Activation avn; + return avn.tanh(alg.dot(weights, x) + bias); + } + + double TanhReg::propagate(std::vector x){ + LinAlg alg; + return alg.dot(weights, x) + bias; + } + + // Tanh ( wTx + b ) + void TanhReg::forwardPass(){ + LinAlg alg; + Activation avn; + + z = propagate(inputSet); + y_hat = avn.tanh(z); + } +} \ No newline at end of file diff --git a/MLPP/TanhReg/TanhReg.hpp b/MLPP/TanhReg/TanhReg.hpp new file mode 100644 index 0000000..1bbf5d9 --- /dev/null +++ b/MLPP/TanhReg/TanhReg.hpp @@ -0,0 +1,59 @@ +// +// TanhReg.hpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#ifndef TanhReg_hpp +#define TanhReg_hpp + + +#include +#include + +namespace MLPP { + + class TanhReg{ + + public: + TanhReg(std::vector> inputSet, std::vector outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + std::vector modelSetTest(std::vector> X); + double modelTest(std::vector x); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + void SGD(double learning_rate, int max_epoch, bool UI = 1); + void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1); + double score(); + void save(std::string fileName); + private: + + double Cost(std::vector y_hat, std::vector y); + + std::vector Evaluate(std::vector> X); + std::vector propagate(std::vector> X); + double Evaluate(std::vector x); + double propagate(std::vector x); + void forwardPass(); + + std::vector> inputSet; + std::vector outputSet; + std::vector z; + std::vector y_hat; + std::vector weights; + double bias; + + int n; + int k; + + // UI Portion + void UI(int epoch, double cost_prev); + + // Regularization Params + std::string reg; + double lambda; + double alpha; /* This is the controlling param for Elastic Net*/ + + + }; +} + +#endif /* TanhReg_hpp */ diff --git a/MLPP/Transforms/Transforms.cpp b/MLPP/Transforms/Transforms.cpp new file mode 100644 index 0000000..d8ac75c --- /dev/null +++ b/MLPP/Transforms/Transforms.cpp @@ -0,0 +1,59 @@ +// +// Transforms.cpp +// +// Created by Marc Melikyan on 11/13/20. +// + +#include "Transforms.hpp" +#include "LinAlg/LinAlg.hpp" +#include +#include +#include + +namespace MLPP{ + + // DCT ii. + // https://www.mathworks.com/help/images/discrete-cosine-transform.html + std::vector> Transforms::discreteCosineTransform(std::vector> A){ + LinAlg alg; + A = alg.scalarAdd(-128, A); // Center around 0. + + std::vector> B; + B.resize(A.size()); + for(int i = 0; i < B.size(); i++){ + B[i].resize(A[i].size()); + } + + int M = A.size(); + + for(int i = 0; i < B.size(); i++){ + for(int j = 0; j < B[i].size(); j++){ + double sum = 0; + double alphaI; + if(i == 0){ + alphaI = 1/std::sqrt(M); + } + else{ + alphaI = std::sqrt(double(2)/double(M)); + } + double alphaJ; + if(j == 0){ + alphaJ = 1/std::sqrt(M); + } + else{ + alphaJ = std::sqrt(double(2)/double(M)); + } + + for(int k = 0; k < B.size(); k++){ + for(int f = 0; f < B[k].size(); f++){ + sum += A[k][f] * std::cos( (M_PI * i * (2 * k + 1)) / (2 * M)) * std::cos( (M_PI * j * (2 * f + 1)) / (2 * M)); + } + } + B[i][j] = sum; + B[i][j] *= alphaI * alphaJ; + + } + } + return B; + } +} \ No newline at end of file diff --git a/MLPP/Transforms/Transforms.hpp b/MLPP/Transforms/Transforms.hpp new file mode 100644 index 0000000..d8bbb53 --- /dev/null +++ b/MLPP/Transforms/Transforms.hpp @@ -0,0 +1,20 @@ +// +// Transforms.hpp +// +// + +#ifndef Transforms_hpp +#define Transforms_hpp + +#include +#include + +namespace MLPP{ + class Transforms{ + public: + std::vector> discreteCosineTransform(std::vector> A); + + }; +} + +#endif /* Transforms_hpp */ diff --git a/MLPP/UniLinReg/UniLinReg.cpp b/MLPP/UniLinReg/UniLinReg.cpp new file mode 100644 index 0000000..85f207f --- /dev/null +++ b/MLPP/UniLinReg/UniLinReg.cpp @@ -0,0 +1,37 @@ +// +// UniLinReg.cpp +// +// Created by Marc Melikyan on 9/29/20. +// + +#include "UniLinReg.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Stat/Stat.hpp" +#include + + +// General Multivariate Linear Regression Model +// ŷ = b0 + b1x1 + b2x2 + ... + bkxk + + +// Univariate Linear Regression Model +// ŷ = b0 + b1x1 + +namespace MLPP{ + UniLinReg::UniLinReg(std::vector x, std::vector y) + : inputSet(x), outputSet(y) + { + Stat estimator; + b1 = estimator.b1Estimation(inputSet, outputSet); + b0 = estimator.b0Estimation(inputSet, outputSet); + } + + std::vector UniLinReg::modelSetTest(std::vector x){ + LinAlg alg; + return alg.scalarAdd(b0, alg.scalarMultiply(b1, x)); + } + + double UniLinReg::modelTest(double input){ + return b0 + b1 * input; + } +} diff --git a/MLPP/UniLinReg/UniLinReg.hpp b/MLPP/UniLinReg/UniLinReg.hpp new file mode 100644 index 0000000..3ff7715 --- /dev/null +++ b/MLPP/UniLinReg/UniLinReg.hpp @@ -0,0 +1,30 @@ +// +// UniLinReg.hpp +// +// Created by Marc Melikyan on 9/29/20. +// + +#ifndef UniLinReg_hpp +#define UniLinReg_hpp + +#include + +namespace MLPP{ + class UniLinReg{ + + public: + UniLinReg(std::vector x, std::vector y); + std::vector modelSetTest(std::vector x); + double modelTest(double x); + + private: + std::vector inputSet; + std::vector outputSet; + + double b0; + double b1; + + }; +} + +#endif /* UniLinReg_hpp */ diff --git a/MLPP/Utilities/Utilities.cpp b/MLPP/Utilities/Utilities.cpp new file mode 100644 index 0000000..53b5962 --- /dev/null +++ b/MLPP/Utilities/Utilities.cpp @@ -0,0 +1,397 @@ +// +// Reg.cpp +// +// Created by Marc Melikyan on 1/16/21. +// + +#include +#include +#include +#include +#include "Utilities.hpp" + +namespace MLPP{ + + std::vector Utilities::weightInitialization(int n, std::string type){ + std::random_device rd; + std::default_random_engine generator(rd()); + + std::vector weights; + for(int i = 0; i < n; i++){ + if(type == "XavierNormal"){ + std::normal_distribution distribution(0, sqrt(2 / (n + 1))); + weights.push_back(distribution(generator)); + } + else if(type == "XavierUniform"){ + std::uniform_real_distribution distribution(-sqrt(6 / (n + 1)), sqrt(6 / (n + 1))); + weights.push_back(distribution(generator)); + } + else if(type == "HeNormal"){ + std::normal_distribution distribution(0, sqrt(2 / n)); + weights.push_back(distribution(generator)); + } + else if(type == "HeUniform"){ + std::uniform_real_distribution distribution(-sqrt(6 / n), sqrt(6 / n)); + weights.push_back(distribution(generator)); + } + else if(type == "LeCunNormal"){ + std::normal_distribution distribution(0, sqrt(1 / n)); + weights.push_back(distribution(generator)); + } + else if(type == "LeCunUniform"){ + std::uniform_real_distribution distribution(-sqrt(3/n), sqrt(3/n)); + weights.push_back(distribution(generator)); + } + else if(type == "Uniform"){ + std::uniform_real_distribution distribution(-1/sqrt(n), 1/sqrt(n)); + weights.push_back(distribution(generator)); + } + else{ + std::uniform_real_distribution distribution(0, 1); + weights.push_back(distribution(generator)); + } + } + return weights; + } + + double Utilities::biasInitialization(){ + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_real_distribution distribution(0,1); + + return distribution(generator); + } + + std::vector> Utilities::weightInitialization(int n, int m, std::string type){ + std::random_device rd; + std::default_random_engine generator(rd()); + + std::vector> weights; + weights.resize(n); + + for(int i = 0; i < n; i++){ + for(int j = 0; j < m; j++){ + if(type == "XavierNormal"){ + std::normal_distribution distribution(0, sqrt(2 / (n + m))); + weights[i].push_back(distribution(generator)); + } + else if(type == "XavierUniform"){ + std::uniform_real_distribution distribution(-sqrt(6 / (n + m)), sqrt(6 / (n + m))); + weights[i].push_back(distribution(generator)); + } + else if(type == "HeNormal"){ + std::normal_distribution distribution(0, sqrt(2 / n)); + weights[i].push_back(distribution(generator)); + } + else if(type == "HeUniform"){ + std::uniform_real_distribution distribution(-sqrt(6 / n), sqrt(6 / n)); + weights[i].push_back(distribution(generator)); + } + else if(type == "LeCunNormal"){ + std::normal_distribution distribution(0, sqrt(1 / n)); + weights[i].push_back(distribution(generator)); + } + else if(type == "LeCunUniform"){ + std::uniform_real_distribution distribution(-sqrt(3/n), sqrt(3/n)); + weights[i].push_back(distribution(generator)); + } + else if(type == "Uniform"){ + std::uniform_real_distribution distribution(-1/sqrt(n), 1/sqrt(n)); + weights[i].push_back(distribution(generator)); + } + else{ + std::uniform_real_distribution distribution(0, 1); + weights[i].push_back(distribution(generator)); + } + } + } + return weights; + } + + std::vector Utilities::biasInitialization(int n){ + std::vector bias; + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_real_distribution distribution(0,1); + + for(int i = 0; i < n; i++){ + bias.push_back(distribution(generator)); + } + return bias; + } + + double Utilities::performance(std::vector y_hat, std::vector outputSet){ + double correct = 0; + for(int i = 0; i < y_hat.size(); i++){ + if(std::round(y_hat[i]) == outputSet[i]){ + correct++; + } + } + return correct/y_hat.size(); + } + + double Utilities::performance(std::vector> y_hat, std::vector> y){ + double correct = 0; + for(int i = 0; i < y_hat.size(); i++){ + int sub_correct = 0; + for(int j = 0; j < y_hat[i].size(); j++){ + if(std::round(y_hat[i][j]) == y[i][j]){ + sub_correct++; + } + if(sub_correct == y_hat[0].size()){ + correct++; + } + } + } + return correct/y_hat.size(); + } + + void Utilities::saveParameters(std::string fileName, std::vector weights, double bias, bool app, int layer){ + std::string layer_info = ""; + std::ofstream saveFile; + + if(layer > -1){ + layer_info = " for layer " + std::to_string(layer); + } + + if(app){ + saveFile.open(fileName.c_str(), std::ios_base::app); + } + else { saveFile.open(fileName.c_str()); } + + if(!saveFile.is_open()){ + std::cout << fileName << " failed to open." << std::endl; + } + + saveFile << "Weight(s)" << layer_info << std::endl; + for(int i = 0; i < weights.size(); i++){ + saveFile << weights[i] << std::endl; + } + saveFile << "Bias" << layer_info << std::endl; + saveFile << bias << std::endl; + + saveFile.close(); + } + + void Utilities::saveParameters(std::string fileName, std::vector weights, std::vector initial, double bias, bool app, int layer){ + std::string layer_info = ""; + std::ofstream saveFile; + + if(layer > -1){ + layer_info = " for layer " + std::to_string(layer); + } + + if(app){ + saveFile.open(fileName.c_str(), std::ios_base::app); + } + else { saveFile.open(fileName.c_str()); } + + if(!saveFile.is_open()){ + std::cout << fileName << " failed to open." << std::endl; + } + + saveFile << "Weight(s)" << layer_info << std::endl; + for(int i = 0; i < weights.size(); i++){ + saveFile << weights[i] << std::endl; + } + + saveFile << "Initial(s)" << layer_info << std::endl; + for(int i = 0; i < initial.size(); i++){ + saveFile << initial[i] << std::endl; + } + + saveFile << "Bias" << layer_info << std::endl; + saveFile << bias << std::endl; + + saveFile.close(); + } + + void Utilities::saveParameters(std::string fileName, std::vector> weights, std::vector bias, bool app, int layer){ + std::string layer_info = ""; + std::ofstream saveFile; + + if(layer > -1){ + layer_info = " for layer " + std::to_string(layer); + } + + if(app){ + saveFile.open(fileName.c_str(), std::ios_base::app); + } + else { saveFile.open(fileName.c_str()); } + + if(!saveFile.is_open()){ + std::cout << fileName << " failed to open." << std::endl; + } + + saveFile << "Weight(s)" << layer_info << std::endl; + for(int i = 0; i < weights.size(); i++){ + for(int j = 0; j < weights[i].size(); j++){ + saveFile << weights[i][j] << std::endl; + } + } + saveFile << "Bias(es)" << layer_info << std::endl; + for(int i = 0; i < bias.size(); i++){ + saveFile << bias[i] << std::endl; + } + + saveFile.close(); + } + + void Utilities::UI(std::vector weights, double bias){ + std::cout << "Values of the weight(s):" << std::endl; + for(int i = 0; i < weights.size(); i++){ + std::cout << weights[i] << std::endl; + } + std:: cout << "Value of the bias:" << std::endl; + std::cout << bias << std::endl; + } + + void Utilities::UI(std::vector> weights, std::vector bias){ + std::cout << "Values of the weight(s):" << std::endl; + for(int i = 0; i < weights.size(); i++){ + for(int j = 0; j < weights[i].size(); j++){ + std::cout << weights[i][j] << std::endl; + } + } + std::cout << "Value of the biases:" << std::endl; + for(int i = 0; i < bias.size(); i++){ + std::cout << bias[i] << std::endl; + } + } + + void Utilities::UI(std::vector weights, std::vector initial, double bias){ + std::cout << "Values of the weight(s):" << std::endl; + for(int i = 0; i < weights.size(); i++){ + std::cout << weights[i] << std::endl; + } + std::cout << "Values of the initial(s):" << std::endl; + for(int i = 0; i < initial.size(); i++){ + std::cout << initial[i] << std::endl; + } + std:: cout << "Value of the bias:" << std::endl; + std::cout << bias << std::endl; + } + + void Utilities::CostInfo(int epoch, double cost_prev, double Cost){ + std::cout << "-----------------------------------" << std::endl; + std::cout << "This is epoch: " << epoch << std::endl; + std::cout << "The cost function has been minimized by " << cost_prev - Cost << std::endl; + std::cout << "Current Cost:" << std::endl; + std::cout << Cost << std::endl; + } + + std::vector>> Utilities::createMiniBatches(std::vector> inputSet, int n_mini_batch){ + int n = inputSet.size(); + + std::vector>> inputMiniBatches; + + // Creating the mini-batches + for(int i = 0; i < n_mini_batch; i++){ + std::vector> currentInputSet; + for(int j = 0; j < n/n_mini_batch; j++){ + currentInputSet.push_back(inputSet[n/n_mini_batch * i + j]); + } + inputMiniBatches.push_back(currentInputSet); + } + + if(double(n)/double(n_mini_batch) - int(n/n_mini_batch) != 0){ + for(int i = 0; i < n - n/n_mini_batch * n_mini_batch; i++){ + inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n/n_mini_batch * n_mini_batch + i]); + } + } + return inputMiniBatches; + } + + std::tuple>>, std::vector>> Utilities::createMiniBatches(std::vector> inputSet, std::vector outputSet, int n_mini_batch){ + int n = inputSet.size(); + + std::vector>> inputMiniBatches; + std::vector> outputMiniBatches; + + for(int i = 0; i < n_mini_batch; i++){ + std::vector> currentInputSet; + std::vector currentOutputSet; + for(int j = 0; j < n/n_mini_batch; j++){ + currentInputSet.push_back(inputSet[n/n_mini_batch * i + j]); + currentOutputSet.push_back(outputSet[n/n_mini_batch * i + j]); + } + inputMiniBatches.push_back(currentInputSet); + outputMiniBatches.push_back(currentOutputSet); + } + + if(double(n)/double(n_mini_batch) - int(n/n_mini_batch) != 0){ + for(int i = 0; i < n - n/n_mini_batch * n_mini_batch; i++){ + inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n/n_mini_batch * n_mini_batch + i]); + outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n/n_mini_batch * n_mini_batch + i]); + } + } + return {inputMiniBatches, outputMiniBatches}; + } + + std::tuple>>, std::vector>>> Utilities::createMiniBatches(std::vector> inputSet, std::vector> outputSet, int n_mini_batch){ + int n = inputSet.size(); + + std::vector>> inputMiniBatches; + std::vector>> outputMiniBatches; + + for(int i = 0; i < n_mini_batch; i++){ + std::vector> currentInputSet; + std::vector> currentOutputSet; + for(int j = 0; j < n/n_mini_batch; j++){ + currentInputSet.push_back(inputSet[n/n_mini_batch * i + j]); + currentOutputSet.push_back(outputSet[n/n_mini_batch * i + j]); + } + inputMiniBatches.push_back(currentInputSet); + outputMiniBatches.push_back(currentOutputSet); + } + + if(double(n)/double(n_mini_batch) - int(n/n_mini_batch) != 0){ + for(int i = 0; i < n - n/n_mini_batch * n_mini_batch; i++){ + inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n/n_mini_batch * n_mini_batch + i]); + outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n/n_mini_batch * n_mini_batch + i]); + } + } + return {inputMiniBatches, outputMiniBatches}; + } + + std::tuple Utilities::TF_PN(std::vector y_hat, std::vector y){ + double TP, FP, TN, FN = 0; + for(int i = 0; i < y_hat.size(); i++){ + if(y_hat[i] == y[i]){ + if(y_hat[i] == 1){ + TP++; + } + else{ + TN++; + } + } + else{ + if(y_hat[i] == 1){ + FP++; + } + else{ + FN++; + } + } + } + return {TP, FP, TN, FN}; + } + + double Utilities::recall(std::vector y_hat, std::vector y){ + auto [TP, FP, TN, FN] = TF_PN(y_hat, y); + return TP / (TP + FN); + } + + double Utilities::precision(std::vector y_hat, std::vector y){ + auto [TP, FP, TN, FN] = TF_PN(y_hat, y); + return TP / (TP + FP); + } + + double Utilities::accuracy(std::vector y_hat, std::vector y){ + auto [TP, FP, TN, FN] = TF_PN(y_hat, y); + return (TP + TN) / (TP + FP + FN + TN); + } + double Utilities::f1_score(std::vector y_hat, std::vector y){ + return 2 * precision(y_hat, y) * recall(y_hat, y) / (precision(y_hat, y) + recall(y_hat, y)); + } +} \ No newline at end of file diff --git a/MLPP/Utilities/Utilities.hpp b/MLPP/Utilities/Utilities.hpp new file mode 100644 index 0000000..8801e3e --- /dev/null +++ b/MLPP/Utilities/Utilities.hpp @@ -0,0 +1,54 @@ +// +// Utilities.hpp +// +// Created by Marc Melikyan on 1/16/21. +// + +#ifndef Utilities_hpp +#define Utilities_hpp + +#include +#include +#include + +namespace MLPP{ + class Utilities{ + public: + // Weight Init + static std::vector weightInitialization(int n, std::string type = "Default"); + static double biasInitialization(); + + static std::vector> weightInitialization(int n, int m, std::string type = "Default"); + static std::vector biasInitialization(int n); + + // Cost/Performance related Functions + double performance(std::vector y_hat, std::vector y); + double performance(std::vector> y_hat, std::vector> y); + + // Parameter Saving Functions + void saveParameters(std::string fileName, std::vector weights, double bias, bool app = 0, int layer = -1); + void saveParameters(std::string fileName, std::vector weights, std::vector initial, double bias, bool app = 0, int layer = -1); + void saveParameters(std::string fileName, std::vector> weights, std::vector bias, bool app = 0, int layer = -1); + + // Gradient Descent related + static void UI(std::vector weights, double bias); + static void UI(std::vector weights, std::vector initial, double bias); + static void UI(std::vector>, std::vector bias); + static void CostInfo(int epoch, double cost_prev, double Cost); + + static std::vector>> createMiniBatches(std::vector> inputSet, int n_mini_batch); + static std::tuple>>, std::vector>> createMiniBatches(std::vector> inputSet, std::vector outputSet, int n_mini_batch); + static std::tuple>>, std::vector>>> createMiniBatches(std::vector> inputSet, std::vector> outputSet, int n_mini_batch); + + // F1 score, Precision/Recall, TP, FP, TN, FN, etc. + std::tuple TF_PN(std::vector y_hat, std::vector y); //TF_PN = "True", "False", "Positive", "Negative" + double recall(std::vector y_hat, std::vector y); + double precision(std::vector y_hat, std::vector y); + double accuracy(std::vector y_hat, std::vector y); + double f1_score(std::vector y_hat, std::vector y); + + private: + }; +} + +#endif /* Utilities_hpp */ diff --git a/MLPP/WGAN/WGAN.cpp b/MLPP/WGAN/WGAN.cpp new file mode 100644 index 0000000..2cc56eb --- /dev/null +++ b/MLPP/WGAN/WGAN.cpp @@ -0,0 +1,300 @@ +// +// WGAN.cpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#include "WGAN.hpp" +#include "Activation/Activation.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Regularization/Reg.hpp" +#include "Utilities/Utilities.hpp" +#include "Cost/Cost.hpp" + +#include +#include + +namespace MLPP { + WGAN::WGAN(double k, std::vector> outputSet) + : outputSet(outputSet), n(outputSet.size()), k(k) + { + + } + + WGAN::~WGAN(){ + delete outputLayer; + } + + std::vector> WGAN::generateExample(int n){ + LinAlg alg; + return modelSetTestGenerator(alg.gaussianNoise(n, k)); + } + + void WGAN::gradientDescent(double learning_rate, int max_epoch, bool UI){ + class Cost cost; + LinAlg alg; + double cost_prev = 0; + int epoch = 1; + forwardPass(); + + const int CRITIC_INTERATIONS = 5; // Wasserstein GAN specific parameter. + + while(true){ + cost_prev = Cost(y_hat, alg.onevec(n)); + + + std::vector> generatorInputSet; + std::vector> discriminatorInputSet; + + std::vector y_hat; + std::vector outputSet; + + // Training of the discriminator. + for(int i = 0; i < CRITIC_INTERATIONS; i++){ + generatorInputSet = alg.gaussianNoise(n, k); + discriminatorInputSet = modelSetTestGenerator(generatorInputSet); + discriminatorInputSet.insert(discriminatorInputSet.end(), WGAN::outputSet.begin(), WGAN::outputSet.end()); // Fake + real inputs. + + y_hat = modelSetTestDiscriminator(discriminatorInputSet); + outputSet = alg.scalarMultiply(-1, alg.onevec(n)); // WGAN changes y_i = 1 and y_i = 0 to y_i = 1 and y_i = -1 + std::vector outputSetReal = alg.onevec(n); + outputSet.insert(outputSet.end(), outputSetReal.begin(), outputSetReal.end()); // Fake + real output scores. + + auto [cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad] = computeDiscriminatorGradients(y_hat, outputSet); + cumulativeDiscriminatorHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeDiscriminatorHiddenLayerWGrad); + outputDiscriminatorWGrad = alg.scalarMultiply(learning_rate/n, outputDiscriminatorWGrad); + updateDiscriminatorParameters(cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad, learning_rate); + } + + // Training of the generator. + generatorInputSet = alg.gaussianNoise(n, k); + discriminatorInputSet = modelSetTestGenerator(generatorInputSet); + y_hat = modelSetTestDiscriminator(discriminatorInputSet); + outputSet = alg.onevec(n); + + std::vector>> cumulativeGeneratorHiddenLayerWGrad = computeGeneratorGradients(y_hat, outputSet); + cumulativeGeneratorHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeGeneratorHiddenLayerWGrad); + updateGeneratorParameters(cumulativeGeneratorHiddenLayerWGrad, learning_rate); + + forwardPass(); + if(UI) { WGAN::UI(epoch, cost_prev, WGAN::y_hat, alg.onevec(n)); } + + epoch++; + if(epoch > max_epoch) { break; } + } + } + + double WGAN::score(){ + LinAlg alg; + Utilities util; + forwardPass(); + return util.performance(y_hat, alg.onevec(n)); + } + + void WGAN::save(std::string fileName){ + Utilities util; + if(!network.empty()){ + util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1); + for(int i = 1; i < network.size(); i++){ + util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); + } + util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1); + } + else{ + util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1); + } + } + + void WGAN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){ + LinAlg alg; + if(network.empty()){ + network.push_back(HiddenLayer(n_hidden, activation, alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha)); + network[0].forwardPass(); + } + else{ + network.push_back(HiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha)); + network[network.size() - 1].forwardPass(); + } + } + + void WGAN::addOutputLayer(std::string weightInit, std::string reg, double lambda, double alpha){ + LinAlg alg; + if(!network.empty()){ + outputLayer = new OutputLayer(network[network.size() - 1].n_hidden, "Linear", "WassersteinLoss", network[network.size() - 1].a, weightInit, "WeightClipping", -0.01, 0.01); + } + else{ // Should never happen. + outputLayer = new OutputLayer(k, "Linear", "WassersteinLoss", alg.gaussianNoise(n, k), weightInit, "WeightClipping", -0.01, 0.01); + } + } + + std::vector> WGAN::modelSetTestGenerator(std::vector> X){ + if(!network.empty()){ + network[0].input = X; + network[0].forwardPass(); + + for(int i = 1; i <= network.size()/2; i++){ + network[i].input = network[i - 1].a; + network[i].forwardPass(); + } + } + return network[network.size()/2].a; + } + + std::vector WGAN::modelSetTestDiscriminator(std::vector> X){ + if(!network.empty()){ + for(int i = network.size()/2 + 1; i < network.size(); i++){ + if(i == network.size()/2 + 1){ + network[i].input = X; + } + else { network[i].input = network[i - 1].a; } + network[i].forwardPass(); + } + outputLayer->input = network[network.size() - 1].a; + } + outputLayer->forwardPass(); + return outputLayer->a; + } + + double WGAN::Cost(std::vector y_hat, std::vector y){ + Reg regularization; + class Cost cost; + double totalRegTerm = 0; + + auto cost_function = outputLayer->cost_map[outputLayer->cost]; + if(!network.empty()){ + for(int i = 0; i < network.size() - 1; i++){ + totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg); + } + } + return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg); + } + + void WGAN::forwardPass(){ + LinAlg alg; + if(!network.empty()){ + network[0].input = alg.gaussianNoise(n, k); + network[0].forwardPass(); + + for(int i = 1; i < network.size(); i++){ + network[i].input = network[i - 1].a; + network[i].forwardPass(); + } + outputLayer->input = network[network.size() - 1].a; + } + else{ // Should never happen, though. + outputLayer->input = alg.gaussianNoise(n, k); + } + outputLayer->forwardPass(); + y_hat = outputLayer->a; + } + + void WGAN::updateDiscriminatorParameters(std::vector>> hiddenLayerUpdations, std::vector outputLayerUpdation, double learning_rate){ + LinAlg alg; + + outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation); + outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n; + + if(!network.empty()){ + network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]); + network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta)); + + for(int i = network.size() - 2; i > network.size()/2; i--){ + network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]); + network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta)); + } + } + } + + void WGAN::updateGeneratorParameters(std::vector>> hiddenLayerUpdations, double learning_rate){ + LinAlg alg; + + if(!network.empty()){ + + for(int i = network.size()/2; i >= 0; i--){ + //std::cout << network[i].weights.size() << "x" << network[i].weights[0].size() << std::endl; + //std::cout << hiddenLayerUpdations[(network.size() - 2) - i + 1].size() << "x" << hiddenLayerUpdations[(network.size() - 2) - i + 1][0].size() << std::endl; + network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]); + network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta)); + } + } + } + + std::tuple>>, std::vector> WGAN::computeDiscriminatorGradients(std::vector y_hat, std::vector outputSet){ + class Cost cost; + Activation avn; + LinAlg alg; + Reg regularization; + + std::vector>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. + + auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost]; + auto outputAvn = outputLayer->activation_map[outputLayer->activation]; + outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1)); + std::vector outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta); + outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg)); + + + if(!network.empty()){ + auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation]; + + network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta); + + cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well. + + //std::cout << "HIDDENLAYER FIRST:" << hiddenLayerWGrad.size() << "x" << hiddenLayerWGrad[0].size() << std::endl; + //std::cout << "WEIGHTS SECOND:" << network[network.size() - 1].weights.size() << "x" << network[network.size() - 1].weights[0].size() << std::endl; + + for(int i = network.size() - 2; i > network.size()/2; i--){ + auto hiddenLayerAvn = network[i].activation_map[network[i].activation]; + network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta); + + cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well. + + } + } + return {cumulativeHiddenLayerWGrad, outputWGrad}; + } + + std::vector>> WGAN::computeGeneratorGradients(std::vector y_hat, std::vector outputSet){ + class Cost cost; + Activation avn; + LinAlg alg; + Reg regularization; + + std::vector>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. + + auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost]; + auto outputAvn = outputLayer->activation_map[outputLayer->activation]; + outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1)); + std::vector outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta); + outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg)); + if(!network.empty()){ + auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation]; + network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta); + cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well. + + for(int i = network.size() - 2; i >= 0; i--){ + auto hiddenLayerAvn = network[i].activation_map[network[i].activation]; + network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1)); + std::vector> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta); + cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well. + } + } + return cumulativeHiddenLayerWGrad; + } + + void WGAN::UI(int epoch, double cost_prev, std::vector y_hat, std::vector outputSet){ + Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet)); + std::cout << "Layer " << network.size() + 1 << ": " << std::endl; + Utilities::UI(outputLayer->weights, outputLayer->bias); + if(!network.empty()){ + for(int i = network.size() - 1; i >= 0; i--){ + std::cout << "Layer " << i + 1 << ": " << std::endl; + Utilities::UI(network[i].weights, network[i].bias); + } + } + } +} \ No newline at end of file diff --git a/MLPP/WGAN/WGAN.hpp b/MLPP/WGAN/WGAN.hpp new file mode 100644 index 0000000..2e948f1 --- /dev/null +++ b/MLPP/WGAN/WGAN.hpp @@ -0,0 +1,56 @@ +// +// WGAN.hpp +// +// Created by Marc Melikyan on 11/4/20. +// + +#ifndef WGAN_hpp +#define WGAN_hpp + +#include "HiddenLayer/HiddenLayer.hpp" +#include "OutputLayer/OutputLayer.hpp" + +#include +#include +#include + +namespace MLPP{ + +class WGAN{ + public: + WGAN(double k, std::vector> outputSet); + ~WGAN(); + std::vector> generateExample(int n); + void gradientDescent(double learning_rate, int max_epoch, bool UI = 1); + double score(); + void save(std::string fileName); + + void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + void addOutputLayer(std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); + + private: + std::vector> modelSetTestGenerator(std::vector> X); // Evaluator for the generator of the WGAN. + std::vector modelSetTestDiscriminator(std::vector> X); // Evaluator for the discriminator of the WGAN. + + double Cost(std::vector y_hat, std::vector y); + + void forwardPass(); + void updateDiscriminatorParameters(std::vector>> hiddenLayerUpdations, std::vector outputLayerUpdation, double learning_rate); + void updateGeneratorParameters(std::vector>> hiddenLayerUpdations, double learning_rate); + std::tuple>>, std::vector> computeDiscriminatorGradients(std::vector y_hat, std::vector outputSet); + std::vector>> computeGeneratorGradients(std::vector y_hat, std::vector outputSet); + + void UI(int epoch, double cost_prev, std::vector y_hat, std::vector outputSet); + + std::vector> outputSet; + std::vector y_hat; + + std::vector network; + OutputLayer *outputLayer; + + int n; + int k; + }; +} + +#endif /* WGAN_hpp */ \ No newline at end of file diff --git a/MLPP/kNN/kNN.cpp b/MLPP/kNN/kNN.cpp new file mode 100644 index 0000000..feda69c --- /dev/null +++ b/MLPP/kNN/kNN.cpp @@ -0,0 +1,87 @@ +// +// kNN.cpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#include "kNN.hpp" +#include "LinAlg/LinAlg.hpp" +#include "Utilities/Utilities.hpp" + +#include +#include +#include + +namespace MLPP{ + kNN::kNN(std::vector> inputSet, std::vector outputSet, int k) + : inputSet(inputSet), outputSet(outputSet), k(k) + { + + } + + std::vector kNN::modelSetTest(std::vector> X){ + std::vector y_hat; + for(int i = 0; i < X.size(); i++){ + y_hat.push_back(modelTest(X[i])); + } + return y_hat; + } + + int kNN::modelTest(std::vector x){ + return determineClass(nearestNeighbors(x)); + } + + double kNN::score(){ + Utilities util; + return util.performance(modelSetTest(inputSet), outputSet); + } + + int kNN::determineClass(std::vector knn){ + std::map class_nums; + for(int i = 0; i < outputSet.size(); i++){ + class_nums[outputSet[i]] = 0; + } + for(int i = 0; i < knn.size(); i++){ + for(int j = 0; j < outputSet.size(); j++){ + if(knn[i] == outputSet[j]){ + class_nums[outputSet[j]]++; + } + } + } + int max = class_nums[outputSet[0]]; + int final_class = outputSet[0]; + + for(int i = 0; i < outputSet.size(); i++){ + if(class_nums[outputSet[i]] > max){ + max = class_nums[outputSet[i]]; + } + } + for(auto [c, v] : class_nums){ + if(v == max){ + final_class = c; + } + } + return final_class; + } + + std::vector kNN::nearestNeighbors(std::vector x){ + LinAlg alg; + // The nearest neighbors + std::vector knn; + + std::vector> inputUseSet = inputSet; + //Perfom this loop unless and until all k nearest neighbors are found, appended, and returned + for(int i = 0; i < k; i++){ + int neighbor = 0; + for(int j = 0; j < inputUseSet.size(); j++){ + bool isNeighborNearer = alg.euclideanDistance(x, inputUseSet[j]) < alg.euclideanDistance(x, inputUseSet[neighbor]); + if(isNeighborNearer){ + neighbor = j; + } + } + knn.push_back(neighbor); + inputUseSet.erase(inputUseSet.begin() + neighbor); // This is why we maintain an extra input"Use"Set + } + return knn; + } +} \ No newline at end of file diff --git a/MLPP/kNN/kNN.hpp b/MLPP/kNN/kNN.hpp new file mode 100644 index 0000000..740543e --- /dev/null +++ b/MLPP/kNN/kNN.hpp @@ -0,0 +1,35 @@ +// +// kNN.hpp +// +// Created by Marc Melikyan on 10/2/20. +// + +#ifndef kNN_hpp +#define kNN_hpp + +#include + +namespace MLPP{ + class kNN{ + + public: + kNN(std::vector> inputSet, std::vector outputSet, int k); + std::vector modelSetTest(std::vector> X); + int modelTest(std::vector x); + double score(); + + private: + + // Private Model Functions + std::vector nearestNeighbors(std::vector x); + int determineClass(std::vector knn); + + // Model Inputs and Parameters + std::vector> inputSet; + std::vector outputSet; + int k; + + }; +} + +#endif /* kNN_hpp */ diff --git a/README.md b/README.md new file mode 100644 index 0000000..a72ce89 --- /dev/null +++ b/README.md @@ -0,0 +1,244 @@ +# ML++ + +Machine learning is a vast and exiciting discipline, garnering attention from specialists of many fields. Unfortunately, for C++ programmers and enthusiasts, there appears to be a lack of support in the field of machine learning. To fill that void and give C++ a true foothold in the ML sphere, this library was written. The intent with this library is for it to act as a crossroad between low-level developers and machine learning engineers. + +

+ +

+ +## Installation +Begin by downloading the header files for the ML++ library. You can do this by cloning the repository and extracting the MLPP directory within it: +``` +git clone https://github.com/novak-99/MLPP +``` +Next, execute the "buildSO.sh" shell script: +``` +sudo ./buildSO.sh +``` +After doing so, maintain the ML++ source files in a local directory and include them in this fashion: +```cpp +#include "MLPP/Stat/Stat.hpp" // Including the ML++ statistics module. + +int main(){ +... +} +``` +Finally, after you have concluded creating a project, compile it using g++: +``` +g++ main.cpp /usr/local/lib/MLPP.so --std=c++17 +``` + +## Usage +Please note that ML++ uses the ```std::vector``` data type for emulating vectors, and the ```std::vector>``` data type for emulating matrices. + +Begin by including the respective header file of your choice. +```cpp +#include "MLPP/LinReg/LinReg.hpp" +``` +Next, instantiate an object of the class. Don't forget to pass the input set and output set as parameters. +```cpp +LinReg model(inputSet, outputSet); +``` +Afterwards, call the optimizer that you would like to use. For iterative optimizers such as gradient descent, include the learning rate, epoch number, and whether or not to utilize the UI panel. +```cpp +model.gradientDescent(0.001, 1000, 0); +``` +Great, you are now ready to test! To test a singular testing instance, utilize the following function: +```cpp +model.modelTest(testSetInstance); +``` +This will return the model's singular prediction for that example. + +To test an entire test set, use the following function: +```cpp +model.modelSetTest(testSet); +``` +The result will be the model's predictions for the entire dataset. + + +## Contents of the Library +1. ***Regression*** + 1. Linear Regression + 2. Logistic Regression + 3. Softmax Regression + 4. Exponential Regression + 5. Probit Regression + 6. CLogLog Regression + 7. Tanh Regression +2. ***Deep, Dynamically Sized Neural Networks*** + 1. Possible Activation Functions + - Linear + - Sigmoid + - Softmax + - Swish + - Mish + - SinC + - Softplus + - Softsign + - CLogLog + - Logit + - Gaussian CDF + - RELU + - GELU + - Sign + - Unit Step + - Sinh + - Cosh + - Tanh + - Csch + - Sech + - Coth + - Arsinh + - Arcosh + - Artanh + - Arcsch + - Arsech + - Arcoth + 2. Possible Optimization Algorithms + - Batch Gradient Descent + - Mini-Batch Gradient Descent + - Stochastic Gradient Descent + - Gradient Descent with Momentum + - Nesterov Accelerated Gradient + - Adagrad Optimizer + - Adadelta Optimizer + - Adam Optimizer + - Adamax Optimizer + - Nadam Optimizer + - AMSGrad Optimizer + - 2nd Order Newton-Raphson Optimizer* + - Normal Equation* +

+ *Only available for linear regression + 3. Possible Loss Functions + - MSE + - RMSE + - MAE + - MBE + - Log Loss + - Cross Entropy + - Hinge Loss + - Wasserstein Loss + 4. Possible Regularization Methods + - Lasso + - Ridge + - ElasticNet + - Weight Clipping + 5. Possible Weight Initialization Methods + - Uniform + - Xavier Normal + - Xavier Uniform + - He Normal + - He Uniform + - LeCun Normal + - LeCun Uniform + 6. Possible Learning Rate Schedulers + - Time Based + - Epoch Based + - Step Based + - Exponential +3. ***Prebuilt Neural Networks*** + 1. Multilayer Peceptron + 2. Autoencoder + 3. Softmax Network +4. ***Generative Modeling*** + 1. Tabular Generative Adversarial Networks + 2. Tabular Wasserstein Generative Adversarial Networks +5. ***Natural Language Processing*** + 1. Word2Vec (Continous Bag of Words, Skip-Gram) + 2. Stemming + 3. Bag of Words + 4. TFIDF + 5. Tokenization + 6. Auxiliary Text Processing Functions +6. ***Computer Vision*** + 1. The Convolution Operation + 2. Max, Min, Average Pooling + 3. Global Max, Min, Average Pooling + 4. Prebuilt Feature Detectors + - Horizontal/Vertical Prewitt Filter + - Horizontal/Vertical Sobel Filter + - Horizontal/Vertical Scharr Filter + - Horizontal/Vertical Roberts Filter + - Gaussian Filter + - Harris Corner Detector +7. ***Principal Component Analysis*** +8. ***Naive Bayes Classifiers*** + 1. Multinomial Naive Bayes + 2. Bernoulli Naive Bayes + 3. Gaussian Naive Bayes +9. ***Support Vector Classification*** + 1. Primal Formulation (Hinge Loss Objective) + 2. Dual Formulation (Via Lagrangian Multipliers) +10. ***K-Means*** +11. ***k-Nearest Neighbors*** +12. ***Outlier Finder (Using z-scores)*** +13. ***Matrix Decompositions*** + 1. SVD Decomposition + 2. Cholesky Decomposition + - Positive Definiteness Checker + 3. QR Decomposition +14. ***Numerical Analysis*** + 1. Numerical Diffrentiation + - Univariate Functions + - Multivariate Functions + 2. Jacobian Vector Calculator + 3. Hessian Matrix Calculator + 4. Function approximator + - Constant Approximation + - Linear Approximation + - Quadratic Approximation + - Cubic Approximation + 5. Diffrential Equations Solvers + - Euler's Method + - Growth Method +15. ***Mathematical Transforms*** + 1. Discrete Cosine Transform +16. ***Linear Algebra Module*** +17. ***Statistics Module*** +18. ***Data Processing Module*** + 1. Setting and Printing Datasets + 2. Available Datasets + 1. Wisconsin Breast Cancer Dataset + - Binary + - SVM + 2. MNIST Dataset + - Train + - Test + 3. Iris Flower Dataset + 4. Wine Dataset + 5. California Housing Dataset + 6. Fires and Crime Dataset (Chicago) + 3. Feature Scaling + 4. Mean Normalization + 5. One Hot Representation + 6. Reverse One Hot Representation + 7. Supported Color Space Conversions + - RGB to Grayscale + - RGB to HSV + - RGB to YCbCr + - RGB to XYZ + - XYZ to RGB +19. ***Utilities*** + 1. TP, FP, TN, FN function + 2. Precision + 3. Recall + 4. Accuracy + 5. F1 score + + +## What's in the Works? +ML++, like most frameworks, is dynamic, and constantly changing. This is especially important in the world of ML, as new algorithms and techniques are being developed day by day. Here are a couple of things currently being developed for ML++: +

+ - Convolutional Neural Networks +

+

+ - Kernels for SVMs +

+

+ - Support Vector Regression +

+ +## Citations +Various different materials helped me along the way of creating ML++, and I would like to give credit to several of them here. [This](https://www.tutorialspoint.com/cplusplus-program-to-compute-determinant-of-a-matrix) article by TutorialsPoint was a big help when trying to implement the determinant of a matrix, and [this](https://www.geeksforgeeks.org/adjoint-inverse-matrix/) article by GeeksForGeeks was very helpful when trying to take the adjoint and inverse of a matrix. diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..b9a0f74 --- /dev/null +++ b/main.cpp @@ -0,0 +1,722 @@ +// +// main.cpp +// TEST_APP +// +// Created by Marc on 1/20/21. +// + +// THINGS CURRENTLY TO DO: +// POLYMORPHIC IMPLEMENTATION OF REGRESSION CLASSES +// EXTEND SGD/MBGD SUPPORT FOR DYN. SIZED ANN +// ADD LEAKYRELU, ELU, SELU TO ANN +// FIX VECTOR/MATRIX/TENSOR RESIZE ROUTINE + +// HYPOTHESIS TESTING CLASS +// GAUSS MARKOV CHECKER CLASS + +#include +#include +#include +#include +#include "MLPP/UniLinReg/UniLinReg.hpp" +#include "MLPP/LinReg/LinReg.hpp" +#include "MLPP/LogReg/LogReg.hpp" +#include "MLPP/CLogLogReg/CLogLogReg.hpp" +#include "MLPP/ExpReg/ExpReg.hpp" +#include "MLPP/ProbitReg/ProbitReg.hpp" +#include "MLPP/SoftmaxReg/SoftmaxReg.hpp" +#include "MLPP/TanhReg/TanhReg.hpp" +#include "MLPP/MLP/MLP.hpp" +#include "MLPP/SoftmaxNet/SoftmaxNet.hpp" +#include "MLPP/AutoEncoder/AutoEncoder.hpp" +#include "MLPP/ANN/ANN.hpp" +#include "MLPP/MANN/MANN.hpp" +#include "MLPP/MultinomialNB/MultinomialNB.hpp" +#include "MLPP/BernoulliNB/BernoulliNB.hpp" +#include "MLPP/GaussianNB/GaussianNB.hpp" +#include "MLPP/KMeans/KMeans.hpp" +#include "MLPP/kNN/kNN.hpp" +#include "MLPP/PCA/PCA.hpp" +#include "MLPP/OutlierFinder/OutlierFinder.hpp" +#include "MLPP/Stat/Stat.hpp" +#include "MLPP/LinAlg/LinAlg.hpp" +#include "MLPP/Activation/Activation.hpp" +#include "MLPP/Cost/Cost.hpp" +#include "MLPP/Data/Data.hpp" +#include "MLPP/Convolutions/Convolutions.hpp" +#include "MLPP/SVC/SVC.hpp" +#include "MLPP/NumericalAnalysis/NumericalAnalysis.hpp" +#include "MLPP/DualSVC/DualSVC.hpp" +#include "MLPP/GAN/GAN.hpp" +#include "MLPP/WGAN/WGAN.hpp" +#include "MLPP/Transforms/Transforms.hpp" + +using namespace MLPP; + + +// double f(double x){ +// return x*x*x + 2*x - 2; +// } + +double f(double x){ + return sin(x); +} + +double f_prime(double x){ + return 2 * x; +} + +double f_prime_2var(std::vector x){ + return 2 * x[0] + x[1]; +} +/* + y = x^3 + 2x - 2 + y' = 3x^2 + 2 + y'' = 6x + y''(2) = 12 +*/ + +// double f_mv(std::vector x){ +// return x[0] * x[0] + x[0] * x[1] * x[1] + x[1] + 5; +// } + +/* + Where x, y = x[0], x[1], this function is defined as: + f(x, y) = x^2 + xy^2 + y + 5 + ∂f/∂x = 2x + 2y + ∂^2f/∂x∂y = 2 +*/ + +double f_mv(std::vector x){ + return x[0] * x[0] * x[0] + x[0] + x[1] * x[1] * x[1] * x[0] + x[2] * x[2] * x[1]; +} + +/* + Where x, y = x[0], x[1], this function is defined as: + f(x, y) = x^3 + x + xy^3 + yz^2 + + fy = 3xy^2 + 2yz + fyy = 6xy + 2z + fyyz = 2 + + ∂^2f/∂y^2 = 6xy + 2z + ∂^3f/∂y^3 = 6x + + ∂f/∂z = 2zy + ∂^2f/∂z^2 = 2y + ∂^3f/∂z^3 = 0 + + ∂f/∂x = 3x^2 + 1 + y^3 + ∂^2f/∂x^2 = 6x + ∂^3f/∂x^3 = 6 + + ∂f/∂z = 2zy + ∂^2f/∂z^2 = 2z + + ∂f/∂y = 3xy^2 + ∂^2f/∂y∂x = 3y^2 + +*/ + + +int main() { + + // // OBJECTS + Stat stat; + LinAlg alg; + Activation avn; + Cost cost; + Data data; + Convolutions conv; + + // DATA SETS + // std::vector> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}}; + // std::vector outputSet = {2,4,6,8,10,12,14,16,18,20}; + + // std::vector> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}}; + // std::vector outputSet = {0,0,0,0,1,1,1,1}; + + // std::vector> inputSet = {{4,3,0,-3,-4}, {0,0,0,1,1}}; + // std::vector outputSet = {1,1,0,-1,-1}; + + // std::vector> inputSet = {{0,1,2,3,4}}; + // std::vector outputSet = {1,2,4,8,16}; + + //std::vector> inputSet = {{32, 0, 7}, {2, 28, 17}, {0, 9, 23}}; + + // std::vector> inputSet = {{1,1,0,0,1}, {0,0,1,1,1}, {0,1,1,0,1}}; + // std::vector outputSet = {0,1,0,1,1}; + + // std::vector> inputSet = {{0,0,1,1}, {0,1,0,1}}; + // std::vector outputSet = {0,1,1,0}; + + // // STATISTICS + // std::vector x = {1,2,3,4,5,6,7,8,9,10}; + // std::vector y = {10,9,8,7,6,5,4,3,2,1}; + // std::vector w = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1}; + + // std::cout << "Arithmetic Mean: " << stat.mean(x) << std::endl; + // std::cout << "Median: " << stat.median(x) << std::endl; + // alg.printVector(x); + // alg.printVector(stat.mode(x)); + // std::cout << "Range: " << stat.range(x) << std::endl; + // std::cout << "Midrange: " << stat.midrange(x) << std::endl; + // std::cout << "Absolute Average Deviation: " << stat.absAvgDeviation(x) << std::endl; + // std::cout << "Standard Deviation: " << stat.standardDeviation(x) << std::endl; + // std::cout << "Variance: " << stat.variance(x) << std::endl; + // std::cout << "Covariance: " << stat.covariance(x, y) << std::endl; + // std::cout << "Correlation: " << stat.correlation(x, y) << std::endl; + // std::cout << "R^2: " << stat.R2(x, y) << std::endl; + // // Returns 1 - (1/k^2) + // std::cout << "Chebyshev Inequality: " << stat.chebyshevIneq(2) << std::endl; + // std::cout << "Weighted Mean: " << stat.weightedMean(x, w) << std::endl; + // std::cout << "Geometric Mean: " << stat.geometricMean(x) << std::endl; + // std::cout << "Harmonic Mean: " << stat.harmonicMean(x) << std::endl; + // std::cout << "Root Mean Square (Quadratic mean): " << stat.RMS(x) << std::endl; + // std::cout << "Power Mean (p = 5): " << stat.powerMean(x, 5) << std::endl; + // std::cout << "Lehmer Mean (p = 5): " << stat.lehmerMean(x, 5) << std::endl; + // std::cout << "Weighted Lehmer Mean (p = 5): " << stat.weightedLehmerMean(x, w, 5) << std::endl; + // std::cout << "Contraharmonic Mean: " << stat.contraHarmonicMean(x) << std::endl; + // std::cout << "Hernonian Mean: " << stat.heronianMean(1, 10) << std::endl; + // std::cout << "Heinz Mean (x = 1): " << stat.heinzMean(1, 10, 1) << std::endl; + // std::cout << "Neuman-Sandor Mean: " << stat.neumanSandorMean(1, 10) << std::endl; + // std::cout << "Stolarsky Mean (p = 5): " << stat.stolarskyMean(1, 10, 5) << std::endl; + // std::cout << "Identric Mean: " << stat.identricMean(1, 10) << std::endl; + // std::cout << "Logarithmic Mean: " << stat.logMean(1, 10) << std::endl; + // std::cout << "Absolute Average Deviation: " << stat.absAvgDeviation(x) << std::endl; + + // LINEAR ALGEBRA + // std::vector> square = {{1, 1}, {-1, 1}, {1, -1}, {-1, -1}}; + + // alg.printMatrix(alg.rotate(square, M_PI/4)); + + // std::vector> A = { + // {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + // {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + // }; + // std::vector a = {4, 3, 1, 3}; + // std::vector b = {3, 5, 6, 1}; + + // alg.printMatrix(alg.matmult(alg.transpose(A), A)); + // std::cout << std::endl; + // std::cout << alg.dot(a, b) << std::endl; + // std::cout << std::endl; + // alg.printMatrix(alg.hadamard_product(A, A)); + // std::cout << std::endl; + // alg.printMatrix(alg.identity(10)); + + // UNIVARIATE LINEAR REGRESSION + // Univariate, simple linear regression, case where k = 1 + // auto [inputSet, outputSet] = data.loadFiresAndCrime(); + // UniLinReg model(inputSet, outputSet); + // alg.printVector(model.modelSetTest(inputSet)); + + // // MULIVARIATE LINEAR REGRESSION + // auto [inputSet, outputSet] = data.loadCaliforniaHousing(); + + // LinReg model(inputSet, outputSet); // Can use Lasso, Ridge, ElasticNet Reg + + //model.gradientDescent(0.001, 30, 0); + //model.SGD(0.00000001, 300000, 1); + //model.MBGD(0.001, 10000, 2, 1); + //model.normalEquation(); + + // LinReg adamModel(alg.transpose(inputSet), outputSet); + // alg.printVector(model.modelSetTest(inputSet)); + // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + + + // const int TRIAL_NUM = 1000; + + // double scoreSGD = 0; + // double scoreADAM = 0; + // for(int i = 0; i < TRIAL_NUM; i++){ + // LinReg model(alg.transpose(inputSet), outputSet); + // model.MBGD(0.001, 5, 1, 0); + // scoreSGD += model.score(); + + // LinReg adamModel(alg.transpose(inputSet), outputSet); + // adamModel.Adam(0.1, 5, 1, 0.9, 0.999, 1e-8, 0); // Change batch size = sgd, bgd + // scoreADAM += adamModel.score(); + // } + + // std::cout << "ACCURACY, AVG, SGD: " << 100 * scoreSGD/TRIAL_NUM << "%" << std::endl; + + // std::cout << std::endl; + + // std::cout << "ACCURACY, AVG, ADAM: " << 100 * scoreADAM/TRIAL_NUM << "%" << std::endl; + + + // std::cout << "Total epoch num: 300" << std::endl; + // std::cout << "Method: 1st Order w/ Jacobians" << std::endl; + + // LinReg model(alg.transpose(inputSet), outputSet); // Can use Lasso, Ridge, ElasticNet Reg + + // model.gradientDescent(0.001, 300, 0); + + + // std::cout << "--------------------------------------------" << std::endl; + // std::cout << "Total epoch num: 300" << std::endl; + // std::cout << "Method: Newtonian 2nd Order w/ Hessians" << std::endl; + // LinReg model2(alg.transpose(inputSet), outputSet); + + // model2.NewtonRaphson(1.5, 300, 0); + + + // // LOGISTIC REGRESSION + // auto [inputSet, outputSet] = data.load rastCancer(); + // LogReg model(inputSet, outputSet); + // model.SGD(0.001, 100000, 0); + // alg.printVector(model.modelSetTest(inputSet)); + // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + + // // PROBIT REGRESSION + // std::vector> inputSet; + // std::vector outputSet; + // data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancer.csv", inputSet, outputSet); + // ProbitReg model(inputSet, outputSet); + // model.SGD(0.001, 10000, 1); + // alg.printVector(model.modelSetTest(inputSet)); + // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + + // // CLOGLOG REGRESSION + // std::vector> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}}; + // std::vector outputSet = {0,0,0,0,1,1,1,1}; + // CLogLogReg model(alg.transpose(inputSet), outputSet); + // model.SGD(0.1, 10000, 0); + // alg.printVector(model.modelSetTest(alg.transpose(inputSet))); + // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + + // // EXPREG REGRESSION + // std::vector> inputSet = {{0,1,2,3,4}}; + // std::vector outputSet = {1,2,4,8,16}; + // ExpReg model(alg.transpose(inputSet), outputSet); + // model.SGD(0.001, 10000, 0); + // alg.printVector(model.modelSetTest(alg.transpose(inputSet))); + // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + + // // TANH REGRESSION + // std::vector> inputSet = {{4,3,0,-3,-4}, {0,0,0,1,1}}; + // std::vector outputSet = {1,1,0,-1,-1}; + // TanhReg model(alg.transpose(inputSet), outputSet); + // model.SGD(0.1, 10000, 0); + // alg.printVector(model.modelSetTest(alg.transpose(inputSet))); + // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + + // // SOFTMAX REGRESSION + // auto [inputSet, outputSet] = data.loadIris(); + // SoftmaxReg model(inputSet, outputSet); + // model.SGD(0.1, 10000, 1); + // alg.printMatrix(model.modelSetTest(inputSet)); + // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + + // // SUPPORT VECTOR CLASSIFICATION + // auto [inputSet, outputSet] = data.loadBreastCancerSVC(); + // SVC model(inputSet, outputSet, 1); + // model.SGD(0.00001, 100000, 1); + // alg.printVector(model.modelSetTest(inputSet)); + // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + + // SoftmaxReg model(inputSet, outputSet); + // model.SGD(0.001, 20000, 0); + // alg.printMatrix(model.modelSetTest(inputSet)); + + // // MLP + // std::vector> inputSet = {{0,0,1,1}, {0,1,0,1}}; + // inputSet = alg.transpose(inputSet); + // std::vector outputSet = {0,1,1,0}; + + // MLP model(inputSet, outputSet, 2); + // model.gradientDescent(0.1, 10000, 0); + // alg.printVector(model.modelSetTest(inputSet)); + // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + + // // SOFTMAX NETWORK + // auto [inputSet, outputSet] = data.loadWine(); + // SoftmaxNet model(inputSet, outputSet, 1); + // model.gradientDescent(0.01, 100000, 1); + // alg.printMatrix(model.modelSetTest(inputSet)); + // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + + // // AUTOENCODER + // std::vector> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}}; + // AutoEncoder model(alg.transpose(inputSet), 5); + // model.SGD(0.001, 300000, 0); + // alg.printMatrix(model.modelSetTest(alg.transpose(inputSet))); + // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + + // DYNAMICALLY SIZED ANN + // Possible Weight Init Methods: Default, Uniform, HeNormal, HeUniform, XavierNormal, XavierUniform + // Possible Activations: Linear, Sigmoid, Swish, Softplus, Softsign, CLogLog, Ar{Sinh, Cosh, Tanh, Csch, Sech, Coth}, GaussianCDF, GELU, UnitStep + // Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss + // std::vector> inputSet = {{0,0,1,1}, {0,1,0,1}}; + // std::vector outputSet = {0,1,1,0}; + // ANN ann(alg.transpose(inputSet), outputSet); + // ann.addLayer(2, "Cosh"); + // ann.addOutputLayer("Sigmoid", "LogLoss"); + + + // ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, 1); + // ann.Adadelta(1, 1000, 2, 0.9, 0.000001, 1); + // ann.Momentum(0.1, 8000, 2, 0.9, true, 1); + + //ann.setLearningRateScheduler("Step", 0.5, 1000); + // ann.gradientDescent(0.01, 30000); + // alg.printVector(ann.modelSetTest(alg.transpose(inputSet))); + // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; + + std::vector> outputSet = {{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}, + {2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40}}; + + WGAN gan(2, alg.transpose(outputSet)); // our gan is a wasserstein gan (wgan) + gan.addLayer(5, "Sigmoid"); + gan.addLayer(2, "RELU"); + gan.addLayer(5, "Sigmoid"); + gan.addOutputLayer(); // User can specify weight init- if necessary. + gan.gradientDescent(0.1, 55000, 0); + std::cout << "GENERATED INPUT: (Gaussian-sampled noise):" << std::endl; + alg.printMatrix(gan.generateExample(100)); + + + // typedef std::vector> Matrix; + // typedef std::vector Vector; + + // Matrix inputSet = {{0,0}, {0,1}, {1,0}, {1,1}}; // XOR + // Vector outputSet = {0,1,1,0}; + + // ANN ann(inputSet, outputSet); + // ann.addLayer(5, "Sigmoid"); + // ann.addLayer(8, "Sigmoid"); // Add more layers as needed. + // ann.addOutputLayer("Sigmoid", "LogLoss"); + // ann.gradientDescent(1, 20000, 1); + + // Vector predictions = ann.modelSetTest(inputSet); + // alg.printVector(predictions); // Testing out the model's preds for train set. + // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; // Accuracy. + + // // DYNAMICALLY SIZED MANN (Multidimensional Output ANN) + // std::vector> inputSet = {{1,2,3},{2,4,6},{3,6,9},{4,8,12}}; + // std::vector> outputSet = {{1,5}, {2,10}, {3,15}, {4,20}}; + + // MANN mann(inputSet, outputSet); + // mann.addOutputLayer("Linear", "MSE"); + // mann.gradientDescent(0.001, 80000, 0); + // alg.printMatrix(mann.modelSetTest(inputSet)); + // std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl; + + // std::vector> inputSet; + // std::vector tempOutputSet; + // data.setData(4, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet); + // std::vector> outputSet = data.oneHotRep(tempOutputSet, 3); + + // TRAIN TEST SPLIT CHECK + // std::vector> inputSet1 = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}}; + // std::vector> outputSet1 = {{2,4,6,8,10,12,14,16,18,20}}; + // auto [inputSet, outputSet, inputTestSet, outputTestSet] = data.trainTestSplit(alg.transpose(inputSet1), alg.transpose(outputSet1), 0.2); + // alg.printMatrix(inputSet); + // alg.printMatrix(outputSet); + // alg.printMatrix(inputTestSet); + // alg.printMatrix(outputTestSet); + + + // alg.printMatrix(inputSet); + // alg.printMatrix(outputSet); + + // MANN mann(inputSet, outputSet); + // mann.addLayer(100, "RELU", "XavierNormal"); + // mann.addOutputLayer("Softmax", "CrossEntropy", "XavierNormal"); + // mann.gradientDescent(0.1, 80000, 1); + // alg.printMatrix(mann.modelSetTest(inputSet)); + // std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl; + + // // NAIVE BAYES + // std::vector> inputSet = {{1,1,1,1,1}, {0,0,1,1,1}, {0,0,1,0,1}}; + // std::vector outputSet = {0,1,0,1,1}; + + // MultinomialNB MNB(alg.transpose(inputSet), outputSet, 2); + // alg.printVector(MNB.modelSetTest(alg.transpose(inputSet))); + + // BernoulliNB BNB(alg.transpose(inputSet), outputSet); + // alg.printVector(BNB.modelSetTest(alg.transpose(inputSet))); + + // GaussianNB GNB(alg.transpose(inputSet), outputSet, 2); + // alg.printVector(GNB.modelSetTest(alg.transpose(inputSet))); + + // // KMeans + // std::vector> inputSet = {{32, 0, 7}, {2, 28, 17}, {0, 9, 23}}; + // KMeans kmeans(inputSet, 3, "KMeans++"); + // kmeans.train(3, 1); + // std::cout << std::endl; + // alg.printMatrix(kmeans.modelSetTest(inputSet)); // Returns the assigned centroids to each of the respective training examples + // std::cout << std::endl; + // alg.printVector(kmeans.silhouette_scores()); + + // // kNN + // std::vector> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}}; + // std::vector outputSet = {0,0,0,0,1,1,1,1}; + // kNN knn(alg.transpose(inputSet), outputSet, 8); + // alg.printVector(knn.modelSetTest(alg.transpose(inputSet))); + // std::cout << "ACCURACY: " << 100 * knn.score() << "%" << std::endl; + + + // // CONVOLUTION, POOLING, ETC.. + // std::vector> input = { + // {1}, + // }; + + // std::vector>> tensorSet; + // tensorSet.push_back(input); + // tensorSet.push_back(input); + // tensorSet.push_back(input); + + // alg.printTensor(data.rgb2xyz(tensorSet)); + + // std::vector> input = { + // {62,55,55,54,49,48,47,55}, + // {62,57,54,52,48,47,48,53}, + // {61,60,52,49,48,47,49,54}, + // {63,61,60,60,63,65,68,65}, + // {67,67,70,74,79,85,91,92}, + // {82,95,101,106,114,115,112,117}, + // {96,111,115,119,128,128,130,127}, + // {109,121,127,133,139,141,140,133}, + // }; + + // Transforms trans; + + // alg.printMatrix(trans.discreteCosineTransform(input)); + + // alg.printMatrix(conv.convolve(input, conv.getPrewittVertical(), 1)); // Can use padding + // alg.printMatrix(conv.pool(input, 4, 4, "Max")); // Can use Max, Min, or Average pooling. + + // std::vector>> tensorSet; + // tensorSet.push_back(input); + // tensorSet.push_back(input); + // alg.printVector(conv.globalPool(tensorSet, "Average")); // Can use Max, Min, or Average global pooling. + + // std::vector> laplacian = {{1, 1, 1}, {1, -4, 1}, {1, 1, 1}}; + // alg.printMatrix(conv.convolve(conv.gaussianFilter2D(5, 1), laplacian, 1)); + + + // // PCA, SVD, eigenvalues & eigenvectors + // std::vector> inputSet = {{1,1}, {1,1}}; + // auto [Eigenvectors, Eigenvalues] = alg.eig(inputSet); + // std::cout << "Eigenvectors:" << std::endl; + // alg.printMatrix(Eigenvectors); + // std::cout << std::endl; + // std::cout << "Eigenvalues:" << std::endl; + // alg.printMatrix(Eigenvalues); + + // auto [U, S, Vt] = alg.SVD(inputSet); + + // // PCA done using Jacobi's method to approximate eigenvalues and eigenvectors. + // PCA dr(inputSet, 1); // 1 dimensional representation. + // std::cout << std::endl; + // std::cout << "Dimensionally reduced representation:" << std::endl; + // alg.printMatrix(dr.principalComponents()); + // std::cout << "SCORE: " << dr.score() << std::endl; + + + // // NLP/DATA + // std::string verbText = "I am appearing and thinking, as well as conducting."; + // std::cout << "Stemming Example:" << std::endl; + // std::cout << data.stemming(verbText) << std::endl; + // std::cout << std::endl; + + // std::vector sentences = {"He is a good boy", "She is a good girl", "The boy and girl are good"}; + // std::cout << "Bag of Words Example:" << std::endl; + // alg.printMatrix(data.BOW(sentences, "Default")); + // std::cout << std::endl; + // std::cout << "TFIDF Example:" << std::endl; + // alg.printMatrix(data.TFIDF(sentences)); + // std::cout << std::endl; + + // std::cout << "Tokenization:" << std::endl; + // alg.printVector(data.tokenize(verbText)); + // std::cout << std::endl; + + // std::cout << "Word2Vec:" << std::endl; + // std::string textArchive = {"He is a good boy. She is a good girl. The boy and girl are good."}; + // std::vector corpus = data.splitSentences(textArchive); + // auto [wordEmbeddings, wordList] = data.word2Vec(corpus, "CBOW", 2, 2, 0.1, 10000); // Can use either CBOW or Skip-n-gram. + // alg.printMatrix(wordEmbeddings); + // std::cout << std::endl; + + // std::vector textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"}; + + // alg.printMatrix(data.LSA(textArchive, 2)); + // //alg.printMatrix(data.BOW(textArchive, "Default")); + // std::cout << std::endl; + + + // std::vector> inputSet = {{1,2},{2,3},{3,4},{4,5},{5,6}}; + // std::cout << "Feature Scaling Example:" << std::endl; + // alg.printMatrix(data.featureScaling(inputSet)); + // std::cout << std::endl; + + // std::cout << "Mean Centering Example:" << std::endl; + // alg.printMatrix(data.meanCentering(inputSet)); + // std::cout << std::endl; + + // std::cout << "Mean Normalization Example:" << std::endl; + // alg.printMatrix(data.meanNormalization(inputSet)); + // std::cout << std::endl; + + // // Outlier Finder + // std::vector inputSet = {1,2,3,4,5,6,7,8,9,23554332523523}; + // OutlierFinder outlierFinder(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier. + // alg.printVector(outlierFinder.modelTest(inputSet)); + + // // Testing new Functions + // double z_s = 0.001; + // std::cout << avn.logit(z_s) << std::endl; + // std::cout << avn.logit(z_s, 1) << std::endl; + + // std::vector z_v = {0.001}; + // alg.printVector(avn.logit(z_v)); + // alg.printVector(avn.logit(z_v, 1)); + + // std::vector> Z_m = {{0.001}}; + // alg.printMatrix(avn.logit(Z_m)); + // alg.printMatrix(avn.logit(Z_m, 1)); + + // std::cout << alg.trace({{1,2}, {3,4}}) << std::endl; + // alg.printMatrix(alg.pinverse({{1,2}, {3,4}})); + // alg.printMatrix(alg.diag({1,2,3,4,5})); + // alg.printMatrix(alg.kronecker_product({{1,2,3,4,5}}, {{6,7,8,9,10}})); + // alg.printMatrix(alg.matrixPower({{5,5},{5,5}}, 2)); + // alg.printVector(alg.solve({{1,1}, {1.5, 4.0}}, {2200, 5050})); + + // std::vector> matrixOfCubes = {{1,2,64,27}}; + // std::vector vectorOfCubes = {1,2,64,27}; + // alg.printMatrix(alg.cbrt(matrixOfCubes)); + // alg.printVector(alg.cbrt(vectorOfCubes)); + // std::cout << alg.max({{1,2,3,4,5}, {6,5,3,4,1}, {9,9,9,9,9}}) << std::endl; + // std::cout << alg.min({{1,2,3,4,5}, {6,5,3,4,1}, {9,9,9,9,9}}) << std::endl; + + // std::vector chicken; + // data.getImage("../../Data/apple.jpeg", chicken); + // alg.printVector(chicken); + + // std::vector> P = {{12, -51, 4}, {6, 167, -68}, {-4, 24, -41}}; + // alg.printMatrix(P); + + // alg.printMatrix(alg.gramSchmidtProcess(P)); + + // auto [Q, R] = alg.QRD(P); // It works! + + // alg.printMatrix(Q); + + // alg.printMatrix(R); + + // // Checking positive-definiteness checker. For Cholesky Decomp. + // std::vector> A = + // { + // {1,-1,-1,-1}, + // {-1,2,2,2}, + // {-1,2,3,1}, + // {-1,2,1,4} + // }; + + // std::cout << std::boolalpha << alg.positiveDefiniteChecker(A) << std::endl; + // auto [L, Lt] = alg.chol(A); // works. + // alg.printMatrix(L); + // alg.printMatrix(Lt); + + // Checks for numerical analysis class. + NumericalAnalysis numAn; + + //std::cout << numAn.quadraticApproximation(f, 0, 1) << std::endl; + + // std::cout << numAn.cubicApproximation(f, 0, 1.001) << std::endl; + + // std::cout << f(1.001) << std::endl; + + // std::cout << numAn.quadraticApproximation(f_mv, {0, 0, 0}, {1, 1, 1}) << std::endl; + + // std::cout << numAn.numDiff(&f, 1) << std::endl; + // std::cout << numAn.newtonRaphsonMethod(&f, 1, 1000) << std::endl; + //std::cout << numAn.invQuadraticInterpolation(&f, {100, 2,1.5}, 10) << std::endl; + + // std::cout << numAn.numDiff(&f_mv, {1, 1}, 1) << std::endl; // Derivative w.r.t. x. + + // alg.printVector(numAn.jacobian(&f_mv, {1, 1})); + + //std::cout << numAn.numDiff_2(&f, 2) << std::endl; + + //std::cout << numAn.numDiff_3(&f, 2) << std::endl; + + // std::cout << numAn.numDiff_2(&f_mv, {2, 2, 500}, 2, 2) << std::endl; + //std::cout << numAn.numDiff_3(&f_mv, {2, 1000, 130}, 0, 0, 0) << std::endl; + + // alg.printTensor(numAn.thirdOrderTensor(&f_mv, {1, 1, 1})); + // std::cout << "Our Hessian." << std::endl; + // alg.printMatrix(numAn.hessian(&f_mv, {2, 2, 500})); + + // std::cout << numAn.laplacian(f_mv, {1,1,1}) << std::endl; + + // std::vector>> tensor; + // tensor.push_back({{1,2}, {1,2}, {1,2}}); + // tensor.push_back({{1,2}, {1,2}, {1,2}}); + + // alg.printTensor(tensor); + + // alg.printMatrix(alg.tensor_vec_mult(tensor, {1,2})); + + // std::cout << numAn.cubicApproximation(f_mv, {0, 0, 0}, {1, 1, 1}) << std::endl; + + // std::cout << numAn.eulerianMethod(f_prime, {1, 1}, 1.5, 0.000001) << std::endl; + + // std::cout << numAn.eulerianMethod(f_prime_2var, {2, 3}, 2.5, 0.00000001) << std::endl; + + // alg.printMatrix(conv.dx(A)); + // alg.printMatrix(conv.dy(A)); + + // alg.printMatrix(conv.gradOrientation(A)); + + // std::vector> A = + // { + // {1,0,0,0}, + // {0,0,0,0}, + // {0,0,0,0}, + // {0,0,0,1} + // }; + + // std::vector> h = conv.harrisCornerDetection(A); + + // for(int i = 0; i < h.size(); i++){ + // for(int j = 0; j < h[i].size(); j++){ + // std::cout << h[i][j] << " "; + // } + // std::cout << std::endl; + // } // Harris detector works. Life is good! + + // std::vector a = {3,4,4}; + // std::vector b = {4,4,4}; + // alg.printVector(alg.cross(a,b)); + + //SUPPORT VECTOR CLASSIFICATION (kernel method) + // std::vector> inputSet; + // std::vector outputSet; + // data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancerSVM.csv", inputSet, outputSet); + + // std::vector> inputSet; + // std::vector outputSet; + // data.setData(4, "/Users/marcmelikyan/Desktop/Data/IrisSVM.csv", inputSet, outputSet); + + // DualSVC kernelSVM(inputSet, outputSet, 1000); + // kernelSVM.gradientDescent(0.0001, 20, 1); + + // std::vector> linearlyIndependentMat = + + // { + // {1,2,3,4}, + // {234538495,4444,6111,55} + // }; + + // std::cout << "True of false: linearly independent?: " << std::boolalpha << alg.linearIndependenceChecker(linearlyIndependentMat) << std::endl; + + + return 0; +} +