Added https://github.com/novak-99/MLPP as a base, without the included datasets.

2025-02-01 17:07:02 +01:00 · 2023-01-23 21:13:26 +01:00 · 2023-01-23 21:13:26 +01:00 · 51765e87ad
commit 51765e87ad
82 changed files with 13735 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+a.out
+.DS_Store
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Marc Melikyan
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/MLPP/ANN/ANN.cpp
+++ b/MLPP/ANN/ANN.cpp
@ -0,0 +1,742 @@
+//
+//  ANN.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "ANN.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <cmath>
+#include <random>
+
+namespace MLPP {
+    ANN::ANN(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), lrScheduler("None"), decayConstant(0), dropRate(0)
+    {
+
+    }
+
+    ANN::~ANN(){
+        delete outputLayer;
+    }
+
+    std::vector<double> ANN::modelSetTest(std::vector<std::vector<double>> X){
+        if(!network.empty()){
+            network[0].input = X;
+            network[0].forwardPass();
+
+            for(int i = 1; i < network.size(); i++){
+                network[i].input = network[i - 1].a;
+                network[i].forwardPass();
+            }
+            outputLayer->input = network[network.size() - 1].a;
+        }
+        else{
+            outputLayer->input = X;
+        }
+        outputLayer->forwardPass();
+        return outputLayer->a;
+    }
+
+    double ANN::modelTest(std::vector<double> x){
+        if(!network.empty()){
+            network[0].Test(x);
+            for(int i = 1; i < network.size(); i++){
+                network[i].Test(network[i - 1].a_test);
+            }
+            outputLayer->Test(network[network.size() - 1].a_test);
+        }
+        else{
+            outputLayer->Test(x);
+        }
+        return outputLayer->a_test;
+    }
+
+    void ANN::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        double initial_learning_rate = learning_rate;
+
+        alg.printMatrix(network[network.size() - 1].weights);
+        while(true){
+            learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
+            cost_prev = Cost(y_hat, outputSet);
+
+            auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputSet);
+
+            cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeHiddenLayerWGrad);
+            outputWGrad = alg.scalarMultiply(learning_rate/n, outputWGrad);
+            updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
+
+            std::cout << learning_rate << std::endl;
+
+            forwardPass();
+
+            if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputSet); }
+
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void ANN::SGD(double learning_rate, int max_epoch, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        double initial_learning_rate = learning_rate;
+
+        while(true){
+            learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
+
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            std::vector<double> y_hat = modelSetTest({inputSet[outputIndex]});
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+
+            auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat,  {outputSet[outputIndex]});
+            cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeHiddenLayerWGrad);
+            outputWGrad = alg.scalarMultiply(learning_rate/n, outputWGrad);
+
+            updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
+            y_hat = modelSetTest({inputSet[outputIndex]});
+
+            if(UI) { ANN::UI(epoch, cost_prev, y_hat, {outputSet[outputIndex]}); }
+            
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void ANN::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        double initial_learning_rate = learning_rate;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        // always evaluate the result 
+        // always do forward pass only ONCE at end.
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+        while(true){
+            learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
+                cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeHiddenLayerWGrad);
+                outputWGrad = alg.scalarMultiply(learning_rate/n, outputWGrad);
+
+                updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
+                y_hat = modelSetTest(inputMiniBatches[i]);
+
+                if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void ANN::Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool NAG, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        double initial_learning_rate = learning_rate;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        // always evaluate the result 
+        // always do forward pass only ONCE at end.
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+        
+        // Initializing necessary components for Adam. 
+        std::vector<std::vector<std::vector<double>>> v_hidden;
+        
+        std::vector<double> v_output;
+        while(true){
+            learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+                
+                auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
+
+                if(!network.empty() && v_hidden.empty()){ // Initing our tensor
+                    v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
+                }
+
+                if(v_output.empty()){
+                    v_output.resize(outputWGrad.size());
+                }
+
+                if(NAG){ // "Aposterori" calculation
+                    updateParameters(v_hidden, v_output, 0); // DON'T update bias.
+                }
+
+                v_hidden = alg.addition(alg.scalarMultiply(gamma, v_hidden), alg.scalarMultiply(learning_rate/n, cumulativeHiddenLayerWGrad));
+
+                v_output = alg.addition(alg.scalarMultiply(gamma, v_output), alg.scalarMultiply(learning_rate/n, outputWGrad));
+
+                updateParameters(v_hidden, v_output, learning_rate); // subject to change. may want bias to have this matrix too.
+                y_hat = modelSetTest(inputMiniBatches[i]);
+
+                if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void ANN::Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        double initial_learning_rate = learning_rate;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        // always evaluate the result 
+        // always do forward pass only ONCE at end.
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+        
+        // Initializing necessary components for Adam. 
+        std::vector<std::vector<std::vector<double>>> v_hidden;
+        
+        std::vector<double> v_output;
+        while(true){
+            learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
+
+                if(!network.empty() && v_hidden.empty()){ // Initing our tensor
+                    v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
+                }
+
+                if(v_output.empty()){
+                    v_output.resize(outputWGrad.size());
+                }
+
+                v_hidden = alg.addition(v_hidden, alg.exponentiate(cumulativeHiddenLayerWGrad, 2));
+
+                v_output = alg.addition(v_output, alg.exponentiate(outputWGrad, 2));
+
+                std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
+                std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
+
+                updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
+                y_hat = modelSetTest(inputMiniBatches[i]);
+
+                if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void ANN::Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        double initial_learning_rate = learning_rate;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        // always evaluate the result 
+        // always do forward pass only ONCE at end.
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+        
+        // Initializing necessary components for Adam. 
+        std::vector<std::vector<std::vector<double>>> v_hidden;
+        
+        std::vector<double> v_output;
+        while(true){
+            learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
+
+                if(!network.empty() && v_hidden.empty()){ // Initing our tensor
+                    v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
+                }
+
+                if(v_output.empty()){
+                    v_output.resize(outputWGrad.size());
+                }
+
+                v_hidden = alg.addition(alg.scalarMultiply(1 - b1, v_hidden), alg.scalarMultiply(b1, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
+
+                v_output = alg.addition(v_output, alg.exponentiate(outputWGrad, 2));
+
+                std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
+                std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
+
+                updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
+                y_hat = modelSetTest(inputMiniBatches[i]);
+
+                if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+void ANN::Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        double initial_learning_rate = learning_rate;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        // always evaluate the result 
+        // always do forward pass only ONCE at end.
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+        
+        // Initializing necessary components for Adam. 
+        std::vector<std::vector<std::vector<double>>> m_hidden;
+        std::vector<std::vector<std::vector<double>>> v_hidden;
+
+        std::vector<double> m_output;
+        std::vector<double> v_output;
+        while(true){
+            learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
+                if(!network.empty() && m_hidden.empty() && v_hidden.empty()){ // Initing our tensor
+                    m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
+                    v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
+                }
+
+                if(m_output.empty() && v_output.empty()){
+                    m_output.resize(outputWGrad.size());
+                    v_output.resize(outputWGrad.size());
+                }
+
+                m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
+                v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
+
+                m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
+                v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2)));
+
+                std::vector<std::vector<std::vector<double>>> m_hidden_hat = alg.scalarMultiply(1/(1 - std::pow(b1, epoch)), m_hidden);
+                std::vector<std::vector<std::vector<double>>> v_hidden_hat = alg.scalarMultiply(1/(1 - std::pow(b2, epoch)), v_hidden);
+
+                std::vector<double> m_output_hat = alg.scalarMultiply(1/(1 - std::pow(b1, epoch)), m_output);
+                std::vector<double> v_output_hat = alg.scalarMultiply(1/(1 - std::pow(b2, epoch)), v_output);
+
+                std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
+                std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
+
+
+                updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
+                y_hat = modelSetTest(inputMiniBatches[i]);
+
+                if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void ANN::Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        double initial_learning_rate = learning_rate;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        // always evaluate the result 
+        // always do forward pass only ONCE at end.
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+        
+        // Initializing necessary components for Adam. 
+        std::vector<std::vector<std::vector<double>>> m_hidden;
+        std::vector<std::vector<std::vector<double>>> u_hidden;
+
+        std::vector<double> m_output;
+        std::vector<double> u_output;
+        while(true){
+            learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
+                if(!network.empty() && m_hidden.empty() && u_hidden.empty()){ // Initing our tensor
+                    m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
+                    u_hidden = alg.resize(u_hidden, cumulativeHiddenLayerWGrad);
+                }
+
+                if(m_output.empty() && u_output.empty()){
+                    m_output.resize(outputWGrad.size());
+                    u_output.resize(outputWGrad.size());
+                }
+
+                m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
+                u_hidden = alg.max(alg.scalarMultiply(b2, u_hidden), alg.abs(cumulativeHiddenLayerWGrad));
+
+                m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
+                u_output = alg.max(alg.scalarMultiply(b2, u_output), alg.abs(outputWGrad));
+
+                std::vector<std::vector<std::vector<double>>> m_hidden_hat = alg.scalarMultiply(1/(1 - std::pow(b1, epoch)), m_hidden);
+
+                std::vector<double> m_output_hat = alg.scalarMultiply(1/(1 - std::pow(b1, epoch)), m_output);
+
+                std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, u_hidden)));
+                std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, u_output)));
+
+
+                updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
+                y_hat = modelSetTest(inputMiniBatches[i]);
+
+                if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+    
+    void ANN::Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        double initial_learning_rate = learning_rate;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        // always evaluate the result 
+        // always do forward pass only ONCE at end.
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+        
+        // Initializing necessary components for Adam. 
+        std::vector<std::vector<std::vector<double>>> m_hidden;
+        std::vector<std::vector<std::vector<double>>> v_hidden;
+        std::vector<std::vector<std::vector<double>>> m_hidden_final;
+
+        std::vector<double> m_output;
+        std::vector<double> v_output;
+        while(true){
+            learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
+                if(!network.empty() && m_hidden.empty() && v_hidden.empty()){ // Initing our tensor
+                    m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
+                    v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
+                }
+
+                if(m_output.empty() && v_output.empty()){
+                    m_output.resize(outputWGrad.size());
+                    v_output.resize(outputWGrad.size());
+                }
+
+                m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
+                v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
+                
+
+                m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
+                v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2)));
+
+                std::vector<std::vector<std::vector<double>>> m_hidden_hat = alg.scalarMultiply(1/(1 - std::pow(b1, epoch)), m_hidden);
+                std::vector<std::vector<std::vector<double>>> v_hidden_hat = alg.scalarMultiply(1/(1 - std::pow(b2, epoch)), v_hidden);
+                std::vector<std::vector<std::vector<double>>> m_hidden_final = alg.addition(alg.scalarMultiply(b1, m_hidden_hat), alg.scalarMultiply((1 - b1)/(1 - std::pow(b1, epoch)), cumulativeHiddenLayerWGrad));
+
+                std::vector<double> m_output_hat = alg.scalarMultiply(1/(1 - std::pow(b1, epoch)), m_output);
+                std::vector<double> v_output_hat = alg.scalarMultiply(1/(1 - std::pow(b2, epoch)), v_output);
+                std::vector<double> m_output_final = alg.addition(alg.scalarMultiply(b1, m_output_hat), alg.scalarMultiply((1 - b1)/(1 - std::pow(b1, epoch)), outputWGrad));
+
+                std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden_final, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
+                std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output_final, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
+
+
+                updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
+                y_hat = modelSetTest(inputMiniBatches[i]);
+
+                if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void ANN::AMSGrad(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        double initial_learning_rate = learning_rate;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        // always evaluate the result 
+        // always do forward pass only ONCE at end.
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+        
+        // Initializing necessary components for Adam. 
+        std::vector<std::vector<std::vector<double>>> m_hidden;
+        std::vector<std::vector<std::vector<double>>> v_hidden;
+
+        std::vector<std::vector<std::vector<double>>> v_hidden_hat;
+
+        std::vector<double> m_output;
+        std::vector<double> v_output;
+
+        std::vector<double> v_output_hat;
+        while(true){
+            learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = modelSetTest(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                auto [cumulativeHiddenLayerWGrad, outputWGrad] = computeGradients(y_hat, outputMiniBatches[i]);
+                if(!network.empty() && m_hidden.empty() && v_hidden.empty()){ // Initing our tensor
+                    m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
+                    v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
+                    v_hidden_hat = alg.resize(v_hidden_hat, cumulativeHiddenLayerWGrad);
+                }
+
+                if(m_output.empty() && v_output.empty()){
+                    m_output.resize(outputWGrad.size());
+                    v_output.resize(outputWGrad.size());
+                    v_output_hat.resize(outputWGrad.size());
+                }
+
+                m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
+                v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
+
+                m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
+                v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2)));
+
+                v_hidden_hat = alg.max(v_hidden_hat, v_hidden);
+
+                v_output_hat = alg.max(v_output_hat, v_output);
+
+                std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_hidden, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
+                std::vector<double> outputLayerUpdation = alg.scalarMultiply(learning_rate/n, alg.elementWiseDivision(m_output, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
+
+
+                updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
+                y_hat = modelSetTest(inputMiniBatches[i]);
+
+                if(UI) { ANN::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]); }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    double ANN::score(){
+        Utilities util;
+        forwardPass();
+        return util.performance(y_hat, outputSet);
+    }
+
+    void ANN::save(std::string fileName){
+        Utilities util;
+        if(!network.empty()){
+            util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1);
+            for(int i = 1; i < network.size(); i++){
+                util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); 
+            }
+            util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1);
+        }
+        else{
+            util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1);
+        }
+     }
+
+     void ANN::setLearningRateScheduler(std::string type, double decayConstant){
+        lrScheduler = type;
+        ANN::decayConstant = decayConstant;
+     }
+
+     void ANN::setLearningRateScheduler(std::string type, double decayConstant, double dropRate){
+         lrScheduler = type; 
+         ANN::decayConstant = decayConstant;
+         ANN::dropRate = dropRate;
+     }
+
+    // https://en.wikipedia.org/wiki/Learning_rate
+    // Learning Rate Decay (C2W2L09) - Andrew Ng - Deep Learning Specialization
+     double ANN::applyLearningRateScheduler(double learningRate, double decayConstant, double epoch, double dropRate){
+         if(lrScheduler == "Time"){
+             return learningRate / (1 + decayConstant * epoch);
+         }
+         else if(lrScheduler == "Epoch"){
+             return learningRate * (decayConstant / std::sqrt(epoch));
+         }
+         else if(lrScheduler == "Step"){
+            return learningRate * std::pow(decayConstant, int((1 + epoch)/dropRate)); // Utilizing an explicit int conversion implicitly takes the floor.
+         }
+        else if(lrScheduler == "Exponential"){
+             return learningRate * std::exp(-decayConstant * epoch);
+         }
+         return learningRate;
+     }
+
+    void ANN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){
+        if(network.empty()){
+            network.push_back(HiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha));
+            network[0].forwardPass();
+        }
+        else{
+            network.push_back(HiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
+            network[network.size() - 1].forwardPass();
+        }
+    }
+    
+    void ANN::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, double lambda, double alpha){
+        LinAlg alg;
+        if(!network.empty()){
+            outputLayer = new OutputLayer(network[network.size() - 1].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha);
+        }
+        else{
+            outputLayer = new OutputLayer(k, activation, loss, inputSet, weightInit, reg, lambda, alpha);
+        }
+    }
+
+    double ANN::Cost(std::vector<double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost;
+        double totalRegTerm = 0;
+
+        auto cost_function = outputLayer->cost_map[outputLayer->cost];
+        if(!network.empty()){
+            for(int i = 0; i < network.size() - 1; i++){
+                totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
+            }
+        }
+        return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
+    }
+
+    void ANN::forwardPass(){
+        if(!network.empty()){
+            network[0].input = inputSet;
+            network[0].forwardPass();
+
+            for(int i = 1; i < network.size(); i++){
+                network[i].input = network[i - 1].a;
+                network[i].forwardPass();
+            }
+            outputLayer->input = network[network.size() - 1].a;
+        }
+        else{
+            outputLayer->input = inputSet;
+        }
+        outputLayer->forwardPass();
+        y_hat = outputLayer->a;
+    }
+
+    void ANN::updateParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, std::vector<double> outputLayerUpdation, double learning_rate){
+        LinAlg alg;
+
+        outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
+        outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n;
+
+        if(!network.empty()){
+                
+            network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]);
+            network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta));
+
+            for(int i = network.size() - 2; i >= 0; i--){
+                network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
+                network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta));
+            }
+        }
+    }
+    
+    std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<double>> ANN::computeGradients(std::vector<double> y_hat, std::vector<double> outputSet){
+       // std::cout << "BEGIN" << std::endl;
+        class Cost cost; 
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+
+        std::vector<std::vector<std::vector<double>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. 
+
+        auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
+        auto outputAvn = outputLayer->activation_map[outputLayer->activation];
+        outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
+        std::vector<double> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
+        outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
+
+        if(!network.empty()){
+            auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
+            network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
+            std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
+
+            cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well. 
+
+            for(int i = network.size() - 2; i >= 0; i--){
+                auto hiddenLayerAvn = network[i].activation_map[network[i].activation];
+                network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
+                std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
+                cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
+
+            }
+        }
+        return {cumulativeHiddenLayerWGrad, outputWGrad};
+    }
+
+    void ANN::UI(int epoch, double cost_prev, std::vector<double> y_hat, std::vector<double> outputSet){
+        Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+        std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
+        Utilities::UI(outputLayer->weights, outputLayer->bias); 
+        if(!network.empty()){ 
+            for(int i = network.size() - 1; i >= 0; i--){
+                std::cout << "Layer " << i + 1 << ": " << std::endl;
+                Utilities::UI(network[i].weights, network[i].bias); 
+            }
+        }
+    }
+}
--- a/MLPP/ANN/ANN.hpp
+++ b/MLPP/ANN/ANN.hpp
@ -0,0 +1,72 @@
+//
+//  ANN.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef ANN_hpp
+#define ANN_hpp
+
+#include "HiddenLayer/HiddenLayer.hpp"
+#include "OutputLayer/OutputLayer.hpp"
+
+#include <vector>
+#include <tuple>
+#include <string>
+
+namespace  MLPP{
+
+class ANN{
+        public:
+        ANN(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet);
+        ~ANN();
+        std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+        double modelTest(std::vector<double> x);
+        void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+        void SGD(double learning_rate, int max_epoch, bool UI = 1);
+        void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+        void Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool NAG, bool UI = 1);
+        void Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI = 1);
+        void Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI = 1);
+        void Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
+        void Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
+        void Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
+        void AMSGrad(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
+        double score(); 
+        void save(std::string fileName); 
+
+        void setLearningRateScheduler(std::string type, double decayConstant);
+        void setLearningRateScheduler(std::string type, double decayConstant, double dropRate);
+
+        void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        
+        private:
+            double applyLearningRateScheduler(double learningRate, double decayConstant, double epoch, double dropRate);
+
+            double Cost(std::vector<double> y_hat, std::vector<double> y);
+
+            void forwardPass();
+            void updateParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, std::vector<double> outputLayerUpdation, double learning_rate);
+            std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<double>> computeGradients(std::vector<double> y_hat, std::vector<double> outputSet);
+
+            void UI(int epoch, double cost_prev, std::vector<double> y_hat, std::vector<double> outputSet);
+
+
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+
+            std::vector<HiddenLayer> network;
+            OutputLayer *outputLayer;
+
+            int n;
+            int k;
+
+            std::string lrScheduler;
+            double decayConstant;
+            double dropRate;
+    };
+}
+
+#endif /* ANN_hpp */
--- a/MLPP/Activation/Activation.cpp
+++ b/MLPP/Activation/Activation.cpp
@ -0,0 +1,884 @@
+//
+//  Activation.cpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#include <iostream>
+#include "LinAlg/LinAlg.hpp"
+#include "Activation.hpp"
+#include <cmath>
+#include <algorithm>
+
+namespace MLPP{
+
+    double Activation::linear(double z, bool deriv){
+        if(deriv){ return 1; }
+        return z; 
+    }
+
+    std::vector<double> Activation::linear(std::vector<double> z, bool deriv){
+        if(deriv) { 
+            LinAlg alg; 
+            return alg.onevec(z.size());
+         }
+         return z; 
+
+    }
+
+    std::vector<std::vector<double>> Activation::linear(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            LinAlg alg;
+            return alg.onemat(z.size(), z[0].size());
+        }
+        return z; 
+    }
+
+    double Activation::sigmoid(double z, bool deriv){
+        if(deriv) { return sigmoid(z) * (1 - sigmoid(z)); }
+        return 1 / (1 + exp(-z));
+    }
+
+    std::vector<double> Activation::sigmoid(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv) { return alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), sigmoid(z))); }
+        return alg.elementWiseDivision(alg.onevec(z.size()), alg.addition(alg.onevec(z.size()), alg.exp(alg.scalarMultiply(-1, z))));
+    }
+
+    std::vector<std::vector<double>> Activation::sigmoid(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv) { return alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), sigmoid(z))); }
+        return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.addition(alg.onemat(z.size(), z[0].size()), alg.exp(alg.scalarMultiply(-1, z))));
+    }
+
+    std::vector<double> Activation::softmax(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        std::vector<double> a;
+        a.resize(z.size());
+        std::vector<double> expZ = alg.exp(z);
+        double sum = 0;
+        
+        for(int i = 0; i < z.size(); i++){
+            sum += expZ[i];
+        }
+        for(int i = 0; i < z.size(); i++){
+            a[i] = expZ[i] / sum;
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::softmax(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+
+        for(int i = 0; i < z.size(); i++){
+            a[i] = softmax(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<double> Activation::adjSoftmax(std::vector<double> z){
+        LinAlg alg;
+        std::vector<double> a;
+        double C = -*std::max_element(z.begin(), z.end());
+        z = alg.scalarAdd(C, z);
+
+        return softmax(z);
+    }
+    
+    std::vector<std::vector<double>> Activation::adjSoftmax(std::vector<std::vector<double>> z){
+        LinAlg alg;
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+
+        for(int i = 0; i < z.size(); i++){
+            a[i] = adjSoftmax(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::softmaxDeriv(std::vector<double> z){
+        LinAlg alg;
+        std::vector<std::vector<double>> deriv;
+        std::vector<double> a = softmax(z);
+        deriv.resize(a.size());
+        for(int i = 0; i < deriv.size(); i++){
+            deriv[i].resize(a.size());
+        }
+        for(int i = 0; i < a.size(); i++){
+            for(int j = 0; j < z.size(); j++){
+                if(i == j){
+                    deriv[i][j] = a[i] * (1 - a[i]);
+                }
+                else{
+                    deriv[i][j] = -a[i] * a[j];
+                }
+            }
+        }
+        return deriv;
+    }
+
+    std::vector<std::vector<std::vector<double>>> Activation::softmaxDeriv(std::vector<std::vector<double>> z){
+        LinAlg alg;
+        std::vector<std::vector<std::vector<double>>> deriv;
+        std::vector<std::vector<double>> a = softmax(z);
+        
+        deriv.resize(a.size());
+        for(int i = 0; i < deriv.size(); i++){
+            deriv[i].resize(a.size());
+        }
+        for(int i = 0; i < a.size(); i++){
+            for(int j = 0; j < z.size(); j++){
+                if(i == j){
+                    deriv[i][j] = alg.subtraction(a[i], alg.hadamard_product(a[i], a[i]));
+                }
+                else{
+                    deriv[i][j] = alg.scalarMultiply(-1, alg.hadamard_product(a[i], a[j])); 
+                }
+            }
+        }
+        return deriv;
+    }
+
+    double Activation::softplus(double z, bool deriv){
+        if(deriv){ return sigmoid(z); }
+        return std::log(1 + exp(z)); 
+    }
+            
+    std::vector<double> Activation::softplus(std::vector<double> z, bool deriv){
+        if(deriv) { return sigmoid(z); }
+        LinAlg alg;
+        return alg.log(alg.addition(alg.onevec(z.size()), alg.exp(z)));
+    }
+    
+    std::vector<std::vector<double>> Activation::softplus(std::vector<std::vector<double>>  z, bool deriv){
+        if(deriv) { return sigmoid(z); }
+        LinAlg alg;
+        return alg.log(alg.addition(alg.onemat(z.size(), z[0].size()), alg.exp(z)));
+    }
+
+    double Activation::softsign(double z, bool deriv){
+        if(deriv){ return 1/((1 + abs(z)) * (1 + abs(z))); }
+        return z/(1 + abs(z));
+    }
+            
+    std::vector<double> Activation::softsign(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv) { return alg.elementWiseDivision(alg.onevec(z.size()), alg.exponentiate(alg.addition(alg.onevec(z.size()), alg.abs(z)), 2)); }
+        return alg.elementWiseDivision(z, alg.addition(alg.onevec(z.size()), alg.abs(z)));
+    }
+    
+    std::vector<std::vector<double>> Activation::softsign(std::vector<std::vector<double>>  z, bool deriv){
+        LinAlg alg;
+        if(deriv) { return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.exponentiate(alg.addition(alg.onemat(z.size(), z[0].size()), alg.abs(z)), 2)); }
+        return alg.elementWiseDivision(z, alg.addition(alg.onemat(z.size(), z[0].size()), alg.abs(z)));
+    }
+
+    double Activation::gaussianCDF(double z, bool deriv){
+        if(deriv) {
+            return (1 / sqrt(2 * M_PI)) * exp(-z * z / 2);
+        }
+        return 0.5 * (1 + erf(z / sqrt(2)));
+    }
+
+    std::vector<double> Activation::gaussianCDF(std::vector<double> z, bool deriv){
+        LinAlg alg; 
+        if(deriv) {
+            return alg.scalarMultiply(1 / sqrt(2 * M_PI), alg.exp(alg.scalarMultiply(-1/2, alg.hadamard_product(z, z))));
+        }
+        return alg.scalarMultiply(0.5, alg.addition(alg.onevec(z.size()), alg.erf(alg.scalarMultiply(1/sqrt(2), z))));
+    }
+
+    std::vector<std::vector<double>> Activation::gaussianCDF(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg; 
+        if(deriv) {
+            return alg.scalarMultiply(1 / sqrt(2 * M_PI), alg.exp(alg.scalarMultiply(-1/2, alg.hadamard_product(z, z))));
+        }
+        return alg.scalarMultiply(0.5, alg.addition(alg.onemat(z.size(), z[0].size()), alg.erf(alg.scalarMultiply(1/sqrt(2), z))));
+    }
+
+    double Activation::cloglog(double z, bool deriv){
+        if(deriv) { return exp(z-exp(z)); }
+        return 1 - exp(-exp(z));
+    }
+
+    std::vector<double> Activation::cloglog(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv) {
+            return alg.exp(alg.scalarMultiply(-1, alg.exp(z)));
+        }
+        return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.exp(alg.scalarMultiply(-1, alg.exp(z)))));
+    }
+
+    std::vector<std::vector<double>> Activation::cloglog(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv) {
+            return alg.exp(alg.scalarMultiply(-1, alg.exp(z)));
+        }
+        return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.exp(alg.scalarMultiply(-1, alg.exp(z)))));
+    }
+
+    double Activation::logit(double z, bool deriv){
+        if(deriv) { return 1/z - 1/(z-1); }
+        return std::log(z / (1 - z));
+    }
+
+    std::vector<double> Activation::logit(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv) {
+            return alg.subtraction(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.elementWiseDivision(alg.onevec(z.size()), alg.subtraction(z, alg.onevec(z.size()))));
+        }
+        return alg.log(alg.elementWiseDivision(z, alg.subtraction(alg.onevec(z.size()), z)));
+    }
+
+    std::vector<std::vector<double>> Activation::logit(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv) {
+            return alg.subtraction(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.subtraction(z, alg.onemat(z.size(), z[0].size()))));
+        }
+        return alg.log(alg.elementWiseDivision(z, alg.subtraction(alg.onemat(z.size(), z[0].size()), z)));
+    }
+
+    double Activation::unitStep(double z, bool deriv){
+        if(deriv) { 
+            return 0;
+        }
+        return z < 0 ? 0 : 1;
+    }
+
+    std::vector<double> Activation::unitStep(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = unitStep(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = unitStep(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::unitStep(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = unitStep(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = unitStep(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::swish(double z, bool deriv){
+        if(deriv){
+            return swish(z) + sigmoid(z) * (1 - swish(z));
+        }
+        return z * sigmoid(z);
+    }
+
+    std::vector<double> Activation::swish(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            alg.addition(swish(z), alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), swish(z))));
+        }
+        return alg.hadamard_product(z, sigmoid(z));
+    }
+
+    std::vector<std::vector<double>> Activation::swish(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            alg.addition(swish(z), alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), swish(z))));
+        }
+        return alg.hadamard_product(z, sigmoid(z));
+    }
+
+    double Activation::mish(double z, bool deriv){
+        if(deriv){
+            return sech(softplus(z)) * sech(softplus(z)) * z * sigmoid(z) + mish(z)/z;
+        }
+        return z * tanh(softplus(z));
+    }
+
+    std::vector<double> Activation::mish(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            return alg.addition(alg.hadamard_product(alg.hadamard_product(alg.hadamard_product(sech(softplus(z)), sech(softplus(z))), z), sigmoid(z)), alg.elementWiseDivision(mish(z), z));
+        }
+        return alg.hadamard_product(z, tanh(softplus(z)));
+    }
+
+    std::vector<std::vector<double>> Activation::mish(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            return alg.addition(alg.hadamard_product(alg.hadamard_product(alg.hadamard_product(sech(softplus(z)), sech(softplus(z))), z), sigmoid(z)), alg.elementWiseDivision(mish(z), z));
+        }
+        return alg.hadamard_product(z, tanh(softplus(z)));
+    }
+
+    double Activation::sinc(double z, bool deriv){
+        if(deriv){
+            return (z * std::cos(z)  - std::sin(z)) / (z * z);
+        }
+        return std::sin(z)/z;
+    }
+
+    std::vector<double> Activation::sinc(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            return alg.elementWiseDivision(alg.subtraction(alg.hadamard_product(z, alg.cos(z)), alg.sin(z)), alg.hadamard_product(z, z));
+        }
+        return alg.elementWiseDivision(alg.sin(z), z);
+    }
+
+    std::vector<std::vector<double>> Activation::sinc(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            return alg.elementWiseDivision(alg.subtraction(alg.hadamard_product(z, alg.cos(z)), alg.sin(z)), alg.hadamard_product(z, z));
+        }
+        return alg.elementWiseDivision(alg.sin(z), z);
+    }
+
+
+    double Activation::RELU(double z, bool deriv){
+        if (deriv){
+            if(z <= 0){
+                return 0;
+            }
+            else {
+                return 1;
+            }
+        }
+        return fmax(0, z);
+    }
+
+    std::vector<double> Activation::RELU(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = RELU(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = RELU(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::RELU(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = RELU(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = RELU(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::leakyReLU(double z, double c, bool deriv){
+        if (deriv){
+            if(z <= 0){
+                return c;
+            }
+            else {
+                return 1;
+            }
+        }
+        return fmax(c * z, z);
+    }
+
+    std::vector<double> Activation::leakyReLU(std::vector<double> z, double c, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = leakyReLU(z[i], c, 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = leakyReLU(z[i], c);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::leakyReLU(std::vector<std::vector<double>> z, double c, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = leakyReLU(z[i], c, 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = leakyReLU(z[i], c);
+        }
+        return a;
+    }
+
+    double Activation::ELU(double z, double c, bool deriv){
+        if (deriv){
+            if(z <= 0){
+                return c * exp(z);
+            }
+            else {
+                return 1;
+            }
+        }
+        if(z >= 0){
+            return z;
+        }
+        else{
+            return c * (exp(z) - 1);
+        }
+    }
+
+    std::vector<double> Activation::ELU(std::vector<double> z, double c, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = ELU(z[i], c, 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = ELU(z[i], c);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::ELU(std::vector<std::vector<double>> z, double c, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = ELU(z[i], c, 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = ELU(z[i], c);
+        }
+        return a;
+    }
+
+    double Activation::SELU(double z, double lambda, double c, bool deriv){
+        if (deriv){
+            return ELU(z, c, 1);
+        }
+        return lambda * ELU(z, c);
+    }
+
+    std::vector<double> Activation::SELU(std::vector<double> z, double lambda, double c, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = SELU(z[i], lambda, c, 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = SELU(z[i], lambda, c);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::SELU(std::vector<std::vector<double>> z, double lambda, double c, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = SELU(z[i], lambda, c, 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = SELU(z[i], lambda, c);
+        }
+        return a;
+    }
+
+    double Activation::GELU(double z, bool deriv){
+        if (deriv){
+            return 0.5 * tanh(0.0356774 * std::pow(z, 3) + 0.797885 * z) + (0.0535161 * std::pow(z, 3) + 0.398942 * z) * std::pow(sech(0.0356774 * std::pow(z, 3) + 0.797885 * z), 2) + 0.5;
+        }
+        return 0.5 * z * (1 + tanh(sqrt(2/M_PI) * (z + 0.044715 * std::pow(z, 3))));
+    }
+
+    std::vector<double> Activation::GELU(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = GELU(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = GELU(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::GELU(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = GELU(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = GELU(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::sign(double z, bool deriv){
+        if(deriv){
+            return 0;
+        }
+        if(z < 0){
+            return -1;
+        }
+        else if(z == 0){
+            return 0;
+        }
+        else{
+            return 1;
+        }
+    }
+
+    std::vector<double> Activation::sign(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = sign(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = sign(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::sign(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = sign(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = sign(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::sinh(double z, bool deriv){
+        if(deriv){ return cosh(z); }
+        return 0.5 * (exp(z) - exp(-z));
+    }
+
+    std::vector<double> Activation::sinh(std::vector<double> z, bool deriv){
+        if(deriv){ return cosh(z); }
+        LinAlg alg;
+        return alg.scalarMultiply(0.5, alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
+    }
+
+    std::vector<std::vector<double>> Activation::sinh(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){ return cosh(z); }
+        LinAlg alg;
+        return alg.scalarMultiply(0.5, alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
+    }
+
+    double Activation::cosh(double z, bool deriv){
+        if(deriv){ return sinh(z); }
+        return 0.5 * (exp(z) + exp(-z));
+    }
+
+    std::vector<double> Activation::cosh(std::vector<double> z, bool deriv){
+        if(deriv){ return sinh(z); }
+        LinAlg alg;
+        return alg.scalarMultiply(0.5, alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
+    }
+
+    std::vector<std::vector<double>> Activation::cosh(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){ return sinh(z); }
+        LinAlg alg;
+        return alg.scalarMultiply(0.5, alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
+    }
+
+    double Activation::tanh(double z, bool deriv){
+        if(deriv){ return 1 - tanh(z) * tanh(z); }
+        return (exp(z) - exp(-z)) / (exp(z) + exp(-z));
+    }
+
+    std::vector<double> Activation::tanh(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ 
+            return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.hadamard_product(tanh(z), tanh(z)))); 
+        }
+        return alg.elementWiseDivision(alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))), alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
+    }
+
+    std::vector<std::vector<double>> Activation::tanh(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ 
+            return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.hadamard_product(tanh(z), tanh(z)))); 
+        }
+
+        return alg.elementWiseDivision(alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))), alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
+    }
+
+    double Activation::csch(double z, bool deriv){
+        if(deriv){ return -csch(z) * coth(z); }
+        return 1 / sinh(z);
+    }
+
+    std::vector<double> Activation::csch(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), coth(z)); }
+        return alg.elementWiseDivision(alg.onevec(z.size()), sinh(z));
+    }
+
+    std::vector<std::vector<double>> Activation::csch(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), coth(z)); }
+        return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), sinh(z));
+    }
+
+    double Activation::sech(double z, bool deriv){
+        if(deriv){ return -sech(z) * tanh(z); }
+        return 1 / cosh(z);
+    }
+
+    std::vector<double> Activation::sech(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.hadamard_product(alg.scalarMultiply(-1, sech(z)), tanh(z)); }
+        return alg.elementWiseDivision(alg.onevec(z.size()), cosh(z));
+
+        // return activation(z, deriv, static_cast<void (*)(double, bool)>(&sech));
+    }
+
+    std::vector<std::vector<double>> Activation::sech(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.hadamard_product(alg.scalarMultiply(-1, sech(z)), tanh(z)); }
+        return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), cosh(z));
+
+        // return activation(z, deriv, static_cast<void (*)(double, bool)>(&sech));
+    }
+
+
+    double Activation::coth(double z, bool deriv){
+        if(deriv){ return -csch(z) * csch(z); }
+        return 1 / tanh(z);
+    }
+
+    std::vector<double> Activation::coth(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), csch(z)); }
+        return alg.elementWiseDivision(alg.onevec(z.size()), tanh(z));
+    }
+
+    std::vector<std::vector<double>> Activation::coth(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), csch(z)); }
+        return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), tanh(z));
+    }
+
+    double Activation::arsinh(double z, bool deriv){
+        if(deriv){ return 1 / sqrt(z * z + 1); }
+        return std::log(z + sqrt(z * z + 1));
+    }
+
+    std::vector<double> Activation::arsinh(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.elementWiseDivision(alg.onevec(z.size()), alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onevec(z.size())))); }
+        return alg.log(alg.addition(z, alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onevec(z.size())))));
+    }
+
+    std::vector<std::vector<double>> Activation::arsinh(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size())))); }
+        return alg.log(alg.addition(z, alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size())))));
+    }
+
+    double Activation::arcosh(double z, bool deriv){
+        if(deriv){
+            return 1/sqrt(z * z - 1);
+        }
+        return std::log(z + sqrt(z * z - 1));
+    }
+
+    std::vector<double> Activation::arcosh(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.elementWiseDivision(alg.onevec(z.size()), alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onevec(z.size())))); }
+        return alg.log(alg.addition(z, alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onevec(z.size())))));
+    }
+
+    std::vector<std::vector<double>> Activation::arcosh(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size())))); }
+        return alg.log(alg.addition(z, alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size())))));
+    }
+
+    double Activation::artanh(double z, bool deriv){
+        if(deriv){
+            return 1/(1 - z * z);
+        }
+        return 0.5 * std::log((1 + z)/(1 - z));
+    }
+
+    std::vector<double> Activation::artanh(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.elementWiseDivision(alg.onevec(z.size()),  alg.subtraction(alg.onevec(z.size()), alg.hadamard_product(z, z))); }
+        return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onevec(z.size()), z), alg.subtraction(alg.onevec(z.size()), z))));
+    }
+
+    std::vector<std::vector<double>> Activation::artanh(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()),  alg.subtraction(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z))); }
+        return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onemat(z.size(), z[0].size()), z), alg.subtraction(alg.onemat(z.size(), z[0].size()), z))));
+    }
+
+    double Activation::arcsch(double z, bool deriv){
+        if(deriv){
+            return -1/((z * z) * sqrt(1 + (1/(z * z))));
+        }
+        return std::log(sqrt(1 + (1 / (z * z))) + (1/z));
+    }
+
+    std::vector<double> Activation::arcsch(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.elementWiseDivision(alg.full(z.size(), -1),  alg.hadamard_product(alg.hadamard_product(z, z), alg.sqrt(alg.addition(alg.onevec(z.size()), alg.elementWiseDivision(alg.onevec(z.size()), alg.hadamard_product(z, z)))))); }
+        return alg.log(alg.addition(alg.sqrt(alg.addition(alg.onevec(z.size()), alg.elementWiseDivision(alg.onevec(z.size()), alg.hadamard_product(z, z)))), alg.elementWiseDivision(alg.onevec(z.size()), z)));
+    }
+
+    std::vector<std::vector<double>> Activation::arcsch(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.elementWiseDivision(alg.full(z.size(), z[0].size(), -1),  alg.hadamard_product(alg.hadamard_product(z, z), alg.sqrt(alg.addition(alg.onemat(z.size(), z[0].size()), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z)))))); }
+        return alg.log(alg.addition(alg.sqrt(alg.addition(alg.onemat(z.size(), z[0].size()), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z)))), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z)));
+    }
+
+
+    double Activation::arsech(double z, bool deriv){
+        if(deriv){
+            return -1/(z * sqrt(1 - z * z));
+        }
+        return std::log((1/z) + ((1/z) + 1) * ((1/z) - 1));
+    }
+
+    std::vector<double> Activation::arsech(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.elementWiseDivision(alg.full(z.size(), -1), alg.hadamard_product(z, alg.sqrt(alg.subtraction(alg.onevec(z.size()), alg.hadamard_product(z, z))))); }
+        return alg.log(alg.addition(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.hadamard_product(alg.addition(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.onevec(z.size())), alg.subtraction(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.onevec(z.size())))));
+    }
+
+    std::vector<std::vector<double>> Activation::arsech(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.elementWiseDivision(alg.full(z.size(), z[0].size(), -1), alg.hadamard_product(z, alg.sqrt(alg.subtraction(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z))))); }
+        return alg.log(alg.addition(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.hadamard_product(alg.addition(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.onemat(z.size(), z[0].size())), alg.subtraction(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.onemat(z.size(), z[0].size())))));
+    }
+
+    double Activation::arcoth(double z, bool deriv){
+        if(deriv){
+            return 1/(1 - z * z);
+        }
+        return 0.5 * std::log((1 + z)/(z - 1));
+    }
+
+    std::vector<double> Activation::arcoth(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.elementWiseDivision(alg.onevec(z.size()), alg.subtraction(alg.onevec(z.size()), alg.hadamard_product(z, z))); }
+        return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onevec(z.size()), z), alg.subtraction(z, alg.onevec(z.size())))));
+    }
+
+    std::vector<std::vector<double>> Activation::arcoth(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.subtraction(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z))); }
+        return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onemat(z.size(), z[0].size()), z), alg.subtraction(z, alg.onemat(z.size(), z[0].size())))));
+    }
+
+    // TO DO: Implement this template activation
+    std::vector<double> Activation::activation(std::vector<double> z, bool deriv, double(*function)(double, bool)){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = function(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = function(z[i], deriv);
+        }
+        return a;
+    }
+}
--- a/MLPP/Activation/Activation.hpp
+++ b/MLPP/Activation/Activation.hpp
@ -0,0 +1,146 @@
+//
+//  Activation.hpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#ifndef Activation_hpp
+#define Activation_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class Activation{
+        public:
+            double linear(double z, bool deriv = 0); 
+            std::vector<double> linear(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> linear(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double sigmoid(double z, bool deriv = 0); 
+            std::vector<double> sigmoid(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> sigmoid(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            std::vector<double> softmax(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> softmax(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            std::vector<double> adjSoftmax(std::vector<double> z);
+            std::vector<std::vector<double>> adjSoftmax(std::vector<std::vector<double>> z);
+
+            std::vector<std::vector<double>> softmaxDeriv(std::vector<double> z);
+            std::vector<std::vector<std::vector<double>>> softmaxDeriv(std::vector<std::vector<double>> z);
+
+            double softplus(double z, bool deriv = 0);
+            std::vector<double> softplus(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> softplus(std::vector<std::vector<double>>  z, bool deriv = 0);
+
+            double softsign(double z, bool deriv = 0);
+            std::vector<double> softsign(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> softsign(std::vector<std::vector<double>>  z, bool deriv = 0);
+
+            double gaussianCDF(double z, bool deriv = 0);
+            std::vector<double> gaussianCDF(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> gaussianCDF(std::vector<std::vector<double>>  z, bool deriv = 0);
+
+            double cloglog(double z, bool deriv = 0);
+            std::vector<double> cloglog(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> cloglog(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double logit(double z, bool deriv = 0);
+            std::vector<double> logit(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> logit(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double unitStep(double z, bool deriv = 0);
+            std::vector<double> unitStep(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> unitStep(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double swish(double z, bool deriv = 0);
+            std::vector<double> swish(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> swish(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double mish(double z, bool deriv = 0);
+            std::vector<double> mish(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> mish(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double sinc(double z, bool deriv = 0);
+            std::vector<double> sinc(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> sinc(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double RELU(double z, bool deriv = 0);
+            std::vector<double> RELU(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> RELU(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double leakyReLU(double z, double c, bool deriv = 0);
+            std::vector<double> leakyReLU(std::vector<double> z, double c, bool deriv = 0);
+            std::vector<std::vector<double>> leakyReLU(std::vector<std::vector<double>> z, double c, bool deriv = 0);
+
+            double ELU(double z, double c, bool deriv = 0);
+            std::vector<double> ELU(std::vector<double> z, double c, bool deriv = 0);
+            std::vector<std::vector<double>> ELU(std::vector<std::vector<double>> z, double c, bool deriv = 0);
+
+            double SELU(double z, double lambda, double c, bool deriv = 0);
+            std::vector<double> SELU(std::vector<double> z, double lambda, double c, bool deriv = 0);
+            std::vector<std::vector<double>> SELU(std::vector<std::vector<double>>, double lambda, double c, bool deriv = 0);
+
+            double GELU(double z, bool deriv = 0);
+            std::vector<double> GELU(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> GELU(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double sign(double z, bool deriv = 0);
+            std::vector<double> sign(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> sign(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double sinh(double z, bool deriv = 0);
+            std::vector<double> sinh(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> sinh(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double cosh(double z, bool deriv = 0);
+            std::vector<double> cosh(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> cosh(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double tanh(double z, bool deriv = 0);
+            std::vector<double> tanh(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> tanh(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double csch(double z, bool deriv = 0);
+            std::vector<double> csch(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> csch( std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double sech(double z, bool deriv = 0);
+            std::vector<double> sech(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> sech(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double coth(double z, bool deriv = 0);
+            std::vector<double> coth(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> coth(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double arsinh(double z, bool deriv = 0);
+            std::vector<double> arsinh(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> arsinh(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double arcosh(double z, bool deriv = 0);
+            std::vector<double> arcosh(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> arcosh(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double artanh(double z, bool deriv = 0);
+            std::vector<double> artanh(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> artanh(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double arcsch(double z, bool deriv = 0);
+            std::vector<double> arcsch(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> arcsch(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double arsech(double z, bool deriv = 0);
+            std::vector<double> arsech(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> arsech(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double arcoth(double z, bool deriv = 0);
+            std::vector<double> arcoth(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> arcoth(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            std::vector<double> activation(std::vector<double> z, bool deriv, double(*function)(double, bool));
+
+        private:
+    };
+}
+
+#endif /* Activation_hpp */
--- a/MLPP/AutoEncoder/AutoEncoder.cpp
+++ b/MLPP/AutoEncoder/AutoEncoder.cpp
@ -0,0 +1,253 @@
+//
+//  AutoEncoder.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "AutoEncoder.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP {
+    AutoEncoder::AutoEncoder(std::vector<std::vector<double>> inputSet, int n_hidden)
+    : inputSet(inputSet), n_hidden(n_hidden), n(inputSet.size()), k(inputSet[0].size())
+    {
+        Activation avn;
+        y_hat.resize(inputSet.size());
+
+        weights1 = Utilities::weightInitialization(k, n_hidden);
+        weights2 = Utilities::weightInitialization(n_hidden, k);
+        bias1 = Utilities::biasInitialization(n_hidden);
+        bias2 = Utilities::biasInitialization(k);
+    }
+
+    std::vector<std::vector<double>> AutoEncoder::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    std::vector<double> AutoEncoder::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void AutoEncoder::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Activation avn;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, inputSet);
+
+            // Calculating the errors
+            std::vector<std::vector<double>> error = alg.subtraction(y_hat, inputSet);
+                    
+            // Calculating the weight/bias gradients for layer 2
+            std::vector<std::vector<double>> D2_1 = alg.matmult(alg.transpose(a2), error);
+
+            // weights and bias updation for layer 2
+            weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate/n, D2_1));
+
+            // Calculating the bias gradients for layer 2
+            bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
+
+            //Calculating the weight/bias for layer 1
+
+            std::vector<std::vector<double>> D1_1 = alg.matmult(error, alg.transpose(weights2));
+
+            std::vector<std::vector<double>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+
+            std::vector<std::vector<double>> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2);
+
+
+            // weight an bias updation for layer 1
+            weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate/n, D1_3));
+
+            bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate/n, D1_2));
+    
+            forwardPass();
+                
+            // UI PORTION
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, inputSet));
+                std::cout << "Layer 1:" << std::endl;
+                Utilities::UI(weights1, bias1); 
+                std::cout << "Layer 2:" << std::endl;
+                Utilities::UI(weights2, bias2);
+            }
+            epoch++;
+                
+            if(epoch > max_epoch) { break; }
+        }
+
+    }
+
+    void AutoEncoder::SGD(double learning_rate, int max_epoch, bool UI){
+        Activation avn;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd());
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            std::vector<double> y_hat = Evaluate(inputSet[outputIndex]);
+            auto [z2, a2] = propagate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {inputSet[outputIndex]});
+            std::vector<double> error = alg.subtraction(y_hat, inputSet[outputIndex]);
+            
+            // Weight updation for layer 2
+            std::vector<std::vector<double>> D2_1 = alg.outerProduct(error, a2);
+            weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, alg.transpose(D2_1)));
+
+            // Bias updation for layer 2
+            bias2 = alg.subtraction(bias2, alg.scalarMultiply(learning_rate, error));
+
+            // Weight updation for layer 1
+             std::vector<double> D1_1 = alg.mat_vec_mult(weights2, error);
+             std::vector<double> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+             std::vector<std::vector<double>> D1_3 = alg.outerProduct(inputSet[outputIndex], D1_2);
+
+            weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
+            // Bias updation for layer 1
+
+            bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2));
+
+            y_hat = Evaluate(inputSet[outputIndex]);
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {inputSet[outputIndex]}));
+                std::cout << "Layer 1:" << std::endl;
+                Utilities::UI(weights1, bias1); 
+                std::cout << "Layer 2:" << std::endl;
+                Utilities::UI(weights2, bias2);
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void AutoEncoder::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+        Activation avn;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches  = Utilities::createMiniBatches(inputSet, n_mini_batch);
+
+        while(true){
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<std::vector<double>> y_hat = Evaluate(inputMiniBatches[i]);
+                auto [z2, a2] = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, inputMiniBatches[i]);
+
+                // Calculating the errors
+                std::vector<std::vector<double>> error = alg.subtraction(y_hat, inputMiniBatches[i]);
+                        
+                // Calculating the weight/bias gradients for layer 2
+
+                std::vector<std::vector<double>> D2_1 = alg.matmult(alg.transpose(a2), error);
+
+                // weights and bias updation for layer 2
+                weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate/inputMiniBatches[i].size(), D2_1));
+                
+                // Bias Updation for layer 2
+                bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
+
+                //Calculating the weight/bias for layer 1
+
+                std::vector<std::vector<double>> D1_1 = alg.matmult(error, alg.transpose(weights2));
+
+                std::vector<std::vector<double>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+
+                std::vector<std::vector<double>> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2);
+
+
+                // weight an bias updation for layer 1
+                weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate/inputMiniBatches[i].size(), D1_3));
+
+                bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate/inputMiniBatches[i].size(), D1_2));
+
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, inputMiniBatches[i]));
+                    std::cout << "Layer 1:" << std::endl;
+                    Utilities::UI(weights1, bias1); 
+                    std::cout << "Layer 2:" << std::endl;
+                    Utilities::UI(weights2, bias2);
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double AutoEncoder::score(){
+        Utilities util;
+        return util.performance(y_hat, inputSet);
+    }
+
+    void AutoEncoder::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights1, bias1, 0, 1);
+         util.saveParameters(fileName, weights2, bias2, 1, 2);
+     }
+
+    double AutoEncoder::Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        class Cost cost; 
+        return cost.MSE(y_hat, inputSet);
+    }
+
+    std::vector<std::vector<double>> AutoEncoder::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        std::vector<std::vector<double>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
+        std::vector<std::vector<double>> a2 = avn.sigmoid(z2);
+        return alg.mat_vec_add(alg.matmult(a2, weights2), bias2); 
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> AutoEncoder::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        std::vector<std::vector<double>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
+        std::vector<std::vector<double>> a2 = avn.sigmoid(z2);
+        return {z2, a2};
+    }
+
+    std::vector<double> AutoEncoder::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        std::vector<double> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); 
+        std::vector<double> a2 = avn.sigmoid(z2);
+        return alg.addition(alg.mat_vec_mult(alg.transpose(weights2), a2), bias2);
+    }
+
+    std::tuple<std::vector<double>, std::vector<double>> AutoEncoder::propagate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        std::vector<double> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); 
+        std::vector<double> a2 = avn.sigmoid(z2);
+        return {z2, a2};
+    }
+
+    void AutoEncoder::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1);
+        a2 = avn.sigmoid(z2);
+        y_hat = alg.mat_vec_add(alg.matmult(a2, weights2), bias2); 
+    }
+}
--- a/MLPP/AutoEncoder/AutoEncoder.hpp
+++ b/MLPP/AutoEncoder/AutoEncoder.hpp
@ -0,0 +1,54 @@
+//
+//  AutoEncoder.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef AutoEncoder_hpp
+#define AutoEncoder_hpp
+
+#include <vector>
+#include <tuple>
+#include <string>
+
+namespace  MLPP {
+
+class AutoEncoder{
+        public:
+        AutoEncoder(std::vector<std::vector<double>> inputSet, int n_hidden);
+        std::vector<std::vector<double>> modelSetTest(std::vector<std::vector<double>> X);
+        std::vector<double> modelTest(std::vector<double> x);
+        void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+        void SGD(double learning_rate, int max_epoch, bool UI = 1);
+        void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+        double score(); 
+        void save(std::string fileName);
+        
+        private:
+            double Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<std::vector<double>> Evaluate(std::vector<std::vector<double>> X);
+            std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> propagate(std::vector<std::vector<double>> X);
+            std::vector<double> Evaluate(std::vector<double> x);
+            std::tuple<std::vector<double>, std::vector<double>> propagate(std::vector<double> x);
+            void forwardPass();
+
+            std::vector<std::vector<double>> inputSet;
+            std::vector<std::vector<double>> y_hat;
+        
+            std::vector<std::vector<double>> weights1;
+            std::vector<std::vector<double>> weights2;
+           
+            std::vector<double> bias1;
+            std::vector<double> bias2;
+        
+            std::vector<std::vector<double>> z2;
+            std::vector<std::vector<double>> a2;
+
+            int n;
+            int k;
+            int n_hidden;
+    };
+}
+
+#endif /* AutoEncoder_hpp */
--- a/MLPP/BernoulliNB/BernoulliNB.cpp
+++ b/MLPP/BernoulliNB/BernoulliNB.cpp
@ -0,0 +1,182 @@
+//
+//  BernoulliNB.cpp
+//
+//  Created by Marc Melikyan on 1/17/21.
+//
+
+#include "BernoulliNB.hpp"
+#include "Utilities/Utilities.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Data/Data.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    BernoulliNB::BernoulliNB(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet)
+    : inputSet(inputSet), outputSet(outputSet), class_num(2)
+    {
+        y_hat.resize(outputSet.size());
+        Evaluate();
+    }
+
+    std::vector<double> BernoulliNB::modelSetTest(std::vector<std::vector<double>> X){
+        std::vector<double> y_hat;
+        for(int i = 0; i < X.size(); i++){
+            y_hat.push_back(modelTest(X[i]));
+        }
+        return y_hat;
+    }
+
+    double BernoulliNB::modelTest(std::vector<double> x){
+        double score_0 = 1;
+        double score_1 = 1;
+        
+        std::vector<int> foundIndices;
+
+        for(int j = 0; j < x.size(); j++){
+            for(int k = 0; k < vocab.size(); k++){
+                if(x[j] == vocab[k]){
+                    score_0 *= theta[0][vocab[k]];
+                    score_1 *= theta[1][vocab[k]];
+
+                    foundIndices.push_back(k);
+                }
+            }
+        }
+
+        for(int i = 0; i < vocab.size(); i++){
+            bool found = false; 
+            for(int j = 0; j < foundIndices.size(); j++){
+                if(vocab[i] == vocab[foundIndices[j]]){
+                    found = true; 
+                }
+            }
+            if(!found){
+                score_0 *= 1 - theta[0][vocab[i]];
+                score_1 *= 1 - theta[1][vocab[i]];
+            }
+        }
+            
+        score_0 *= prior_0;
+        score_1 *= prior_1;
+            
+        // Assigning the traning example to a class
+
+        if(score_0 > score_1){
+            return 0;
+        }
+        else{
+            return 1;
+        }
+    }
+
+    double BernoulliNB::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void BernoulliNB::computeVocab(){
+        LinAlg alg;
+        Data data;
+        vocab = data.vecToSet<double>(alg.flatten(inputSet));
+    }
+    
+    void BernoulliNB::computeTheta(){
+        
+        // Resizing theta for the sake of ease & proper access of the elements.
+        theta.resize(class_num);
+        
+        // Setting all values in the hasmap by default to 0.
+        for(int i = class_num - 1; i >= 0; i--){
+            for(int j = 0; j < vocab.size(); j++){
+                theta[i][vocab[j]] = 0; 
+            }
+        }
+
+        for(int i = 0; i < inputSet.size(); i++){  
+            for(int j = 0; j < inputSet[0].size(); j++){
+                theta[outputSet[i]][inputSet[i][j]]++;
+            }
+        }
+        
+        for(int i = 0; i < theta.size(); i++){
+            for(int j = 0; j < theta[i].size(); j++){
+                if(i == 0){
+                    theta[i][j] /= prior_0 * y_hat.size();
+                }
+                else{
+                    theta[i][j] /= prior_1 * y_hat.size();
+                }
+            }
+        }
+    }
+
+    void BernoulliNB::Evaluate(){
+        for(int i = 0; i < outputSet.size(); i++){
+            // Pr(B | A) * Pr(A)
+            double score_0 = 1;
+            double score_1 = 1;
+
+            
+            double sum = 0;
+            for(int i = 0; i < outputSet.size(); i++){
+                if(outputSet[i] == 1){ sum += outputSet[i]; }
+            }
+            
+            // Easy computation of priors, i.e. Pr(C_k)
+            prior_1 = sum / y_hat.size();
+            prior_0 = 1 - prior_1;
+            
+            // Evaluating Theta...
+            computeTheta();
+
+            // Evaluating the vocab set...
+            computeVocab();
+
+            std::vector<int> foundIndices;
+
+            for(int j = 0; j < inputSet.size(); j++){
+                for(int k = 0; k < vocab.size(); k++){
+                    if(inputSet[i][j] == vocab[k]){
+                        score_0 += std::log(theta[0][vocab[k]]);
+                        score_1 += std::log(theta[1][vocab[k]]);
+
+                        foundIndices.push_back(k);
+                    }
+                }
+            }
+
+            for(int i = 0; i < vocab.size(); i++){
+                bool found = false; 
+                for(int j = 0; j < foundIndices.size(); j++){
+                    if(vocab[i] == vocab[foundIndices[j]]){
+                        found = true; 
+                    }
+                }
+                if(!found){
+                    score_0 += std::log(1 - theta[0][vocab[i]]);
+                    score_1 += std::log(1 - theta[1][vocab[i]]);
+                }
+            }
+            
+            score_0 += std::log(prior_0);
+            score_1 += std::log(prior_1);
+
+            score_0 = exp(score_0);
+            score_1 = exp(score_1);
+
+            std::cout << score_0 << std::endl;
+            std::cout << score_1 << std::endl;
+            
+            // Assigning the traning example to a class
+
+            if(score_0 > score_1){
+                y_hat[i] = 0;
+            }
+            else{
+                y_hat[i] = 1;
+            }
+        }
+    }
+}
--- a/MLPP/BernoulliNB/BernoulliNB.hpp
+++ b/MLPP/BernoulliNB/BernoulliNB.hpp
@ -0,0 +1,47 @@
+//
+//  BernoulliNB.hpp
+//
+//  Created by Marc Melikyan on 1/17/21.
+//
+
+#ifndef BernoulliNB_hpp
+#define BernoulliNB_hpp
+
+#include <vector>
+#include <map>
+
+namespace MLPP{
+    class BernoulliNB{
+        
+        public:
+            BernoulliNB(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            double score();
+            
+        private:
+        
+            void computeVocab();
+            void computeTheta();
+            void Evaluate();
+        
+            // Model Params
+            double prior_1 = 0;
+            double prior_0 = 0;
+        
+            std::vector<std::map<double, int>> theta;
+            std::vector<double> vocab;
+            int class_num;
+            
+            // Datasets
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+            
+        
+            
+        
+    };
+
+    #endif /* BernoulliNB_hpp */
+}
--- a/MLPP/CLogLogReg/CLogLogReg.cpp
+++ b/MLPP/CLogLogReg/CLogLogReg.cpp
@ -0,0 +1,219 @@
+//
+//  CLogLogReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "CLogLogReg.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    CLogLogReg::CLogLogReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+        weights = Utilities::weightInitialization(k);
+        bias = Utilities::biasInitialization();
+    }
+
+    std::vector<double> CLogLogReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double CLogLogReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void CLogLogReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+                
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+
+
+            // Calculating the weight gradients
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.cloglog(z, 1)))));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+
+            // Calculating the bias gradients
+            bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n;
+            
+            forwardPass();
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void CLogLogReg::MLE(double learning_rate, int max_epoch, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization; 
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);            
+
+            weights = alg.addition(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.cloglog(z, 1)))));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+
+            // Calculating the bias gradients
+            bias += learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n;
+            forwardPass();
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+                
+        }
+    }
+
+    void CLogLogReg::SGD(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Reg regularization; 
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            double z = propagate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+
+            double error = y_hat - outputSet[outputIndex];
+            
+            // Weight Updation
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error * exp(z-exp(z)), inputSet[outputIndex]));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            // Bias updation
+            bias -= learning_rate * error * exp(z-exp(z));
+
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+                
+           if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void CLogLogReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+        
+        while(true){
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                std::vector<double> z = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                // Calculating the weight gradients
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.cloglog(z, 1)))));
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+
+                // Calculating the bias gradients
+                bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n;
+            
+                forwardPass();
+
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double CLogLogReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    double CLogLogReg::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+    std::vector<double> CLogLogReg::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        return avn.cloglog(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); 
+    }
+    
+    std::vector<double>CLogLogReg::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); 
+    }
+
+    double CLogLogReg::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        return avn.cloglog(alg.dot(weights, x) + bias);
+    }
+
+    double CLogLogReg::propagate(std::vector<double> x){
+        LinAlg alg;
+        return alg.dot(weights, x) + bias;
+    }
+
+    // cloglog ( wTx + b )
+    void CLogLogReg::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        
+        z = propagate(inputSet);
+        y_hat = avn.cloglog(z);
+    }
+}
--- a/MLPP/CLogLogReg/CLogLogReg.hpp
+++ b/MLPP/CLogLogReg/CLogLogReg.hpp
@ -0,0 +1,58 @@
+//
+//  CLogLogReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef CLogLogReg_hpp
+#define CLogLogReg_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class CLogLogReg{
+        
+        public:
+            CLogLogReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void MLE(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+            double score();
+        private:
+
+            void weightInitialization(int k);
+            void biasInitialization();
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+        
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            std::vector<double> propagate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            double propagate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+            std::vector<double> z;
+            std::vector<double> weights;
+            double bias;
+        
+            int n; 
+            int k;
+
+            // Regularization Params
+            std::string reg;
+            double lambda;
+            double alpha; /* This is the controlling param for Elastic Net*/
+        
+        
+    };
+}
+
+#endif /* CLogLogReg_hpp */
--- a/MLPP/Convolutions/Convolutions.cpp
+++ b/MLPP/Convolutions/Convolutions.cpp
@ -0,0 +1,402 @@
+//
+//  Convolutions.cpp
+//
+//  Created by Marc Melikyan on 4/6/21.
+//
+
+#include <iostream>
+#include "Convolutions/Convolutions.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Stat/Stat.hpp"
+#include <cmath>
+
+namespace MLPP{
+
+    Convolutions::Convolutions()
+    : prewittHorizontal({{1,1,1}, {0,0,0}, {-1,-1,-1}}), prewittVertical({{1,0,-1}, {1,0,-1}, {1,0,-1}}), 
+    sobelHorizontal({{1,2,1}, {0,0,0}, {-1,-2,-1}}), sobelVertical({{-1,0,1}, {-2,0,2}, {-1,0,1}}), 
+    scharrHorizontal({{3,10,3}, {0,0,0}, {-3,-10,-3}}), scharrVertical({{3,0,-3}, {10,0,-10}, {3,0,-3}}),
+    robertsHorizontal({{0,1}, {-1,0}}), robertsVertical({{1,0}, {0,-1}}) 
+    {
+
+    }
+
+    std::vector<std::vector<double>> Convolutions::convolve(std::vector<std::vector<double>> input, std::vector<std::vector<double>> filter, int S, int P){
+        LinAlg alg;
+        std::vector<std::vector<double>> featureMap;
+        int N = input.size();
+        int F = filter.size();
+        int mapSize = (N - F + 2*P) / S + 1; // This is computed as ⌊mapSize⌋ by def- thanks C++!
+
+        if(P != 0){
+            std::vector<std::vector<double>> paddedInput; 
+            paddedInput.resize(N + 2*P);
+            for(int i = 0; i < paddedInput.size(); i++){
+                paddedInput[i].resize(N + 2*P);
+            }
+            for(int i = 0; i < paddedInput.size(); i++){
+                for(int j = 0; j < paddedInput[i].size(); j++){
+                    if(i - P < 0 || j - P < 0 || i - P > input.size() - 1 || j - P > input[0].size() - 1){
+                        paddedInput[i][j] = 0;
+                    }
+                    else{
+                        paddedInput[i][j] = input[i - P][j - P];
+                    }
+                }
+            }
+            input.resize(paddedInput.size());
+            for(int i = 0; i < paddedInput.size(); i++){
+                input[i].resize(paddedInput[i].size());
+            }
+            input = paddedInput;
+        }
+
+        featureMap.resize(mapSize);
+        for(int i = 0; i < mapSize; i++){
+            featureMap[i].resize(mapSize);
+        }
+
+        for(int i = 0; i < mapSize; i++){
+            for(int j = 0; j < mapSize; j++){
+                std::vector<double> convolvingInput; 
+                for(int k = 0; k < F; k++){       
+                    for(int p = 0; p < F; p++){
+                        if(i == 0 && j == 0){
+                            convolvingInput.push_back(input[i + k][j + p]);
+                        }
+                        else if(i == 0){
+                            convolvingInput.push_back(input[i + k][j + (S - 1) + p]);
+                        }
+                        else if(j == 0){
+                            convolvingInput.push_back(input[i + (S - 1) + k][j + p]);
+                        }
+                        else{
+                            convolvingInput.push_back(input[i + (S - 1) + k][j + (S - 1) + p]);
+                        }   
+                    }
+                } 
+                featureMap[i][j] = alg.dot(convolvingInput, alg.flatten(filter));
+            }
+        }
+        return featureMap;
+    }
+
+    std::vector<std::vector<std::vector<double>>> Convolutions::convolve(std::vector<std::vector<std::vector<double>>> input, std::vector<std::vector<std::vector<double>>> filter, int S, int P){
+        LinAlg alg;
+        std::vector<std::vector<std::vector<double>>> featureMap;
+        int N = input[0].size();
+        int F = filter[0].size();
+        int C = filter.size() / input.size();
+        int mapSize = (N - F + 2*P) / S + 1; // This is computed as ⌊mapSize⌋ by def.
+
+        if(P != 0){
+            for(int c = 0; c < input.size(); c++){
+                std::vector<std::vector<double>> paddedInput; 
+                paddedInput.resize(N + 2*P);
+                for(int i = 0; i < paddedInput.size(); i++){
+                    paddedInput[i].resize(N + 2*P);
+                }
+                for(int i = 0; i < paddedInput.size(); i++){
+                    for(int j = 0; j < paddedInput[i].size(); j++){
+                        if(i - P < 0 || j - P < 0 || i - P > input[c].size() - 1 || j - P > input[c][0].size() - 1){
+                            paddedInput[i][j] = 0;
+                        }
+                        else{
+                            paddedInput[i][j] = input[c][i - P][j - P];
+                        }
+                    }
+                }
+                input[c].resize(paddedInput.size());
+                for(int i = 0; i < paddedInput.size(); i++){
+                    input[c][i].resize(paddedInput[i].size());
+                }
+                input[c] = paddedInput;
+            }
+        }
+
+        featureMap.resize(C);
+        for(int i = 0; i < featureMap.size(); i++){
+            featureMap[i].resize(mapSize);
+            for(int j = 0; j < featureMap[i].size(); j++){
+                featureMap[i][j].resize(mapSize);
+            }
+        }
+
+        for(int c = 0; c < C; c++){
+            for(int i = 0; i < mapSize; i++){
+                for(int j = 0; j < mapSize; j++){
+                    std::vector<double> convolvingInput; 
+                    for(int t = 0; t < input.size(); t++){
+                        for(int k = 0; k < F; k++){       
+                            for(int p = 0; p < F; p++){
+                                if(i == 0 && j == 0){
+                                    convolvingInput.push_back(input[t][i + k][j + p]);
+                                }
+                                else if(i == 0){
+                                    convolvingInput.push_back(input[t][i + k][j + (S - 1) + p]);
+                                }
+                                else if(j == 0){
+                                    convolvingInput.push_back(input[t][i + (S - 1) + k][j + p]);
+                                }
+                                else{
+                                    convolvingInput.push_back(input[t][i + (S - 1) + k][j + (S - 1) + p]);
+                                }   
+                            }
+                        } 
+                    }
+                    featureMap[c][i][j] = alg.dot(convolvingInput, alg.flatten(filter));
+                }
+            }
+        }
+        return featureMap;
+    }
+
+    std::vector<std::vector<double>> Convolutions::pool(std::vector<std::vector<double>> input, int F, int S, std::string type){
+        LinAlg alg;
+        std::vector<std::vector<double>> pooledMap;
+        int N = input.size();
+        int mapSize = floor((N - F) / S + 1); 
+ 
+        pooledMap.resize(mapSize);
+        for(int i = 0; i < mapSize; i++){
+            pooledMap[i].resize(mapSize);
+        }
+
+        for(int i = 0; i < mapSize; i++){
+            for(int j = 0; j < mapSize; j++){
+                std::vector<double> poolingInput; 
+                for(int k = 0; k < F; k++){       
+                    for(int p = 0; p < F; p++){
+                        if(i == 0 && j == 0){
+                            poolingInput.push_back(input[i + k][j + p]);
+                        }
+                        else if(i == 0){
+                            poolingInput.push_back(input[i + k][j + (S - 1) + p]);
+                        }
+                        else if(j == 0){
+                            poolingInput.push_back(input[i + (S - 1) + k][j + p]);
+                        }
+                        else{
+                            poolingInput.push_back(input[i + (S - 1) + k][j + (S - 1) + p]);
+                        }   
+                    }
+                } 
+                if(type == "Average"){
+                    Stat stat; 
+                    pooledMap[i][j] = stat.mean(poolingInput);
+                }
+                else if(type == "Min"){
+                    pooledMap[i][j] = alg.min(poolingInput);
+                }
+                else{
+                    pooledMap[i][j] = alg.max(poolingInput);
+                }
+            }
+        }
+        return pooledMap;
+    }
+
+    std::vector<std::vector<std::vector<double>>> Convolutions::pool(std::vector<std::vector<std::vector<double>>> input, int F, int S, std::string type){
+        std::vector<std::vector<std::vector<double>>> pooledMap;
+        for(int i = 0; i < input.size(); i++){
+            pooledMap.push_back(pool(input[i], F, S, type));
+        }
+        return pooledMap; 
+    }
+
+    double Convolutions::globalPool(std::vector<std::vector<double>> input, std::string type){
+        LinAlg alg;
+        if(type == "Average"){
+            Stat stat; 
+            return stat.mean(alg.flatten(input));
+        }
+        else if(type == "Min"){
+            return alg.min(alg.flatten(input));
+        }
+        else{
+            return alg.max(alg.flatten(input));
+        }
+    }
+            
+    std::vector<double> Convolutions::globalPool(std::vector<std::vector<std::vector<double>>> input, std::string type){
+        std::vector<double> pooledMap;
+        for(int i = 0; i < input.size(); i++){
+            pooledMap.push_back(globalPool(input[i], type));
+        }
+        return pooledMap; 
+    }
+
+    double Convolutions::gaussian2D(double x, double y, double std){
+        double std_sq = std * std;
+        return 1/(2 * M_PI * std_sq) * std::exp(-(x * x + y * y)/2 * std_sq);
+    }
+
+    std::vector<std::vector<double>> Convolutions::gaussianFilter2D(int size, double std){
+        std::vector<std::vector<double>> filter; 
+        filter.resize(size);
+        for(int i = 0; i < filter.size(); i++){
+            filter[i].resize(size);
+        }
+        for(int i = 0; i < size; i++){
+            for(int j = 0; j < size; j++){
+                filter[i][j] = gaussian2D(i - (size-1)/2, (size-1)/2 - j, std);
+            }
+        }
+        return filter;
+    }
+
+    /* 
+    Indeed a filter could have been used for this purpose, but I decided that it would've just 
+    been easier to carry out the calculation explicitly, mainly because it is more informative, 
+    and also because my convolution algorithm is only built for filters with equally sized 
+    heights and widths.
+    */
+    std::vector<std::vector<double>> Convolutions::dx(std::vector<std::vector<double>> input){
+        std::vector<std::vector<double>> deriv; // We assume a gray scale image. 
+        deriv.resize(input.size());
+        for(int i = 0; i < deriv.size(); i++){
+            deriv[i].resize(input[i].size());
+        }
+
+        for(int i = 0; i < input.size(); i++){
+            for(int j = 0; j < input[i].size(); j++){
+                if(j != 0 && j != input.size() - 1){
+                    deriv[i][j] = input[i][j + 1] - input[i][j - 1];
+                }
+                else if(j == 0){
+                    deriv[i][j] = input[i][j + 1] - 0; // Implicit zero-padding
+                }
+                else{
+                    deriv[i][j] = 0 - input[i][j - 1]; // Implicit zero-padding
+                }
+            }
+        }
+        return deriv;
+    }
+
+    std::vector<std::vector<double>> Convolutions::dy(std::vector<std::vector<double>> input){
+        std::vector<std::vector<double>> deriv; 
+        deriv.resize(input.size());
+        for(int i = 0; i < deriv.size(); i++){
+            deriv[i].resize(input[i].size());
+        }
+
+        for(int i = 0; i < input.size(); i++){
+            for(int j = 0; j < input[i].size(); j++){
+                if(i != 0 && i != input.size() - 1){
+                    deriv[i][j] = input[i - 1][j] - input[i + 1][j];
+                }
+                else if(i == 0){
+                    deriv[i][j] = 0 - input[i + 1][j]; // Implicit zero-padding
+                }
+                else{
+                    deriv[i][j] = input[i - 1][j] - 0; // Implicit zero-padding
+                }
+            }
+        }
+        return deriv;
+    }
+
+    std::vector<std::vector<double>> Convolutions::gradMagnitude(std::vector<std::vector<double>> input){
+        LinAlg alg;
+        std::vector<std::vector<double>> xDeriv_2 = alg.hadamard_product(dx(input), dx(input));
+        std::vector<std::vector<double>> yDeriv_2 = alg.hadamard_product(dy(input), dy(input));
+        return alg.sqrt(alg.addition(xDeriv_2, yDeriv_2));
+    }
+
+    std::vector<std::vector<double>> Convolutions::gradOrientation(std::vector<std::vector<double>> input){
+        std::vector<std::vector<double>> deriv; 
+        deriv.resize(input.size());
+        for(int i = 0; i < deriv.size(); i++){
+            deriv[i].resize(input[i].size());
+        }
+
+        std::vector<std::vector<double>> xDeriv = dx(input);
+        std::vector<std::vector<double>> yDeriv = dy(input);
+        for(int i = 0; i < deriv.size(); i++){
+            for(int j = 0; j < deriv[i].size(); j++){
+                deriv[i][j] = std::atan2(yDeriv[i][j], xDeriv[i][j]);
+            }
+        }
+        return deriv;
+    }
+
+    std::vector<std::vector<std::vector<double>>> Convolutions::computeM(std::vector<std::vector<double>> input){
+        double const SIGMA = 1; 
+        double const GAUSSIAN_SIZE = 3;
+        
+        double const GAUSSIAN_PADDING = ( (input.size() - 1) + GAUSSIAN_SIZE - input.size() ) / 2; // Convs must be same. 
+        std::cout << GAUSSIAN_PADDING << std::endl;
+        LinAlg alg;
+        std::vector<std::vector<double>> xDeriv = dx(input);
+        std::vector<std::vector<double>> yDeriv = dy(input);
+
+        std::vector<std::vector<double>> gaussianFilter = gaussianFilter2D(GAUSSIAN_SIZE, SIGMA); // Sigma of 1, size of 3.
+        std::vector<std::vector<double>> xxDeriv = convolve(alg.hadamard_product(xDeriv, xDeriv), gaussianFilter, 1, GAUSSIAN_PADDING);
+        std::vector<std::vector<double>> yyDeriv = convolve(alg.hadamard_product(yDeriv, yDeriv), gaussianFilter, 1, GAUSSIAN_PADDING);
+        std::vector<std::vector<double>> xyDeriv = convolve(alg.hadamard_product(xDeriv, yDeriv), gaussianFilter, 1, GAUSSIAN_PADDING);
+
+        std::vector<std::vector<std::vector<double>>> M = {xxDeriv, yyDeriv, xyDeriv};
+        return M;
+    }
+    std::vector<std::vector<std::string>> Convolutions::harrisCornerDetection(std::vector<std::vector<double>> input){
+        double const k = 0.05; // Empirically determined wherein k -> [0.04, 0.06], though conventionally 0.05 is typically used as well.
+        LinAlg alg;
+        std::vector<std::vector<std::vector<double>>> M = computeM(input);
+        std::vector<std::vector<double>> det = alg.subtraction(alg.hadamard_product(M[0], M[1]), alg.hadamard_product(M[2], M[2]));
+        std::vector<std::vector<double>> trace = alg.addition(M[0], M[1]);
+
+        // The reason this is not a scalar is because xxDeriv, xyDeriv, yxDeriv, and yyDeriv are not scalars.
+        std::vector<std::vector<double>> r = alg.subtraction(det, alg.scalarMultiply(k, alg.hadamard_product(trace, trace)));
+        std::vector<std::vector<std::string>> imageTypes; 
+        imageTypes.resize(r.size());
+        alg.printMatrix(r);
+        for(int i = 0; i < r.size(); i++){
+            imageTypes[i].resize(r[i].size());
+            for(int j = 0; j < r[i].size(); j++){
+                if(r[i][j] > 0){
+                    imageTypes[i][j] = "C";
+                }
+                else if (r[i][j] < 0){
+                    imageTypes[i][j] = "E";
+                }
+                else{
+                    imageTypes[i][j] = "N";
+                }
+            }
+        }
+        return imageTypes;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getPrewittHorizontal(){
+        return prewittHorizontal;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getPrewittVertical(){
+        return prewittVertical;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getSobelHorizontal(){
+        return sobelHorizontal;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getSobelVertical(){
+        return sobelVertical;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getScharrHorizontal(){
+        return scharrHorizontal;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getScharrVertical(){
+        return scharrVertical;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getRobertsHorizontal(){
+        return robertsHorizontal;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getRobertsVertical(){
+        return robertsVertical;
+    }
+}
--- a/MLPP/Convolutions/Convolutions.hpp
+++ b/MLPP/Convolutions/Convolutions.hpp
@ -0,0 +1,51 @@
+#ifndef Convolutions_hpp
+#define Convolutions_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class Convolutions{
+        public:
+            Convolutions();
+            std::vector<std::vector<double>> convolve(std::vector<std::vector<double>> input, std::vector<std::vector<double>> filter, int S, int P = 0);
+            std::vector<std::vector<std::vector<double>>> convolve(std::vector<std::vector<std::vector<double>>> input, std::vector<std::vector<std::vector<double>>> filter, int S, int P = 0);
+            std::vector<std::vector<double>> pool(std::vector<std::vector<double>> input, int F, int S, std::string type);
+            std::vector<std::vector<std::vector<double>>> pool(std::vector<std::vector<std::vector<double>>> input, int F, int S, std::string type);
+            double globalPool(std::vector<std::vector<double>> input, std::string type);
+            std::vector<double> globalPool(std::vector<std::vector<std::vector<double>>> input, std::string type);
+
+            double gaussian2D(double x, double y, double std);
+            std::vector<std::vector<double>> gaussianFilter2D(int size, double std);
+
+            std::vector<std::vector<double>> dx(std::vector<std::vector<double>> input);
+            std::vector<std::vector<double>> dy(std::vector<std::vector<double>> input);
+
+            std::vector<std::vector<double>> gradMagnitude(std::vector<std::vector<double>> input);
+            std::vector<std::vector<double>> gradOrientation(std::vector<std::vector<double>> input);
+
+            std::vector<std::vector<std::vector<double>>> computeM(std::vector<std::vector<double>> input);
+            std::vector<std::vector<std::string>> harrisCornerDetection(std::vector<std::vector<double>> input);
+
+            std::vector<std::vector<double>> getPrewittHorizontal();
+            std::vector<std::vector<double>> getPrewittVertical();
+            std::vector<std::vector<double>> getSobelHorizontal();
+            std::vector<std::vector<double>> getSobelVertical();
+            std::vector<std::vector<double>> getScharrHorizontal();
+            std::vector<std::vector<double>> getScharrVertical();
+            std::vector<std::vector<double>> getRobertsHorizontal();
+            std::vector<std::vector<double>> getRobertsVertical();
+ 
+        private: 
+            std::vector<std::vector<double>> prewittHorizontal;
+            std::vector<std::vector<double>> prewittVertical;
+            std::vector<std::vector<double>> sobelHorizontal;
+            std::vector<std::vector<double>> sobelVertical;
+            std::vector<std::vector<double>> scharrHorizontal;
+            std::vector<std::vector<double>> scharrVertical;
+            std::vector<std::vector<double>> robertsHorizontal;
+            std::vector<std::vector<double>> robertsVertical;
+
+    };
+}
+
+#endif // Convolutions_hpp
--- a/MLPP/Cost/Cost.cpp
+++ b/MLPP/Cost/Cost.cpp
@ -0,0 +1,422 @@
+//
+//  Reg.cpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#include <iostream>
+#include <cmath>
+#include "Cost.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+
+namespace MLPP{
+    double Cost::MSE(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += (y_hat[i] - y[i]) * (y_hat[i] - y[i]);
+        }
+        return sum / 2 * y_hat.size();
+    }
+
+    double Cost::MSE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += (y_hat[i][j] - y[i][j]) * (y_hat[i][j] - y[i][j]);
+            }
+        }
+        return sum / 2 * y_hat.size();
+    }
+
+    std::vector<double> Cost::MSEDeriv(std::vector <double> y_hat, std::vector<double> y){
+        LinAlg alg;
+        return alg.subtraction(y_hat, y);
+    }
+
+    std::vector<std::vector<double>> Cost::MSEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        LinAlg alg;
+        return alg.subtraction(y_hat, y);
+    }
+
+    double Cost::RMSE(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += (y_hat[i] - y[i]) * (y_hat[i] - y[i]);
+        }
+        return sqrt(sum / y_hat.size());
+    }
+
+    double Cost::RMSE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += (y_hat[i][j] - y[i][j]) * (y_hat[i][j] - y[i][j]);
+            }
+        }
+        return sqrt(sum / y_hat.size());  
+    }
+
+    std::vector<double> Cost::RMSEDeriv(std::vector <double> y_hat, std::vector<double> y){
+        LinAlg alg;
+        return alg.scalarMultiply(1/(2*sqrt(MSE(y_hat, y))), MSEDeriv(y_hat, y));
+    }
+
+    std::vector<std::vector<double>> Cost::RMSEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        LinAlg alg;
+        return alg.scalarMultiply(1/(2/sqrt(MSE(y_hat, y))), MSEDeriv(y_hat, y));
+    }
+
+    double Cost::MAE(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += abs((y_hat[i] - y[i]));
+        }
+        return sum / y_hat.size();
+    }
+
+    double Cost::MAE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += abs((y_hat[i][j] - y[i][j]));
+            }
+        }
+        return sum / y_hat.size();
+    }
+
+    std::vector<double> Cost::MAEDeriv(std::vector <double> y_hat, std::vector <double> y){
+        std::vector<double> deriv; 
+        deriv.resize(y_hat.size());
+        for(int i = 0; i < deriv.size(); i++){
+            if(y_hat[i] < 0){
+                deriv[i] = -1;
+            }
+            else if(y_hat[i] == 0){
+                deriv[i] = 0;
+            }
+            else{
+                deriv[i] = 1;
+           
+            }
+        }
+        return deriv;
+    }
+
+    std::vector<std::vector<double>> Cost::MAEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        std::vector<std::vector<double>> deriv;
+        deriv.resize(y_hat.size());
+        for(int i = 0; i < deriv.size(); i++){
+            deriv.resize(y_hat[i].size());
+        }
+        for(int i = 0; i < deriv.size(); i++){
+            for(int j = 0; j < deriv[i].size(); j++){
+                if(y_hat[i][j] < 0){
+                    deriv[i][j] = -1;
+                }
+                else if(y_hat[i][j] == 0){
+                    deriv[i][j] = 0;
+                }
+                else{
+                    deriv[i][j] = 1;
+            
+                }
+            }
+        }
+        return deriv;
+    }
+
+    double Cost::MBE(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += (y_hat[i] - y[i]);
+        }
+        return sum / y_hat.size();
+    }
+
+    double Cost::MBE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += (y_hat[i][j] - y[i][j]);
+            }
+        }
+        return sum / y_hat.size();
+    }
+
+    std::vector<double> Cost::MBEDeriv(std::vector <double> y_hat, std::vector<double> y){
+        LinAlg alg;
+        return alg.onevec(y_hat.size());
+    }
+
+    std::vector<std::vector<double>> Cost::MBEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        LinAlg alg;
+        return alg.onemat(y_hat.size(), y_hat[0].size());
+    }
+
+    double Cost::LogLoss(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        double eps = 1e-8;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += -(y[i] * std::log(y_hat[i] + eps) + (1 - y[i]) * std::log(1 - y_hat[i] + eps));
+        }
+        
+        return sum / y_hat.size();
+    }
+
+    double Cost::LogLoss(std::vector <std::vector<double>> y_hat, std::vector <std::vector<double>> y){
+        double sum = 0;
+        double eps = 1e-8;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += -(y[i][j] * std::log(y_hat[i][j] + eps) + (1 - y[i][j]) * std::log(1 - y_hat[i][j] + eps));
+            }
+        }
+        
+        return sum / y_hat.size();
+    }
+
+    std::vector<double> Cost::LogLossDeriv(std::vector <double> y_hat, std::vector<double> y){
+        LinAlg alg;
+        return alg.addition(alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)), alg.elementWiseDivision(alg.scalarMultiply(-1, alg.scalarAdd(-1, y)), alg.scalarMultiply(-1, alg.scalarAdd(-1, y_hat))));
+    }
+
+    std::vector<std::vector<double>> Cost::LogLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        LinAlg alg;
+        return alg.addition(alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)), alg.elementWiseDivision(alg.scalarMultiply(-1, alg.scalarAdd(-1, y)), alg.scalarMultiply(-1, alg.scalarAdd(-1, y_hat))));
+    }
+
+    double Cost::CrossEntropy(std::vector<double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += y[i] * std::log(y_hat[i]);
+        }
+        
+        return -1 * sum;
+    }
+
+    double Cost::CrossEntropy(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += y[i][j] * std::log(y_hat[i][j]);
+            }
+        }
+        
+        return -1 * sum;
+    }
+
+    std::vector<double> Cost::CrossEntropyDeriv(std::vector<double> y_hat, std::vector<double> y){
+        LinAlg alg;
+        return alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat));
+    }
+
+    std::vector<std::vector<double>> Cost::CrossEntropyDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        LinAlg alg;
+        return alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat));
+    }
+
+    double Cost::HuberLoss(std::vector <double> y_hat, std::vector<double> y, double delta){
+        LinAlg alg;
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            if(abs(y[i] - y_hat[i]) <= delta){
+                sum += (y[i] - y_hat[i]) * (y[i] - y_hat[i]); 
+            }
+            else{
+                sum += 2 * delta * abs(y[i] - y_hat[i]) - delta * delta;
+            }
+        }
+        return sum;
+    }
+
+    double Cost::HuberLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double delta){
+        LinAlg alg;
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                if(abs(y[i][j] - y_hat[i][j]) <= delta){
+                    sum += (y[i][j] - y_hat[i][j]) * (y[i][j] - y_hat[i][j]); 
+                }
+                else{
+                    sum += 2 * delta * abs(y[i][j] - y_hat[i][j]) - delta * delta;
+                }
+            }
+        }
+        return sum;
+    }
+
+    std::vector<double> Cost::HuberLossDeriv(std::vector <double> y_hat, std::vector<double> y, double delta){
+        LinAlg alg;
+        double sum = 0;
+        std::vector<double> deriv; 
+        deriv.resize(y_hat.size());
+
+        for(int i = 0; i < y_hat.size(); i++){  
+            if(abs(y[i] - y_hat[i]) <= delta){
+                deriv.push_back(-(y[i] - y_hat[i]));
+            }
+            else{
+                if(y_hat[i] > 0 || y_hat[i] < 0){
+                    deriv.push_back(2 * delta * (y_hat[i]/abs(y_hat[i]))); 
+                }
+                else{
+                    deriv.push_back(0);
+                }
+            }
+        }
+        return deriv;
+    }
+            
+    std::vector<std::vector<double>> Cost::HuberLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double delta){
+        LinAlg alg;
+        double sum = 0;
+        std::vector<std::vector<double>> deriv; 
+        deriv.resize(y_hat.size());
+        for(int i = 0; i < deriv.size(); i++){
+            deriv[i].resize(y_hat[i].size());
+        }
+        
+        for(int i = 0; i < y_hat.size(); i++){  
+            for(int j = 0; j < y_hat[i].size(); j++){
+                if(abs(y[i][j] - y_hat[i][j]) <= delta){
+                    deriv[i].push_back(-(y[i][j] - y_hat[i][j]));
+                }
+                else{
+                    if(y_hat[i][j] > 0 || y_hat[i][j] < 0){
+                        deriv[i].push_back(2 * delta * (y_hat[i][j]/abs(y_hat[i][j]))); 
+                    }
+                    else{
+                        deriv[i].push_back(0);
+                    }
+                }
+            }
+        }
+        return deriv;
+    }
+
+    double Cost::HingeLoss(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += fmax(0, 1 - y[i] * y_hat[i]);
+        }
+
+        return sum / y_hat.size();   
+    }
+
+    double Cost::HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += fmax(0, 1 - y[i][j] * y_hat[i][j]);
+            }
+        }
+
+        return sum / y_hat.size();   
+    }
+    
+    std::vector<double> Cost::HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y){
+        std::vector<double> deriv; 
+        deriv.resize(y_hat.size());
+        for(int i = 0; i < y_hat.size(); i++){
+            if(1 - y[i] * y_hat[i] > 0){
+                deriv[i] = -y[i];
+            }
+            else{
+                deriv[i] = 0; 
+            }
+        }
+        return deriv;
+    }
+
+    std::vector<std::vector<double>> Cost::HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        std::vector<std::vector<double>> deriv; 
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                if(1 - y[i][j] * y_hat[i][j] > 0){
+                    deriv[i][j] = -y[i][j];
+                }
+                else{
+                    deriv[i][j] = 0; 
+                }
+            }
+        }
+        return deriv;
+    }
+
+    double Cost::WassersteinLoss(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += y_hat[i] * y[i];
+        }
+        return -sum / y_hat.size();
+    }
+
+    double Cost::WassersteinLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += y_hat[i][j] * y[i][j];
+            }
+        }        
+        return -sum / y_hat.size();
+    }
+
+    std::vector<double> Cost::WassersteinLossDeriv(std::vector<double> y_hat, std::vector<double> y){
+        LinAlg alg;
+        return alg.scalarMultiply(-1, y); // Simple.
+    }
+
+    std::vector<std::vector<double>> Cost::WassersteinLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        LinAlg alg;
+        return alg.scalarMultiply(-1, y); // Simple.
+    }
+
+
+    double Cost::HingeLoss(std::vector <double> y_hat, std::vector<double> y, std::vector<double> weights, double C){
+        LinAlg alg; 
+        Reg regularization;
+        return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
+    }
+    double Cost::HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<std::vector<double>> weights, double C){
+        LinAlg alg; 
+        Reg regularization;
+        return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
+    }
+
+    std::vector<double> Cost::HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y, double C){
+        LinAlg alg;
+        Reg regularization;
+        return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y));
+    } 
+    std::vector<std::vector<double>> Cost::HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double C){
+        LinAlg alg;
+        Reg regularization;
+        return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y));
+    }
+
+    double Cost::dualFormSVM(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y){
+        LinAlg alg;
+        std::vector<std::vector<double>> Y = alg.diag(y); // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
+        std::vector<std::vector<double>> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations. 
+        std::vector<std::vector<double>> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y);
+        double alphaQ = alg.matmult(alg.matmult({alpha}, Q), alg.transpose({alpha}))[0][0];
+        std::vector<double> one = alg.onevec(alpha.size());
+
+        return -alg.dot(one, alpha) + 0.5 * alphaQ;
+    }
+
+    std::vector<double> Cost::dualFormSVMDeriv(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y){
+        LinAlg alg;
+        std::vector<std::vector<double>> Y = alg.zeromat(y.size(), y.size());
+        for(int i = 0; i < y.size(); i++){
+            Y[i][i] = y[i]; // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
+        }
+        std::vector<std::vector<double>> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations. 
+        std::vector<std::vector<double>> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y);
+        std::vector<double> alphaQDeriv = alg.mat_vec_mult(Q, alpha);
+        std::vector<double> one = alg.onevec(alpha.size());
+
+        return alg.subtraction(alphaQDeriv, one);
+    }
+}
--- a/MLPP/Cost/Cost.hpp
+++ b/MLPP/Cost/Cost.hpp
@ -0,0 +1,86 @@
+//
+//  Cost.hpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#ifndef Cost_hpp
+#define Cost_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class Cost{
+        public:
+            // Regression Costs
+            double MSE(std::vector <double> y_hat, std::vector<double> y);
+            double MSE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> MSEDeriv(std::vector <double> y_hat, std::vector<double> y);
+            std::vector<std::vector<double>> MSEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            double RMSE(std::vector <double> y_hat, std::vector<double> y);
+            double RMSE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> RMSEDeriv(std::vector <double> y_hat, std::vector<double> y);
+            std::vector<std::vector<double>> RMSEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            double MAE(std::vector <double> y_hat, std::vector<double> y);
+            double MAE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> MAEDeriv(std::vector <double> y_hat, std::vector <double> y);
+            std::vector<std::vector<double>> MAEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            double MBE(std::vector <double> y_hat, std::vector <double> y);
+            double MBE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> MBEDeriv(std::vector <double> y_hat, std::vector <double> y);
+            std::vector<std::vector<double>> MBEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            // Classification Costs
+            double LogLoss(std::vector <double> y_hat, std::vector<double> y);
+            double LogLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> LogLossDeriv(std::vector <double> y_hat, std::vector<double> y);
+            std::vector<std::vector<double>> LogLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            double CrossEntropy(std::vector<double> y_hat, std::vector<double> y);
+            double CrossEntropy(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> CrossEntropyDeriv(std::vector<double> y_hat, std::vector<double> y);
+            std::vector<std::vector<double>> CrossEntropyDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            double HuberLoss(std::vector <double> y_hat, std::vector<double> y, double delta);
+            double HuberLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double delta);
+
+            std::vector<double> HuberLossDeriv(std::vector <double> y_hat, std::vector<double> y, double delta); 
+            std::vector<std::vector<double>> HuberLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double delta);
+
+            double HingeLoss(std::vector <double> y_hat, std::vector<double> y);
+            double HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y); 
+            std::vector<std::vector<double>> HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            double HingeLoss(std::vector <double> y_hat, std::vector<double> y, std::vector<double> weights, double C);
+            double HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, std::vector<std::vector<double>> weights, double C);
+
+            std::vector<double> HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y, double C); 
+            std::vector<std::vector<double>> HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double C);
+
+            double WassersteinLoss(std::vector<double> y_hat, std::vector<double> y);
+            double WassersteinLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> WassersteinLossDeriv(std::vector<double> y_hat, std::vector<double> y);
+            std::vector<std::vector<double>> WassersteinLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            double dualFormSVM(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y); // TO DO: DON'T forget to add non-linear kernelizations. 
+
+            std::vector<double> dualFormSVMDeriv(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y);
+            
+
+        private:
+    };
+}
+
+#endif /* Cost_hpp */
--- a/MLPP/Data/Data.cpp
+++ b/MLPP/Data/Data.cpp
@ -0,0 +1,773 @@
+//
+//  Data.cpp
+//  MLP
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "Data.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Stat/Stat.hpp"
+#include "SoftmaxNet/SoftmaxNet.hpp"
+#include <iostream>
+#include <random>
+#include <cmath>
+#include <fstream>
+#include <sstream>
+#include <algorithm>
+
+
+namespace MLPP{
+    // Loading Datasets
+    std::tuple<std::vector<std::vector<double>>, std::vector<double>> Data::loadBreastCancer(){
+        const int BREAST_CANCER_SIZE = 30; // k = 30
+        std::vector<std::vector<double>> inputSet;
+        std::vector<double> outputSet;
+
+        setData(BREAST_CANCER_SIZE, "MLPP/Data/Datasets/BreastCancer.csv", inputSet, outputSet);
+        return {inputSet, outputSet};
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<double>> Data::loadBreastCancerSVC(){
+        const int BREAST_CANCER_SIZE = 30; // k = 30
+        std::vector<std::vector<double>> inputSet;
+        std::vector<double> outputSet;
+
+        setData(BREAST_CANCER_SIZE, "MLPP/Data/Datasets/BreastCancerSVM.csv", inputSet, outputSet);
+        return {inputSet, outputSet};
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> Data::loadIris(){
+        const int IRIS_SIZE = 4;
+        const int ONE_HOT_NUM = 3;
+        std::vector<std::vector<double>> inputSet;
+        std::vector<double> tempOutputSet;
+
+        setData(IRIS_SIZE, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
+        std::vector<std::vector<double>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
+        return {inputSet, outputSet};
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> Data::loadWine(){
+        const int WINE_SIZE = 4;
+        const int ONE_HOT_NUM = 3;
+        std::vector<std::vector<double>> inputSet;
+        std::vector<double> tempOutputSet;
+
+        setData(WINE_SIZE, "MLPP/Data/Datasets/Iris.csv", inputSet, tempOutputSet);
+        std::vector<std::vector<double>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
+        return {inputSet, outputSet};
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> Data::loadMnistTrain(){
+        const int MNIST_SIZE = 784;
+        const int ONE_HOT_NUM = 10;
+        std::vector<std::vector<double>> inputSet;
+        std::vector<double> tempOutputSet;
+
+        setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTrain.csv", inputSet, tempOutputSet);
+        std::vector<std::vector<double>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
+        return {inputSet, outputSet};
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> Data::loadMnistTest(){
+        const int MNIST_SIZE = 784;
+        const int ONE_HOT_NUM = 10;
+        std::vector<std::vector<double>> inputSet;
+        std::vector<double> tempOutputSet;
+
+        setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTest.csv", inputSet, tempOutputSet);
+        std::vector<std::vector<double>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
+        return {inputSet, outputSet};
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<double>> Data::loadCaliforniaHousing(){
+        const int CALIFORNIA_HOUSING_SIZE = 13; // k = 30
+        std::vector<std::vector<double>> inputSet;
+        std::vector<double> outputSet;
+
+        setData(CALIFORNIA_HOUSING_SIZE, "MLPP/Data/Datasets/CaliforniaHousing.csv", inputSet, outputSet);
+        return {inputSet, outputSet};
+    }
+
+    std::tuple<std::vector<double>, std::vector<double>> Data::loadFiresAndCrime(){
+        std::vector<double> inputSet; // k is implicitly 1.
+        std::vector<double> outputSet;
+
+        setData("MLPP/Data/Datasets/FiresAndCrime.csv", inputSet, outputSet);
+        return {inputSet, outputSet};
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>, std::vector<std::vector<double>>, std::vector<std::vector<double>>> Data::trainTestSplit(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet, double testSize){
+        std::random_device rd;
+        std::default_random_engine generator(rd()); 
+
+        std::shuffle(inputSet.begin(), inputSet.end(), generator); // inputSet random shuffle
+        std::shuffle(outputSet.begin(), outputSet.end(), generator); // outputSet random shuffle)
+
+        std::vector<std::vector<double>> inputTestSet;
+        std::vector<std::vector<double>> outputTestSet; 
+
+        int testInputNumber = testSize * inputSet.size(); // implicit usage of floor
+        int testOutputNumber = testSize * outputSet.size(); // implicit usage of floor
+
+        for(int i = 0; i < testInputNumber; i++){
+            inputTestSet.push_back(inputSet[i]);
+            inputSet.erase(inputSet.begin());
+        }
+
+        for(int i = 0; i < testOutputNumber; i++){
+            outputTestSet.push_back(outputSet[i]);
+            outputSet.erase(outputSet.begin());
+        }
+
+        return {inputSet, outputSet, inputTestSet, outputTestSet};
+
+    }
+
+    // MULTIVARIATE SUPERVISED
+
+    void Data::setData(int k, std::string fileName, std::vector<std::vector<double>>& inputSet, std::vector<double>& outputSet){
+        LinAlg alg;
+        std::string inputTemp;
+        std::string outputTemp;
+        
+        inputSet.resize(k);
+        
+        std::ifstream dataFile(fileName);
+        if(!dataFile.is_open()){
+            std::cout << fileName << " failed to open." << std::endl;
+        }
+        
+        std::string line;
+        while(std::getline(dataFile, line)){
+            std::stringstream ss(line);
+       
+            for(int i = 0; i < k; i++){
+                std::getline(ss, inputTemp, ',');
+                inputSet[i].push_back(std::stod(inputTemp));
+                
+            }
+            
+            std::getline(ss, outputTemp, ',');
+            outputSet.push_back(std::stod(outputTemp));
+        }
+        inputSet = alg.transpose(inputSet);
+        dataFile.close();
+    }
+
+    void Data::printData(std::vector <std::string> inputName, std::string outputName, std::vector<std::vector<double>> inputSet, std::vector<double> outputSet){
+        LinAlg alg;
+        inputSet = alg.transpose(inputSet);
+        for(int i = 0; i < inputSet.size(); i++){
+            std::cout << inputName[i] << std::endl;
+            for(int j = 0; j < inputSet[i].size(); j++){
+                std::cout << inputSet[i][j] << std::endl;
+            }
+        }
+        
+        std::cout << outputName << std::endl;
+        for(int i = 0; i < outputSet.size(); i++){
+            std::cout << outputSet[i] << std::endl;
+        }
+    }
+
+    // UNSUPERVISED
+
+    void Data::setData(int k, std::string fileName, std::vector<std::vector<double>>& inputSet){
+        LinAlg alg;
+        std::string inputTemp;
+        
+        inputSet.resize(k);
+        
+        std::ifstream dataFile(fileName);
+        if(!dataFile.is_open()){
+            std::cout << fileName << " failed to open." << std::endl;
+        }
+        
+        std::string line;
+        while(std::getline(dataFile, line)){
+            std::stringstream ss(line);
+       
+            for(int i = 0; i < k; i++){
+                std::getline(ss, inputTemp, ',');
+                inputSet[i].push_back(std::stod(inputTemp));
+                
+            }
+        }
+        inputSet = alg.transpose(inputSet);
+        dataFile.close();
+    }
+
+    void Data::printData(std::vector <std::string> inputName, std::vector<std::vector<double>> inputSet){
+        LinAlg alg;
+        inputSet = alg.transpose(inputSet);
+        for(int i = 0; i < inputSet.size(); i++){
+            std::cout << inputName[i] << std::endl;
+            for(int j = 0; j < inputSet[i].size(); j++){
+                std::cout << inputSet[i][j] << std::endl;
+            }
+        }
+    }
+
+    // SIMPLE
+
+    void Data::setData(std::string fileName, std::vector <double>& inputSet, std::vector <double>& outputSet){
+        std::string inputTemp, outputTemp;
+        
+        std::ifstream dataFile(fileName);
+        if(!dataFile.is_open()){
+            std::cout << "The file failed to open." << std::endl;
+        }
+        
+        std::string line;
+        
+        
+        while(std::getline(dataFile, line)){
+            std::stringstream ss(line);
+
+            std::getline(ss, inputTemp, ',');
+            std::getline(ss, outputTemp, ',');
+            
+            inputSet.push_back(std::stod(inputTemp));
+            outputSet.push_back(std::stod(outputTemp));
+        }
+            
+        dataFile.close();
+    }
+
+    void Data::printData(std::string& inputName, std::string& outputName, std::vector <double>& inputSet, std::vector <double>& outputSet){
+        std::cout << inputName << std::endl;
+        for(int i = 0; i < inputSet.size(); i++){
+            std::cout << inputSet[i] << std::endl;
+        }
+        
+        std::cout << outputName << std::endl;
+        for(int i = 0; i < inputSet.size(); i++){
+            std::cout << outputSet[i] << std::endl;
+        }
+    }
+
+    // Images
+    std::vector<std::vector<double>> Data::rgb2gray(std::vector<std::vector<std::vector<double>>> input){
+        std::vector<std::vector<double>> grayScale;
+        grayScale.resize(input[0].size());
+        for(int i = 0; i < grayScale.size(); i++){
+            grayScale[i].resize(input[0][i].size());
+        }
+        for(int i = 0; i < grayScale.size(); i++){
+            for(int j = 0; j < grayScale[i].size(); j++){
+                grayScale[i][j] = 0.299 * input[0][i][j] + 0.587 * input[1][i][j] + 0.114 * input[2][i][j];
+            }
+        }
+        return grayScale;
+    }
+
+    std::vector<std::vector<std::vector<double>>> Data::rgb2ycbcr(std::vector<std::vector<std::vector<double>>> input){
+        LinAlg alg;
+        std::vector<std::vector<std::vector<double>>> YCbCr;
+        YCbCr = alg.resize(YCbCr, input);
+        for(int i = 0; i < YCbCr[0].size(); i++){
+            for(int j = 0; j < YCbCr[0][i].size(); j++){
+                YCbCr[0][i][j] = 0.299 * input[0][i][j] + 0.587 * input[1][i][j] + 0.114 * input[2][i][j];
+                YCbCr[1][i][j] = -0.169 * input[0][i][j] - 0.331 * input[1][i][j] + 0.500 * input[2][i][j];
+                YCbCr[2][i][j] = 0.500 * input[0][i][j] - 0.419 * input[1][i][j] - 0.081 * input[2][i][j];
+            }
+        }
+        return YCbCr;
+    }
+
+    // Conversion formulas available here: 
+    // https://www.rapidtables.com/convert/color/rgb-to-hsv.html
+    std::vector<std::vector<std::vector<double>>> Data::rgb2hsv(std::vector<std::vector<std::vector<double>>> input){
+        LinAlg alg;
+        std::vector<std::vector<std::vector<double>>> HSV;
+        HSV = alg.resize(HSV, input);
+        for(int i = 0; i < HSV[0].size(); i++){
+            for(int j = 0; j < HSV[0][i].size(); j++){
+                double rPrime = input[0][i][j] / 255;
+                double gPrime = input[1][i][j] / 255;
+                double bPrime = input[2][i][j] / 255; 
+
+                double cMax = alg.max({rPrime, gPrime, bPrime});
+                double cMin = alg.min({rPrime, gPrime, bPrime});
+                double delta = cMax - cMin; 
+
+                // H calculation.
+                if(delta == 0){
+                    HSV[0][i][j] = 0; 
+                }
+                else{
+                    if(cMax == rPrime){
+                        HSV[0][i][j] = 60 * fmod(((gPrime - bPrime) / delta), 6);
+                    }
+                    else if(cMax == gPrime){
+                        HSV[0][i][j] = 60 * ( (bPrime - rPrime)  / delta + 2); 
+                    }
+                    else{ // cMax == bPrime
+                        HSV[0][i][j] = 60 * ( (rPrime - gPrime)  / delta + 6); 
+                    }
+                }
+                
+                // S calculation.
+                if(cMax == 0){
+                    HSV[1][i][j] = 0; 
+                }
+                else{ HSV[1][i][j] = delta/cMax; }
+
+                // V calculation. 
+                HSV[2][i][j] = cMax;
+            }
+        }
+        return HSV;
+    }
+
+    // http://machinethatsees.blogspot.com/2013/07/how-to-convert-rgb-to-xyz-or-vice-versa.html
+    std::vector<std::vector<std::vector<double>>> Data::rgb2xyz(std::vector<std::vector<std::vector<double>>> input){
+        LinAlg alg;
+        std::vector<std::vector<std::vector<double>>> XYZ;
+        XYZ = alg.resize(XYZ, input);
+        std::vector<std::vector<double>> RGB2XYZ = {{0.4124564, 0.3575761, 0.1804375}, {0.2126726, 0.7151522, 0.0721750}, {0.0193339, 0.1191920, 0.9503041}};
+        return alg.vector_wise_tensor_product(input, RGB2XYZ);
+    }
+
+    std::vector<std::vector<std::vector<double>>> Data::xyz2rgb(std::vector<std::vector<std::vector<double>>> input){
+        LinAlg alg;
+        std::vector<std::vector<std::vector<double>>> XYZ;
+        XYZ = alg.resize(XYZ, input);
+        std::vector<std::vector<double>> RGB2XYZ = alg.inverse({{0.4124564, 0.3575761, 0.1804375}, {0.2126726, 0.7151522, 0.0721750}, {0.0193339, 0.1191920, 0.9503041}});
+        return alg.vector_wise_tensor_product(input, RGB2XYZ);
+    }
+    
+    // TEXT-BASED & NLP
+    std::string Data::toLower(std::string text){
+        for(int i = 0; i < text.size(); i++){
+            text[i] = tolower(text[i]);
+        }
+        return text;
+    }
+
+    std::vector<char> Data::split(std::string text){
+        std::vector<char> split_data;
+        for(int i = 0; i < text.size(); i++){
+            split_data.push_back(text[i]);
+        }
+        return split_data;
+    }
+
+    std::vector<std::string> Data::splitSentences(std::string data){
+        std::vector<std::string> sentences;
+        std::string currentStr = "";
+
+        for(int i = 0; i < data.length(); i++){
+            currentStr.push_back(data[i]); 
+            if(data[i] == '.' && data[i + 1] != '.'){
+                sentences.push_back(currentStr);
+                currentStr = "";
+                i++;
+            }
+        }
+        return sentences;
+    }
+
+    std::vector<std::string> Data::removeSpaces(std::vector<std::string> data){
+        for(int i = 0; i < data.size(); i++){
+            auto it = data[i].begin();
+            for(int j = 0; j < data[i].length(); j++){
+                if(data[i][j] == ' '){
+                    data[i].erase(it);
+                }
+                it++;
+            }
+        }
+        return data; 
+    }
+
+    std::vector<std::string> Data::removeNullByte(std::vector<std::string> data){
+        for(int i = 0; i < data.size(); i++){
+            if(data[i] == "\0"){
+                data.erase(data.begin() + i);
+            }
+        }
+        return data; 
+    }
+
+    std::vector<std::string> Data::segment(std::string text){
+        std::vector<std::string> segmented_data;
+        int prev_delim = 0;
+        for(int i = 0; i < text.length(); i++){
+            if(text[i] == ' '){
+                segmented_data.push_back(text.substr(prev_delim, i - prev_delim)); 
+                prev_delim = i + 1;  
+            }
+            else if(text[i] == ',' || text[i] == '!' || text[i] == '.' || text[i] == '-'){
+                segmented_data.push_back(text.substr(prev_delim, i - prev_delim)); 
+                std::string punc;
+                punc.push_back(text[i]);
+                segmented_data.push_back(punc);
+                prev_delim = i + 2; 
+                i++;
+            }
+            else if(i == text.length() - 1){
+                segmented_data.push_back(text.substr(prev_delim, text.length() - prev_delim)); // hehe oops- forgot this
+            }
+        }
+
+        return segmented_data;
+    }
+
+    std::vector<double> Data::tokenize(std::string text){
+        int max_num = 0;
+        bool new_num = true;
+        std::vector<std::string> segmented_data = segment(text);
+        std::vector<double> tokenized_data; 
+        tokenized_data.resize(segmented_data.size());
+        for(int i = 0; i < segmented_data.size(); i++){
+            for(int j = i - 1; j >= 0; j--){
+                if(segmented_data[i] == segmented_data[j]){
+                    tokenized_data[i] = tokenized_data[j];
+                    new_num = false;
+                }
+            }
+            if(!new_num){
+                new_num = true;
+            }
+            else{ 
+                max_num++;
+                tokenized_data[i] = max_num;
+            }
+        }
+        return tokenized_data;
+    }
+
+    std::vector<std::string> Data::removeStopWords(std::string text){
+        std::vector<std::string> stopWords = {"i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"}; 
+        std::vector<std::string> segmented_data = removeSpaces(segment(toLower(text))); 
+
+        for(int i = 0; i < stopWords.size(); i++){
+            for(int j = 0; j < segmented_data.size(); j++){
+                if(segmented_data[j] == stopWords[i]){
+                    segmented_data.erase(segmented_data.begin() + j);
+                }
+            }
+        }
+        return segmented_data;
+    }
+
+    std::vector<std::string> Data::removeStopWords(std::vector<std::string> segmented_data){
+        std::vector<std::string> stopWords = {"i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"}; 
+        for(int i = 0; i < segmented_data.size(); i++){
+            for(int j = 0; j < stopWords.size(); j++){
+                if(segmented_data[i] == stopWords[j]){
+                    segmented_data.erase(segmented_data.begin() + i);
+                }
+            }
+        }
+        return segmented_data;
+    }
+
+    std::string Data::stemming(std::string text){
+
+        // Our list of suffixes which we use to compare against
+        std::vector<std::string> suffixes = {"eer", "er", "ion", "ity", "ment", "ness", "or", "sion", "ship", "th", "able", "ible", "al", "ant", "ary", "ful", "ic", "ious", "ous", "ive", "less", "y", "ed", "en", "ing", "ize", "ise", "ly", "ward", "wise"};
+        int padding_size = 4; 
+        char padding = ' '; // our padding
+
+        for(int i = 0; i < padding_size; i++){
+            text[text.length() + i] = padding; // ' ' will be our padding value
+        }
+        
+
+        for(int i = 0; i < text.size(); i++){
+            for(int j = 0; j < suffixes.size(); j++){
+                if(text.substr(i, suffixes[j].length()) == suffixes[j] && (text[i + suffixes[j].length()] == ' ' || text[i + suffixes[j].length()] == ',' || text[i + suffixes[j].length()] == '-' || text[i + suffixes[j].length()] == '.' || text[i + suffixes[j].length()] == '!')){
+                    text.erase(i, suffixes[j].length());
+                }
+            }
+        }
+
+        return text; 
+    }
+
+    std::vector<std::vector<double>> Data::BOW(std::vector<std::string> sentences, std::string type){
+        /* 
+        STEPS OF BOW: 
+            1) To lowercase (done by removeStopWords function by def)
+            2) Removing stop words
+            3) Obtain a list of the used words
+            4) Create a one hot encoded vector of the words and sentences
+            5) Sentence.size() x list.size() matrix
+        */
+
+        std::vector<std::string> wordList = removeNullByte(removeStopWords(createWordList(sentences)));
+
+        std::vector<std::vector<std::string>> segmented_sentences; 
+        segmented_sentences.resize(sentences.size());
+
+        for(int i = 0; i < sentences.size(); i++){
+            segmented_sentences[i] = removeStopWords(sentences[i]);
+        }
+
+        std::vector<std::vector<double>> bow; 
+
+        bow.resize(sentences.size());
+        for(int i = 0; i < bow.size(); i++){
+            bow[i].resize(wordList.size());
+        }
+
+
+        for(int i = 0; i < segmented_sentences.size(); i++){
+            for(int j = 0; j < segmented_sentences[i].size(); j++){
+                for(int k = 0; k < wordList.size(); k++){ 
+                    if(segmented_sentences[i][j] == wordList[k]){
+                        if(type == "Binary"){
+                            bow[i][k] = 1;
+                        }
+                        else{
+                            bow[i][k]++;
+                        }
+                    }
+                }
+            }
+        }
+        return bow; 
+    }
+
+    std::vector<std::vector<double>> Data::TFIDF(std::vector<std::string> sentences){
+        LinAlg alg;
+        std::vector<std::string> wordList = removeNullByte(removeStopWords(createWordList(sentences)));
+
+        std::vector<std::vector<std::string>> segmented_sentences; 
+        segmented_sentences.resize(sentences.size());
+
+        for(int i = 0; i < sentences.size(); i++){
+            segmented_sentences[i] = removeStopWords(sentences[i]);
+        }
+
+        std::vector<std::vector<double>> TF; 
+        std::vector<int> frequency;
+        frequency.resize(wordList.size());
+        TF.resize(segmented_sentences.size());
+        for(int i = 0; i < TF.size(); i++){
+            TF[i].resize(wordList.size());
+        }
+        for(int i = 0; i < segmented_sentences.size(); i++){
+            std::vector<bool> present(wordList.size(), 0);
+            for(int j = 0; j < segmented_sentences[i].size(); j++){
+                for(int k = 0; k < wordList.size(); k++){
+                    if(segmented_sentences[i][j] == wordList[k]){
+                        TF[i][k]++;
+                        if(!present[k]){
+                            frequency[k]++;
+                            present[k] = true;   
+                        }
+                    }
+                }
+            }
+            TF[i] = alg.scalarMultiply(double(1) / double(segmented_sentences[i].size()), TF[i]);
+        }
+
+        std::vector<double> IDF; 
+        IDF.resize(frequency.size());
+
+        for(int i = 0; i < IDF.size(); i++){
+            IDF[i] = std::log((double)segmented_sentences.size() / (double)frequency[i]);
+        }
+
+        std::vector<std::vector<double>> TFIDF;
+        TFIDF.resize(segmented_sentences.size());
+        for(int i = 0; i < TFIDF.size(); i++){
+            TFIDF[i].resize(wordList.size());
+        }
+
+        for(int i = 0; i < TFIDF.size(); i++){
+            for(int j = 0; j < TFIDF[i].size(); j++){
+                TFIDF[i][j] = TF[i][j] * IDF[j];
+            }
+        }
+
+        return TFIDF;
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::string>> Data::word2Vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, double learning_rate, int max_epoch){
+        std::vector<std::string> wordList = removeNullByte(removeStopWords(createWordList(sentences)));
+
+        std::vector<std::vector<std::string>> segmented_sentences; 
+        segmented_sentences.resize(sentences.size());
+
+        for(int i = 0; i < sentences.size(); i++){
+            segmented_sentences[i] = removeStopWords(sentences[i]);
+        }
+
+        std::vector<std::string> inputStrings; 
+        std::vector<std::string> outputStrings; 
+
+        for(int i = 0; i < segmented_sentences.size(); i++){
+            for(int j = 0; j < segmented_sentences[i].size(); j++){
+                for(int k = windowSize; k > 0; k--){
+                    if(j - k >= 0){
+                        inputStrings.push_back(segmented_sentences[i][j]);
+
+                        outputStrings.push_back(segmented_sentences[i][j - k]);
+                    }
+                    if(j + k <= segmented_sentences[i].size() - 1){
+                        inputStrings.push_back(segmented_sentences[i][j]);
+                        outputStrings.push_back(segmented_sentences[i][j + k]);
+                    }
+                }
+            }
+        }
+        
+        int inputSize = inputStrings.size();
+
+        inputStrings.insert(inputStrings.end(), outputStrings.begin(), outputStrings.end());
+
+        std::vector<std::vector<double>> BOW = Data::BOW(inputStrings, "Binary");
+
+        std::vector<std::vector<double>> inputSet;
+        std::vector<std::vector<double>> outputSet; 
+        
+        for(int i = 0; i < inputSize; i++){
+            inputSet.push_back(BOW[i]);
+        }
+
+        for(int i = inputSize; i < BOW.size(); i++){
+            outputSet.push_back(BOW[i]);
+        }
+        LinAlg alg;
+        SoftmaxNet* model;
+        if(type == "Skipgram"){
+            model = new SoftmaxNet(outputSet, inputSet, dimension);
+        }
+        else { // else = CBOW. We maintain it is a default. 
+            model = new SoftmaxNet(inputSet, outputSet, dimension);
+        }
+        model->gradientDescent(learning_rate, max_epoch, 1);
+
+        std::vector<std::vector<double>> wordEmbeddings = model->getEmbeddings();
+        delete model;
+        return {wordEmbeddings, wordList};
+    }
+
+    std::vector<std::vector<double>> Data::LSA(std::vector<std::string> sentences, int dim){
+        LinAlg alg;
+        std::vector<std::vector<double>> docWordData = BOW(sentences, "Binary");
+
+        auto [U, S, Vt] = alg.SVD(docWordData);
+        std::vector<std::vector<double>> S_trunc = alg.zeromat(dim, dim);
+        std::vector<std::vector<double>> Vt_trunc; 
+        for(int i = 0; i < dim; i++){ 
+            S_trunc[i][i] = S[i][i]; 
+            Vt_trunc.push_back(Vt[i]);
+        }
+
+        std::vector<std::vector<double>> embeddings = alg.matmult(S_trunc, Vt_trunc); 
+        return embeddings;
+    }
+
+    std::vector<std::string> Data::createWordList(std::vector<std::string> sentences){
+        std::string combinedText = "";
+        for(int i = 0; i < sentences.size(); i++){
+            if(i != 0){ combinedText += " "; }
+            combinedText += sentences[i];
+        }
+        
+        return removeSpaces(vecToSet(removeStopWords(combinedText)));
+    }
+
+    // EXTRA 
+    void Data::setInputNames(std::string fileName, std::vector<std::string>& inputNames){
+        std::string inputNameTemp;
+        std::ifstream dataFile(fileName);
+        if(!dataFile.is_open()){
+            std::cout << fileName << " failed to open." << std::endl;
+        }
+        
+        while (std::getline(dataFile, inputNameTemp))
+        {
+            inputNames.push_back(inputNameTemp);
+        }
+        
+        dataFile.close();
+    }
+    
+    std::vector<std::vector<double>> Data::featureScaling(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        X = alg.transpose(X);
+        std::vector<double> max_elements, min_elements;
+        max_elements.resize(X.size());
+        min_elements.resize(X.size());
+
+        for(int i = 0; i < X.size(); i++){
+            max_elements[i] = alg.max(X[i]);
+            min_elements[i] = alg.min(X[i]);
+        }
+
+        for(int i = 0; i < X.size(); i++){
+            for(int j = 0; j < X[i].size(); j++){
+                X[i][j] = (X[i][j] - min_elements[i]) / (max_elements[i] - min_elements[i]);
+            }
+        }
+        return alg.transpose(X);
+    }
+
+
+    std::vector<std::vector<double>> Data::meanNormalization(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Stat stat; 
+        // (X_j - mu_j) / std_j, for every j
+
+        X = meanCentering(X);
+        for(int i = 0; i < X.size(); i++){
+            X[i] = alg.scalarMultiply(1/stat.standardDeviation(X[i]), X[i]);
+        }
+        return X;
+    }
+
+    std::vector<std::vector<double>> Data::meanCentering(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Stat stat; 
+        for(int i = 0; i < X.size(); i++){
+            double mean_i = stat.mean(X[i]);
+            for(int j = 0; j < X[i].size(); j++){
+                X[i][j] -= mean_i; 
+            }
+        }
+        return X; 
+    }
+    
+    std::vector<std::vector<double>> Data::oneHotRep(std::vector<double> tempOutputSet, int n_class){
+        std::vector<std::vector<double>> outputSet;
+        outputSet.resize(tempOutputSet.size());
+        for(int i = 0; i < tempOutputSet.size(); i++){
+            for(int j = 0; j <= n_class - 1; j++){
+                if(tempOutputSet[i] == j){
+                    outputSet[i].push_back(1);
+                }
+                else{
+                    outputSet[i].push_back(0);
+                }
+            }
+        }
+        return outputSet;
+    }
+
+    std::vector<double> Data::reverseOneHot(std::vector<std::vector<double>> tempOutputSet){
+        std::vector<double> outputSet;
+        int n_class = tempOutputSet[0].size();
+        for(int i = 0; i < tempOutputSet.size(); i++){
+            int current_class = 1;
+            for(int j = 0; j < tempOutputSet[i].size(); j++){
+                if(tempOutputSet[i][j] == 1){
+                    break;
+                } 
+                else{
+                    current_class++;
+                }
+            }
+            outputSet.push_back(current_class);
+        }
+
+        return outputSet;
+    }
+}
--- a/MLPP/Data/Data.hpp
+++ b/MLPP/Data/Data.hpp
@ -0,0 +1,99 @@
+//
+//  Data.hpp
+//  MLP
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef Data_hpp
+#define Data_hpp
+
+#include <vector>
+#include <tuple>
+#include <string>
+
+
+namespace MLPP{
+class Data{
+        public:
+        // Load Datasets
+            std::tuple<std::vector<std::vector<double>>, std::vector<double>> loadBreastCancer();
+            std::tuple<std::vector<std::vector<double>>, std::vector<double>> loadBreastCancerSVC();
+            std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> loadIris();
+            std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> loadWine();
+            std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> loadMnistTrain();
+            std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> loadMnistTest();
+            std::tuple<std::vector<std::vector<double>>, std::vector<double>> loadCaliforniaHousing();
+            std::tuple<std::vector<double>, std::vector<double>> loadFiresAndCrime();
+
+            std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>, std::vector<std::vector<double>>, std::vector<std::vector<double>>> trainTestSplit(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet, double testSize);
+
+        // Supervised
+        void setData(int k, std::string fileName, std::vector<std::vector<double>>& inputSet, std::vector<double>& outputSet);
+        void printData(std::vector <std::string> inputName, std::string outputName, std::vector<std::vector<double>> inputSet, std::vector<double> outputSet);
+        
+        // Unsupervised
+        void setData(int k, std::string fileName, std::vector<std::vector<double>>& inputSet);
+        void printData(std::vector <std::string> inputName, std::vector<std::vector<double>> inputSet);
+        
+        // Simple
+        void setData(std::string fileName, std::vector <double>& inputSet, std::vector <double>& outputSet);
+        void printData(std::string& inputName, std::string& outputName, std::vector <double>& inputSet, std::vector <double>& outputSet);
+
+        // Images
+        std::vector<std::vector<double>> rgb2gray(std::vector<std::vector<std::vector<double>>> input);
+        std::vector<std::vector<std::vector<double>>> rgb2ycbcr(std::vector<std::vector<std::vector<double>>> input);
+        std::vector<std::vector<std::vector<double>>> rgb2hsv(std::vector<std::vector<std::vector<double>>> input);
+        std::vector<std::vector<std::vector<double>>> rgb2xyz(std::vector<std::vector<std::vector<double>>> input);
+        std::vector<std::vector<std::vector<double>>> xyz2rgb(std::vector<std::vector<std::vector<double>>> input);
+
+        // Text-Based & NLP
+        std::string toLower(std::string text);
+        std::vector<char> split(std::string text);
+        std::vector<std::string> splitSentences(std::string data);
+        std::vector<std::string> removeSpaces(std::vector<std::string> data);
+        std::vector<std::string> removeNullByte(std::vector<std::string> data);
+        std::vector<std::string> segment(std::string text);
+        std::vector<double> tokenize(std::string text);
+        std::vector<std::string> removeStopWords(std::string text);
+        std::vector<std::string> removeStopWords(std::vector<std::string> segmented_data);
+        
+        std::string stemming(std::string text);
+        
+        std::vector<std::vector<double>> BOW(std::vector<std::string> sentences, std::string = "Default"); 
+        std::vector<std::vector<double>> TFIDF(std::vector<std::string> sentences);
+        std::tuple<std::vector<std::vector<double>>, std::vector<std::string>> word2Vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, double learning_rate, int max_epoch);
+        std::vector<std::vector<double>> LSA(std::vector<std::string> sentences, int dim);
+
+        std::vector<std::string> createWordList(std::vector<std::string> sentences);
+
+        // Extra
+        void setInputNames(std::string fileName, std::vector<std::string>& inputNames);
+        std::vector<std::vector<double>> featureScaling(std::vector<std::vector<double>> X);
+        std::vector<std::vector<double>> meanNormalization(std::vector<std::vector<double>> X);
+        std::vector<std::vector<double>> meanCentering(std::vector<std::vector<double>> X);
+        std::vector<std::vector<double>> oneHotRep (std::vector<double> tempOutputSet, int n_class); 
+        std::vector<double> reverseOneHot(std::vector<std::vector<double>> tempOutputSet); 
+
+        template <class T>
+        std::vector<T> vecToSet(std::vector<T> inputSet){
+            std::vector<T> setInputSet;
+            for(int i = 0; i < inputSet.size(); i++){
+                bool new_element = true; 
+                for(int j = 0; j < setInputSet.size(); j++){
+                    if(setInputSet[j] == inputSet[i]){
+                        new_element = false;
+                    }
+                }
+                if(new_element){
+                    setInputSet.push_back(inputSet[i]);
+                }
+            }
+            return setInputSet;
+        }
+
+        private:
+    };
+}
+
+#endif /* Data_hpp */
--- a/MLPP/DualSVC/DualSVC.cpp
+++ b/MLPP/DualSVC/DualSVC.cpp
@ -0,0 +1,241 @@
+//
+//  DualSVC.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "DualSVC.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    DualSVC::DualSVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C, std::string kernel)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), C(C), kernel(kernel)
+    {
+        y_hat.resize(n);
+        bias = Utilities::biasInitialization();
+        alpha = Utilities::weightInitialization(n); // One alpha for all training examples, as per the lagrangian multipliers.
+        K = kernelFunction(inputSet, inputSet, kernel); // For now this is unused. When non-linear kernels are added, the K will be manipulated.
+    }
+
+    std::vector<double> DualSVC::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double DualSVC::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void DualSVC::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        class Cost cost;
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(alpha, inputSet, outputSet);
+
+            alpha = alg.subtraction(alpha, alg.scalarMultiply(learning_rate, cost.dualFormSVMDeriv(alpha, inputSet, outputSet)));
+
+            alphaProjection();
+
+            // Calculating the bias 
+            double biasGradient = 0; 
+            for(int i = 0; i < alpha.size(); i++){
+                double sum = 0;
+                if(alpha[i] < C && alpha[i] > 0){
+                    for(int j = 0; j < alpha.size(); j++){
+                        if(alpha[j] > 0){  
+                            sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], inputSet[i]); // TO DO: DON'T forget to add non-linear kernelizations. 
+                        }
+                    }
+                }
+                biasGradient = (1 - outputSet[i] * sum) / outputSet[i];
+                break;
+            }
+            bias -= biasGradient * learning_rate;
+            
+            forwardPass();
+                
+            // UI PORTION
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(alpha, inputSet, outputSet));
+                Utilities::UI(alpha, bias);
+                std::cout << score() << std::endl; // TO DO: DELETE THIS. 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+
+        }
+    }
+
+    // void DualSVC::SGD(double learning_rate, int max_epoch, bool UI){
+    //     class Cost cost;
+    //     Activation avn;
+    //     LinAlg alg;
+    //     Reg regularization;
+        
+    //     double cost_prev = 0;
+    //     int epoch = 1;
+        
+    //     while(true){
+    //         std::random_device rd;
+    //         std::default_random_engine generator(rd()); 
+    //         std::uniform_int_distribution<int> distribution(0, int(n - 1));
+    //         int outputIndex = distribution(generator);
+
+    //         cost_prev = Cost(alpha, inputSet[outputIndex], outputSet[outputIndex]);
+            
+    //         // Bias updation
+    //         bias -= learning_rate * costDeriv;
+
+    //         y_hat = Evaluate({inputSet[outputIndex]});
+                
+    //         if(UI) { 
+    //             Utilities::CostInfo(epoch, cost_prev, Cost(alpha));
+    //             Utilities::UI(weights, bias); 
+    //         }
+    //         epoch++;
+            
+    //         if(epoch > max_epoch) { break; }
+    //     }
+    //     forwardPass();
+    // }
+
+    // void DualSVC::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+    //     class Cost cost; 
+    //     Activation avn;
+    //     LinAlg alg;
+    //     Reg regularization;
+    //     double cost_prev = 0;
+    //     int epoch = 1;
+        
+    //     // Creating the mini-batches
+    //     int n_mini_batch = n/mini_batch_size;
+    //     auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+
+    //     while(true){
+    //         for(int i = 0; i < n_mini_batch; i++){
+    //             std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+    //             std::vector<double> z = propagate(inputMiniBatches[i]);
+    //             cost_prev = Cost(z, outputMiniBatches[i], weights, C);
+
+    //             // Calculating the weight gradients
+    //             weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C))));
+    //             weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge");
+                
+
+    //             // Calculating the bias gradients
+    //             bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n;
+            
+    //             forwardPass();
+
+    //             y_hat = Evaluate(inputMiniBatches[i]);
+                    
+    //             if(UI) { 
+    //                 Utilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C));
+    //                 Utilities::UI(weights, bias); 
+    //             }
+    //         }
+    //         epoch++;
+    //         if(epoch > max_epoch) { break; }
+    //     }
+    //     forwardPass(); 
+    // }
+
+    double DualSVC::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+     void DualSVC::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, alpha, bias);
+     }
+
+    double DualSVC::Cost(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y){
+        class Cost cost; 
+        return cost.dualFormSVM(alpha, X, y);    
+    }
+
+    std::vector<double> DualSVC::Evaluate(std::vector<std::vector<double>> X){
+        Activation avn;
+        return avn.sign(propagate(X)); 
+    }
+    
+    std::vector<double> DualSVC::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg; 
+        std::vector<double> z; 
+        for(int i = 0; i < X.size(); i++){
+            double sum = 0;
+            for(int j = 0; j < alpha.size(); j++){
+                if(alpha[j] != 0){
+                    sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], X[i]); // TO DO: DON'T forget to add non-linear kernelizations. 
+                }
+            }
+            sum += bias; 
+            z.push_back(sum);
+        }
+        return z; 
+    }
+
+    double DualSVC::Evaluate(std::vector<double> x){
+        Activation avn;
+        return avn.sign(propagate(x));
+    }
+
+    double DualSVC::propagate(std::vector<double> x){
+        LinAlg alg;
+        double z = 0;
+        for(int j = 0; j < alpha.size(); j++){
+            if(alpha[j] != 0){
+                z += alpha[j] * outputSet[j] * alg.dot(inputSet[j], x); // TO DO: DON'T forget to add non-linear kernelizations. 
+            }
+        }
+        z += bias; 
+        return z; 
+    }
+
+    void DualSVC::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        
+        z = propagate(inputSet);
+        y_hat = avn.sign(z);
+    }
+
+    void DualSVC::alphaProjection(){
+        for(int i = 0; i < alpha.size(); i++){
+            if(alpha[i] > C){
+                alpha[i] = C;
+            }
+            else if(alpha[i] < 0){
+                alpha[i] = 0;
+            }
+        }
+    }
+
+    double DualSVC::kernelFunction(std::vector<double> u, std::vector<double> v, std::string kernel){
+        LinAlg alg;
+        if(kernel == "Linear"){
+            return alg.dot(u, v);
+        } // warning: non-void function does not return a value in all control paths [-Wreturn-type]
+    }
+
+    std::vector<std::vector<double>> DualSVC::kernelFunction(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B, std::string kernel){
+        LinAlg alg;
+        if(kernel == "Linear"){
+            return alg.matmult(inputSet, alg.transpose(inputSet));
+        } // warning: non-void function does not return a value in all control paths [-Wreturn-type]
+    }
+}
--- a/MLPP/DualSVC/DualSVC.hpp
+++ b/MLPP/DualSVC/DualSVC.hpp
@ -0,0 +1,71 @@
+//
+//  DualSVC.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+// http://disp.ee.ntu.edu.tw/~pujols/Support%20Vector%20Machine.pdf
+// http://ciml.info/dl/v0_99/ciml-v0_99-ch11.pdf
+// Were excellent for the practical intution behind the dual formulation. 
+
+#ifndef DualSVC_hpp
+#define DualSVC_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class DualSVC{
+        
+        public:
+            DualSVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C, std::string kernel = "Linear");
+            DualSVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C, std::string kernel, double p, double c);
+
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+        private:
+
+            void init();
+
+            double Cost(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y);
+        
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            std::vector<double> propagate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            double propagate(std::vector<double> x);
+            void forwardPass();
+
+            void alphaProjection();
+
+            double kernelFunction(std::vector<double> v, std::vector<double> u, std::string kernel);
+            std::vector<std::vector<double>> kernelFunction(std::vector<std::vector<double>> U, std::vector<std::vector<double>> V, std::string kernel);
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> z;
+            std::vector<double> y_hat;
+            double bias;
+
+            std::vector<double> alpha;
+            std::vector<std::vector<double>> K;
+
+            double C;
+            int n; 
+            int k;
+
+            std::string kernel;
+            double p; // Poly
+            double c; // Poly
+        
+            // UI Portion
+            void UI(int epoch, double cost_prev);        
+    };
+}
+
+#endif /* DualSVC_hpp */
--- a/MLPP/ExpReg/ExpReg.cpp
+++ b/MLPP/ExpReg/ExpReg.cpp
@ -0,0 +1,240 @@
+//
+//  ExpReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "ExpReg.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Stat/Stat.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    ExpReg::ExpReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+        weights = Utilities::weightInitialization(k);
+        initial = Utilities::weightInitialization(k);
+        bias = Utilities::biasInitialization();
+    }
+
+    std::vector<double> ExpReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double ExpReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void ExpReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+
+            for(int i = 0; i < k; i++){
+            
+                // Calculating the weight gradient
+                double sum = 0;
+                for(int j = 0; j < n; j++){
+                    sum += error[j] * inputSet[j][i] * std::pow(weights[i], inputSet[j][i] - 1);
+                }
+                double w_gradient = sum / n;
+                    
+                // Calculating the initial gradient
+                double sum2 = 0;
+                for(int j = 0; j < n; j++){
+                    sum2 += error[j] * std::pow(weights[i], inputSet[j][i]);
+                }
+
+
+                double i_gradient = sum2 / n;
+                
+                // Weight/initial updation
+                weights[i] -= learning_rate * w_gradient;
+                initial[i] -= learning_rate * i_gradient;
+                    
+            }
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+                
+            // Calculating the bias gradient
+            double sum = 0;
+            for(int j = 0; j < n; j++){
+                sum += (y_hat[j] - outputSet[j]);
+            }
+            double b_gradient = sum / n;
+                
+            // bias updation
+            bias -= learning_rate * b_gradient;
+            forwardPass();
+            
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+                
+            if(epoch > max_epoch) { break; }
+                
+        }
+    }
+
+    void ExpReg::SGD(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd());
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+
+                
+            for(int i = 0; i < k; i++){
+                    
+                // Calculating the weight gradients
+                
+                double w_gradient = (y_hat - outputSet[outputIndex]) * inputSet[outputIndex][i] * std::pow(weights[i], inputSet[outputIndex][i] - 1);
+                double i_gradient = (y_hat - outputSet[outputIndex]) * std::pow(weights[i], inputSet[outputIndex][i]);
+
+                // Weight/initial updation
+                weights[i] -= learning_rate * w_gradient;
+                initial[i] -= learning_rate * i_gradient;
+            }
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            // Calculating the bias gradients
+            double b_gradient = (y_hat - outputSet[outputIndex]);
+            
+            // Bias updation
+            bias -= learning_rate * b_gradient;
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void ExpReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+        
+        while(true){
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                for(int j = 0; j < k; j++){
+                    // Calculating the weight gradient
+                    double sum = 0;
+                    for(int k = 0; k < outputMiniBatches[i].size(); k++){
+                        sum += error[k] * inputMiniBatches[i][k][j] * std::pow(weights[j], inputMiniBatches[i][k][j] - 1);
+                    }
+                    double w_gradient = sum / outputMiniBatches[i].size();
+                        
+                    // Calculating the initial gradient
+                    double sum2 = 0;
+                    for(int k = 0; k < outputMiniBatches[i].size(); k++){
+                        sum2 += error[k] * std::pow(weights[j], inputMiniBatches[i][k][j]);
+                    }
+
+
+                    double i_gradient = sum2 / outputMiniBatches[i].size();
+                    
+                    // Weight/initial updation
+                    weights[j] -= learning_rate * w_gradient;
+                    initial[j] -= learning_rate * i_gradient;
+                }   
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+                    
+                // Calculating the bias gradient
+                double sum = 0;
+                for(int j = 0; j < outputMiniBatches[i].size(); j++){
+                    sum += (y_hat[j] - outputMiniBatches[i][j]);
+                }
+                double b_gradient = sum / outputMiniBatches[i].size();
+                y_hat = Evaluate(inputMiniBatches[i]);
+
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double ExpReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void ExpReg::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights, initial, bias);
+     }
+
+    double ExpReg::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+    std::vector<double> ExpReg::Evaluate(std::vector<std::vector<double>> X){
+        std::vector<double> y_hat;
+        y_hat.resize(X.size());
+        for(int i = 0; i < X.size(); i++){
+            y_hat[i] = 0;
+            for(int j = 0; j < X[i].size(); j++){
+                y_hat[i] += initial[j] * std::pow(weights[j], X[i][j]);
+            }
+            y_hat[i] += bias;
+        }
+        return y_hat;
+    }
+
+    double ExpReg::Evaluate(std::vector<double> x){
+        double y_hat = 0;
+        for(int i = 0; i < x.size(); i++){
+            y_hat += initial[i] * std::pow(weights[i], x[i]);
+        }
+        
+        return y_hat + bias;
+    }
+
+    // a * w^x + b
+    void ExpReg::forwardPass(){
+        y_hat = Evaluate(inputSet); 
+    }
+}
--- a/MLPP/ExpReg/ExpReg.hpp
+++ b/MLPP/ExpReg/ExpReg.hpp
@ -0,0 +1,51 @@
+//
+//  ExpReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef ExpReg_hpp
+#define ExpReg_hpp
+
+#include <vector>
+#include <string>
+
+namespace MLPP{
+    class ExpReg{
+        
+        public:
+            ExpReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+        private:
+
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+        
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+            std::vector<double> weights;
+            std::vector<double> initial;
+            double bias;
+        
+            int n; 
+            int k;
+
+            // Regularization Params
+            std::string reg;
+            double lambda;
+            double alpha; /* This is the controlling param for Elastic Net*/
+            
+    };
+}
+
+#endif /* ExpReg_hpp */
--- a/MLPP/GAN/GAN.cpp
+++ b/MLPP/GAN/GAN.cpp
@ -0,0 +1,290 @@
+//
+//  GAN.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "GAN.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <cmath>
+
+namespace MLPP {
+    GAN::GAN(double k, std::vector<std::vector<double>> outputSet)
+    : outputSet(outputSet), n(outputSet.size()), k(k)
+    {
+
+    }
+
+    GAN::~GAN(){
+        delete outputLayer;
+    }
+
+    std::vector<std::vector<double>> GAN::generateExample(int n){
+        LinAlg alg;
+        return modelSetTestGenerator(alg.gaussianNoise(n, k));
+    }
+
+    void GAN::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+
+        while(true){
+            cost_prev = Cost(y_hat, alg.onevec(n));
+
+            // Training of the discriminator. 
+
+            std::vector<std::vector<double>> generatorInputSet = alg.gaussianNoise(n, k);
+            std::vector<std::vector<double>> discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
+            discriminatorInputSet.insert(discriminatorInputSet.end(), outputSet.begin(), outputSet.end()); // Fake + real inputs.
+
+            std::vector<double> y_hat = modelSetTestDiscriminator(discriminatorInputSet);
+            std::vector<double> outputSet = alg.zerovec(n);
+            std::vector<double> outputSetReal = alg.onevec(n);
+            outputSet.insert(outputSet.end(), outputSetReal.begin(), outputSetReal.end()); // Fake + real output scores.
+
+            auto [cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad] = computeDiscriminatorGradients(y_hat, outputSet);
+            cumulativeDiscriminatorHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeDiscriminatorHiddenLayerWGrad);
+            outputDiscriminatorWGrad = alg.scalarMultiply(learning_rate/n, outputDiscriminatorWGrad);
+            updateDiscriminatorParameters(cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad, learning_rate);
+
+            // Training of the generator.
+            generatorInputSet = alg.gaussianNoise(n, k);
+            discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
+            y_hat = modelSetTestDiscriminator(discriminatorInputSet);
+            outputSet = alg.onevec(n);
+            
+            std::vector<std::vector<std::vector<double>>> cumulativeGeneratorHiddenLayerWGrad = computeGeneratorGradients(y_hat, outputSet);
+            cumulativeGeneratorHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeGeneratorHiddenLayerWGrad);
+            updateGeneratorParameters(cumulativeGeneratorHiddenLayerWGrad, learning_rate);
+
+            forwardPass();
+            if(UI) { GAN::UI(epoch, cost_prev, GAN::y_hat, alg.onevec(n)); }
+
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    double GAN::score(){
+        LinAlg alg;
+        Utilities util;
+        forwardPass();
+        return util.performance(y_hat, alg.onevec(n));
+    }
+
+    void GAN::save(std::string fileName){
+        Utilities util;
+        if(!network.empty()){
+            util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1);
+            for(int i = 1; i < network.size(); i++){
+                util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); 
+            }
+            util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1);
+        }
+        else{
+            util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1);
+        }
+     }
+
+    void GAN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){
+        LinAlg alg;
+        if(network.empty()){
+            network.push_back(HiddenLayer(n_hidden, activation, alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha));
+            network[0].forwardPass();
+        }
+        else{
+            network.push_back(HiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
+            network[network.size() - 1].forwardPass();
+        }
+    }
+    
+    void GAN::addOutputLayer(std::string weightInit, std::string reg, double lambda, double alpha){
+        LinAlg alg;
+        if(!network.empty()){
+            outputLayer = new OutputLayer(network[network.size() - 1].n_hidden, "Sigmoid", "LogLoss", network[network.size() - 1].a, weightInit, reg, lambda, alpha);
+        }
+        else{
+            outputLayer = new OutputLayer(k, "Sigmoid", "LogLoss", alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha);
+        }
+    }
+
+    std::vector<std::vector<double>> GAN::modelSetTestGenerator(std::vector<std::vector<double>> X){
+        if(!network.empty()){
+            network[0].input = X;
+            network[0].forwardPass();
+
+            for(int i = 1; i <= network.size()/2; i++){
+                network[i].input = network[i - 1].a;
+                network[i].forwardPass();
+            }
+        }
+        return network[network.size()/2].a;        
+    }
+
+    std::vector<double> GAN::modelSetTestDiscriminator(std::vector<std::vector<double>> X){
+        if(!network.empty()){
+            for(int i = network.size()/2 + 1; i < network.size(); i++){
+                if(i == network.size()/2 + 1){
+                    network[i].input = X; 
+                }
+                else { network[i].input = network[i - 1].a; }
+                network[i].forwardPass();
+            }
+            outputLayer->input = network[network.size() - 1].a;
+        }
+        outputLayer->forwardPass();
+        return outputLayer->a;
+    }
+
+    double GAN::Cost(std::vector<double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost;
+        double totalRegTerm = 0;
+
+        auto cost_function = outputLayer->cost_map[outputLayer->cost];
+        if(!network.empty()){
+            for(int i = 0; i < network.size() - 1; i++){
+                totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
+            }
+        }
+        return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
+    }
+
+    void GAN::forwardPass(){
+        LinAlg alg;
+        if(!network.empty()){
+            network[0].input = alg.gaussianNoise(n, k);
+            network[0].forwardPass();
+
+            for(int i = 1; i < network.size(); i++){
+                network[i].input = network[i - 1].a;
+                network[i].forwardPass();
+            }
+            outputLayer->input = network[network.size() - 1].a;
+        }
+        else{ // Should never happen, though.
+            outputLayer->input = alg.gaussianNoise(n, k);
+        }
+        outputLayer->forwardPass();
+        y_hat = outputLayer->a;
+    }
+
+    void GAN::updateDiscriminatorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, std::vector<double> outputLayerUpdation, double learning_rate){
+        LinAlg alg;
+
+        outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
+        outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n;
+
+        if(!network.empty()){
+            network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]);
+            network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta));
+
+            for(int i = network.size() - 2; i > network.size()/2; i--){
+                network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
+                network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta));
+            }
+        }
+    }
+
+    void GAN::updateGeneratorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, double learning_rate){
+        LinAlg alg;
+
+        if(!network.empty()){
+
+            for(int i = network.size()/2; i >= 0; i--){
+                //std::cout << network[i].weights.size() << "x" << network[i].weights[0].size() << std::endl;
+                //std::cout << hiddenLayerUpdations[(network.size() - 2) - i + 1].size() << "x" << hiddenLayerUpdations[(network.size() - 2) - i + 1][0].size() << std::endl;
+                network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
+                network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta));
+            }
+        }
+    }
+    
+    std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<double>> GAN::computeDiscriminatorGradients(std::vector<double> y_hat, std::vector<double> outputSet){
+        class Cost cost; 
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+
+        std::vector<std::vector<std::vector<double>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. 
+
+        auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
+        auto outputAvn = outputLayer->activation_map[outputLayer->activation];
+        outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
+        std::vector<double> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
+        outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
+
+
+        if(!network.empty()){
+            auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
+
+            network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
+            std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
+
+            cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
+
+            //std::cout << "HIDDENLAYER FIRST:" << hiddenLayerWGrad.size() << "x" << hiddenLayerWGrad[0].size() << std::endl;
+            //std::cout << "WEIGHTS SECOND:" << network[network.size() - 1].weights.size() << "x" << network[network.size() - 1].weights[0].size() << std::endl;
+
+            for(int i = network.size() - 2; i > network.size()/2; i--){
+                auto hiddenLayerAvn = network[i].activation_map[network[i].activation];
+                network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
+                std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
+
+                cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
+
+            }
+        }
+        return {cumulativeHiddenLayerWGrad, outputWGrad};
+    }
+
+    std::vector<std::vector<std::vector<double>>> GAN::computeGeneratorGradients(std::vector<double> y_hat, std::vector<double> outputSet){
+        class Cost cost; 
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+
+        std::vector<std::vector<std::vector<double>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. 
+
+        auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
+        auto outputAvn = outputLayer->activation_map[outputLayer->activation];
+        outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
+        std::vector<double> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
+        outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
+        if(!network.empty()){
+            auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
+            network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
+            std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
+            cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
+
+            for(int i = network.size() - 2; i >= 0; i--){
+                auto hiddenLayerAvn = network[i].activation_map[network[i].activation];
+                network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
+                std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
+                cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
+            }
+        }
+        return cumulativeHiddenLayerWGrad;
+    }
+
+    void GAN::UI(int epoch, double cost_prev, std::vector<double> y_hat, std::vector<double> outputSet){
+        Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+        std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
+        Utilities::UI(outputLayer->weights, outputLayer->bias); 
+        if(!network.empty()){ 
+            for(int i = network.size() - 1; i >= 0; i--){
+                std::cout << "Layer " << i + 1 << ": " << std::endl;
+                Utilities::UI(network[i].weights, network[i].bias); 
+            }
+        }
+    }
+}
--- a/MLPP/GAN/GAN.hpp
+++ b/MLPP/GAN/GAN.hpp
@ -0,0 +1,56 @@
+//
+//  GAN.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef GAN_hpp
+#define GAN_hpp
+
+#include "HiddenLayer/HiddenLayer.hpp"
+#include "OutputLayer/OutputLayer.hpp"
+
+#include <vector>
+#include <tuple>
+#include <string>
+
+namespace  MLPP{
+
+class GAN{
+        public:
+        GAN(double k, std::vector<std::vector<double>> outputSet);
+        ~GAN();
+        std::vector<std::vector<double>> generateExample(int n);
+        void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+        double score(); 
+        void save(std::string fileName);
+
+        void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        void addOutputLayer(std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        
+        private:
+            std::vector<std::vector<double>> modelSetTestGenerator(std::vector<std::vector<double>> X); // Evaluator for the generator of the gan.
+            std::vector<double> modelSetTestDiscriminator(std::vector<std::vector<double>> X); // Evaluator for the discriminator of the gan.
+
+            double Cost(std::vector<double> y_hat, std::vector<double> y);
+
+            void forwardPass();
+            void updateDiscriminatorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, std::vector<double> outputLayerUpdation, double learning_rate);
+            void updateGeneratorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, double learning_rate);
+            std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<double>> computeDiscriminatorGradients(std::vector<double> y_hat, std::vector<double> outputSet);
+            std::vector<std::vector<std::vector<double>>> computeGeneratorGradients(std::vector<double> y_hat, std::vector<double> outputSet);
+
+            void UI(int epoch, double cost_prev, std::vector<double> y_hat, std::vector<double> outputSet);
+
+            std::vector<std::vector<double>> outputSet;
+            std::vector<double> y_hat;
+
+            std::vector<HiddenLayer> network;
+            OutputLayer *outputLayer;
+
+            int n;
+            int k;
+    };
+}
+
+#endif /* GAN_hpp */
--- a/MLPP/GaussMarkovChecker/GaussMarkovChecker.cpp
+++ b/MLPP/GaussMarkovChecker/GaussMarkovChecker.cpp
@ -0,0 +1,59 @@
+//
+//  GaussMarkovChecker.cpp
+//
+//  Created by Marc Melikyan on 11/13/20.
+//
+
+#include "GaussMarkovChecker.hpp"
+#include "Stat/Stat.hpp"
+#include <iostream>
+
+
+namespace MLPP{
+    void GaussMarkovChecker::checkGMConditions(std::vector<double> eps){
+        bool condition1 = arithmeticMean(eps);
+        bool condition2 = homoscedasticity(eps);
+        bool condition3 = exogeneity(eps);
+        
+        if(condition1 && condition2 && condition3){
+            std::cout << "Gauss-Markov conditions were not violated. You may use OLS to obtain a BLUE estimator" << std::endl;
+        }
+        else{
+            std::cout << "A test of the expected value of 0 of the error terms returned " << std::boolalpha << condition1 << ", a test of homoscedasticity has returned " << std::boolalpha << condition2 << ", and a test of exogenity has returned " << std::boolalpha << "." << std::endl;
+        }
+        
+    }
+    
+    bool GaussMarkovChecker::arithmeticMean(std::vector<double> eps){
+        Stat stat;
+        if(stat.mean(eps) == 0) {
+            return 1;
+        }
+        else { return 0; }
+    }
+    
+    bool GaussMarkovChecker::homoscedasticity(std::vector<double> eps){
+        Stat stat;
+        double currentVar = (eps[0] - stat.mean(eps)) * (eps[0] - stat.mean(eps)) / eps.size();
+        for(int i = 0; i < eps.size(); i++){
+            if(currentVar != (eps[i] - stat.mean(eps)) * (eps[i] - stat.mean(eps)) / eps.size()){
+                return 0;
+            }
+        }
+        return 1;
+    }
+
+    bool GaussMarkovChecker::exogeneity(std::vector<double> eps){
+        Stat stat;
+        for(int i = 0; i < eps.size(); i++){
+            for(int j = 0; j < eps.size(); j++){
+                if(i != j){
+                    if((eps[i] - stat.mean(eps)) * (eps[j] - stat.mean(eps)) / eps.size() != 0){
+                        return 0;
+                    }
+                }
+            }
+        }
+        return 1;
+    }
+}
--- a/MLPP/GaussMarkovChecker/GaussMarkovChecker.hpp
+++ b/MLPP/GaussMarkovChecker/GaussMarkovChecker.hpp
@ -0,0 +1,27 @@
+//
+//  GaussMarkovChecker.hpp
+//
+//  Created by Marc Melikyan on 11/13/20.
+//
+
+#ifndef GaussMarkovChecker_hpp
+#define GaussMarkovChecker_hpp
+
+#include <string>
+#include <vector>
+
+namespace MLPP{
+    class GaussMarkovChecker{
+        public:
+            void checkGMConditions(std::vector<double> eps);
+        
+            // Independent, 3 Gauss-Markov Conditions
+            bool arithmeticMean(std::vector<double> eps); // 1) Arithmetic Mean of 0.
+            bool homoscedasticity(std::vector<double> eps); // 2) Homoscedasticity
+            bool exogeneity(std::vector<double> eps); // 3) Cov of any 2 non-equal eps values = 0.
+        private:
+        
+    };
+}
+
+#endif /* GaussMarkovChecker_hpp */
--- a/MLPP/GaussianNB/GaussianNB.cpp
+++ b/MLPP/GaussianNB/GaussianNB.cpp
@ -0,0 +1,92 @@
+//
+//  GaussianNB.cpp
+//
+//  Created by Marc Melikyan on 1/17/21.
+//
+
+#include "GaussianNB.hpp"
+#include "Stat/Stat.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Utilities/Utilities.hpp"
+
+#include <iostream>
+#include <algorithm>
+#include <random>
+
+namespace MLPP{
+    GaussianNB::GaussianNB(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int class_num)
+    : inputSet(inputSet), outputSet(outputSet), class_num(class_num)
+    {
+        y_hat.resize(outputSet.size());
+        Evaluate();
+        LinAlg alg;
+    }
+
+    std::vector<double> GaussianNB::modelSetTest(std::vector<std::vector<double>> X){
+        std::vector<double> y_hat;
+        for(int i = 0; i < X.size(); i++){
+            y_hat.push_back(modelTest(X[i]));
+        }
+        return y_hat;
+    }
+    
+    double GaussianNB::modelTest(std::vector<double> x){
+        Stat stat;
+        LinAlg alg;
+
+        double score[class_num];
+        double y_hat_i = 1;
+        for(int i = class_num - 1; i >= 0; i--){
+            y_hat_i += std::log(priors[i] * (1 / sqrt(2 * M_PI * sigma[i] * sigma[i])) * exp(-(x[i] * mu[i]) * (x[i] * mu[i]) / (2 * sigma[i] * sigma[i])));
+            score[i] = exp(y_hat_i);
+        }
+        return std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double)));
+    }
+
+    double GaussianNB::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void GaussianNB::Evaluate(){
+        Stat stat;
+        LinAlg alg;
+
+        // Computing mu_k_y and sigma_k_y
+        mu.resize(class_num);
+        sigma.resize(class_num);
+        for(int i = class_num - 1; i >= 0; i--){
+            std::vector<double> set; 
+            for(int j = 0; j < inputSet.size(); j++){
+                for(int k = 0; k < inputSet[j].size(); k++){
+                    if(outputSet[j] == i){
+                        set.push_back(inputSet[j][k]);
+                    }
+                }
+            }
+            mu[i] = stat.mean(set);
+            sigma[i] = stat.standardDeviation(set);
+        }
+
+        // Priors
+        priors.resize(class_num);
+        for(int i = 0; i < outputSet.size(); i++){
+            priors[int(outputSet[i])]++;
+        }
+        priors = alg.scalarMultiply( double(1)/double(outputSet.size()), priors);
+
+        for(int i = 0; i < outputSet.size(); i++){
+            double score[class_num];
+            double y_hat_i = 1;
+            for(int j = class_num - 1; j >= 0; j--){
+                for(int k = 0; k < inputSet[i].size(); k++){
+                    y_hat_i += std::log(priors[j] * (1 / sqrt(2 * M_PI * sigma[j] * sigma[j])) * exp(-(inputSet[i][k] * mu[j]) * (inputSet[i][k] * mu[j]) / (2 * sigma[j] * sigma[j])));
+                }
+                score[j] = exp(y_hat_i);
+                std::cout << score[j] << std::endl;
+            }
+            y_hat[i] = std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double)));
+            std::cout << std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double))) << std::endl;
+        }
+    }
+}
--- a/MLPP/GaussianNB/GaussianNB.hpp
+++ b/MLPP/GaussianNB/GaussianNB.hpp
@ -0,0 +1,42 @@
+//
+//  GaussianNB.hpp
+//
+//  Created by Marc Melikyan on 1/17/21.
+//
+
+#ifndef GaussianNB_hpp
+#define GaussianNB_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class GaussianNB{
+        
+        public:
+            GaussianNB(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int class_num);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            double score();
+            
+        private:
+        
+            void Evaluate();
+
+            int class_num;
+
+            std::vector<double> priors; 
+            std::vector<double> mu;
+            std::vector<double> sigma;
+            
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+
+            std::vector<double> y_hat;
+            
+        
+            
+        
+    };
+
+    #endif /* GaussianNB_hpp */
+}
--- a/MLPP/HiddenLayer/HiddenLayer.cpp
+++ b/MLPP/HiddenLayer/HiddenLayer.cpp
@ -0,0 +1,114 @@
+//
+//  HiddenLayer.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "HiddenLayer.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Utilities/Utilities.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP {
+    HiddenLayer::HiddenLayer(int n_hidden, std::string activation, std::vector<std::vector<double>> input, std::string weightInit, std::string reg, double lambda, double alpha)
+    : n_hidden(n_hidden), activation(activation), input(input), weightInit(weightInit), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        weights = Utilities::weightInitialization(input[0].size(), n_hidden, weightInit);
+        bias = Utilities::biasInitialization(n_hidden);
+
+        activation_map["Linear"] = &Activation::linear;
+        activationTest_map["Linear"] = &Activation::linear;
+
+        activation_map["Sigmoid"] = &Activation::sigmoid;
+        activationTest_map["Sigmoid"] = &Activation::sigmoid;
+
+        activation_map["Swish"] = &Activation::swish;
+        activationTest_map["Swish"] = &Activation::swish;
+
+        activation_map["Mish"] = &Activation::mish;
+        activationTest_map["Mish"] = &Activation::mish;
+
+        activation_map["SinC"] = &Activation::sinc;
+        activationTest_map["SinC"] = &Activation::sinc;
+
+        activation_map["Softplus"] = &Activation::softplus;
+        activationTest_map["Softplus"] = &Activation::softplus;
+
+        activation_map["Softsign"] = &Activation::softsign;
+        activationTest_map["Softsign"] = &Activation::softsign;
+
+        activation_map["CLogLog"] = &Activation::cloglog;
+        activationTest_map["CLogLog"] = &Activation::cloglog;
+
+        activation_map["Logit"] = &Activation::logit;
+        activationTest_map["Logit"] = &Activation::logit;
+
+        activation_map["GaussianCDF"] = &Activation::gaussianCDF;
+        activationTest_map["GaussianCDF"] = &Activation::gaussianCDF;
+
+        activation_map["RELU"] = &Activation::RELU;
+        activationTest_map["RELU"] = &Activation::RELU;
+
+        activation_map["GELU"] = &Activation::GELU;
+        activationTest_map["GELU"] = &Activation::GELU;
+
+        activation_map["Sign"] = &Activation::sign;
+        activationTest_map["Sign"] = &Activation::sign;
+
+        activation_map["UnitStep"] = &Activation::unitStep;
+        activationTest_map["UnitStep"] = &Activation::unitStep;
+
+        activation_map["Sinh"] = &Activation::sinh;
+        activationTest_map["Sinh"] = &Activation::sinh;
+
+        activation_map["Cosh"] = &Activation::cosh;
+        activationTest_map["Cosh"] = &Activation::cosh;
+
+        activation_map["Tanh"] = &Activation::tanh;
+        activationTest_map["Tanh"] = &Activation::tanh;
+
+        activation_map["Csch"] = &Activation::csch;
+        activationTest_map["Csch"] = &Activation::csch;   
+
+        activation_map["Sech"] = &Activation::sech;
+        activationTest_map["Sech"] = &Activation::sech;  
+
+        activation_map["Coth"] = &Activation::coth;
+        activationTest_map["Coth"] = &Activation::coth;  
+
+        activation_map["Arsinh"] = &Activation::arsinh;
+        activationTest_map["Arsinh"] = &Activation::arsinh;
+
+        activation_map["Arcosh"] = &Activation::arcosh;
+        activationTest_map["Arcosh"] = &Activation::arcosh;
+
+        activation_map["Artanh"] = &Activation::artanh;
+        activationTest_map["Artanh"] = &Activation::artanh;
+
+        activation_map["Arcsch"] = &Activation::arcsch;
+        activationTest_map["Arcsch"] = &Activation::arcsch;
+
+        activation_map["Arsech"] = &Activation::arsech;
+        activationTest_map["Arsech"] = &Activation::arsech;
+
+        activation_map["Arcoth"] = &Activation::arcoth;
+        activationTest_map["Arcoth"] = &Activation::arcoth;
+    }
+
+    void HiddenLayer::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        z = alg.mat_vec_add(alg.matmult(input, weights), bias);
+        a = (avn.*activation_map[activation])(z, 0);
+    }
+
+    void HiddenLayer::Test(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        z_test = alg.addition(alg.mat_vec_mult(alg.transpose(weights), x), bias); 
+        a_test = (avn.*activationTest_map[activation])(z_test, 0);
+    }
+}
--- a/MLPP/HiddenLayer/HiddenLayer.hpp
+++ b/MLPP/HiddenLayer/HiddenLayer.hpp
@ -0,0 +1,52 @@
+//
+//  HiddenLayer.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef HiddenLayer_hpp
+#define HiddenLayer_hpp
+
+#include "Activation/Activation.hpp"
+
+#include <vector>
+#include <map>
+#include <string>
+
+namespace  MLPP {
+    class HiddenLayer{
+        public:
+            HiddenLayer(int n_hidden, std::string activation, std::vector<std::vector<double>> input, std::string weightInit, std::string reg, double lambda, double alpha);
+
+            int n_hidden;
+            std::string activation;
+
+            std::vector<std::vector<double>> input;   
+
+            std::vector<std::vector<double>> weights;
+            std::vector<double> bias;
+        
+            std::vector<std::vector<double>> z;
+            std::vector<std::vector<double>> a;
+
+            std::map<std::string, std::vector<std::vector<double>> (Activation::*)(std::vector<std::vector<double>>, bool)> activation_map;
+            std::map<std::string, std::vector<double> (Activation::*)(std::vector<double>, bool)> activationTest_map;
+
+            std::vector<double> z_test;
+            std::vector<double> a_test;
+
+            std::vector<std::vector<double>> delta;
+
+            // Regularization Params
+            std::string reg;
+            double lambda; /* Regularization Parameter */
+            double alpha; /* This is the controlling param for Elastic Net*/
+
+            std::string weightInit;
+
+            void forwardPass();
+            void Test(std::vector<double> x);
+    };
+}
+
+#endif /* HiddenLayer_hpp */
--- a/MLPP/HypothesisTesting/HypothesisTesting.cpp
+++ b/MLPP/HypothesisTesting/HypothesisTesting.cpp
@ -0,0 +1,19 @@
+//
+//  HypothesisTesting.cpp
+//
+//  Created by Marc Melikyan on 3/10/21.
+//
+
+#include "HypothesisTesting.hpp"
+
+namespace MLPP{
+
+    std::tuple<bool, double> HypothesisTesting::chiSquareTest(std::vector<double> observed, std::vector<double> expected){
+        double df = observed.size() - 1; // These are our degrees of freedom
+        double sum = 0; 
+        for(int i = 0; i < observed.size(); i++){
+            sum += (observed[i] - expected[i]) * (observed[i] - expected[i]) / expected[i];
+        }
+    }
+
+}
--- a/MLPP/HypothesisTesting/HypothesisTesting.hpp
+++ b/MLPP/HypothesisTesting/HypothesisTesting.hpp
@ -0,0 +1,24 @@
+//
+//  HypothesisTesting.hpp
+//
+//  Created by Marc Melikyan on 3/10/21.
+//
+
+#ifndef HypothesisTesting_hpp
+#define HypothesisTesting_hpp
+
+#include <vector>
+#include <tuple>
+
+namespace MLPP{
+    class HypothesisTesting{
+      
+        public:
+            std::tuple<bool, double> chiSquareTest(std::vector<double> observed, std::vector<double> expected);
+        
+        private:
+            
+    };
+}
+
+#endif /* HypothesisTesting_hpp */
--- a/MLPP/KMeans/KMeans.cpp
+++ b/MLPP/KMeans/KMeans.cpp
@ -0,0 +1,235 @@
+//
+//  KMeans.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "KMeans.hpp"
+#include "Utilities/Utilities.hpp"
+#include "LinAlg/LinAlg.hpp"
+
+#include <iostream>
+#include <random>
+#include <climits>
+
+namespace MLPP{
+    KMeans::KMeans(std::vector<std::vector<double>> inputSet, int k, std::string init_type)
+    : inputSet(inputSet), k(k), init_type(init_type)
+    {
+        if(init_type == "KMeans++"){ 
+            kmeansppInitialization(k); 
+        }
+        else{
+            centroidInitialization(k);
+        }
+    }
+
+    std::vector<std::vector<double>> KMeans::modelSetTest(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        std::vector<std::vector<double>> closestCentroids; 
+        for(int i = 0; i < inputSet.size(); i++){
+            std::vector<double> closestCentroid = mu[0];
+            for(int j = 0; j < r[0].size(); j++){
+                bool isCentroidCloser = alg.euclideanDistance(X[i], mu[j]) < alg.euclideanDistance(X[i], closestCentroid);
+                if(isCentroidCloser){
+                    closestCentroid = mu[j];
+                }
+            }
+            closestCentroids.push_back(closestCentroid);
+        }
+        return closestCentroids;
+    }
+
+    std::vector<double> KMeans::modelTest(std::vector<double> x){
+        LinAlg alg;
+        std::vector<double> closestCentroid = mu[0];
+        for(int j = 0; j < mu.size(); j++){
+            if(alg.euclideanDistance(x, mu[j]) < alg.euclideanDistance(x, closestCentroid)){
+                closestCentroid = mu[j];
+            }
+        }
+        return closestCentroid;
+    }
+
+    void KMeans::train(int epoch_num, bool UI){
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        Evaluate();
+        
+        while(true){
+            
+            // STEPS OF THE ALGORITHM
+            // 1. DETERMINE r_nk
+            // 2. DETERMINE J
+            // 3. DETERMINE mu_k
+            
+            // STOP IF CONVERGED, ELSE REPEAT
+            
+            cost_prev = Cost();
+            
+            computeMu();
+            Evaluate();
+                
+            // UI PORTION
+            if(UI) { Utilities::CostInfo(epoch, cost_prev, Cost()); }
+            epoch++;
+
+            if(epoch > epoch_num) { break; }
+
+        }
+    }
+
+    double KMeans::score(){
+        return Cost();
+    }
+
+    std::vector<double> KMeans::silhouette_scores(){
+        LinAlg alg;
+        std::vector<std::vector<double>> closestCentroids = modelSetTest(inputSet);
+        std::vector<double> silhouette_scores;
+        for(int i = 0; i < inputSet.size(); i++){
+            // COMPUTING a[i]
+            double a = 0;
+            for(int j = 0; j < inputSet.size(); j++){
+                if(i != j && r[i] == r[j]){
+                    a += alg.euclideanDistance(inputSet[i], inputSet[j]);
+                }
+            }   
+            // NORMALIZE a[i]
+            a /= closestCentroids[i].size() - 1; 
+
+
+            // COMPUTING b[i]
+            double b = INT_MAX; 
+            for(int j = 0; j < mu.size(); j++){
+                if(closestCentroids[i] != mu[j]){
+                    double sum = 0;
+                    for(int k = 0; k < inputSet.size(); k++){
+                        sum += alg.euclideanDistance(inputSet[i], inputSet[k]);
+                    }
+                    // NORMALIZE b[i]
+                    double k_clusterSize = 0;
+                    for(int k = 0; k < closestCentroids.size(); k++){
+                        if(closestCentroids[k] == mu[j]){
+                            k_clusterSize++;
+                        }
+                    }
+                    if(sum / k_clusterSize < b) { b = sum / k_clusterSize; }
+                }
+            }
+            silhouette_scores.push_back((b - a)/fmax(a, b));
+            // Or the expanded version: 
+            // if(a < b) {
+            //     silhouette_scores.push_back(1 - a/b); 
+            // }
+            // else if(a == b){
+            //     silhouette_scores.push_back(0);
+            // }
+            // else{
+            //     silhouette_scores.push_back(b/a - 1);
+            // }
+        }
+        return silhouette_scores;
+    }
+
+    // This simply computes r_nk
+    void KMeans::Evaluate(){
+        LinAlg alg;
+        r.resize(inputSet.size());
+        
+        for(int i = 0; i < r.size(); i++){
+            r[i].resize(k);
+        }
+        
+        for(int i = 0; i < r.size(); i++){
+            std::vector<double> closestCentroid = mu[0];
+            for(int j = 0; j < r[0].size(); j++){
+                bool isCentroidCloser = alg.euclideanDistance(inputSet[i], mu[j]) < alg.euclideanDistance(inputSet[i], closestCentroid);
+                if(isCentroidCloser){
+                    closestCentroid = mu[j];
+                }
+            }
+            for(int j = 0; j < r[0].size(); j++){
+                if(mu[j] == closestCentroid) {
+                    r[i][j] = 1;
+                }
+                else { r[i][j] = 0; }
+            }
+        }
+        
+    }
+
+    // This simply computes or re-computes mu_k
+    void KMeans::computeMu(){
+        LinAlg alg;
+        for(int i = 0; i < mu.size(); i++){
+            std::vector<double> num;
+            num.resize(r.size());
+            
+            for(int i = 0; i < num.size(); i++){
+                num[i] = 0;
+            }
+            
+            double den = 0;
+            for(int j = 0; j < r.size(); j++){
+                num = alg.addition(num, alg.scalarMultiply(r[j][i], inputSet[j]));
+            }
+            for(int j = 0; j < r.size(); j++){
+                den += r[j][i];
+            }
+            mu[i] = alg.scalarMultiply(double(1)/double(den), num);
+        }
+        
+    }
+
+    void KMeans::centroidInitialization(int k){
+        mu.resize(k);
+        
+        for(int i = 0; i < k; i++){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(inputSet.size() - 1));
+
+            mu[i].resize(inputSet.size());
+            mu[i] = inputSet[distribution(generator)];
+        }
+    }
+
+    void KMeans::kmeansppInitialization(int k){
+        LinAlg alg;
+        std::random_device rd;
+        std::default_random_engine generator(rd()); 
+        std::uniform_int_distribution<int> distribution(0, int(inputSet.size() - 1));
+        mu.push_back(inputSet[distribution(generator)]);
+
+        for(int i = 0; i < k - 1; i++){
+            std::vector<double> farthestCentroid;
+            for(int j = 0; j < inputSet.size(); j++){
+                double max_dist = 0; 
+                /* SUM ALL THE SQUARED DISTANCES, CHOOSE THE ONE THAT'S FARTHEST
+                AS TO SPREAD OUT THE CLUSTER CENTROIDS. */
+                double sum = 0;
+                for(int k = 0; k < mu.size(); k++){
+                    sum += alg.euclideanDistance(inputSet[j], mu[k]);
+                }
+                if(sum * sum > max_dist){
+                    farthestCentroid = inputSet[j];
+                    max_dist = sum * sum;
+                }
+            }
+            mu.push_back(farthestCentroid);
+        }
+    }
+
+    double KMeans::Cost(){
+        LinAlg alg;
+        double sum = 0;
+        for(int i = 0; i < r.size(); i++){
+            for(int j = 0; j < r[0].size(); j++){
+                sum += r[i][j] * alg.norm_sq(alg.subtraction(inputSet[i], mu[j]));
+            }
+        }
+        return sum;
+    }
+}
--- a/MLPP/KMeans/KMeans.hpp
+++ b/MLPP/KMeans/KMeans.hpp
@ -0,0 +1,45 @@
+//
+//  KMeans.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef KMeans_hpp
+#define KMeans_hpp
+
+#include <vector>
+#include <string>
+
+namespace MLPP{
+    class KMeans{
+        
+        public:
+            KMeans(std::vector<std::vector<double>> inputSet, int k, std::string init_type = "Default");
+            std::vector<std::vector<double>> modelSetTest(std::vector<std::vector<double>> X);
+            std::vector<double> modelTest(std::vector<double> x);
+            void train(int epoch_num, bool UI = 1);
+            double score();
+            std::vector<double> silhouette_scores(); 
+        private:
+        
+            void Evaluate();
+            void computeMu();
+        
+            void centroidInitialization(int k);
+            void kmeansppInitialization(int k);
+            double Cost();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<std::vector<double>> mu;
+            std::vector<std::vector<double>> r;
+        
+            double euclideanDistance(std::vector<double> A, std::vector<double> B);
+        
+            double accuracy_threshold;
+            int k;        
+
+            std::string init_type;
+    };
+}
+
+#endif /* KMeans_hpp */
--- a/MLPP/LinAlg/LinAlg.cpp
+++ b/MLPP/LinAlg/LinAlg.cpp
--- a/MLPP/LinAlg/LinAlg.hpp
+++ b/MLPP/LinAlg/LinAlg.hpp
@ -0,0 +1,236 @@
+//
+//  LinAlg.hpp
+//
+//  Created by Marc Melikyan on 1/8/21.
+//
+
+#ifndef LinAlg_hpp
+#define LinAlg_hpp
+
+#include <vector>
+#include <tuple>
+
+namespace MLPP{
+    class LinAlg{
+        public:
+        
+        // MATRIX FUNCTIONS
+
+        std::vector<std::vector<double>> gramMatrix(std::vector<std::vector<double>> A);
+
+        bool linearIndependenceChecker(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> gaussianNoise(int n, int m);
+
+        std::vector<std::vector<double>> addition(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
+
+        std::vector<std::vector<double>> subtraction(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
+        
+        std::vector<std::vector<double>> matmult(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
+        
+        std::vector<std::vector<double>> hadamard_product(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
+
+        std::vector<std::vector<double>> kronecker_product(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
+
+        std::vector<std::vector<double>> elementWiseDivision(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
+        
+        std::vector<std::vector<double>> transpose(std::vector<std::vector<double>> A);
+        
+        std::vector<std::vector<double>> scalarMultiply(double scalar, std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> scalarAdd(double scalar, std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> log(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> log10(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> exp(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> erf(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> exponentiate(std::vector<std::vector<double>> A, double p);
+
+        std::vector<std::vector<double>> sqrt(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> cbrt(std::vector<std::vector<double>> A);   
+
+        std::vector<std::vector<double>> matrixPower(std::vector<std::vector<double>> A, int n);
+
+        std::vector<std::vector<double>> abs(std::vector<std::vector<double>> A);
+        
+        double det(std::vector<std::vector<double>> A, int d);
+
+        double trace(std::vector<std::vector<double>> A); 
+        
+        std::vector<std::vector<double>> cofactor(std::vector<std::vector<double>> A, int n, int i, int j);
+        
+        std::vector<std::vector<double>> adjoint(std::vector<std::vector<double>> A);
+        
+        std::vector<std::vector<double>> inverse(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> pinverse(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> zeromat(int n, int m);
+
+        std::vector<std::vector<double>> onemat(int n, int m);
+
+        std::vector<std::vector<double>> full(int n, int m, int k);
+
+        std::vector<std::vector<double>> sin(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> cos(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> rotate(std::vector<std::vector<double>> A, double theta, int axis = -1);
+
+        std::vector<std::vector<double>> max(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
+
+        double max(std::vector<std::vector<double>> A);
+
+        double min(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> round(std::vector<std::vector<double>> A);
+
+        double norm_2(std::vector<std::vector<double>> A);
+        
+        std::vector<std::vector<double>> identity(double d);
+
+        std::vector<std::vector<double>> cov(std::vector<std::vector<double>> A);
+
+        std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> eig(std::vector<std::vector<double>> A);
+
+        std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>, std::vector<std::vector<double>>> SVD(std::vector<std::vector<double>> A);
+
+        std::vector<double> vectorProjection(std::vector<double> a, std::vector<double> b);
+
+        std::vector<std::vector<double>> gramSchmidtProcess(std::vector<std::vector<double>> A);
+
+        std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> QRD(std::vector<std::vector<double>> A);
+
+        std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> chol(std::vector<std::vector<double>> A);
+
+        double sum_elements(std::vector<std::vector<double>> A);
+
+        std::vector<double> flatten(std::vector<std::vector<double>> A);
+
+        std::vector<double> solve(std::vector<std::vector<double>> A, std::vector<double> b);
+
+        bool positiveDefiniteChecker(std::vector<std::vector<double>> A);
+
+        bool negativeDefiniteChecker(std::vector<std::vector<double>> A);
+
+        bool zeroEigenvalue(std::vector<std::vector<double>> A);
+        
+        void printMatrix(std::vector<std::vector<double>> A);
+        
+        // VECTOR FUNCTIONS
+
+        std::vector<std::vector<double>> outerProduct(std::vector<double> a, std::vector<double> b); // This multiplies a, bT 
+        
+        std::vector<double> hadamard_product(std::vector<double> a, std::vector<double> b);
+
+        std::vector<double> elementWiseDivision(std::vector<double> a, std::vector<double> b);
+        
+        std::vector<double> scalarMultiply(double scalar, std::vector<double> a);
+
+        std::vector<double> scalarAdd(double scalar, std::vector<double> a);
+        
+        std::vector<double> addition(std::vector<double> a, std::vector<double> b);
+        
+        std::vector<double> subtraction(std::vector<double> a, std::vector<double> b);
+
+        std::vector<double> subtractMatrixRows(std::vector<double> a, std::vector<std::vector<double>> B);
+
+        std::vector<double> log(std::vector<double> a);
+
+        std::vector<double> log10(std::vector<double> a);
+
+        std::vector<double> exp(std::vector<double> a);
+
+        std::vector<double> erf(std::vector<double> a);
+
+        std::vector<double> exponentiate(std::vector<double> a, double p);
+
+        std::vector<double> sqrt(std::vector<double> a);
+
+        std::vector<double> cbrt(std::vector<double> a);
+        
+        double dot(std::vector<double> a, std::vector<double> b);
+
+        std::vector<double> cross(std::vector<double> a, std::vector<double> b);
+
+        std::vector<double> abs(std::vector<double> a);
+
+        std::vector<double> zerovec(int n);
+
+        std::vector<double> onevec(int n);
+
+        std::vector<std::vector<double>> diag(std::vector<double> a);
+
+        std::vector<double> full(int n, int k);
+
+        std::vector<double> sin(std::vector<double> a);
+
+        std::vector<double> cos(std::vector<double> a);
+
+        std::vector<double> max(std::vector<double> a, std::vector<double> b);
+
+        double max(std::vector<double> a);
+
+        double min(std::vector<double> a);
+
+        std::vector<double> round(std::vector<double> a);
+
+        double euclideanDistance(std::vector<double> a, std::vector<double> b);
+        
+        double norm_2(std::vector<double> a);
+
+        double norm_sq(std::vector<double> a);
+        
+        double sum_elements(std::vector<double> a);
+
+        double cosineSimilarity(std::vector<double> a, std::vector<double> b);
+        
+        void printVector(std::vector<double> a);
+        
+        // MATRIX-VECTOR FUNCTIONS
+        std::vector<std::vector<double>> mat_vec_add(std::vector<std::vector<double>> A, std::vector<double> b);
+
+        std::vector<double> mat_vec_mult(std::vector<std::vector<double>> A, std::vector<double> b);
+
+        // TENSOR FUNCTIONS
+        std::vector<std::vector<std::vector<double>>> addition(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B);
+
+        std::vector<std::vector<std::vector<double>>> elementWiseDivision(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B);
+
+        std::vector<std::vector<std::vector<double>>> sqrt(std::vector<std::vector<std::vector<double>>> A);
+
+        std::vector<std::vector<std::vector<double>>> exponentiate(std::vector<std::vector<std::vector<double>>> A, double p);
+
+        std::vector<std::vector<double>> tensor_vec_mult(std::vector<std::vector<std::vector<double>>> A, std::vector<double> b);
+
+        std::vector<double> flatten(std::vector<std::vector<std::vector<double>>> A);
+        
+        void printTensor(std::vector<std::vector<std::vector<double>>> A);
+
+        std::vector<std::vector<std::vector<double>>> scalarMultiply(double scalar, std::vector<std::vector<std::vector<double>>> A);
+
+        std::vector<std::vector<std::vector<double>>> scalarAdd(double scalar, std::vector<std::vector<std::vector<double>>> A);
+
+        std::vector<std::vector<std::vector<double>>> resize(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B);
+
+        std::vector<std::vector<std::vector<double>>> hadamard_product(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B);
+
+        std::vector<std::vector<std::vector<double>>> max(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<std::vector<double>>> B);
+
+        std::vector<std::vector<std::vector<double>>> abs(std::vector<std::vector<std::vector<double>>> A);
+
+        double norm_2(std::vector<std::vector<std::vector<double>>> A);
+
+        std::vector<std::vector<std::vector<double>>> vector_wise_tensor_product(std::vector<std::vector<std::vector<double>>> A, std::vector<std::vector<double>> B);
+
+        private:
+    };
+
+}
+
+#endif /* LinAlg_hpp */
--- a/MLPP/LinReg/LinReg.cpp
+++ b/MLPP/LinReg/LinReg.cpp
@ -0,0 +1,233 @@
+//
+//  LinReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "LinReg.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Stat/Stat.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <cmath>
+#include <random>
+
+namespace MLPP{
+
+    LinReg::LinReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+
+        weights = Utilities::weightInitialization(k);
+        bias = Utilities::biasInitialization();
+    }
+
+    std::vector<double> LinReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double LinReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void LinReg::NewtonRaphson(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();   
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+                
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+
+            // Calculating the weight gradients (2nd derivative)
+            std::vector<double> first_derivative = alg.mat_vec_mult(alg.transpose(inputSet), error);
+            std::vector<std::vector<double>> second_derivative = alg.matmult(alg.transpose(inputSet), inputSet);
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(alg.inverse(second_derivative)), first_derivative)));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+ 
+            // Calculating the bias gradients (2nd derivative)
+            bias -= learning_rate * alg.sum_elements(error) / n; // We keep this the same. The 2nd derivative is just [1].
+            forwardPass();
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void LinReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+                
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+
+            // Calculating the weight gradients
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), error)));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+ 
+            // Calculating the bias gradients
+            bias -= learning_rate * alg.sum_elements(error) / n;
+            forwardPass();
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void LinReg::SGD(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+
+            double error = y_hat - outputSet[outputIndex];
+
+            // Weight updation
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error, inputSet[outputIndex]));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            // Bias updation
+            bias -= learning_rate * error;
+
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void LinReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+
+        while(true){
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+                
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                // Calculating the weight gradients
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error)));
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+    
+                // Calculating the bias gradients
+                bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size();
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    void LinReg::normalEquation(){
+        LinAlg alg;
+        Stat stat;
+        std::vector<double> x_means;
+        std::vector<std::vector<double>> inputSetT = alg.transpose(inputSet);
+
+        x_means.resize(inputSetT.size());
+        for(int i = 0; i < inputSetT.size(); i++){
+            x_means[i] = (stat.mean(inputSetT[i]));
+        }
+        
+        try{
+            std::vector<double> temp;
+            temp.resize(k);
+            temp = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
+            if(std::isnan(temp[0])){
+                throw 99;
+            }
+            else{
+                if(reg == "Ridge") {
+                    weights = alg.mat_vec_mult(alg.inverse(alg.addition(alg.matmult(alg.transpose(inputSet), inputSet), alg.scalarMultiply(lambda, alg.identity(k)))), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
+                }
+                else{ weights = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet)); }
+                
+                bias = stat.mean(outputSet) - alg.dot(weights, x_means);
+                
+                forwardPass();
+            }
+        }
+        catch(int err_num){
+            std::cout << "ERR " << err_num << ": Resulting matrix was noninvertible/degenerate, and so the normal equation could not be performed. Try utilizing gradient descent." << std::endl;
+        }
+    }
+
+    double LinReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void LinReg::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights, bias);
+     }
+
+    double LinReg::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+    std::vector<double> LinReg::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); 
+    }
+
+    double LinReg::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        return alg.dot(weights, x) + bias;
+    }
+
+    // wTx + b
+    void LinReg::forwardPass(){
+        y_hat = Evaluate(inputSet);
+    }
+}
--- a/MLPP/LinReg/LinReg.hpp
+++ b/MLPP/LinReg/LinReg.hpp
@ -0,0 +1,53 @@
+//
+//  LinReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef LinReg_hpp
+#define LinReg_hpp
+
+#include <vector>
+#include <string>
+
+namespace MLPP{
+    class LinReg{
+        
+        public:
+            LinReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void NewtonRaphson(double learning_rate, int max_epoch, bool UI);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+            void normalEquation();
+            double score();
+            void save(std::string fileName);
+        private:
+
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+        
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+            std::vector<double> weights;
+            double bias;
+        
+            int n; 
+            int k;
+        
+            // Regularization Params
+            std::string reg;
+            int lambda;
+            int alpha; /* This is the controlling param for Elastic Net*/
+        
+        
+    };
+}
+
+#endif /* LinReg_hpp */
--- a/MLPP/LogReg/LogReg.cpp
+++ b/MLPP/LogReg/LogReg.cpp
@ -0,0 +1,200 @@
+//
+//  LogReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "LogReg.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    LogReg::LogReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+        weights = Utilities::weightInitialization(k);
+        bias = Utilities::biasInitialization();
+    }
+
+    std::vector<double> LogReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double LogReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void LogReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Reg regularization; 
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+                
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+
+            // Calculating the weight gradients
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), error)));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+ 
+            // Calculating the bias gradients
+            bias -= learning_rate * alg.sum_elements(error) / n;
+            forwardPass();
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+            
+        }
+    }
+
+    void LogReg::MLE(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+                    
+            std::vector<double> error = alg.subtraction(outputSet, y_hat);
+
+            // Calculating the weight gradients
+            weights = alg.addition(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), error)));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+
+            // Calculating the bias gradients
+            bias += learning_rate * alg.sum_elements(error) / n;
+            forwardPass();
+                    
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void LogReg::SGD(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd());
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+
+            double error = y_hat - outputSet[outputIndex];
+
+            // Weight updation
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error, inputSet[outputIndex]));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            // Bias updation
+            bias -= learning_rate * error;
+
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void LogReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+        
+        while(true){
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+                
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                // Calculating the weight gradients
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error)));
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+    
+                // Calculating the bias gradients
+                bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size();
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double LogReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void LogReg::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights, bias);
+     }
+
+    double LogReg::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.LogLoss(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+
+    std::vector<double> LogReg::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        return avn.sigmoid(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); 
+    }
+
+    double LogReg::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        return avn.sigmoid(alg.dot(weights, x) + bias);
+    }
+
+    // sigmoid ( wTx + b )
+    void LogReg::forwardPass(){
+        y_hat = Evaluate(inputSet); 
+    }
+}
--- a/MLPP/LogReg/LogReg.hpp
+++ b/MLPP/LogReg/LogReg.hpp
@ -0,0 +1,53 @@
+//
+//  LogReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef LogReg_hpp
+#define LogReg_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class LogReg{
+        
+        public:
+            LogReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void MLE(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+        private:
+
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+        
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+            std::vector<double> weights;
+            double bias;
+        
+            int n; 
+            int k;
+            double learning_rate;
+
+            // Regularization Params
+            std::string reg;
+            double lambda; /* Regularization Parameter */
+            double alpha; /* This is the controlling param for Elastic Net*/
+    };
+}
+
+#endif /* LogReg_hpp */
--- a/MLPP/MANN/MANN.cpp
+++ b/MLPP/MANN/MANN.cpp
@ -0,0 +1,197 @@
+//
+//  MANN.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "MANN.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+
+namespace MLPP {
+    MANN::MANN(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), n_output(outputSet[0].size())
+    {
+
+    }
+
+    MANN::~MANN(){
+        delete outputLayer;
+    }
+
+    std::vector<std::vector<double>> MANN::modelSetTest(std::vector<std::vector<double>> X){
+        if(!network.empty()){
+            network[0].input = X;
+            network[0].forwardPass();
+
+            for(int i = 1; i < network.size(); i++){
+                network[i].input = network[i - 1].a;
+                network[i].forwardPass();
+            }
+            outputLayer->input = network[network.size() - 1].a;
+        }
+        else {
+            outputLayer->input = X;
+        }
+        outputLayer->forwardPass();
+        return outputLayer->a;
+    }
+
+    std::vector<double> MANN::modelTest(std::vector<double> x){
+        if(!network.empty()){
+            network[0].Test(x);
+            for(int i = 1; i < network.size(); i++){
+                network[i].Test(network[i - 1].a_test);
+            }
+            outputLayer->Test(network[network.size() - 1].a_test);
+        }
+        else{
+            outputLayer->Test(x);
+        }
+        return outputLayer->a_test;
+    }
+
+    void MANN::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        class Cost cost; 
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+ 
+            if(outputLayer->activation == "Softmax"){
+                outputLayer->delta = alg.subtraction(y_hat, outputSet);
+            }
+            else{
+                auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
+                auto outputAvn = outputLayer->activation_map[outputLayer->activation];
+                outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
+            }
+
+            std::vector<std::vector<double>> outputWGrad = alg.matmult(alg.transpose(outputLayer->input), outputLayer->delta);
+
+            outputLayer->weights = alg.subtraction(outputLayer->weights, alg.scalarMultiply(learning_rate/n, outputWGrad));
+            outputLayer->weights = regularization.regWeights(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
+            outputLayer->bias = alg.subtractMatrixRows(outputLayer->bias, alg.scalarMultiply(learning_rate/n, outputLayer->delta));
+
+            if(!network.empty()){
+                auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
+                network[network.size() - 1].delta = alg.hadamard_product(alg.matmult(outputLayer->delta, alg.transpose(outputLayer->weights)), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
+                std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
+                
+                network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad));
+                network[network.size() - 1].weights = regularization.regWeights(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg);
+                network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta));
+
+                for(int i = network.size() - 2; i >= 0; i--){
+                    auto hiddenLayerAvn = network[i].activation_map[network[i].activation];
+                    network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, network[i + 1].weights), (avn.*hiddenLayerAvn)(network[i].z, 1));
+                    std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
+                    network[i].weights = alg.subtraction(network[i].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad));
+                    network[i].weights = regularization.regWeights(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
+                    network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta));
+                }
+            }
+            
+            forwardPass();
+
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
+                Utilities::UI(outputLayer->weights, outputLayer->bias); 
+                if(!network.empty()){
+                    std::cout << "Layer " << network.size() << ": " << std::endl; 
+                    for(int i = network.size() - 1; i >= 0; i--){
+                        std::cout << "Layer " << i + 1 << ": " << std::endl;
+                        Utilities::UI(network[i].weights, network[i].bias); 
+                    }
+                }
+            }
+
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    double MANN::score(){
+        Utilities util;
+        forwardPass();
+        return util.performance(y_hat, outputSet);
+    }
+
+    void MANN::save(std::string fileName){
+        Utilities util;
+        if(!network.empty()){
+            util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1);
+            for(int i = 1; i < network.size(); i++){
+                util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); 
+            }
+            util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1);
+        }
+        else{
+            util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1);
+        }
+     }
+
+    void MANN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){
+        if(network.empty()){
+            network.push_back(HiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha));
+            network[0].forwardPass();
+        }
+        else{
+            network.push_back(HiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
+            network[network.size() - 1].forwardPass();
+        }
+    }
+    
+    void MANN::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, double lambda, double alpha){
+        if(!network.empty()){
+            outputLayer = new MultiOutputLayer(n_output, network[0].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha);
+        }
+        else{
+            outputLayer = new MultiOutputLayer(n_output, k, activation, loss, inputSet, weightInit, reg, lambda, alpha);
+        }
+    }
+
+    double MANN::Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        Reg regularization;
+        class Cost cost;
+        double totalRegTerm = 0;
+
+        auto cost_function = outputLayer->cost_map[outputLayer->cost];
+        if(!network.empty()){
+            for(int i = 0; i < network.size() - 1; i++){
+                totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
+            }
+        }
+        return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
+    }
+
+    void MANN::forwardPass(){
+        if(!network.empty()){
+            network[0].input = inputSet;
+            network[0].forwardPass();
+
+            for(int i = 1; i < network.size(); i++){
+                network[i].input = network[i - 1].a;
+                network[i].forwardPass();
+            }
+            outputLayer->input = network[network.size() - 1].a;
+        }
+        else{
+            outputLayer->input = inputSet;
+        }
+        outputLayer->forwardPass();
+        y_hat = outputLayer->a;
+    }
+}
--- a/MLPP/MANN/MANN.hpp
+++ b/MLPP/MANN/MANN.hpp
@ -0,0 +1,48 @@
+//
+//  MANN.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef MANN_hpp
+#define MANN_hpp
+
+#include "HiddenLayer/HiddenLayer.hpp"
+#include "MultiOutputLayer/MultiOutputLayer.hpp"
+
+#include <vector>
+#include <string>
+
+namespace  MLPP{
+
+class MANN{
+        public:
+        MANN(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet);
+        ~MANN();
+        std::vector<std::vector<double>> modelSetTest(std::vector<std::vector<double>> X);
+        std::vector<double> modelTest(std::vector<double> x);
+        void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+        double score(); 
+        void save(std::string fileName);
+
+        void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        
+        private:
+            double Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+            void forwardPass();
+
+            std::vector<std::vector<double>> inputSet;
+            std::vector<std::vector<double>> outputSet;
+            std::vector<std::vector<double>> y_hat;
+
+            std::vector<HiddenLayer> network;
+            MultiOutputLayer *outputLayer;
+
+            int n;
+            int k;
+            int n_output;
+    };
+}
+
+#endif /* MANN_hpp */
--- a/MLPP/MLP/MLP.cpp
+++ b/MLPP/MLP/MLP.cpp
@ -0,0 +1,270 @@
+//
+//  MLP.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "MLP.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP {
+    MLP::MLP(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int n_hidden, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n_hidden(n_hidden), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        Activation avn;
+        y_hat.resize(n);
+
+        weights1 = Utilities::weightInitialization(k, n_hidden);
+        weights2 = Utilities::weightInitialization(n_hidden);
+        bias1 = Utilities::biasInitialization(n_hidden);
+        bias2 = Utilities::biasInitialization();
+    }
+
+    std::vector<double> MLP::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double MLP::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void MLP::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+
+            // Calculating the errors
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+                    
+            // Calculating the weight/bias gradients for layer 2
+
+            std::vector<double> D2_1 = alg.mat_vec_mult(alg.transpose(a2), error);
+
+            // weights and bias updation for layer 2
+            weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate/n, D2_1));
+            weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
+
+            bias2 -= learning_rate * alg.sum_elements(error) / n;
+
+            // Calculating the weight/bias for layer 1
+
+            std::vector<std::vector<double>> D1_1;
+            D1_1.resize(n);
+
+            D1_1 = alg.outerProduct(error, weights2);
+
+            std::vector<std::vector<double>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+
+            std::vector<std::vector<double>> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2);
+
+
+            // weight an bias updation for layer 1
+            weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate/n, D1_3));
+            weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
+
+            bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate/n, D1_2));
+    
+            forwardPass();
+                
+            // UI PORTION
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                std::cout << "Layer 1:" << std::endl;
+                Utilities::UI(weights1, bias1); 
+                std::cout << "Layer 2:" << std::endl;
+                Utilities::UI(weights2, bias2);
+            }
+            epoch++;
+                
+            if(epoch > max_epoch) { break; }
+        }
+
+    }
+
+    void MLP::SGD(double learning_rate, int max_epoch, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            auto [z2, a2] = propagate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+            double error = y_hat - outputSet[outputIndex];
+
+            // Weight updation for layer 2
+            std::vector<double> D2_1 = alg.scalarMultiply(error, a2);
+            weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1));
+            weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
+
+            // Bias updation for layer 2
+            bias2 -= learning_rate * error;
+
+            // Weight updation for layer 1
+            std::vector<double> D1_1 = alg.scalarMultiply(error, weights2);
+            std::vector<double> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+            std::vector<std::vector<double>> D1_3 = alg.outerProduct(inputSet[outputIndex], D1_2);
+
+            weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
+            weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
+            // Bias updation for layer 1
+
+            bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2));
+
+            y_hat = Evaluate(inputSet[outputIndex]);
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                std::cout << "Layer 1:" << std::endl;
+                Utilities::UI(weights1, bias1); 
+                std::cout << "Layer 2:" << std::endl;
+                Utilities::UI(weights2, bias2);
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void MLP::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+        
+        while(true){
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                auto [z2, a2] = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                // Calculating the errors
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+                        
+                // Calculating the weight/bias gradients for layer 2
+
+                std::vector<double> D2_1 = alg.mat_vec_mult(alg.transpose(a2), error);
+
+                // weights and bias updation for layser 2
+                weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), D2_1));
+                weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
+
+                // Calculating the bias gradients for layer 2
+                double b_gradient = alg.sum_elements(error);
+                
+                // Bias Updation for layer 2
+                bias2 -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size();
+
+                //Calculating the weight/bias for layer 1
+
+                std::vector<std::vector<double>> D1_1 = alg.outerProduct(error, weights2);
+
+                std::vector<std::vector<double>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+
+                std::vector<std::vector<double>> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2);
+
+
+                // weight an bias updation for layer 1
+                weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), D1_3));
+                weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
+
+                bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), D1_2));
+
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    std::cout << "Layer 1:" << std::endl;
+                    Utilities::UI(weights1, bias1); 
+                    std::cout << "Layer 2:" << std::endl;
+                    Utilities::UI(weights2, bias2);
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double MLP::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void MLP::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights1, bias1, 0, 1);
+         util.saveParameters(fileName, weights2, bias2, 1, 2);
+     }
+
+    double MLP::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.LogLoss(y_hat, y) + regularization.regTerm(weights2, lambda, alpha, reg) + regularization.regTerm(weights1, lambda, alpha, reg);
+    }
+
+    std::vector<double> MLP::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        std::vector<std::vector<double>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
+        std::vector<std::vector<double>> a2 = avn.sigmoid(z2);
+        return avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2))); 
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> MLP::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        std::vector<std::vector<double>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
+        std::vector<std::vector<double>> a2 = avn.sigmoid(z2);
+        return {z2, a2};
+    }
+
+    double MLP::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        std::vector<double> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); 
+        std::vector<double> a2 = avn.sigmoid(z2);
+        return avn.sigmoid(alg.dot(weights2, a2) + bias2);
+    }
+
+    std::tuple<std::vector<double>, std::vector<double>> MLP::propagate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        std::vector<double> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); 
+        std::vector<double> a2 = avn.sigmoid(z2);
+        return {z2, a2};
+    }
+
+    void MLP::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1);
+        a2 = avn.sigmoid(z2);
+        y_hat = avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2))); 
+    }
+}
--- a/MLPP/MLP/MLP.hpp
+++ b/MLPP/MLP/MLP.hpp
@ -0,0 +1,61 @@
+//
+//  MLP.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef MLP_hpp
+#define MLP_hpp
+
+#include <vector>
+#include <map>
+#include <string>
+
+namespace  MLPP {
+
+class MLP{
+        public:
+        MLP(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int n_hidden, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+        std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+        double modelTest(std::vector<double> x);
+        void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+        void SGD(double learning_rate, int max_epoch, bool UI = 1);
+        void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+        double score(); 
+        void save(std::string fileName);
+        
+        private:
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> propagate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            std::tuple<std::vector<double>, std::vector<double>> propagate(std::vector<double> x);
+            void forwardPass();
+
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+        
+            std::vector<std::vector<double>> weights1;
+            std::vector<double> weights2;
+           
+            std::vector<double> bias1;
+            double bias2;
+        
+            std::vector<std::vector<double>> z2;
+            std::vector<std::vector<double>> a2;
+
+            int n;
+            int k;
+            int n_hidden;
+
+
+            // Regularization Params
+            std::string reg;
+            double lambda; /* Regularization Parameter */
+            double alpha; /* This is the controlling param for Elastic Net*/
+    };
+}
+
+#endif /* MLP_hpp */
--- a/MLPP/MultiOutputLayer/MultiOutputLayer.cpp
+++ b/MLPP/MultiOutputLayer/MultiOutputLayer.cpp
@ -0,0 +1,133 @@
+//
+//  MultiOutputLayer.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "MultiOutputLayer.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Utilities/Utilities.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP {
+    MultiOutputLayer::MultiOutputLayer(int n_output, int n_hidden, std::string activation, std::string cost, std::vector<std::vector<double>> input, std::string weightInit, std::string reg, double lambda, double alpha)
+    : n_output(n_output), n_hidden(n_hidden), activation(activation), cost(cost), input(input), weightInit(weightInit), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        weights = Utilities::weightInitialization(n_hidden, n_output, weightInit);
+        bias = Utilities::biasInitialization(n_output);
+
+        activation_map["Linear"] = &Activation::linear;
+        activationTest_map["Linear"] = &Activation::linear;
+
+        activation_map["Sigmoid"] = &Activation::sigmoid;
+        activationTest_map["Sigmoid"] = &Activation::sigmoid;
+
+        activation_map["Softmax"] = &Activation::softmax;
+        activationTest_map["Softmax"] = &Activation::softmax;
+
+        activation_map["Swish"] = &Activation::swish;
+        activationTest_map["Swish"] = &Activation::swish;
+
+        activation_map["Mish"] = &Activation::mish;
+        activationTest_map["Mish"] = &Activation::mish;
+
+        activation_map["SinC"] = &Activation::sinc;
+        activationTest_map["SinC"] = &Activation::sinc;
+
+        activation_map["Softplus"] = &Activation::softplus;
+        activationTest_map["Softplus"] = &Activation::softplus;
+
+        activation_map["Softsign"] = &Activation::softsign;
+        activationTest_map["Softsign"] = &Activation::softsign;
+
+        activation_map["CLogLog"] = &Activation::cloglog;
+        activationTest_map["CLogLog"] = &Activation::cloglog;
+
+        activation_map["Logit"] = &Activation::logit;
+        activationTest_map["Logit"] = &Activation::logit;
+
+        activation_map["GaussianCDF"] = &Activation::gaussianCDF;
+        activationTest_map["GaussianCDF"] = &Activation::gaussianCDF;
+
+        activation_map["RELU"] = &Activation::RELU;
+        activationTest_map["RELU"] = &Activation::RELU;
+
+        activation_map["GELU"] = &Activation::GELU;
+        activationTest_map["GELU"] = &Activation::GELU;
+
+        activation_map["Sign"] = &Activation::sign;
+        activationTest_map["Sign"] = &Activation::sign;
+
+        activation_map["UnitStep"] = &Activation::unitStep;
+        activationTest_map["UnitStep"] = &Activation::unitStep;
+
+        activation_map["Sinh"] = &Activation::sinh;
+        activationTest_map["Sinh"] = &Activation::sinh;
+
+        activation_map["Cosh"] = &Activation::cosh;
+        activationTest_map["Cosh"] = &Activation::cosh;
+
+        activation_map["Tanh"] = &Activation::tanh;
+        activationTest_map["Tanh"] = &Activation::tanh;
+
+        activation_map["Csch"] = &Activation::csch;
+        activationTest_map["Csch"] = &Activation::csch;   
+
+        activation_map["Sech"] = &Activation::sech;
+        activationTest_map["Sech"] = &Activation::sech;  
+
+        activation_map["Coth"] = &Activation::coth;
+        activationTest_map["Coth"] = &Activation::coth;  
+
+        activation_map["Arsinh"] = &Activation::arsinh;
+        activationTest_map["Arsinh"] = &Activation::arsinh;
+
+        activation_map["Arcosh"] = &Activation::arcosh;
+        activationTest_map["Arcosh"] = &Activation::arcosh;
+
+        activation_map["Artanh"] = &Activation::artanh;
+        activationTest_map["Artanh"] = &Activation::artanh;
+
+        activation_map["Arcsch"] = &Activation::arcsch;
+        activationTest_map["Arcsch"] = &Activation::arcsch;
+
+        activation_map["Arsech"] = &Activation::arsech;
+        activationTest_map["Arsech"] = &Activation::arsech;
+
+        activation_map["Arcoth"] = &Activation::arcoth;
+        activationTest_map["Arcoth"] = &Activation::arcoth;
+
+        costDeriv_map["MSE"] = &Cost::MSEDeriv;
+        cost_map["MSE"] = &Cost::MSE;
+        costDeriv_map["RMSE"] = &Cost::RMSEDeriv;
+        cost_map["RMSE"] = &Cost::RMSE;
+        costDeriv_map["MAE"] = &Cost::MAEDeriv;
+        cost_map["MAE"] = &Cost::MAE;
+        costDeriv_map["MBE"] = &Cost::MBEDeriv;
+        cost_map["MBE"] = &Cost::MBE;
+        costDeriv_map["LogLoss"] = &Cost::LogLossDeriv;
+        cost_map["LogLoss"] = &Cost::LogLoss;
+        costDeriv_map["CrossEntropy"] = &Cost::CrossEntropyDeriv;
+        cost_map["CrossEntropy"] = &Cost::CrossEntropy;
+        costDeriv_map["HingeLoss"] = &Cost::HingeLossDeriv;
+        cost_map["HingeLoss"] = &Cost::HingeLoss;
+        costDeriv_map["WassersteinLoss"] = &Cost::HingeLossDeriv;
+        cost_map["WassersteinLoss"] = &Cost::HingeLoss;
+    }
+    
+    void MultiOutputLayer::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        z = alg.mat_vec_add(alg.matmult(input, weights), bias);
+        a = (avn.*activation_map[activation])(z, 0); 
+    }
+
+    void MultiOutputLayer::Test(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        z_test = alg.addition(alg.mat_vec_mult(alg.transpose(weights), x), bias); 
+        a_test = (avn.*activationTest_map[activation])(z_test, 0);
+    }
+}
--- a/MLPP/MultiOutputLayer/MultiOutputLayer.hpp
+++ b/MLPP/MultiOutputLayer/MultiOutputLayer.hpp
@ -0,0 +1,58 @@
+//
+//  MultiOutputLayer.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef MultiOutputLayer_hpp
+#define MultiOutputLayer_hpp
+
+#include "Activation/Activation.hpp"
+#include "Cost/Cost.hpp"
+
+#include <vector>
+#include <map>
+#include <string>
+
+namespace  MLPP {
+    class MultiOutputLayer{
+        public:
+            MultiOutputLayer(int n_output, int n_hidden, std::string activation, std::string cost, std::vector<std::vector<double>> input, std::string weightInit, std::string reg, double lambda, double alpha);
+        
+            int n_output;
+            int n_hidden;
+            std::string activation;
+            std::string cost;
+
+            std::vector<std::vector<double>> input;   
+
+            std::vector<std::vector<double>> weights;
+            std::vector<double> bias;
+        
+            std::vector<std::vector<double>> z;
+            std::vector<std::vector<double>> a;
+
+            std::map<std::string, std::vector<std::vector<double>> (Activation::*)(std::vector<std::vector<double>>, bool)> activation_map;
+            std::map<std::string, std::vector<double> (Activation::*)(std::vector<double>, bool)> activationTest_map;
+            std::map<std::string, double (Cost::*)(std::vector<std::vector<double>>, std::vector<std::vector<double>>)> cost_map;
+            std::map<std::string, std::vector<std::vector<double>> (Cost::*)(std::vector<std::vector<double>>, std::vector<std::vector<double>>)> costDeriv_map;
+
+            std::vector<double> z_test;
+            std::vector<double> a_test; 
+
+            std::vector<std::vector<double>> delta;
+
+            // Regularization Params
+            std::string reg;
+            double lambda; /* Regularization Parameter */
+            double alpha; /* This is the controlling param for Elastic Net*/
+            
+            std::string weightInit;
+
+            void forwardPass();
+            void Test(std::vector<double> x);
+    };
+}
+
+#endif /* MultiOutputLayer_hpp */
+
--- a/MLPP/MultinomialNB/MultinomialNB.cpp
+++ b/MLPP/MultinomialNB/MultinomialNB.cpp
@ -0,0 +1,121 @@
+//
+//  MultinomialNB.cpp
+//
+//  Created by Marc Melikyan on 1/17/21.
+//
+
+#include "MultinomialNB.hpp"
+#include "Utilities/Utilities.hpp"
+#include "LinAlg/LinAlg.hpp"
+
+#include <iostream>
+#include <algorithm>
+#include <random>
+
+namespace MLPP{
+    MultinomialNB::MultinomialNB(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int class_num)
+    : inputSet(inputSet), outputSet(outputSet), class_num(class_num)
+    {
+        y_hat.resize(outputSet.size());
+        Evaluate();
+    }
+
+    std::vector<double> MultinomialNB::modelSetTest(std::vector<std::vector<double>> X){
+        std::vector<double> y_hat;
+        for(int i = 0; i < X.size(); i++){
+            y_hat.push_back(modelTest(X[i]));
+        }
+        return y_hat;
+    }
+
+    double MultinomialNB::modelTest(std::vector<double> x){
+        double score[class_num];
+        computeTheta();
+        
+        for(int j = 0; j < x.size(); j++){
+            for(int k = 0; k < vocab.size(); k++){
+                if(x[j] == vocab[k]){
+                    for(int p = class_num - 1; p >= 0; p--){
+                        score[p] += std::log(theta[p][vocab[k]]);
+                    }
+                }
+            }
+        }
+
+        for(int i = 0; i < priors.size(); i++){
+            score[i] += std::log(priors[i]);
+        }
+
+        return std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double)));
+    }
+
+    double MultinomialNB::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void MultinomialNB::computeTheta(){
+        
+        // Resizing theta for the sake of ease & proper access of the elements.
+        theta.resize(class_num);
+        
+        // Setting all values in the hasmap by default to 0.
+        for(int i = class_num - 1; i >= 0; i--){
+            for(int j = 0; j < vocab.size(); j++){
+                theta[i][vocab[j]] = 0; 
+            }
+        }
+
+        for(int i = 0; i < inputSet.size(); i++){  
+            for(int j = 0; j < inputSet[0].size(); j++){
+                theta[outputSet[i]][inputSet[i][j]]++;
+            }
+        }
+        
+        for(int i = 0; i < theta.size(); i++){
+            for(int j = 0; j < theta[i].size(); j++){
+                theta[i][j] /= priors[i] * y_hat.size();
+            }
+        }
+    }
+
+    void MultinomialNB::Evaluate(){
+        LinAlg alg;
+        for(int i = 0; i < outputSet.size(); i++){
+            // Pr(B | A) * Pr(A)
+            double score[class_num];
+
+            // Easy computation of priors, i.e. Pr(C_k)
+            priors.resize(class_num);
+            for(int i = 0; i < outputSet.size(); i++){
+                priors[int(outputSet[i])]++;
+            }
+            priors = alg.scalarMultiply( double(1)/double(outputSet.size()), priors);
+            
+            // Evaluating Theta...
+            computeTheta();
+            
+            for(int j = 0; j < inputSet.size(); j++){
+                for(int k = 0; k < vocab.size(); k++){
+                    if(inputSet[i][j] == vocab[k]){
+                        for(int p = class_num - 1; p >= 0; p--){
+                            score[p] += std::log(theta[i][vocab[k]]);
+                        }
+                    }
+                }
+            }
+
+            for(int i = 0; i < priors.size(); i++){
+                score[i] += std::log(priors[i]);
+                score[i] = exp(score[i]);
+            }
+
+            for(int i = 0; i < 2; i++){
+                std::cout << score[i] << std::endl;
+            }
+            
+            // Assigning the traning example's y_hat to a class
+            y_hat[i] = std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double)));
+        }
+    }
+}
--- a/MLPP/MultinomialNB/MultinomialNB.hpp
+++ b/MLPP/MultinomialNB/MultinomialNB.hpp
@ -0,0 +1,45 @@
+//
+//  MultinomialNB.hpp
+//
+//  Created by Marc Melikyan on 1/17/21.
+//
+
+#ifndef MultinomialNB_hpp
+#define MultinomialNB_hpp
+
+#include <vector>
+#include <map>
+
+namespace MLPP{
+    class MultinomialNB{
+        
+        public:
+            MultinomialNB(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int class_num);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            double score();
+            
+        private:
+        
+            void computeTheta();
+            void Evaluate();
+        
+            // Model Params
+            std::vector<double> priors;
+        
+            std::vector<std::map<double, int>> theta;
+            std::vector<double> vocab;
+            int class_num;
+            
+            // Datasets
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+            
+        
+            
+        
+    };
+
+    #endif /* MultinomialNB_hpp */
+}
--- a/MLPP/NumericalAnalysis/NumericalAnalysis.cpp
+++ b/MLPP/NumericalAnalysis/NumericalAnalysis.cpp
@ -0,0 +1,305 @@
+//
+//  NumericalAnalysis.cpp
+//
+//  Created by Marc Melikyan on 11/13/20.
+//
+
+#include "NumericalAnalysis.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include <iostream>
+#include <string>
+#include <cmath>
+#include <climits>
+
+namespace MLPP{
+
+    double NumericalAnalysis::numDiff(double(*function)(double), double x){ 
+        double eps = 1e-10;
+        return (function(x + eps) - function(x)) / eps; // This is just the formal def. of the derivative.
+    }
+
+    
+    double NumericalAnalysis::numDiff_2(double(*function)(double), double x){ 
+        double eps = 1e-5;
+        return (function(x + 2 * eps) -  2 * function(x + eps) + function(x)) / (eps * eps);
+    }
+
+    double NumericalAnalysis::numDiff_3(double(*function)(double), double x){ 
+        double eps = 1e-5;
+        double t1 = function(x + 3 * eps) - 2 * function(x + 2 * eps) + function(x + eps);
+        double t2 = function(x + 2 * eps) -  2 * function(x + eps) + function(x);
+        return (t1 - t2)/(eps * eps * eps);
+    }
+
+    double  NumericalAnalysis::constantApproximation(double(*function)(double), double c){
+        return function(c);
+    }
+
+    double  NumericalAnalysis::linearApproximation(double(*function)(double), double c, double x){
+        return constantApproximation(function, c) + numDiff(function, c) * (x - c);
+    }
+
+    double NumericalAnalysis::quadraticApproximation(double(*function)(double), double c, double x){
+        return linearApproximation(function, c, x) + 0.5 * numDiff_2(function, c) * (x - c) * (x - c);
+    }
+
+    double NumericalAnalysis::cubicApproximation(double(*function)(double), double c, double x){
+        return quadraticApproximation(function, c, x) + (1/6) * numDiff_3(function, c) * (x - c) * (x - c) * (x - c);
+    }
+
+    double NumericalAnalysis::numDiff(double(*function)(std::vector<double>), std::vector<double> x, int axis){
+        // For multivariable function analysis. 
+        // This will be used for calculating Jacobian vectors. 
+        // Diffrentiate with respect to indicated axis. (0, 1, 2 ...)
+        double eps = 1e-10;
+        std::vector<double> x_eps = x;
+        x_eps[axis] += eps;
+
+        return (function(x_eps) - function(x)) / eps; 
+    }
+
+    double NumericalAnalysis::numDiff_2(double(*function)(std::vector<double>), std::vector<double> x, int axis1, int axis2){
+        //For Hessians. 
+        double eps = 1e-5;
+
+        std::vector<double> x_pp = x;
+        x_pp[axis1] += eps; 
+        x_pp[axis2] += eps; 
+
+        std::vector<double> x_np = x;
+        x_np[axis2] += eps; 
+            
+        std::vector<double> x_pn = x;
+        x_pn[axis1] += eps;
+
+        return (function(x_pp) - function(x_np) - function(x_pn) + function(x))/(eps * eps);
+    }
+
+    double NumericalAnalysis::numDiff_3(double(*function)(std::vector<double>), std::vector<double> x, int axis1, int axis2, int axis3){
+        // For third order derivative tensors. 
+        // NOTE: Approximations do not appear to be accurate for sinusodial functions...
+        // Should revisit this later. 
+        double eps = INT_MAX; 
+
+        std::vector<double> x_ppp = x;
+        x_ppp[axis1] += eps; 
+        x_ppp[axis2] += eps; 
+        x_ppp[axis3] += eps; 
+
+        std::vector<double> x_npp = x;
+        x_npp[axis2] += eps; 
+        x_npp[axis3] += eps; 
+            
+        std::vector<double> x_pnp = x;
+        x_pnp[axis1] += eps; 
+        x_pnp[axis3] += eps; 
+
+        std::vector<double> x_nnp = x;
+        x_nnp[axis3] += eps; 
+
+
+        std::vector<double> x_ppn = x;
+        x_ppn[axis1] += eps; 
+        x_ppn[axis2] += eps; 
+
+        std::vector<double> x_npn = x;
+        x_npn[axis2] += eps; 
+            
+        std::vector<double> x_pnn = x;
+        x_pnn[axis1] += eps;
+
+        double thirdAxis = function(x_ppp) - function(x_npp) - function(x_pnp) + function(x_nnp);
+        double noThirdAxis = function(x_ppn) - function(x_npn) - function(x_pnn) + function(x);
+        return (thirdAxis - noThirdAxis)/(eps * eps * eps);
+    }
+
+    double NumericalAnalysis::newtonRaphsonMethod(double(*function)(double), double x_0, double epoch_num){
+        double x = x_0;
+        for(int i = 0; i < epoch_num; i++){
+            x -= function(x)/numDiff(function, x);
+        }
+        return x;
+    }
+
+    double NumericalAnalysis::halleyMethod(double (*function)(double), double x_0, double epoch_num){
+        double x = x_0;
+        for(int i = 0; i < epoch_num; i++){
+            x -= ((2 * function(x) * numDiff(function, x))/(2 * numDiff(function, x) * numDiff(function, x) - function(x) * numDiff_2(function, x)));
+        }
+        return x; 
+    }
+
+    double NumericalAnalysis::invQuadraticInterpolation(double (*function)(double), std::vector<double> x_0, double epoch_num){
+        double x = 0;
+        std::vector<double> currentThree = x_0;
+        for(int i = 0; i < epoch_num; i++){
+            double t1 = ((function(currentThree[1]) * function(currentThree[2]))/( (function(currentThree[0]) - function(currentThree[1])) * (function(currentThree[0]) - function(currentThree[2])) ) ) * currentThree[0];
+            double t2 = ((function(currentThree[0]) * function(currentThree[2]))/( (function(currentThree[1]) - function(currentThree[0])) * (function(currentThree[1]) - function(currentThree[2])) ) ) * currentThree[1];
+            double t3 = ((function(currentThree[0]) * function(currentThree[1]))/( (function(currentThree[2]) - function(currentThree[0])) * (function(currentThree[2]) - function(currentThree[1])) ) ) * currentThree[2];
+            x = t1 + t2 + t3; 
+
+            currentThree.erase(currentThree.begin());
+            currentThree.push_back(x);
+
+        }
+        return x; 
+    }
+
+    double NumericalAnalysis::eulerianMethod(double(*derivative)(double), std::vector<double> q_0, double p, double h){
+        double max_epoch = (p - q_0[0])/h; 
+        double x = q_0[0];
+        double y = q_0[1];
+        for(int i = 0; i < max_epoch; i++){
+            y = y + h * derivative(x);
+            x += h;
+        }
+        return y;
+    }
+
+    double NumericalAnalysis::eulerianMethod(double(*derivative)(std::vector<double>), std::vector<double> q_0, double p, double h){
+        double max_epoch = (p - q_0[0])/h; 
+        double x = q_0[0];
+        double y = q_0[1];
+        for(int i = 0; i < max_epoch; i++){
+            y = y + h * derivative({x, y});
+            x += h;
+        }
+        return y;
+    }
+
+    double NumericalAnalysis::growthMethod(double C, double k, double t){
+        /* 
+        dP/dt = kP
+        dP/P = kdt
+        integral(1/P)dP = integral(k) dt
+        ln|P| = kt + C_initial
+        |P| = e^(kt + C_initial)
+        |P| = e^(C_initial) * e^(kt)
+        P = +/- e^(C_initial) * e^(kt)
+        P = C * e^(kt)
+        */
+
+        // auto growthFunction = [&C, &k](double t) { return C * exp(k * t); };
+        return C * std::exp(k * t);
+    }
+    
+    std::vector<double> NumericalAnalysis::jacobian(double(*function)(std::vector<double>), std::vector<double> x){
+        std::vector<double> jacobian; 
+        jacobian.resize(x.size());
+        for(int i = 0; i < jacobian.size(); i++){
+            jacobian[i] = numDiff(function, x, i); // Derivative w.r.t axis i evaluated at x. For all x_i.
+        }
+        return jacobian;
+    }
+    std::vector<std::vector<double>> NumericalAnalysis::hessian(double(*function)(std::vector<double>), std::vector<double> x){
+        std::vector<std::vector<double>> hessian; 
+        hessian.resize(x.size());
+        for(int i = 0; i < hessian.size(); i++){
+            hessian[i].resize(x.size());
+        }
+        for(int i = 0; i < hessian.size(); i++){
+            for(int j = 0; j < hessian[i].size(); j++){
+                hessian[i][j] = numDiff_2(function, x, i, j);
+            }
+        }
+        return hessian;
+    }
+
+    std::vector<std::vector<std::vector<double>>> NumericalAnalysis::thirdOrderTensor(double(*function)(std::vector<double>), std::vector<double> x){
+        std::vector<std::vector<std::vector<double>>> tensor; 
+        tensor.resize(x.size());
+        for(int i = 0; i < tensor.size(); i++){
+            tensor[i].resize(x.size());
+            for(int j = 0; j < tensor[i].size(); j++){
+                tensor[i][j].resize(x.size());
+            }
+        }
+        for(int i = 0; i < tensor.size(); i++){ // O(n^3) time complexity :(
+            for(int j = 0; j < tensor[i].size(); j++){
+                for(int k = 0; k < tensor[i][j].size(); k++)
+                tensor[i][j][k] = numDiff_3(function, x, i, j, k);
+            }
+        }
+        return tensor;
+    }
+
+    double NumericalAnalysis::constantApproximation(double(*function)(std::vector<double>), std::vector<double> c){
+        return function(c);
+    }
+
+    double NumericalAnalysis::linearApproximation(double(*function)(std::vector<double>), std::vector<double> c, std::vector<double> x){
+        LinAlg alg;
+        return constantApproximation(function, c) + alg.matmult(alg.transpose({jacobian(function, c)}), {alg.subtraction(x, c)})[0][0];
+    }
+
+    double NumericalAnalysis::quadraticApproximation(double(*function)(std::vector<double>), std::vector<double> c, std::vector<double> x){
+        LinAlg alg;
+        return linearApproximation(function, c, x) + 0.5 * alg.matmult({(alg.subtraction(x, c))}, alg.matmult(hessian(function, c), alg.transpose({alg.subtraction(x, c)})))[0][0];
+    }
+
+    double NumericalAnalysis::cubicApproximation(double(*function)(std::vector<double>), std::vector<double> c, std::vector<double> x){
+        /* 
+        Not completely sure as the literature seldom discusses the third order taylor approximation, 
+        in particular for multivariate cases, but ostensibly, the matrix/tensor/vector multiplies 
+        should look something like this: 
+
+        (N x N x N) (N x 1) [tensor vector mult] => (N x N x 1) => (N x N)
+        Perform remaining multiplies as done for the 2nd order approximation.
+        Result is a scalar. 
+        */
+        LinAlg alg;
+        std::vector<std::vector<double>> resultMat = alg.tensor_vec_mult(thirdOrderTensor(function, c), alg.subtraction(x, c));
+        double resultScalar = alg.matmult({(alg.subtraction(x, c))}, alg.matmult(resultMat, alg.transpose({alg.subtraction(x, c)})))[0][0];
+
+        return quadraticApproximation(function, c, x) + (1/6) * resultScalar;
+    }
+
+    double NumericalAnalysis::laplacian(double(*function)(std::vector<double>), std::vector<double> x){
+        LinAlg alg;
+        std::vector<std::vector<double>> hessian_matrix = hessian(function, x);
+        double laplacian = 0;
+        for(int i = 0; i < hessian_matrix.size(); i++){
+            laplacian += hessian_matrix[i][i]; // homogenous 2nd derivs w.r.t i, then i
+        }
+        return laplacian;
+    }
+
+    std::string NumericalAnalysis::secondPartialDerivativeTest(double(*function)(std::vector<double>), std::vector<double> x){
+        LinAlg alg;
+        std::vector<std::vector<double>> hessianMatrix = hessian(function, x);
+        /* 
+        The reason we do this is because the 2nd partial derivative test is less conclusive for functions of variables greater than
+        2, and the calculations specific to the bivariate case are less computationally intensive. 
+        */
+        if(x.size() == 2){ 
+            double det = alg.det(hessianMatrix, hessianMatrix.size());
+            double secondDerivative = numDiff_2(function, x, 0, 0);
+            if(secondDerivative > 0 && det > 0){
+                return "min";
+            }
+            else if(secondDerivative < 0 && det > 0){
+                return "max";
+            }
+            else if(det < 0){
+                return "saddle";
+            }
+            else{
+                return "test was inconclusive";
+            }
+        }
+        else {
+            if(alg.positiveDefiniteChecker(hessianMatrix)){
+                return "min";
+            }
+            else if(alg.negativeDefiniteChecker(hessianMatrix)){
+                return "max";
+            }
+            else if(!alg.zeroEigenvalue(hessianMatrix)){
+                return "saddle";
+            }
+            else{
+                return "test was inconclusive";
+            }
+        }
+    }
+}
--- a/MLPP/NumericalAnalysis/NumericalAnalysis.hpp
+++ b/MLPP/NumericalAnalysis/NumericalAnalysis.hpp
@ -0,0 +1,57 @@
+//
+//  NumericalAnalysis.hpp
+//
+//
+
+#ifndef NumericalAnalysis_hpp
+#define NumericalAnalysis_hpp
+
+#include <vector>
+#include <string>
+
+namespace MLPP{
+    class NumericalAnalysis{
+        public:
+            /* A numerical method for derivatives is used. This may be subject to change,
+            as an analytical method for calculating derivatives will most likely be used in
+            the future.
+            */
+            double numDiff(double(*function)(double), double x);
+            double numDiff_2(double(*function)(double), double x);
+            double numDiff_3(double(*function)(double), double x);
+
+            double constantApproximation(double(*function)(double), double c);
+            double linearApproximation(double(*function)(double), double c, double x);
+            double quadraticApproximation(double(*function)(double), double c, double x);
+            double cubicApproximation(double(*function)(double), double c, double x);
+
+            double numDiff(double(*function)(std::vector<double>), std::vector<double> x, int axis);
+            double numDiff_2(double(*function)(std::vector<double>), std::vector<double> x, int axis1, int axis2);
+            double numDiff_3(double(*function)(std::vector<double>), std::vector<double> x, int axis1, int axis2, int axis3);
+
+            double newtonRaphsonMethod(double(*function)(double), double x_0, double epoch_num);
+            double halleyMethod(double(*function)(double), double x_0, double epoch_num);
+            double invQuadraticInterpolation(double (*function)(double), std::vector<double> x_0, double epoch_num);
+
+            double eulerianMethod(double(*derivative)(double), std::vector<double> q_0, double p, double h); // Euler's method for solving diffrential equations. 
+            double eulerianMethod(double(*derivative)(std::vector<double>), std::vector<double> q_0, double p, double h); // Euler's method for solving diffrential equations. 
+
+            double growthMethod(double C, double k, double t); // General growth-based diffrential equations can be solved by seperation of variables.
+
+            std::vector<double> jacobian(double(*function)(std::vector<double>), std::vector<double> x); // Indeed, for functions with scalar outputs the Jacobians will be vectors.
+            std::vector<std::vector<double>> hessian(double(*function)(std::vector<double>), std::vector<double> x);
+            std::vector<std::vector<std::vector<double>>> thirdOrderTensor(double(*function)(std::vector<double>), std::vector<double> x);
+
+            double constantApproximation(double(*function)(std::vector<double>), std::vector<double> c);
+            double linearApproximation(double(*function)(std::vector<double>), std::vector<double> c, std::vector<double> x);
+            double quadraticApproximation(double(*function)(std::vector<double>), std::vector<double> c, std::vector<double> x);
+            double cubicApproximation(double(*function)(std::vector<double>), std::vector<double> c, std::vector<double> x);
+
+            double laplacian(double(*function)(std::vector<double>), std::vector<double> x); // laplacian
+
+            std::string secondPartialDerivativeTest(double(*function)(std::vector<double>), std::vector<double> x);
+
+    };
+}
+
+#endif /* NumericalAnalysis_hpp */
--- a/MLPP/OutlierFinder/OutlierFinder.cpp
+++ b/MLPP/OutlierFinder/OutlierFinder.cpp
@ -0,0 +1,43 @@
+//
+//  OutlierFinder.cpp
+//
+//  Created by Marc Melikyan on 11/13/20.
+//
+
+#include "OutlierFinder.hpp"
+#include "Stat/Stat.hpp"
+#include <iostream>
+
+namespace MLPP{
+    OutlierFinder::OutlierFinder(int threshold)
+    : threshold(threshold){
+
+    }
+
+    std::vector<std::vector<double>> OutlierFinder::modelSetTest(std::vector<std::vector<double>> inputSet){
+        Stat stat;
+        std::vector<std::vector<double>> outliers;
+        outliers.resize(inputSet.size());
+        for(int i = 0; i < inputSet.size(); i++){
+            for(int j = 0; j < inputSet[i].size(); j++){
+                double z = (inputSet[i][j] - stat.mean(inputSet[i])) / stat.standardDeviation(inputSet[i]);
+                if(abs(z) > threshold){
+                    outliers[i].push_back(inputSet[i][j]);
+                }
+            }
+        }
+        return outliers; 
+    }
+
+    std::vector<double> OutlierFinder::modelTest(std::vector<double> inputSet){
+        Stat stat;
+        std::vector<double> outliers;
+        for(int i = 0; i < inputSet.size(); i++){
+            double z = (inputSet[i] - stat.mean(inputSet)) / stat.standardDeviation(inputSet);
+            if(abs(z) > threshold){
+                outliers.push_back(inputSet[i]);
+            }
+        }
+        return outliers; 
+    }
+}
--- a/MLPP/OutlierFinder/OutlierFinder.hpp
+++ b/MLPP/OutlierFinder/OutlierFinder.hpp
@ -0,0 +1,27 @@
+//
+//  OutlierFinder.hpp
+//
+//  Created by Marc Melikyan on 11/13/20.
+//
+
+#ifndef OutlierFinder_hpp
+#define OutlierFinder_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class OutlierFinder{
+        public:
+            // Cnstr
+            OutlierFinder(int threshold);
+
+            std::vector<std::vector<double>> modelSetTest(std::vector<std::vector<double>> inputSet);
+            std::vector<double> modelTest(std::vector<double> inputSet);
+
+            // Variables required 
+            int threshold;
+        
+    };
+}
+
+#endif /* OutlierFinder_hpp */
--- a/MLPP/OutputLayer/OutputLayer.cpp
+++ b/MLPP/OutputLayer/OutputLayer.cpp
@ -0,0 +1,130 @@
+//
+//  OutputLayer.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "OutputLayer.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Utilities/Utilities.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP {
+    OutputLayer::OutputLayer(int n_hidden, std::string activation, std::string cost, std::vector<std::vector<double>> input, std::string weightInit, std::string reg, double lambda, double alpha)
+    : n_hidden(n_hidden), activation(activation), cost(cost), input(input), weightInit(weightInit), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        weights = Utilities::weightInitialization(n_hidden, weightInit);
+        bias = Utilities::biasInitialization();
+
+        activation_map["Linear"] = &Activation::linear;
+        activationTest_map["Linear"] = &Activation::linear;
+
+        activation_map["Sigmoid"] = &Activation::sigmoid;
+        activationTest_map["Sigmoid"] = &Activation::sigmoid;
+
+        activation_map["Swish"] = &Activation::swish;
+        activationTest_map["Swish"] = &Activation::swish;
+
+        activation_map["Mish"] = &Activation::mish;
+        activationTest_map["Mish"] = &Activation::mish;
+
+        activation_map["SinC"] = &Activation::sinc;
+        activationTest_map["SinC"] = &Activation::sinc;
+
+        activation_map["Softplus"] = &Activation::softplus;
+        activationTest_map["Softplus"] = &Activation::softplus;
+
+        activation_map["Softsign"] = &Activation::softsign;
+        activationTest_map["Softsign"] = &Activation::softsign;
+
+        activation_map["CLogLog"] = &Activation::cloglog;
+        activationTest_map["CLogLog"] = &Activation::cloglog;
+
+        activation_map["Logit"] = &Activation::logit;
+        activationTest_map["Logit"] = &Activation::logit;
+
+        activation_map["GaussianCDF"] = &Activation::gaussianCDF;
+        activationTest_map["GaussianCDF"] = &Activation::gaussianCDF;
+
+        activation_map["RELU"] = &Activation::RELU;
+        activationTest_map["RELU"] = &Activation::RELU;
+
+        activation_map["GELU"] = &Activation::GELU;
+        activationTest_map["GELU"] = &Activation::GELU;
+
+        activation_map["Sign"] = &Activation::sign;
+        activationTest_map["Sign"] = &Activation::sign;
+
+        activation_map["UnitStep"] = &Activation::unitStep;
+        activationTest_map["UnitStep"] = &Activation::unitStep;
+
+        activation_map["Sinh"] = &Activation::sinh;
+        activationTest_map["Sinh"] = &Activation::sinh;
+
+        activation_map["Cosh"] = &Activation::cosh;
+        activationTest_map["Cosh"] = &Activation::cosh;
+
+        activation_map["Tanh"] = &Activation::tanh;
+        activationTest_map["Tanh"] = &Activation::tanh;
+
+        activation_map["Csch"] = &Activation::csch;
+        activationTest_map["Csch"] = &Activation::csch;   
+
+        activation_map["Sech"] = &Activation::sech;
+        activationTest_map["Sech"] = &Activation::sech;  
+
+        activation_map["Coth"] = &Activation::coth;
+        activationTest_map["Coth"] = &Activation::coth;  
+
+        activation_map["Arsinh"] = &Activation::arsinh;
+        activationTest_map["Arsinh"] = &Activation::arsinh;
+
+        activation_map["Arcosh"] = &Activation::arcosh;
+        activationTest_map["Arcosh"] = &Activation::arcosh;
+
+        activation_map["Artanh"] = &Activation::artanh;
+        activationTest_map["Artanh"] = &Activation::artanh;
+
+        activation_map["Arcsch"] = &Activation::arcsch;
+        activationTest_map["Arcsch"] = &Activation::arcsch;
+
+        activation_map["Arsech"] = &Activation::arsech;
+        activationTest_map["Arsech"] = &Activation::arsech;
+
+        activation_map["Arcoth"] = &Activation::arcoth;
+        activationTest_map["Arcoth"] = &Activation::arcoth;
+
+        costDeriv_map["MSE"] = &Cost::MSEDeriv;
+        cost_map["MSE"] = &Cost::MSE;
+        costDeriv_map["RMSE"] = &Cost::RMSEDeriv;
+        cost_map["RMSE"] = &Cost::RMSE;
+        costDeriv_map["MAE"] = &Cost::MAEDeriv;
+        cost_map["MAE"] = &Cost::MAE;
+        costDeriv_map["MBE"] = &Cost::MBEDeriv;
+        cost_map["MBE"] = &Cost::MBE;
+        costDeriv_map["LogLoss"] = &Cost::LogLossDeriv;
+        cost_map["LogLoss"] = &Cost::LogLoss;
+        costDeriv_map["CrossEntropy"] = &Cost::CrossEntropyDeriv;
+        cost_map["CrossEntropy"] = &Cost::CrossEntropy;
+        costDeriv_map["HingeLoss"] = &Cost::HingeLossDeriv;
+        cost_map["HingeLoss"] = &Cost::HingeLoss;
+        costDeriv_map["WassersteinLoss"] = &Cost::HingeLossDeriv;
+        cost_map["WassersteinLoss"] = &Cost::HingeLoss;
+    }
+    
+    void OutputLayer::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        z = alg.scalarAdd(bias, alg.mat_vec_mult(input, weights));
+        a = (avn.*activation_map[activation])(z, 0); 
+    }
+
+    void OutputLayer::Test(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        z_test = alg.dot(weights, x) + bias;
+        a_test = (avn.*activationTest_map[activation])(z_test, 0);
+    }
+}
--- a/MLPP/OutputLayer/OutputLayer.hpp
+++ b/MLPP/OutputLayer/OutputLayer.hpp
@ -0,0 +1,56 @@
+//
+//  OutputLayer.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef OutputLayer_hpp
+#define OutputLayer_hpp
+
+#include "Activation/Activation.hpp"
+#include "Cost/Cost.hpp"
+
+#include <vector>
+#include <map>
+#include <string>
+
+namespace  MLPP {
+    class OutputLayer{
+        public:
+            OutputLayer(int n_hidden, std::string activation, std::string cost, std::vector<std::vector<double>> input, std::string weightInit, std::string reg, double lambda, double alpha);
+        
+            int n_hidden;
+            std::string activation;
+            std::string cost;
+
+            std::vector<std::vector<double>> input;   
+
+            std::vector<double> weights;
+            double bias;
+        
+            std::vector<double> z;
+            std::vector<double> a;
+
+            std::map<std::string, std::vector<double> (Activation::*)(std::vector<double>, bool)> activation_map;
+            std::map<std::string, double (Activation::*)(double, bool)> activationTest_map;
+            std::map<std::string, double (Cost::*)(std::vector<double>, std::vector<double>)> cost_map;
+            std::map<std::string, std::vector<double> (Cost::*)(std::vector<double>, std::vector<double>)> costDeriv_map;
+
+            double z_test;
+            double a_test; 
+
+            std::vector<double> delta;
+
+            // Regularization Params
+            std::string reg;
+            double lambda; /* Regularization Parameter */
+            double alpha; /* This is the controlling param for Elastic Net*/
+            
+            std::string weightInit;
+
+            void forwardPass();
+            void Test(std::vector<double> x);
+    };
+}
+
+#endif /* OutputLayer_hpp */
--- a/MLPP/PCA/PCA.cpp
+++ b/MLPP/PCA/PCA.cpp
@ -0,0 +1,56 @@
+//
+//  PCA.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "PCA.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Data/Data.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+
+    PCA::PCA(std::vector<std::vector<double>> inputSet, int k)
+    : inputSet(inputSet), k(k)
+    {
+
+    }
+
+    std::vector<std::vector<double>> PCA::principalComponents(){
+        LinAlg alg;
+        Data data; 
+
+        auto [U, S, Vt] = alg.SVD(alg.cov(inputSet));
+        X_normalized = data.meanCentering(inputSet);
+        U_reduce.resize(U.size());
+        for(int i = 0; i < k; i++){
+            for(int j = 0; j < U.size(); j++){
+                U_reduce[j].push_back(U[j][i]);
+            }
+        }
+        Z = alg.matmult(alg.transpose(U_reduce), X_normalized);
+        return Z;
+    }
+    // Simply tells us the percentage of variance maintained. 
+    double PCA::score(){
+        LinAlg alg;
+        std::vector<std::vector<double>> X_approx = alg.matmult(U_reduce, Z);
+        double num, den = 0;
+        for(int i = 0; i < X_normalized.size(); i++){
+            num += alg.norm_sq(alg.subtraction(X_normalized[i], X_approx[i]));
+        }
+        num /= X_normalized.size();
+        for(int i = 0; i < X_normalized.size(); i++){
+            den += alg.norm_sq(X_normalized[i]);
+        }
+
+        den /= X_normalized.size();
+        if(den == 0){
+            den+=1e-10; // For numerical sanity as to not recieve a domain error
+        }
+        return 1 - num/den;
+    }
+}
--- a/MLPP/PCA/PCA.hpp
+++ b/MLPP/PCA/PCA.hpp
@ -0,0 +1,28 @@
+//
+//  PCA.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef PCA_hpp
+#define PCA_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class PCA{
+        
+        public:
+            PCA(std::vector<std::vector<double>> inputSet, int k);
+            std::vector<std::vector<double>> principalComponents();
+            double score(); 
+        private:
+            std::vector<std::vector<double>> inputSet;
+            std::vector<std::vector<double>> X_normalized;
+            std::vector<std::vector<double>> U_reduce;
+            std::vector<std::vector<double>> Z;  
+            int k;
+    };
+}
+
+#endif /* PCA_hpp */
--- a/MLPP/ProbitReg/ProbitReg.cpp
+++ b/MLPP/ProbitReg/ProbitReg.cpp
@ -0,0 +1,239 @@
+//
+//  ProbitReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "ProbitReg.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    ProbitReg::ProbitReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+        weights = Utilities::weightInitialization(k);
+        bias = Utilities::biasInitialization();
+    }
+
+    std::vector<double> ProbitReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double ProbitReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void ProbitReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization; 
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+                
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+
+            // Calculating the weight gradients
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.gaussianCDF(z, 1)))));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+ 
+            // Calculating the bias gradients
+            bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / n;
+            forwardPass();
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+                
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void ProbitReg::MLE(double learning_rate, int max_epoch, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization; 
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+                    
+            std::vector<double> error = alg.subtraction(outputSet, y_hat);
+
+            // Calculating the weight gradients
+            weights = alg.addition(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.gaussianCDF(z, 1)))));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+
+            // Calculating the bias gradients
+            bias += learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / n;
+            forwardPass();
+                    
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+                    
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void ProbitReg::SGD(double learning_rate, int max_epoch, bool UI){
+        // NOTE: ∂y_hat/∂z is sparse
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            double z = propagate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+
+            double error = y_hat - outputSet[outputIndex];
+
+            // Weight Updation
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error * ((1 / sqrt(2 * M_PI)) * exp(-z * z / 2)), inputSet[outputIndex]));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            // Bias updation
+            bias -= learning_rate * error * ((1 / sqrt(2 * M_PI)) * exp(-z * z / 2));
+
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void ProbitReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+
+        // Creating the mini-batches
+        for(int i = 0; i < n_mini_batch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            std::vector<double> currentOutputSet; 
+            for(int j = 0; j < n/n_mini_batch; j++){
+                currentInputSet.push_back(inputSet[n/n_mini_batch * i + j]);
+                currentOutputSet.push_back(outputSet[n/n_mini_batch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+            outputMiniBatches.push_back(currentOutputSet);
+        }
+
+        if(double(n)/double(n_mini_batch) - int(n/n_mini_batch) != 0){
+            for(int i = 0; i < n - n/n_mini_batch * n_mini_batch; i++){
+                inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n/n_mini_batch * n_mini_batch + i]);
+                outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n/n_mini_batch * n_mini_batch + i]);
+            }
+        }
+        
+        while(true){
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                std::vector<double> z = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+                
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                // Calculating the weight gradients
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/outputMiniBatches.size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.gaussianCDF(z, 1)))));
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+    
+                // Calculating the bias gradients
+                bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / outputMiniBatches.size();
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double ProbitReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+     void ProbitReg::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights, bias);
+     }
+
+    double ProbitReg::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+    std::vector<double> ProbitReg::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        return avn.gaussianCDF(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); 
+    }
+    
+    std::vector<double>ProbitReg::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); 
+    }
+
+    double ProbitReg::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        return avn.gaussianCDF(alg.dot(weights, x) + bias);
+    }
+
+    double ProbitReg::propagate(std::vector<double> x){
+        LinAlg alg;
+        return alg.dot(weights, x) + bias;
+    }
+
+    // gaussianCDF ( wTx + b )
+    void ProbitReg::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        
+        z = propagate(inputSet);
+        y_hat = avn.gaussianCDF(z);
+    }
+}
--- a/MLPP/ProbitReg/ProbitReg.hpp
+++ b/MLPP/ProbitReg/ProbitReg.hpp
@ -0,0 +1,57 @@
+//
+//  ProbitReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef ProbitReg_hpp
+#define ProbitReg_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class ProbitReg{
+        
+        public:
+            ProbitReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch = 0, bool UI = 1);
+            void MLE(double learning_rate, int max_epoch = 0, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch = 0, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+        private:
+
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            std::vector<double> propagate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            double propagate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> z;
+            std::vector<double> y_hat;
+            std::vector<double> weights;
+            double bias;
+        
+            int n; 
+            int k;
+
+            // Regularization Params
+            std::string reg;
+            double lambda;
+            double alpha; /* This is the controlling param for Elastic Net*/
+        
+        
+    };
+}
+
+#endif /* ProbitReg_hpp */
--- a/MLPP/Regularization/Reg.cpp
+++ b/MLPP/Regularization/Reg.cpp
@ -0,0 +1,177 @@
+//
+//  Reg.cpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#include <iostream>
+#include <random>
+#include "Reg.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Activation/Activation.hpp"
+
+namespace MLPP{
+
+    double Reg::regTerm(std::vector<double> weights, double lambda, double alpha, std::string reg){
+        if(reg == "Ridge"){
+            double reg = 0;
+            for(int i = 0; i < weights.size(); i++){
+                reg += weights[i] * weights[i];
+            }
+            return reg * lambda / 2;
+        }
+        else if(reg == "Lasso"){
+            double reg = 0;
+            for(int i = 0; i < weights.size(); i++){
+                reg += abs(weights[i]);
+            }
+            return reg * lambda;
+        }
+        else if(reg == "ElasticNet"){
+            double reg = 0;
+            for(int i = 0; i < weights.size(); i++){
+                reg += alpha * abs(weights[i]); // Lasso Reg
+                reg += ((1 - alpha) / 2) * weights[i] * weights[i]; // Ridge Reg
+            }
+            return reg * lambda;
+        }
+        return 0;
+    }
+
+    double Reg::regTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg){
+        if(reg == "Ridge"){
+            double reg = 0;
+            for(int i = 0; i < weights.size(); i++){
+                for(int j = 0; j < weights[i].size(); j++){
+                    reg += weights[i][j] * weights[i][j];
+                }
+            }
+            return reg * lambda / 2;
+        }
+        else if(reg == "Lasso"){
+            double reg = 0;
+            for(int i = 0; i < weights.size(); i++){
+                for(int j = 0; j < weights[i].size(); j++){
+                    reg += abs(weights[i][j]);
+                }
+            }
+            return reg * lambda;
+        }
+        else if(reg == "ElasticNet"){
+            double reg = 0;
+            for(int i = 0; i < weights.size(); i++){
+                for(int j = 0; j < weights[i].size(); j++){
+                    reg += alpha * abs(weights[i][j]); // Lasso Reg
+                    reg += ((1 - alpha) / 2) * weights[i][j] * weights[i][j]; // Ridge Reg
+                }
+            }
+            return reg * lambda;
+        }
+        return 0;
+    }
+
+    std::vector<double> Reg::regWeights(std::vector<double> weights, double lambda, double alpha, std::string reg){
+        LinAlg alg;
+        if(reg == "WeightClipping"){ return regDerivTerm(weights, lambda, alpha, reg); }
+        return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg));
+        // for(int i = 0; i < weights.size(); i++){
+        //     weights[i] -= regDerivTerm(weights, lambda, alpha, reg, i);
+        // }
+        // return weights;
+    }
+
+    std::vector<std::vector<double>> Reg::regWeights(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg){
+        LinAlg alg;
+        if(reg == "WeightClipping"){ return regDerivTerm(weights, lambda, alpha, reg); }
+        return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg));
+        // for(int i = 0; i < weights.size(); i++){
+        //     for(int j = 0; j < weights[i].size(); j++){
+        //         weights[i][j] -= regDerivTerm(weights, lambda, alpha, reg, i, j);
+        //     }
+        // }
+        // return weights;
+    }
+
+    std::vector<double> Reg::regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg){
+        std::vector<double> regDeriv; 
+        regDeriv.resize(weights.size());
+
+        for(int i = 0; i < regDeriv.size(); i++){
+            regDeriv[i] = regDerivTerm(weights, lambda, alpha, reg, i);
+        }
+        return regDeriv;
+    }
+
+    std::vector<std::vector<double>> Reg::regDerivTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg){
+        std::vector<std::vector<double>> regDeriv; 
+        regDeriv.resize(weights.size());
+        for(int i = 0; i < regDeriv.size(); i++){
+            regDeriv[i].resize(weights[0].size());
+        }
+
+        for(int i = 0; i < regDeriv.size(); i++){
+            for(int j = 0; j < regDeriv[i].size(); j++){
+                regDeriv[i][j] = regDerivTerm(weights, lambda, alpha, reg, i, j);
+            }
+        }
+        return regDeriv;
+    }
+
+    double Reg::regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg, int j){
+        Activation act;
+        if(reg == "Ridge"){
+            return lambda * weights[j];
+        }
+        else if(reg == "Lasso"){
+            return lambda * act.sign(weights[j]);
+        }
+        else if(reg == "ElasticNet"){
+            return alpha * lambda * act.sign(weights[j]) + (1 - alpha) * lambda * weights[j];
+        }
+        else if(reg == "WeightClipping"){ // Preparation for Wasserstein GANs. 
+            // We assume lambda is the lower clipping threshold, while alpha is the higher clipping threshold. 
+            // alpha > lambda. 
+            if(weights[j] > alpha){
+                return alpha;
+            }
+            else if(weights[j] < lambda){
+                return lambda;
+            }
+            else{
+                return weights[j];
+            }
+        }
+        else {
+            return 0;
+        }
+    }
+
+    double Reg::regDerivTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg, int i, int j){
+        Activation act;
+        if(reg == "Ridge"){
+            return lambda * weights[i][j];
+        }
+        else if(reg == "Lasso"){
+            return lambda * act.sign(weights[i][j]);
+        }
+        else if(reg == "ElasticNet"){
+            return alpha * lambda * act.sign(weights[i][j]) + (1 - alpha) * lambda * weights[i][j];
+        }
+        else if(reg == "WeightClipping"){ // Preparation for Wasserstein GANs.
+            // We assume lambda is the lower clipping threshold, while alpha is the higher clipping threshold. 
+            // alpha > lambda. 
+            if(weights[i][j] > alpha){
+                return alpha;
+            }
+            else if(weights[i][j] < lambda){
+               return lambda;
+            }
+            else{
+                return weights[i][j];
+            }
+        }
+        else {
+            return 0;
+        }
+    }
+}
--- a/MLPP/Regularization/Reg.hpp
+++ b/MLPP/Regularization/Reg.hpp
@ -0,0 +1,31 @@
+//
+//  Reg.hpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#ifndef Reg_hpp
+#define Reg_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class Reg{
+        public:
+        
+            double regTerm(std::vector<double> weights, double lambda, double alpha, std::string reg);
+            double regTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg);
+            
+            std::vector<double> regWeights(std::vector<double> weights, double lambda, double alpha, std::string reg);
+            std::vector<std::vector<double>> regWeights(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg);
+
+            std::vector<double> regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg);
+            std::vector<std::vector<double>> regDerivTerm(std::vector<std::vector<double>>, double lambda, double alpha, std::string reg);
+
+        private:
+            double regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg, int j);
+            double regDerivTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg, int i, int j);
+    };
+}
+
+#endif /* Reg_hpp */
--- a/MLPP/SVC/SVC.cpp
+++ b/MLPP/SVC/SVC.cpp
@ -0,0 +1,195 @@
+//
+//  SVC.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "SVC.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    SVC::SVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), C(C)
+    {
+        y_hat.resize(n);
+        weights = Utilities::weightInitialization(k);
+        bias = Utilities::biasInitialization();
+    }
+
+    std::vector<double> SVC::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double SVC::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void SVC::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        class Cost cost;
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet, weights, C);
+
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), cost.HingeLossDeriv(z, outputSet, C))));
+            weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge");
+
+            // Calculating the bias gradients
+            bias += learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputSet, C)) / n;
+            
+            forwardPass();
+                
+            // UI PORTION
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet, weights, C));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+
+        }
+    }
+
+    void SVC::SGD(double learning_rate, int max_epoch, bool UI){
+        class Cost cost;
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            double z = propagate(inputSet[outputIndex]);
+            cost_prev = Cost({z}, {outputSet[outputIndex]}, weights, C);
+
+            double costDeriv = cost.HingeLossDeriv(std::vector<double>({z}), std::vector<double>({outputSet[outputIndex]}), C)[0]; // Explicit conversion to avoid ambiguity with overloaded function. Error occured on Ubuntu.
+
+            // Weight Updation
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * costDeriv, inputSet[outputIndex]));
+            weights = regularization.regWeights(weights, learning_rate, 0, "Ridge");
+            
+            // Bias updation
+            bias -= learning_rate * costDeriv;
+
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({z}, {outputSet[outputIndex]}, weights, C));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void SVC::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+        class Cost cost; 
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+
+        while(true){
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                std::vector<double> z = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(z, outputMiniBatches[i], weights, C);
+
+                // Calculating the weight gradients
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C))));
+                weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge");
+                
+
+                // Calculating the bias gradients
+                bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n;
+            
+                forwardPass();
+
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double SVC::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+     void SVC::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights, bias);
+     }
+
+    double SVC::Cost(std::vector <double> z, std::vector<double> y, std::vector<double> weights, double C){
+        class Cost cost; 
+        return cost.HingeLoss(z, y, weights, C);    
+    }
+
+    std::vector<double> SVC::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        return avn.sign(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); 
+    }
+    
+    std::vector<double>SVC::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); 
+    }
+
+    double SVC::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        return avn.sign(alg.dot(weights, x) + bias);
+    }
+
+    double SVC::propagate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        return alg.dot(weights, x) + bias;
+    }
+
+    // sign ( wTx + b )
+    void SVC::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        
+        z = propagate(inputSet);
+        y_hat = avn.sign(z);
+    }
+}
--- a/MLPP/SVC/SVC.hpp
+++ b/MLPP/SVC/SVC.hpp
@ -0,0 +1,56 @@
+//
+//  SVC.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+
+// https://towardsdatascience.com/svm-implementation-from-scratch-python-2db2fc52e5c2
+// Illustratd a practical definition of the Hinge Loss function and its gradient when optimizing with SGD.
+#ifndef SVC_hpp
+#define SVC_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class SVC{
+        
+        public:
+            SVC(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, double C);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+        private:
+
+            double Cost(std::vector <double> y_hat, std::vector<double> y, std::vector<double> weights, double C);
+        
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            std::vector<double> propagate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            double propagate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> z;
+            std::vector<double> y_hat;
+            std::vector<double> weights;
+            double bias;
+
+            double C;
+            int n; 
+            int k;
+        
+            // UI Portion
+            void UI(int epoch, double cost_prev);        
+    };
+}
+
+#endif /* SVC_hpp */
--- a/MLPP/SoftmaxNet/SoftmaxNet.cpp
+++ b/MLPP/SoftmaxNet/SoftmaxNet.cpp
@ -0,0 +1,290 @@
+//
+//  SoftmaxNet.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "SoftmaxNet.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Data/Data.hpp"
+#include "Regularization/Reg.hpp"
+#include "Activation/Activation.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    SoftmaxNet::SoftmaxNet(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet, int n_hidden, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), n_hidden(n_hidden), n_class(outputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+
+        weights1 = Utilities::weightInitialization(k, n_hidden);
+        weights2 = Utilities::weightInitialization(n_hidden, n_class);
+        bias1 = Utilities::biasInitialization(n_hidden);
+        bias2 = Utilities::biasInitialization(n_class);
+    }
+
+    std::vector<double> SoftmaxNet::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    std::vector<std::vector<double>> SoftmaxNet::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    void SoftmaxNet::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+
+            // Calculating the errors
+            std::vector<std::vector<double>> error = alg.subtraction(y_hat, outputSet);
+                    
+            // Calculating the weight/bias gradients for layer 2
+
+            std::vector<std::vector<double>> D2_1 = alg.matmult(alg.transpose(a2), error);
+
+            // weights and bias updation for layer 2
+            weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1));
+            weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
+
+            bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
+
+            //Calculating the weight/bias for layer 1
+
+            std::vector<std::vector<double>> D1_1 = alg.matmult(error, alg.transpose(weights2));
+
+            std::vector<std::vector<double>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+
+            std::vector<std::vector<double>> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2);
+
+
+            // weight an bias updation for layer 1
+            weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
+            weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
+
+            bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate, D1_2));
+    
+            forwardPass();
+                
+            // UI PORTION
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                std::cout << "Layer 1:" << std::endl;
+                Utilities::UI(weights1, bias1); 
+                std::cout << "Layer 2:" << std::endl;
+                Utilities::UI(weights2, bias2);
+            }
+            epoch++;
+                
+            if(epoch > max_epoch) { break; }
+        }
+
+    }
+
+    void SoftmaxNet::SGD(double learning_rate, int max_epoch, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            std::vector<double> y_hat = Evaluate(inputSet[outputIndex]);
+            auto [z2, a2] = propagate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+            std::vector<double> error = alg.subtraction(y_hat, outputSet[outputIndex]);
+            
+            // Weight updation for layer 2
+            std::vector<std::vector<double>> D2_1 = alg.outerProduct(error, a2);
+            weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, alg.transpose(D2_1)));
+            weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
+
+            // Bias updation for layer 2
+            bias2 = alg.subtraction(bias2, alg.scalarMultiply(learning_rate, error));
+
+            // Weight updation for layer 1
+            std::vector<double> D1_1 = alg.mat_vec_mult(weights2, error);
+            std::vector<double> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+            std::vector<std::vector<double>> D1_3 = alg.outerProduct(inputSet[outputIndex], D1_2);
+
+            weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
+            weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
+            // Bias updation for layer 1
+
+            bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2));
+
+            y_hat = Evaluate(inputSet[outputIndex]);
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                std::cout << "Layer 1:" << std::endl;
+                Utilities::UI(weights1, bias1); 
+                std::cout << "Layer 2:" << std::endl;
+                Utilities::UI(weights2, bias2);
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void SoftmaxNet::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+
+        // Creating the mini-batches
+        for(int i = 0; i < n_mini_batch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            std::vector<std::vector<double>> currentOutputSet; 
+            for(int j = 0; j < n/n_mini_batch; j++){
+                currentInputSet.push_back(inputSet[n/n_mini_batch * i + j]);
+                currentOutputSet.push_back(outputSet[n/n_mini_batch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+            outputMiniBatches.push_back(currentOutputSet);
+        }
+
+        if(double(n)/double(n_mini_batch) - int(n/n_mini_batch) != 0){
+            for(int i = 0; i < n - n/n_mini_batch * n_mini_batch; i++){
+                inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n/n_mini_batch * n_mini_batch + i]);
+                outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n/n_mini_batch * n_mini_batch + i]);
+            }
+        }
+        
+        while(true){
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<std::vector<double>> y_hat = Evaluate(inputMiniBatches[i]);
+                auto [z2, a2] = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                // Calculating the errors
+                std::vector<std::vector<double>> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+                        
+                // Calculating the weight/bias gradients for layer 2
+
+                std::vector<std::vector<double>> D2_1 = alg.matmult(alg.transpose(a2), error);
+
+                // weights and bias updation for layser 2
+                weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1));
+                weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
+                
+                // Bias Updation for layer 2
+                bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
+
+                //Calculating the weight/bias for layer 1
+
+                std::vector<std::vector<double>> D1_1 = alg.matmult(error, alg.transpose(weights2));
+
+                std::vector<std::vector<double>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+
+                std::vector<std::vector<double>> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2);
+
+
+                // weight an bias updation for layer 1
+                weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
+                weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
+
+                bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate, D1_2));
+
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    std::cout << "Layer 1:" << std::endl;
+                    Utilities::UI(weights1, bias1); 
+                    std::cout << "Layer 2:" << std::endl;
+                    Utilities::UI(weights2, bias2);
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double SoftmaxNet::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+     void SoftmaxNet::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights1, bias1, 0, 1);
+         util.saveParameters(fileName, weights2, bias2, 1, 2);
+
+         LinAlg alg; 
+     }
+
+    std::vector<std::vector<double>> SoftmaxNet::getEmbeddings(){
+        return weights1;
+    }
+
+    double SoftmaxNet::Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        Reg regularization;
+        Data data;
+        class Cost cost; 
+        return cost.CrossEntropy(y_hat, y) + regularization.regTerm(weights1, lambda, alpha, reg) + regularization.regTerm(weights2, lambda, alpha, reg);
+    }
+
+    std::vector<std::vector<double>> SoftmaxNet::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        std::vector<std::vector<double>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
+        std::vector<std::vector<double>> a2 = avn.sigmoid(z2);
+        return avn.adjSoftmax(alg.mat_vec_add(alg.matmult(a2, weights2), bias2)); 
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> SoftmaxNet::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        std::vector<std::vector<double>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
+        std::vector<std::vector<double>> a2 = avn.sigmoid(z2);
+        return {z2, a2};
+    }
+
+    std::vector<double> SoftmaxNet::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        std::vector<double> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); 
+        std::vector<double> a2 = avn.sigmoid(z2);
+        return avn.adjSoftmax(alg.addition(alg.mat_vec_mult(alg.transpose(weights2), a2), bias2));
+    }
+
+    std::tuple<std::vector<double>, std::vector<double>> SoftmaxNet::propagate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        std::vector<double> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); 
+        std::vector<double> a2 = avn.sigmoid(z2);
+        return {z2, a2};
+    }
+
+    void SoftmaxNet::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1);
+        a2 = avn.sigmoid(z2);
+        y_hat = avn.adjSoftmax(alg.mat_vec_add(alg.matmult(a2, weights2), bias2)); 
+    }
+}
--- a/MLPP/SoftmaxNet/SoftmaxNet.hpp
+++ b/MLPP/SoftmaxNet/SoftmaxNet.hpp
@ -0,0 +1,66 @@
+//
+//  SoftmaxNet.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef SoftmaxNet_hpp
+#define SoftmaxNet_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class SoftmaxNet{
+        
+        public:
+            SoftmaxNet(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet, int n_hidden, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelTest(std::vector<double> x);
+            std::vector<std::vector<double>> modelSetTest(std::vector<std::vector<double>> X);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+
+            std::vector<std::vector<double>> getEmbeddings(); // This class is used (mostly) for word2Vec. This function returns our embeddings.
+         private:
+
+            double Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+        
+            std::vector<std::vector<double>> Evaluate(std::vector<std::vector<double>> X);
+            std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> propagate(std::vector<std::vector<double>> X);
+            std::vector<double> Evaluate(std::vector<double> x);
+            std::tuple<std::vector<double>, std::vector<double>> propagate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<std::vector<double>> outputSet;
+            std::vector<std::vector<double>> y_hat;
+
+            std::vector<std::vector<double>> weights1;
+            std::vector<std::vector<double>> weights2;
+           
+            std::vector<double> bias1;
+            std::vector<double> bias2;
+
+            std::vector<std::vector<double>> z2;
+            std::vector<std::vector<double>> a2;
+    
+            int n; 
+            int k;    
+            int n_class;
+            int n_hidden;
+
+            // Regularization Params
+            std::string reg;
+            double lambda;
+            double alpha; /* This is the controlling param for Elastic Net*/
+        
+        
+    };
+}
+
+#endif /* SoftmaxNet_hpp */
--- a/MLPP/SoftmaxReg/SoftmaxReg.cpp
+++ b/MLPP/SoftmaxReg/SoftmaxReg.cpp
@ -0,0 +1,192 @@
+//
+//  SoftmaxReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "SoftmaxReg.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Activation/Activation.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    SoftmaxReg::SoftmaxReg(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), n_class(outputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+        weights = Utilities::weightInitialization(k, n_class);
+        bias = Utilities::biasInitialization(n_class);
+    }
+
+    std::vector<double> SoftmaxReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+
+    }
+
+    std::vector<std::vector<double>> SoftmaxReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    void SoftmaxReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+            std::vector<std::vector<double>> error = alg.subtraction(y_hat, outputSet);
+ 
+                
+            //Calculating the weight gradients
+            std::vector<std::vector<double>> w_gradient = alg.matmult(alg.transpose(inputSet), error);
+                
+            //Weight updation
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+                
+            
+            // Calculating the bias gradients
+            //double b_gradient = alg.sum_elements(error);
+            
+            // Bias Updation
+            bias = alg.subtractMatrixRows(bias, alg.scalarMultiply(learning_rate, error));
+                
+            forwardPass();
+                
+            // UI PORTION
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void SoftmaxReg::SGD(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            double outputIndex = distribution(generator);
+
+            std::vector<double> y_hat = Evaluate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+                
+            // Calculating the weight gradients            
+            std::vector<std::vector<double>> w_gradient = alg.outerProduct(inputSet[outputIndex], alg.subtraction(y_hat, outputSet[outputIndex]));
+
+            // Weight Updation
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            // Calculating the bias gradients
+            std::vector<double> b_gradient = alg.subtraction(y_hat, outputSet[outputIndex]);
+            
+            // Bias updation
+            bias = alg.subtraction(bias, alg.scalarMultiply(learning_rate, b_gradient));
+
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+
+    }
+
+    void SoftmaxReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+        
+        while(true){
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<std::vector<double>> y_hat = Evaluate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+                
+                std::vector<std::vector<double>> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                // Calculating the weight gradients
+                std::vector<std::vector<double>> w_gradient = alg.matmult(alg.transpose(inputMiniBatches[i]), error);
+                
+                //Weight updation
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient));
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+        
+                // Calculating the bias gradients
+                bias = alg.subtractMatrixRows(bias, alg.scalarMultiply(learning_rate, error));
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double SoftmaxReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+     void SoftmaxReg::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights, bias);
+     }
+
+    double SoftmaxReg::Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.CrossEntropy(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+    std::vector<double> SoftmaxReg::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        return avn.softmax(alg.addition(bias, alg.mat_vec_mult(alg.transpose(weights), x)));
+
+    }
+
+    std::vector<std::vector<double>> SoftmaxReg::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+
+        return avn.softmax(alg.mat_vec_add(alg.matmult(X, weights), bias));
+    }
+
+    // softmax ( wTx + b )
+    void SoftmaxReg::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+
+        y_hat = avn.softmax(alg.mat_vec_add(alg.matmult(inputSet, weights), bias));
+    }
+}
--- a/MLPP/SoftmaxReg/SoftmaxReg.hpp
+++ b/MLPP/SoftmaxReg/SoftmaxReg.hpp
@ -0,0 +1,54 @@
+//
+//  SoftmaxReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef SoftmaxReg_hpp
+#define SoftmaxReg_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class SoftmaxReg{
+        
+        public:
+            SoftmaxReg(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelTest(std::vector<double> x);
+            std::vector<std::vector<double>> modelSetTest(std::vector<std::vector<double>> X);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+        private:
+
+            double Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+        
+            std::vector<std::vector<double>> Evaluate(std::vector<std::vector<double>> X);
+            std::vector<double> Evaluate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<std::vector<double>> outputSet;
+            std::vector<std::vector<double>> y_hat;
+            std::vector<std::vector<double>> weights;
+            std::vector<double> bias;
+    
+            int n; 
+            int k;    
+            int n_class;
+
+            // Regularization Params
+            std::string reg;
+            double lambda;
+            double alpha; /* This is the controlling param for Elastic Net*/
+        
+        
+    };
+}
+
+#endif /* SoftmaxReg_hpp */
--- a/MLPP/Stat/Stat.cpp
+++ b/MLPP/Stat/Stat.cpp
@ -0,0 +1,219 @@
+//
+//  Stat.cpp
+//
+//  Created by Marc Melikyan on 9/29/20.
+//
+
+#include "Stat.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Data/Data.hpp"
+#include <algorithm>
+#include <map>
+#include <cmath>
+
+#include <iostream>
+
+namespace MLPP{
+    double Stat::b0Estimation(const std::vector<double>& x, const std::vector<double>& y){
+        return mean(y) - b1Estimation(x, y) * mean(x);
+    }
+
+    double Stat::b1Estimation(const std::vector<double>& x, const std::vector<double>& y){
+        return covariance(x, y) / variance(x);
+    }
+
+    double Stat::mean(const std::vector<double>& x){
+        double sum = 0;
+        for(int i = 0; i < x.size(); i++){
+            sum += x[i];
+        }
+        return sum / x.size();
+    }
+
+    double Stat::median(std::vector<double> x){
+        double center = double(x.size())/double(2); 
+        sort(x.begin(), x.end());
+        if(x.size() % 2 == 0){
+            return mean({x[center - 1], x[center]});
+        }
+        else{
+            return x[center - 1 + 0.5];
+        }
+    }
+
+    std::vector<double> Stat::mode(const std::vector<double>& x){
+        Data data;
+        std::vector<double> x_set = data.vecToSet(x);
+        std::map<double, int> element_num;
+        for(int i = 0; i < x_set.size(); i++){
+            element_num[x[i]] = 0;
+        }
+        for(int i = 0; i < x.size(); i++){
+            element_num[x[i]]++;
+        }
+        std::vector<double> modes;
+        double max_num = element_num[x_set[0]];
+        for(int i = 0; i < x_set.size(); i++){
+            if(element_num[x_set[i]] > max_num){
+                max_num = element_num[x_set[i]];
+                modes.clear();
+                modes.push_back(x_set[i]);
+            }
+            else if(element_num[x_set[i]] == max_num){
+                modes.push_back(x_set[i]);
+            }
+        }
+        return modes;
+    }
+
+    double Stat::range(const std::vector<double>& x){
+        LinAlg alg;
+        return alg.max(x) - alg.min(x);
+    }
+
+    double Stat::midrange(const std::vector<double>& x){
+        return range(x)/2;
+    }
+
+    double Stat::absAvgDeviation(const std::vector<double>& x){
+        double sum = 0;
+        for(int i = 0; i < x.size(); i++){
+            sum += std::abs(x[i] - mean(x));
+        }
+        return sum / x.size();
+    }
+
+    double Stat::standardDeviation(const std::vector<double>& x){
+        return std::sqrt(variance(x));
+    }
+
+    double Stat::variance(const std::vector<double>& x){
+        double sum = 0;
+        for(int i = 0; i < x.size(); i++){
+            sum += (x[i] - mean(x)) * (x[i] - mean(x));
+        }
+        return sum / (x.size() - 1);
+    }
+
+    double Stat::covariance(const std::vector<double>& x, const std::vector<double>& y){
+        double sum = 0;
+        for(int i = 0; i < x.size(); i++){
+            sum += (x[i] - mean(x)) * (y[i] - mean(y));
+        }
+        return sum / (x.size() - 1);
+    }
+
+    double Stat::correlation(const std::vector<double>& x, const std::vector<double>& y){
+        return covariance(x, y) / (standardDeviation(x) * standardDeviation(y));
+    }
+
+    double Stat::R2(const std::vector<double>& x, const std::vector<double>& y){
+        return correlation(x, y) * correlation(x, y);
+    }
+
+    double Stat::chebyshevIneq(const double k){
+        // X may or may not belong to a Gaussian Distribution
+        return 1 - 1 / (k * k);
+    }
+
+    double Stat::weightedMean(const std::vector<double>& x, const std::vector<double>& weights){
+        double sum = 0;
+        double weights_sum = 0; 
+        for(int i = 0; i < x.size(); i++){
+            sum += x[i] * weights[i];
+            weights_sum += weights[i];
+        }
+        return sum / weights_sum;
+    }
+
+    double Stat::geometricMean(const std::vector<double>& x){
+        double product = 1;
+        for(int i = 0; i < x.size(); i++){
+            product *= x[i];
+        }
+        return std::pow(product, 1.0/x.size());
+    }
+
+    double Stat::harmonicMean(const std::vector<double>& x){
+        double sum = 0;
+        for(int i = 0; i < x.size(); i++){
+            sum += 1/x[i];
+        }
+        return x.size()/sum;
+    }
+
+    double Stat::RMS(const std::vector<double>& x){
+        double sum = 0; 
+        for(int i = 0; i < x.size(); i++){
+            sum += x[i] * x[i];
+        }
+        return sqrt(sum / x.size());
+    }
+
+    double Stat::powerMean(const std::vector<double>& x, const double p){
+        double sum = 0; 
+        for(int i = 0; i < x.size(); i++){
+            sum += std::pow(x[i], p); 
+        }
+        return std::pow(sum / x.size(), 1/p);
+    }
+    
+    double Stat::lehmerMean(const std::vector<double>& x, const double p){
+        double num = 0; 
+        double den = 0; 
+        for(int i = 0; i < x.size(); i++){
+            num += std::pow(x[i], p); 
+            den += std::pow(x[i], p - 1);
+        }
+        return num/den;
+    }
+
+    double Stat::weightedLehmerMean(const std::vector<double>& x, const std::vector<double>& weights, const double p){
+        double num = 0; 
+        double den = 0; 
+        for(int i = 0; i < x.size(); i++){
+            num += weights[i] * std::pow(x[i], p); 
+            den += weights[i] * std::pow(x[i], p - 1);
+        }
+        return num/den;
+    }
+
+    double Stat::heronianMean(const double A, const double B){
+        return (A + sqrt(A * B) + B) / 3;
+    }
+
+    double Stat::contraHarmonicMean(const std::vector<double>& x){
+        return lehmerMean(x, 2);
+    }
+
+    double Stat::heinzMean(const double A, const double B, const double x){
+        return (std::pow(A, x) * std::pow(B, 1 - x) + std::pow(A, 1 - x) * std::pow(B, x)) / 2;
+    }
+
+    double Stat::neumanSandorMean(const double a, const double b){
+        Activation avn;
+        return (a - b) / 2 * avn.arsinh((a - b)/(a + b));
+    }
+
+    double Stat::stolarskyMean(const double x, const double y, const double p){
+        if(x == y){
+            return x; 
+        }
+        return std::pow((std::pow(x, p) - std::pow(y, p)) / (p * (x - y)), 1/(p - 1));
+    }
+
+    double Stat::identricMean(const double x, const double y){
+        if(x == y){
+            return x; 
+        }
+        return (1/M_E) * std::pow(std::pow(x, x) / std::pow(y, y), 1/(x-y));
+    }
+
+    double Stat::logMean(const double x, const double y){
+        if(x == y){
+            return x; 
+        }
+        return (y - x) / (log(y) - std::log(x)); 
+    }
+}
--- a/MLPP/Stat/Stat.hpp
+++ b/MLPP/Stat/Stat.hpp
@ -0,0 +1,54 @@
+//
+//  Stat.hpp
+//
+//  Created by Marc Melikyan on 9/29/20.
+//
+
+#ifndef Stat_hpp
+#define Stat_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class Stat{
+      
+        public:
+            // These functions are for univariate lin reg module- not for users. 
+            double b0Estimation(const std::vector<double>& x, const std::vector<double>& y);
+            double b1Estimation(const std::vector<double>& x, const std::vector<double>& y);
+        
+            // Statistical Functions
+            double mean(const std::vector <double>& x);
+            double median(std::vector<double> x);
+            std::vector<double> mode(const std::vector<double>& x);
+            double range(const std::vector<double>& x);
+            double midrange(const std::vector<double>& x);
+            double absAvgDeviation(const std::vector<double>& x);
+            double standardDeviation(const std::vector<double>& x);
+            double variance(const std::vector <double>& x);
+            double covariance(const std::vector<double>& x, const std::vector<double>& y);
+            double correlation(const std::vector <double>& x, const std::vector<double>& y);
+            double R2(const std::vector<double>& x, const std::vector<double>& y);
+            double chebyshevIneq(const double k);
+        
+
+            // Extras
+            double weightedMean(const std::vector<double>& x, const std::vector<double>& weights);
+            double geometricMean(const std::vector<double>& x);
+            double harmonicMean(const std::vector<double>& x);
+            double RMS(const std::vector<double>& x);
+            double powerMean(const std::vector<double>& x, const double p);
+            double lehmerMean(const std::vector<double>& x, const double p);
+            double weightedLehmerMean(const std::vector<double>& x, const std::vector<double>& weights, const double p);
+            double contraHarmonicMean(const std::vector<double>& x);
+            double heronianMean(const double A, const double B);
+            double heinzMean(const double A, const double B, const double x);
+            double neumanSandorMean(const double a, const double b);
+            double stolarskyMean(const double x, const double y, const double p);
+            double identricMean(const double x, const double y);
+            double logMean(const double x, const double y);
+            
+    };
+}
+
+#endif /* Stat_hpp */
--- a/MLPP/TanhReg/TanhReg.cpp
+++ b/MLPP/TanhReg/TanhReg.cpp
@ -0,0 +1,193 @@
+//
+//  TanhReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "TanhReg.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    TanhReg::TanhReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+        weights = Utilities::weightInitialization(k);
+        bias = Utilities::biasInitialization();
+    }
+
+    std::vector<double> TanhReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double TanhReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void TanhReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.tanh(z, 1)))));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+                
+
+            // Calculating the bias gradients
+            bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.tanh(z, 1))) / n;
+            
+            forwardPass();
+                
+            // UI PORTION
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+
+        }
+    }
+
+    void TanhReg::SGD(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+
+            double error = y_hat - outputSet[outputIndex];
+
+            // Weight Updation
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error * (1 - y_hat * y_hat), inputSet[outputIndex]));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            // Bias updation
+            bias -= learning_rate * error * (1 - y_hat * y_hat);
+
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void TanhReg::MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI){
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        // Creating the mini-batches
+        int n_mini_batch = n/mini_batch_size;
+        auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
+
+        while(true){
+            for(int i = 0; i < n_mini_batch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                std::vector<double> z = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                // Calculating the weight gradients
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.tanh(z, 1)))));
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+                
+
+                // Calculating the bias gradients
+                bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.tanh(z, 1))) / n;
+            
+                forwardPass();
+
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double TanhReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+     void TanhReg::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights, bias);
+     }
+
+    double TanhReg::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+    std::vector<double> TanhReg::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        return avn.tanh(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); 
+    }
+    
+    std::vector<double>TanhReg::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); 
+    }
+
+    double TanhReg::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        return avn.tanh(alg.dot(weights, x) + bias);
+    }
+
+    double TanhReg::propagate(std::vector<double> x){
+        LinAlg alg;
+        return alg.dot(weights, x) + bias;
+    }
+
+    // Tanh ( wTx + b )
+    void TanhReg::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        
+        z = propagate(inputSet);
+        y_hat = avn.tanh(z);
+    }
+}
--- a/MLPP/TanhReg/TanhReg.hpp
+++ b/MLPP/TanhReg/TanhReg.hpp
@ -0,0 +1,59 @@
+//
+//  TanhReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef TanhReg_hpp
+#define TanhReg_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class TanhReg{
+        
+        public:
+            TanhReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+        private:
+
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+        
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            std::vector<double> propagate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            double propagate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> z;
+            std::vector<double> y_hat;
+            std::vector<double> weights;
+            double bias;
+        
+            int n; 
+            int k;
+        
+            // UI Portion
+            void UI(int epoch, double cost_prev);
+
+            // Regularization Params
+            std::string reg;
+            double lambda;
+            double alpha; /* This is the controlling param for Elastic Net*/
+
+        
+    };
+}
+
+#endif /* TanhReg_hpp */
--- a/MLPP/Transforms/Transforms.cpp
+++ b/MLPP/Transforms/Transforms.cpp
@ -0,0 +1,59 @@
+//
+//  Transforms.cpp
+//
+//  Created by Marc Melikyan on 11/13/20.
+//
+
+#include "Transforms.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include <iostream>
+#include <string>
+#include <cmath>
+
+namespace MLPP{
+
+    // DCT ii.
+    // https://www.mathworks.com/help/images/discrete-cosine-transform.html
+    std::vector<std::vector<double>> Transforms::discreteCosineTransform(std::vector<std::vector<double>> A){
+        LinAlg alg;
+        A = alg.scalarAdd(-128, A); // Center around 0.
+
+        std::vector<std::vector<double>> B;
+        B.resize(A.size());
+        for(int i = 0; i < B.size(); i++){
+            B[i].resize(A[i].size());
+        }
+
+        int M = A.size();
+
+        for(int i = 0; i < B.size(); i++){
+            for(int j = 0; j < B[i].size(); j++){
+                double sum = 0;
+                double alphaI;
+                if(i == 0){
+                    alphaI = 1/std::sqrt(M);
+                }
+                else{ 
+                    alphaI = std::sqrt(double(2)/double(M)); 
+                }
+                double alphaJ;
+                if(j == 0){
+                    alphaJ = 1/std::sqrt(M); 
+                }
+                else{ 
+                    alphaJ = std::sqrt(double(2)/double(M)); 
+                }
+
+                for(int k = 0; k < B.size(); k++){
+                    for(int f = 0; f < B[k].size(); f++){
+                        sum += A[k][f] * std::cos( (M_PI * i * (2 * k + 1)) / (2 * M)) * std::cos( (M_PI * j * (2 * f + 1)) / (2 * M));
+                    }
+                }
+                B[i][j] = sum;
+                B[i][j] *= alphaI * alphaJ;
+
+            }
+        }
+        return B;
+    }
+}
--- a/MLPP/Transforms/Transforms.hpp
+++ b/MLPP/Transforms/Transforms.hpp
@ -0,0 +1,20 @@
+//
+//  Transforms.hpp
+//
+//
+
+#ifndef Transforms_hpp
+#define Transforms_hpp
+
+#include <vector>
+#include <string>
+
+namespace MLPP{
+    class Transforms{
+        public:
+            std::vector<std::vector<double>> discreteCosineTransform(std::vector<std::vector<double>> A);
+            
+    };
+}
+
+#endif /* Transforms_hpp */
--- a/MLPP/UniLinReg/UniLinReg.cpp
+++ b/MLPP/UniLinReg/UniLinReg.cpp
@ -0,0 +1,37 @@
+//
+//  UniLinReg.cpp
+//
+//  Created by Marc Melikyan on 9/29/20.
+//
+
+#include "UniLinReg.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Stat/Stat.hpp"
+#include <iostream>
+
+
+// General Multivariate Linear Regression Model
+// ŷ = b0 + b1x1 + b2x2 + ... + bkxk
+
+
+// Univariate Linear Regression Model
+// ŷ = b0 + b1x1
+
+namespace MLPP{
+    UniLinReg::UniLinReg(std::vector<double> x, std::vector<double> y)
+    : inputSet(x), outputSet(y)
+    {
+        Stat estimator;
+        b1 = estimator.b1Estimation(inputSet, outputSet);
+        b0 = estimator.b0Estimation(inputSet, outputSet);
+    }
+
+    std::vector<double> UniLinReg::modelSetTest(std::vector<double> x){
+        LinAlg alg;
+        return alg.scalarAdd(b0, alg.scalarMultiply(b1, x));
+    }
+
+    double UniLinReg::modelTest(double input){
+        return b0 + b1 * input;
+    }
+}
--- a/MLPP/UniLinReg/UniLinReg.hpp
+++ b/MLPP/UniLinReg/UniLinReg.hpp
@ -0,0 +1,30 @@
+//
+//  UniLinReg.hpp
+//
+//  Created by Marc Melikyan on 9/29/20.
+//
+
+#ifndef UniLinReg_hpp
+#define UniLinReg_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class UniLinReg{
+        
+        public:
+            UniLinReg(std::vector <double> x, std::vector<double> y);
+            std::vector<double> modelSetTest(std::vector<double> x);
+            double modelTest(double x);
+        
+        private:
+            std::vector <double> inputSet;
+            std::vector <double> outputSet;
+        
+            double b0;
+            double b1;
+        
+    };
+}
+
+#endif /* UniLinReg_hpp */
--- a/MLPP/Utilities/Utilities.cpp
+++ b/MLPP/Utilities/Utilities.cpp
@ -0,0 +1,397 @@
+//
+//  Reg.cpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#include <iostream>
+#include <string>
+#include <random>
+#include <fstream>
+#include "Utilities.hpp"
+
+namespace MLPP{
+
+    std::vector<double> Utilities::weightInitialization(int n, std::string type){
+        std::random_device rd;
+        std::default_random_engine generator(rd()); 
+
+        std::vector<double> weights; 
+        for(int i = 0; i < n; i++){
+            if(type == "XavierNormal"){
+                std::normal_distribution<double> distribution(0, sqrt(2 / (n + 1)));
+                weights.push_back(distribution(generator));
+            }
+            else if(type == "XavierUniform"){
+                std::uniform_real_distribution<double> distribution(-sqrt(6 / (n + 1)), sqrt(6 / (n + 1)));
+                weights.push_back(distribution(generator));
+            }
+            else if(type == "HeNormal"){
+                std::normal_distribution<double> distribution(0, sqrt(2 / n));
+                weights.push_back(distribution(generator));
+            }
+            else if(type == "HeUniform"){
+                std::uniform_real_distribution<double> distribution(-sqrt(6 / n), sqrt(6 / n));
+                weights.push_back(distribution(generator));
+            }
+            else if(type == "LeCunNormal"){
+                std::normal_distribution<double> distribution(0, sqrt(1 / n));
+                weights.push_back(distribution(generator));
+            }
+            else if(type == "LeCunUniform"){
+                std::uniform_real_distribution<double> distribution(-sqrt(3/n), sqrt(3/n));
+                weights.push_back(distribution(generator));
+            }
+            else if(type == "Uniform"){
+                std::uniform_real_distribution<double> distribution(-1/sqrt(n), 1/sqrt(n));
+                weights.push_back(distribution(generator));
+            }
+            else{
+                std::uniform_real_distribution<double> distribution(0, 1);
+                weights.push_back(distribution(generator));
+            }
+        }
+        return weights;
+    }
+
+    double Utilities::biasInitialization(){
+        std::random_device rd;
+        std::default_random_engine generator(rd()); 
+        std::uniform_real_distribution<double> distribution(0,1);
+
+        return distribution(generator);
+    }
+
+    std::vector<std::vector<double>> Utilities::weightInitialization(int n, int m, std::string type){
+        std::random_device rd;
+        std::default_random_engine generator(rd()); 
+
+        std::vector<std::vector<double>> weights; 
+        weights.resize(n);
+
+        for(int i = 0; i < n; i++){
+            for(int j = 0; j < m; j++){
+                if(type == "XavierNormal"){
+                    std::normal_distribution<double> distribution(0, sqrt(2 / (n + m)));
+                    weights[i].push_back(distribution(generator));
+                }
+                else if(type == "XavierUniform"){
+                    std::uniform_real_distribution<double> distribution(-sqrt(6 / (n + m)), sqrt(6 / (n + m)));
+                    weights[i].push_back(distribution(generator));
+                }
+                else if(type == "HeNormal"){
+                    std::normal_distribution<double> distribution(0, sqrt(2 / n));
+                    weights[i].push_back(distribution(generator));
+                }
+                else if(type == "HeUniform"){
+                    std::uniform_real_distribution<double> distribution(-sqrt(6 / n), sqrt(6 / n));
+                    weights[i].push_back(distribution(generator));
+                }
+                else if(type == "LeCunNormal"){
+                    std::normal_distribution<double> distribution(0, sqrt(1 / n));
+                    weights[i].push_back(distribution(generator));
+                }
+                else if(type == "LeCunUniform"){
+                    std::uniform_real_distribution<double> distribution(-sqrt(3/n), sqrt(3/n));
+                    weights[i].push_back(distribution(generator));
+                }
+                else if(type == "Uniform"){
+                    std::uniform_real_distribution<double> distribution(-1/sqrt(n), 1/sqrt(n));
+                    weights[i].push_back(distribution(generator));
+                }
+                else{
+                    std::uniform_real_distribution<double> distribution(0, 1);
+                    weights[i].push_back(distribution(generator));
+                }
+            }
+        }
+        return weights;
+    }
+
+    std::vector<double> Utilities::biasInitialization(int n){
+        std::vector<double> bias; 
+        std::random_device rd;
+        std::default_random_engine generator(rd()); 
+        std::uniform_real_distribution<double> distribution(0,1);
+
+        for(int i = 0; i < n; i++){
+          bias.push_back(distribution(generator));
+        }
+        return bias; 
+    }
+
+    double Utilities::performance(std::vector<double> y_hat, std::vector<double> outputSet){
+        double correct = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            if(std::round(y_hat[i]) == outputSet[i]){
+                correct++;
+            }
+        }
+        return correct/y_hat.size();
+    }
+
+    double Utilities::performance(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double correct = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            int sub_correct = 0;
+            for(int j = 0; j < y_hat[i].size(); j++){
+                if(std::round(y_hat[i][j]) == y[i][j]){
+                    sub_correct++;
+                }
+                if(sub_correct == y_hat[0].size()){
+                    correct++;
+                }
+            }
+        }
+        return correct/y_hat.size();
+    }
+
+    void Utilities::saveParameters(std::string fileName, std::vector<double> weights, double bias, bool app, int layer){
+        std::string layer_info = "";        
+        std::ofstream saveFile;
+
+        if(layer > -1){
+            layer_info = " for layer " + std::to_string(layer);
+        }
+
+        if(app){
+            saveFile.open(fileName.c_str(), std::ios_base::app); 
+        }
+        else { saveFile.open(fileName.c_str()); }
+
+        if(!saveFile.is_open()){
+            std::cout << fileName << " failed to open." << std::endl;
+        }
+
+        saveFile << "Weight(s)" << layer_info << std::endl;
+        for(int i = 0; i < weights.size(); i++){
+            saveFile << weights[i] << std::endl;
+        }
+        saveFile << "Bias" << layer_info << std::endl;
+        saveFile << bias << std::endl;
+
+        saveFile.close();
+    }
+
+    void Utilities::saveParameters(std::string fileName, std::vector<double> weights, std::vector<double> initial, double bias, bool app, int layer){
+        std::string layer_info = "";        
+        std::ofstream saveFile;
+
+        if(layer > -1){
+            layer_info = " for layer " + std::to_string(layer);
+        }
+
+        if(app){
+            saveFile.open(fileName.c_str(), std::ios_base::app); 
+        }
+        else { saveFile.open(fileName.c_str()); }
+
+        if(!saveFile.is_open()){
+            std::cout << fileName << " failed to open." << std::endl;
+        }
+
+        saveFile << "Weight(s)" << layer_info << std::endl;
+        for(int i = 0; i < weights.size(); i++){
+            saveFile << weights[i] << std::endl;
+        }
+
+        saveFile << "Initial(s)" << layer_info << std::endl;
+        for(int i = 0; i < initial.size(); i++){
+            saveFile << initial[i] << std::endl;
+        }
+
+        saveFile << "Bias" << layer_info << std::endl;
+        saveFile << bias << std::endl;
+
+        saveFile.close();
+    }
+
+    void Utilities::saveParameters(std::string fileName, std::vector<std::vector<double>> weights, std::vector<double> bias, bool app, int layer){
+        std::string layer_info = "";        
+        std::ofstream saveFile;
+
+        if(layer > -1){
+            layer_info = " for layer " + std::to_string(layer);
+        }
+
+        if(app){
+            saveFile.open(fileName.c_str(), std::ios_base::app); 
+        }
+        else { saveFile.open(fileName.c_str()); }
+
+        if(!saveFile.is_open()){
+            std::cout << fileName << " failed to open." << std::endl;
+        }
+
+        saveFile << "Weight(s)" << layer_info << std::endl;
+        for(int i = 0; i < weights.size(); i++){
+            for(int j = 0; j < weights[i].size(); j++){
+                saveFile << weights[i][j] << std::endl;
+            }
+        }
+        saveFile << "Bias(es)" << layer_info << std::endl;
+        for(int i = 0; i < bias.size(); i++){
+            saveFile << bias[i] << std::endl;
+        }
+
+        saveFile.close();
+    }
+
+    void Utilities::UI(std::vector<double> weights, double bias){
+        std::cout << "Values of the weight(s):" << std::endl;
+        for(int i = 0; i < weights.size(); i++){
+            std::cout << weights[i] << std::endl;
+        }
+        std:: cout << "Value of the bias:" << std::endl;
+        std::cout << bias << std::endl;
+    }
+
+    void Utilities::UI(std::vector<std::vector<double>> weights, std::vector<double> bias){
+        std::cout << "Values of the weight(s):" << std::endl;
+        for(int i = 0; i < weights.size(); i++){
+            for(int j = 0; j < weights[i].size(); j++){
+                std::cout << weights[i][j] << std::endl;
+            }
+        }
+        std::cout << "Value of the biases:" << std::endl;
+        for(int i = 0; i < bias.size(); i++){
+            std::cout << bias[i] << std::endl;
+        }
+    }
+
+    void Utilities::UI(std::vector<double> weights, std::vector<double> initial, double bias){
+        std::cout << "Values of the weight(s):" << std::endl;
+        for(int i = 0; i < weights.size(); i++){
+            std::cout << weights[i] << std::endl;
+        }
+        std::cout << "Values of the initial(s):" << std::endl;
+        for(int i = 0; i < initial.size(); i++){
+            std::cout << initial[i] << std::endl;
+        }
+        std:: cout << "Value of the bias:" << std::endl;
+        std::cout << bias << std::endl;
+    }
+
+    void Utilities::CostInfo(int epoch, double cost_prev, double Cost){
+        std::cout << "-----------------------------------" << std::endl;
+        std::cout << "This is epoch: " << epoch << std::endl;
+        std::cout << "The cost function has been minimized by " << cost_prev - Cost << std::endl;
+        std::cout << "Current Cost:" << std::endl;
+        std::cout << Cost << std::endl;
+    }
+
+    std::vector<std::vector<std::vector<double>>> Utilities::createMiniBatches(std::vector<std::vector<double>> inputSet, int n_mini_batch){
+        int n = inputSet.size();
+        
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches; 
+
+        // Creating the mini-batches
+        for(int i = 0; i < n_mini_batch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            for(int j = 0; j < n/n_mini_batch; j++){
+                currentInputSet.push_back(inputSet[n/n_mini_batch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+        }
+
+        if(double(n)/double(n_mini_batch) - int(n/n_mini_batch) != 0){
+            for(int i = 0; i < n - n/n_mini_batch * n_mini_batch; i++){
+                inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n/n_mini_batch * n_mini_batch + i]);
+            }
+        }
+        return inputMiniBatches;
+    }
+
+    std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<std::vector<double>>> Utilities::createMiniBatches(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int n_mini_batch){
+        int n = inputSet.size();
+        
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches; 
+        std::vector<std::vector<double>> outputMiniBatches; 
+
+        for(int i = 0; i < n_mini_batch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            std::vector<double> currentOutputSet; 
+            for(int j = 0; j < n/n_mini_batch; j++){
+                currentInputSet.push_back(inputSet[n/n_mini_batch * i + j]);
+                currentOutputSet.push_back(outputSet[n/n_mini_batch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+            outputMiniBatches.push_back(currentOutputSet);
+        }
+
+        if(double(n)/double(n_mini_batch) - int(n/n_mini_batch) != 0){
+            for(int i = 0; i < n - n/n_mini_batch * n_mini_batch; i++){
+                inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n/n_mini_batch * n_mini_batch + i]);
+                outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n/n_mini_batch * n_mini_batch + i]);
+            }
+        }
+        return {inputMiniBatches, outputMiniBatches};
+    }
+
+    std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<std::vector<std::vector<double>>>> Utilities::createMiniBatches(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet, int n_mini_batch){
+        int n = inputSet.size();
+        
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches; 
+        std::vector<std::vector<std::vector<double>>> outputMiniBatches; 
+
+        for(int i = 0; i < n_mini_batch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            std::vector<std::vector<double>> currentOutputSet; 
+            for(int j = 0; j < n/n_mini_batch; j++){
+                currentInputSet.push_back(inputSet[n/n_mini_batch * i + j]);
+                currentOutputSet.push_back(outputSet[n/n_mini_batch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+            outputMiniBatches.push_back(currentOutputSet);
+        }
+
+        if(double(n)/double(n_mini_batch) - int(n/n_mini_batch) != 0){
+            for(int i = 0; i < n - n/n_mini_batch * n_mini_batch; i++){
+                inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n/n_mini_batch * n_mini_batch + i]);
+                outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n/n_mini_batch * n_mini_batch + i]);
+            }
+        }
+        return {inputMiniBatches, outputMiniBatches};
+    }
+
+    std::tuple<double, double, double, double> Utilities::TF_PN(std::vector<double> y_hat, std::vector<double> y){
+        double TP, FP, TN, FN = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            if(y_hat[i] == y[i]){
+                if(y_hat[i] == 1){
+                    TP++;
+                }
+                else{
+                    TN++;
+                }
+            }
+            else{
+                if(y_hat[i] == 1){
+                    FP++;
+                }
+                else{
+                    FN++;
+                }
+            }
+        }
+        return {TP, FP, TN, FN};
+    }
+
+    double Utilities::recall(std::vector<double> y_hat, std::vector<double> y){
+        auto [TP, FP, TN, FN] = TF_PN(y_hat, y);
+        return TP / (TP + FN);
+    }
+
+    double Utilities::precision(std::vector<double> y_hat, std::vector<double> y){
+        auto [TP, FP, TN, FN] = TF_PN(y_hat, y);
+        return TP / (TP + FP);
+    }
+
+    double Utilities::accuracy(std::vector<double> y_hat, std::vector<double> y){
+        auto [TP, FP, TN, FN] = TF_PN(y_hat, y);
+        return (TP + TN) / (TP + FP + FN + TN);
+    }
+    double Utilities::f1_score(std::vector<double> y_hat, std::vector<double> y){
+        return 2 * precision(y_hat, y) * recall(y_hat, y) / (precision(y_hat, y) + recall(y_hat, y));
+    }
+}
--- a/MLPP/Utilities/Utilities.hpp
+++ b/MLPP/Utilities/Utilities.hpp
@ -0,0 +1,54 @@
+//
+//  Utilities.hpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#ifndef Utilities_hpp
+#define Utilities_hpp
+
+#include <vector>
+#include <tuple>
+#include <string>
+
+namespace MLPP{
+    class Utilities{
+        public:
+            // Weight Init
+            static std::vector<double> weightInitialization(int n, std::string type = "Default");
+            static double biasInitialization();
+
+            static std::vector<std::vector<double>> weightInitialization(int n, int m, std::string type = "Default");
+            static std::vector<double> biasInitialization(int n);
+
+            // Cost/Performance related Functions
+            double performance(std::vector<double> y_hat, std::vector<double> y);
+            double performance(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y); 
+
+            // Parameter Saving Functions
+            void saveParameters(std::string fileName, std::vector<double> weights, double bias, bool app = 0, int layer = -1);
+            void saveParameters(std::string fileName, std::vector<double> weights, std::vector<double> initial, double bias, bool app = 0, int layer = -1);
+            void saveParameters(std::string fileName, std::vector<std::vector<double>> weights, std::vector<double> bias, bool app = 0, int layer = -1);
+
+            // Gradient Descent related
+            static void UI(std::vector<double> weights, double bias);
+            static void UI(std::vector<double> weights, std::vector<double> initial, double bias);
+            static void UI(std::vector<std::vector<double>>, std::vector<double> bias);
+            static void CostInfo(int epoch, double cost_prev, double Cost);
+
+            static std::vector<std::vector<std::vector<double>>> createMiniBatches(std::vector<std::vector<double>> inputSet, int n_mini_batch);
+            static std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<std::vector<double>>> createMiniBatches(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int n_mini_batch);
+            static std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<std::vector<std::vector<double>>>> createMiniBatches(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet, int n_mini_batch);
+
+            // F1 score, Precision/Recall, TP, FP, TN, FN, etc. 
+            std::tuple<double, double, double, double> TF_PN(std::vector<double> y_hat, std::vector<double> y); //TF_PN = "True", "False", "Positive", "Negative"
+            double recall(std::vector<double> y_hat, std::vector<double> y);
+            double precision(std::vector<double> y_hat, std::vector<double> y);
+            double accuracy(std::vector<double> y_hat, std::vector<double> y);
+            double f1_score(std::vector<double> y_hat, std::vector<double> y);
+
+        private:
+    };
+}
+
+#endif /* Utilities_hpp */
--- a/MLPP/WGAN/WGAN.cpp
+++ b/MLPP/WGAN/WGAN.cpp
@ -0,0 +1,300 @@
+//
+//  WGAN.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "WGAN.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <cmath>
+
+namespace MLPP {
+    WGAN::WGAN(double k, std::vector<std::vector<double>> outputSet)
+    : outputSet(outputSet), n(outputSet.size()), k(k)
+    {
+
+    }
+
+    WGAN::~WGAN(){
+        delete outputLayer;
+    }
+
+    std::vector<std::vector<double>> WGAN::generateExample(int n){
+        LinAlg alg;
+        return modelSetTestGenerator(alg.gaussianNoise(n, k));
+    }
+
+    void WGAN::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+
+        const int CRITIC_INTERATIONS = 5; // Wasserstein GAN specific parameter.
+
+        while(true){
+            cost_prev = Cost(y_hat, alg.onevec(n));
+
+
+            std::vector<std::vector<double>> generatorInputSet;
+            std::vector<std::vector<double>> discriminatorInputSet;
+            
+            std::vector<double> y_hat;
+            std::vector<double> outputSet;
+
+            // Training of the discriminator. 
+            for(int i = 0; i < CRITIC_INTERATIONS; i++){
+                generatorInputSet = alg.gaussianNoise(n, k);
+                discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
+                discriminatorInputSet.insert(discriminatorInputSet.end(), WGAN::outputSet.begin(), WGAN::outputSet.end()); // Fake + real inputs.
+
+                y_hat = modelSetTestDiscriminator(discriminatorInputSet);
+                outputSet = alg.scalarMultiply(-1, alg.onevec(n)); // WGAN changes y_i = 1 and y_i = 0 to y_i = 1 and y_i = -1
+                std::vector<double> outputSetReal = alg.onevec(n);
+                outputSet.insert(outputSet.end(), outputSetReal.begin(), outputSetReal.end()); // Fake + real output scores.
+
+                auto [cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad] = computeDiscriminatorGradients(y_hat, outputSet);
+                cumulativeDiscriminatorHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeDiscriminatorHiddenLayerWGrad);
+                outputDiscriminatorWGrad = alg.scalarMultiply(learning_rate/n, outputDiscriminatorWGrad);
+                updateDiscriminatorParameters(cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad, learning_rate);
+            }
+
+            // Training of the generator.
+            generatorInputSet = alg.gaussianNoise(n, k);
+            discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
+            y_hat = modelSetTestDiscriminator(discriminatorInputSet);
+            outputSet = alg.onevec(n);
+            
+            std::vector<std::vector<std::vector<double>>> cumulativeGeneratorHiddenLayerWGrad = computeGeneratorGradients(y_hat, outputSet);
+            cumulativeGeneratorHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeGeneratorHiddenLayerWGrad);
+            updateGeneratorParameters(cumulativeGeneratorHiddenLayerWGrad, learning_rate);
+
+            forwardPass();
+            if(UI) { WGAN::UI(epoch, cost_prev, WGAN::y_hat, alg.onevec(n)); }
+
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    double WGAN::score(){
+        LinAlg alg;
+        Utilities util;
+        forwardPass();
+        return util.performance(y_hat, alg.onevec(n));
+    }
+
+    void WGAN::save(std::string fileName){
+        Utilities util;
+        if(!network.empty()){
+            util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1);
+            for(int i = 1; i < network.size(); i++){
+                util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); 
+            }
+            util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1);
+        }
+        else{
+            util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1);
+        }
+     }
+
+    void WGAN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){
+        LinAlg alg;
+        if(network.empty()){
+            network.push_back(HiddenLayer(n_hidden, activation, alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha));
+            network[0].forwardPass();
+        }
+        else{
+            network.push_back(HiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
+            network[network.size() - 1].forwardPass();
+        }
+    }
+    
+    void WGAN::addOutputLayer(std::string weightInit, std::string reg, double lambda, double alpha){
+        LinAlg alg;
+        if(!network.empty()){
+            outputLayer = new OutputLayer(network[network.size() - 1].n_hidden, "Linear", "WassersteinLoss", network[network.size() - 1].a, weightInit, "WeightClipping", -0.01, 0.01);
+        }
+        else{ // Should never happen.
+            outputLayer = new OutputLayer(k, "Linear", "WassersteinLoss", alg.gaussianNoise(n, k), weightInit, "WeightClipping", -0.01, 0.01);
+        }
+    }
+
+    std::vector<std::vector<double>> WGAN::modelSetTestGenerator(std::vector<std::vector<double>> X){
+        if(!network.empty()){
+            network[0].input = X;
+            network[0].forwardPass();
+
+            for(int i = 1; i <= network.size()/2; i++){
+                network[i].input = network[i - 1].a;
+                network[i].forwardPass();
+            }
+        }
+        return network[network.size()/2].a;        
+    }
+
+    std::vector<double> WGAN::modelSetTestDiscriminator(std::vector<std::vector<double>> X){
+        if(!network.empty()){
+            for(int i = network.size()/2 + 1; i < network.size(); i++){
+                if(i == network.size()/2 + 1){
+                    network[i].input = X; 
+                }
+                else { network[i].input = network[i - 1].a; }
+                network[i].forwardPass();
+            }
+            outputLayer->input = network[network.size() - 1].a;
+        }
+        outputLayer->forwardPass();
+        return outputLayer->a;
+    }
+
+    double WGAN::Cost(std::vector<double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost;
+        double totalRegTerm = 0;
+
+        auto cost_function = outputLayer->cost_map[outputLayer->cost];
+        if(!network.empty()){
+            for(int i = 0; i < network.size() - 1; i++){
+                totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
+            }
+        }
+        return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
+    }
+
+    void WGAN::forwardPass(){
+        LinAlg alg;
+        if(!network.empty()){
+            network[0].input = alg.gaussianNoise(n, k);
+            network[0].forwardPass();
+
+            for(int i = 1; i < network.size(); i++){
+                network[i].input = network[i - 1].a;
+                network[i].forwardPass();
+            }
+            outputLayer->input = network[network.size() - 1].a;
+        }
+        else{ // Should never happen, though.
+            outputLayer->input = alg.gaussianNoise(n, k);
+        }
+        outputLayer->forwardPass();
+        y_hat = outputLayer->a;
+    }
+
+    void WGAN::updateDiscriminatorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, std::vector<double> outputLayerUpdation, double learning_rate){
+        LinAlg alg;
+
+        outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
+        outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n;
+
+        if(!network.empty()){
+            network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]);
+            network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta));
+
+            for(int i = network.size() - 2; i > network.size()/2; i--){
+                network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
+                network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta));
+            }
+        }
+    }
+
+    void WGAN::updateGeneratorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, double learning_rate){
+        LinAlg alg;
+
+        if(!network.empty()){
+
+            for(int i = network.size()/2; i >= 0; i--){
+                //std::cout << network[i].weights.size() << "x" << network[i].weights[0].size() << std::endl;
+                //std::cout << hiddenLayerUpdations[(network.size() - 2) - i + 1].size() << "x" << hiddenLayerUpdations[(network.size() - 2) - i + 1][0].size() << std::endl;
+                network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
+                network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta));
+            }
+        }
+    }
+    
+    std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<double>> WGAN::computeDiscriminatorGradients(std::vector<double> y_hat, std::vector<double> outputSet){
+        class Cost cost; 
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+
+        std::vector<std::vector<std::vector<double>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. 
+
+        auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
+        auto outputAvn = outputLayer->activation_map[outputLayer->activation];
+        outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
+        std::vector<double> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
+        outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
+
+
+        if(!network.empty()){
+            auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
+
+            network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
+            std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
+
+            cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
+
+            //std::cout << "HIDDENLAYER FIRST:" << hiddenLayerWGrad.size() << "x" << hiddenLayerWGrad[0].size() << std::endl;
+            //std::cout << "WEIGHTS SECOND:" << network[network.size() - 1].weights.size() << "x" << network[network.size() - 1].weights[0].size() << std::endl;
+
+            for(int i = network.size() - 2; i > network.size()/2; i--){
+                auto hiddenLayerAvn = network[i].activation_map[network[i].activation];
+                network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
+                std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
+
+                cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
+
+            }
+        }
+        return {cumulativeHiddenLayerWGrad, outputWGrad};
+    }
+
+    std::vector<std::vector<std::vector<double>>> WGAN::computeGeneratorGradients(std::vector<double> y_hat, std::vector<double> outputSet){
+        class Cost cost; 
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+
+        std::vector<std::vector<std::vector<double>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. 
+
+        auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
+        auto outputAvn = outputLayer->activation_map[outputLayer->activation];
+        outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
+        std::vector<double> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
+        outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
+        if(!network.empty()){
+            auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
+            network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
+            std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
+            cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
+
+            for(int i = network.size() - 2; i >= 0; i--){
+                auto hiddenLayerAvn = network[i].activation_map[network[i].activation];
+                network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
+                std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
+                cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
+            }
+        }
+        return cumulativeHiddenLayerWGrad;
+    }
+
+    void WGAN::UI(int epoch, double cost_prev, std::vector<double> y_hat, std::vector<double> outputSet){
+        Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+        std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
+        Utilities::UI(outputLayer->weights, outputLayer->bias); 
+        if(!network.empty()){ 
+            for(int i = network.size() - 1; i >= 0; i--){
+                std::cout << "Layer " << i + 1 << ": " << std::endl;
+                Utilities::UI(network[i].weights, network[i].bias); 
+            }
+        }
+    }
+}
--- a/MLPP/WGAN/WGAN.hpp
+++ b/MLPP/WGAN/WGAN.hpp
@ -0,0 +1,56 @@
+//
+//  WGAN.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef WGAN_hpp
+#define WGAN_hpp
+
+#include "HiddenLayer/HiddenLayer.hpp"
+#include "OutputLayer/OutputLayer.hpp"
+
+#include <vector>
+#include <tuple>
+#include <string>
+
+namespace  MLPP{
+
+class WGAN{
+        public:
+        WGAN(double k, std::vector<std::vector<double>> outputSet);
+        ~WGAN();
+        std::vector<std::vector<double>> generateExample(int n);
+        void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+        double score(); 
+        void save(std::string fileName);
+
+        void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        void addOutputLayer(std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        
+        private:
+            std::vector<std::vector<double>> modelSetTestGenerator(std::vector<std::vector<double>> X); // Evaluator for the generator of the WGAN.
+            std::vector<double> modelSetTestDiscriminator(std::vector<std::vector<double>> X); // Evaluator for the discriminator of the WGAN.
+
+            double Cost(std::vector<double> y_hat, std::vector<double> y);
+
+            void forwardPass();
+            void updateDiscriminatorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, std::vector<double> outputLayerUpdation, double learning_rate);
+            void updateGeneratorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, double learning_rate);
+            std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<double>> computeDiscriminatorGradients(std::vector<double> y_hat, std::vector<double> outputSet);
+            std::vector<std::vector<std::vector<double>>> computeGeneratorGradients(std::vector<double> y_hat, std::vector<double> outputSet);
+
+            void UI(int epoch, double cost_prev, std::vector<double> y_hat, std::vector<double> outputSet);
+
+            std::vector<std::vector<double>> outputSet;
+            std::vector<double> y_hat;
+
+            std::vector<HiddenLayer> network;
+            OutputLayer *outputLayer;
+
+            int n;
+            int k;
+    };
+}
+
+#endif /* WGAN_hpp */
--- a/MLPP/kNN/kNN.cpp
+++ b/MLPP/kNN/kNN.cpp
@ -0,0 +1,87 @@
+//
+//  kNN.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "kNN.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Utilities/Utilities.hpp"
+
+#include <iostream>
+#include <map>
+#include <algorithm>
+
+namespace MLPP{
+    kNN::kNN(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int k)
+    : inputSet(inputSet), outputSet(outputSet), k(k)
+    {
+        
+    }
+    
+    std::vector<double> kNN::modelSetTest(std::vector<std::vector<double>> X){
+        std::vector<double> y_hat;
+        for(int i = 0; i < X.size(); i++){
+            y_hat.push_back(modelTest(X[i]));
+        }
+        return y_hat;
+    }
+
+    int kNN::modelTest(std::vector<double> x){
+        return determineClass(nearestNeighbors(x));
+    }
+    
+    double kNN::score(){
+        Utilities util;
+        return util.performance(modelSetTest(inputSet), outputSet);
+    }
+
+    int kNN::determineClass(std::vector<double> knn){
+        std::map<int, int> class_nums;
+        for(int i = 0; i < outputSet.size(); i++){
+            class_nums[outputSet[i]] = 0;
+        }
+        for(int i = 0; i < knn.size(); i++){
+            for(int j = 0; j < outputSet.size(); j++){
+                if(knn[i] == outputSet[j]){
+                    class_nums[outputSet[j]]++;
+                }
+            }
+        }
+        int max = class_nums[outputSet[0]];
+        int final_class = outputSet[0];
+        
+        for(int i = 0; i < outputSet.size(); i++){
+            if(class_nums[outputSet[i]] > max){
+                max = class_nums[outputSet[i]];
+            }
+        }
+        for(auto [c, v] : class_nums){
+            if(v == max){
+                final_class = c;
+            }
+        }
+        return final_class;
+    }
+    
+    std::vector<double> kNN::nearestNeighbors(std::vector<double> x){
+        LinAlg alg;
+        // The nearest neighbors
+        std::vector<double> knn;
+        
+        std::vector<std::vector<double>> inputUseSet = inputSet;
+        //Perfom this loop unless and until all k nearest neighbors are found, appended, and returned
+        for(int i = 0; i < k; i++){
+            int neighbor = 0;
+            for(int j = 0; j < inputUseSet.size(); j++){
+                bool isNeighborNearer = alg.euclideanDistance(x, inputUseSet[j]) < alg.euclideanDistance(x, inputUseSet[neighbor]);
+                if(isNeighborNearer){
+                    neighbor = j;
+                }
+            }
+            knn.push_back(neighbor);
+            inputUseSet.erase(inputUseSet.begin() + neighbor); // This is why we maintain an extra input"Use"Set
+        }
+        return knn;
+    }
+}
--- a/MLPP/kNN/kNN.hpp
+++ b/MLPP/kNN/kNN.hpp
@ -0,0 +1,35 @@
+//
+//  kNN.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef kNN_hpp
+#define kNN_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class kNN{
+        
+        public:
+            kNN(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int k);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            int modelTest(std::vector<double> x);
+            double score();
+        
+        private:
+        
+            // Private Model Functions
+            std::vector<double> nearestNeighbors(std::vector<double> x);
+            int determineClass(std::vector<double> knn);
+            
+            // Model Inputs and Parameters
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            int k;
+        
+    };
+}
+
+#endif /* kNN_hpp */
--- a/README.md
+++ b/README.md
@ -0,0 +1,244 @@
+# ML++
+
+Machine learning is a vast and exiciting discipline, garnering attention from specialists of many fields. Unfortunately, for C++ programmers and enthusiasts, there appears to be a lack of support in the field of machine learning. To fill that void and give C++ a true foothold in the ML sphere, this library was written. The intent with this library is for it to act as a crossroad between low-level developers and machine learning engineers.
+
+<p align="center">
+    <img src="https://user-images.githubusercontent.com/78002988/119920911-f3338d00-bf21-11eb-89b3-c84bf7c9f4ac.gif" 
+    width = 600 height = 400>
+</p>
+
+## Installation
+Begin by downloading the header files for the ML++ library. You can do this by cloning the repository and extracting the MLPP directory within it:
+```
+git clone https://github.com/novak-99/MLPP
+```
+Next, execute the "buildSO.sh" shell script:
+```
+sudo ./buildSO.sh
+```
+After doing so, maintain the ML++ source files in a local directory and include them in this fashion: 
+```cpp
+#include "MLPP/Stat/Stat.hpp" // Including the ML++ statistics module. 
+
+int main(){
+...
+}
+```
+Finally, after you have concluded creating a project, compile it using g++:
+```
+g++ main.cpp /usr/local/lib/MLPP.so --std=c++17
+```
+
+## Usage
+Please note that ML++ uses the ```std::vector<double>``` data type for emulating vectors, and the ```std::vector<std::vector<double>>``` data type for emulating matrices.
+
+Begin by including the respective header file of your choice.
+```cpp
+#include "MLPP/LinReg/LinReg.hpp"
+```
+Next, instantiate an object of the class. Don't forget to pass the input set and output set as parameters.
+```cpp
+LinReg model(inputSet, outputSet);
+```
+Afterwards, call the optimizer that you would like to use. For iterative optimizers such as gradient descent, include the learning rate, epoch number, and whether or not to utilize the UI panel. 
+```cpp
+model.gradientDescent(0.001, 1000, 0);
+```
+Great, you are now ready to test! To test a singular testing instance, utilize the following function:
+```cpp
+model.modelTest(testSetInstance);
+```
+This will return the model's singular prediction for that example. 
+
+To test an entire test set, use the following function: 
+```cpp
+model.modelSetTest(testSet);
+```
+The result will be the model's predictions for the entire dataset.
+
+
+## Contents of the Library
+1. ***Regression***
+    1. Linear Regression 
+    2. Logistic Regression
+    3. Softmax Regression
+    4. Exponential Regression
+    5. Probit Regression
+    6. CLogLog Regression
+    7. Tanh Regression
+2. ***Deep, Dynamically Sized Neural Networks***
+    1. Possible Activation Functions
+        - Linear
+        - Sigmoid
+        - Softmax
+        - Swish
+        - Mish
+        - SinC
+        - Softplus
+        - Softsign
+        - CLogLog
+        - Logit
+        - Gaussian CDF
+        - RELU
+        - GELU
+        - Sign
+        - Unit Step 
+        - Sinh
+        - Cosh
+        - Tanh
+        - Csch
+        - Sech
+        - Coth
+        - Arsinh
+        - Arcosh
+        - Artanh
+        - Arcsch
+        - Arsech
+        - Arcoth
+    2. Possible Optimization Algorithms
+        - Batch Gradient Descent
+        - Mini-Batch Gradient Descent 
+        - Stochastic Gradient Descent 
+        - Gradient Descent with Momentum
+        - Nesterov Accelerated Gradient
+        - Adagrad Optimizer 
+        - Adadelta Optimizer 
+        - Adam Optimizer 
+        - Adamax Optimizer 
+        - Nadam Optimizer 
+        - AMSGrad Optimizer 
+        - 2nd Order Newton-Raphson Optimizer*
+        - Normal Equation*
+        <p></p>
+        *Only available for linear regression
+    3. Possible Loss Functions
+        - MSE
+        - RMSE 
+        - MAE
+        - MBE
+        - Log Loss
+        - Cross Entropy
+        - Hinge Loss
+        - Wasserstein Loss
+    4. Possible Regularization Methods
+        - Lasso
+        - Ridge
+        - ElasticNet
+        - Weight Clipping
+    5. Possible Weight Initialization Methods
+        - Uniform 
+        - Xavier Normal
+        - Xavier Uniform
+        - He Normal
+        - He Uniform
+        - LeCun Normal
+        - LeCun Uniform
+    6. Possible Learning Rate Schedulers
+        - Time Based 
+        - Epoch Based
+        - Step Based
+        - Exponential 
+3. ***Prebuilt Neural Networks***
+    1. Multilayer Peceptron
+    2. Autoencoder
+    3. Softmax Network
+4. ***Generative Modeling***
+    1. Tabular Generative Adversarial Networks
+    2. Tabular Wasserstein Generative Adversarial Networks
+5. ***Natural Language Processing***
+    1. Word2Vec (Continous Bag of Words, Skip-Gram)
+    2. Stemming
+    3. Bag of Words
+    4. TFIDF
+    5. Tokenization 
+    6. Auxiliary Text Processing Functions
+6. ***Computer Vision***
+    1. The Convolution Operation
+    2. Max, Min, Average Pooling
+    3. Global Max, Min, Average Pooling
+    4. Prebuilt Feature Detectors
+        - Horizontal/Vertical Prewitt Filter
+        - Horizontal/Vertical Sobel Filter
+        - Horizontal/Vertical Scharr Filter
+        - Horizontal/Vertical Roberts Filter
+        - Gaussian Filter
+        - Harris Corner Detector
+7. ***Principal Component Analysis***
+8. ***Naive Bayes Classifiers***
+    1. Multinomial Naive Bayes
+    2. Bernoulli Naive Bayes 
+    3. Gaussian Naive Bayes
+9. ***Support Vector Classification***
+    1. Primal Formulation (Hinge Loss Objective) 
+    2. Dual Formulation (Via Lagrangian Multipliers)
+10. ***K-Means***
+11. ***k-Nearest Neighbors***
+12. ***Outlier Finder (Using z-scores)***
+13. ***Matrix Decompositions***    
+    1. SVD Decomposition
+    2. Cholesky Decomposition
+        - Positive Definiteness Checker 
+    3. QR Decomposition
+14. ***Numerical Analysis***
+    1. Numerical Diffrentiation 
+        - Univariate Functions 
+        - Multivariate Functions 
+    2. Jacobian Vector Calculator
+    3. Hessian Matrix Calculator
+    4. Function approximator
+        - Constant Approximation
+        - Linear Approximation 
+        - Quadratic Approximation
+        - Cubic Approximation
+    5. Diffrential Equations Solvers 
+        - Euler's Method 
+        - Growth Method
+15. ***Mathematical Transforms***
+    1. Discrete Cosine Transform
+16. ***Linear Algebra Module***
+17. ***Statistics Module***
+18. ***Data Processing Module***
+    1. Setting and Printing Datasets 
+    2. Available Datasets
+        1. Wisconsin Breast Cancer Dataset
+            - Binary
+            - SVM 
+        2. MNIST Dataset
+            - Train
+            - Test
+        3. Iris Flower Dataset
+        4. Wine Dataset
+        5. California Housing Dataset
+        6. Fires and Crime Dataset (Chicago)
+    3. Feature Scaling 
+    4. Mean Normalization
+    5. One Hot Representation
+    6. Reverse One Hot Representation
+    7. Supported Color Space Conversions 
+        - RGB to Grayscale
+        - RGB to HSV
+        - RGB to YCbCr
+        - RGB to XYZ
+        - XYZ to RGB
+19. ***Utilities***
+    1. TP, FP, TN, FN function
+    2. Precision
+    3. Recall 
+    4. Accuracy
+    5. F1 score
+
+
+## What's in the Works? 
+ML++, like most frameworks, is dynamic, and constantly changing. This is especially important in the world of ML, as new algorithms and techniques are being developed day by day. Here are a couple of things currently being developed for ML++:
+    <p>
+    - Convolutional Neural Networks 
+    </p>
+    <p>
+    - Kernels for SVMs 
+    </p>
+    <p>
+    - Support Vector Regression
+    </p>    
+    
+## Citations
+Various different materials helped me along the way of creating ML++, and I would like to give credit to several of them here. [This](https://www.tutorialspoint.com/cplusplus-program-to-compute-determinant-of-a-matrix) article by TutorialsPoint was a big help when trying to implement the determinant of a matrix, and [this](https://www.geeksforgeeks.org/adjoint-inverse-matrix/) article by GeeksForGeeks was very helpful when trying to take the adjoint and inverse of a matrix.
--- a/main.cpp
+++ b/main.cpp
@ -0,0 +1,722 @@
+//
+//  main.cpp
+//  TEST_APP
+//
+//  Created by Marc on 1/20/21.
+//
+
+// THINGS CURRENTLY TO DO: 
+// POLYMORPHIC IMPLEMENTATION OF REGRESSION CLASSES
+// EXTEND SGD/MBGD SUPPORT FOR DYN. SIZED ANN 
+// ADD LEAKYRELU, ELU, SELU TO ANN
+// FIX VECTOR/MATRIX/TENSOR RESIZE ROUTINE
+
+// HYPOTHESIS TESTING CLASS 
+// GAUSS MARKOV CHECKER CLASS
+
+#include <iostream>
+#include <ctime>
+#include <cmath>
+#include <vector>
+#include "MLPP/UniLinReg/UniLinReg.hpp"
+#include "MLPP/LinReg/LinReg.hpp"
+#include "MLPP/LogReg/LogReg.hpp"
+#include "MLPP/CLogLogReg/CLogLogReg.hpp"
+#include "MLPP/ExpReg/ExpReg.hpp"
+#include "MLPP/ProbitReg/ProbitReg.hpp"
+#include "MLPP/SoftmaxReg/SoftmaxReg.hpp"
+#include "MLPP/TanhReg/TanhReg.hpp"
+#include "MLPP/MLP/MLP.hpp"
+#include "MLPP/SoftmaxNet/SoftmaxNet.hpp"
+#include "MLPP/AutoEncoder/AutoEncoder.hpp"
+#include "MLPP/ANN/ANN.hpp"
+#include "MLPP/MANN/MANN.hpp"
+#include "MLPP/MultinomialNB/MultinomialNB.hpp"
+#include "MLPP/BernoulliNB/BernoulliNB.hpp"
+#include "MLPP/GaussianNB/GaussianNB.hpp"
+#include "MLPP/KMeans/KMeans.hpp"
+#include "MLPP/kNN/kNN.hpp"
+#include "MLPP/PCA/PCA.hpp"
+#include "MLPP/OutlierFinder/OutlierFinder.hpp"
+#include "MLPP/Stat/Stat.hpp"
+#include "MLPP/LinAlg/LinAlg.hpp"
+#include "MLPP/Activation/Activation.hpp"
+#include "MLPP/Cost/Cost.hpp"
+#include "MLPP/Data/Data.hpp"
+#include "MLPP/Convolutions/Convolutions.hpp"
+#include "MLPP/SVC/SVC.hpp"
+#include "MLPP/NumericalAnalysis/NumericalAnalysis.hpp"
+#include "MLPP/DualSVC/DualSVC.hpp"
+#include "MLPP/GAN/GAN.hpp"
+#include "MLPP/WGAN/WGAN.hpp"
+#include "MLPP/Transforms/Transforms.hpp"
+
+using namespace MLPP;
+
+
+// double f(double x){
+//     return x*x*x + 2*x - 2; 
+// }
+
+double f(double x){
+    return sin(x);
+}
+
+double f_prime(double x){
+    return 2 * x;
+}
+
+double f_prime_2var(std::vector<double> x){
+    return 2 * x[0] + x[1];
+}
+/*
+    y = x^3 + 2x - 2
+    y' = 3x^2 + 2
+    y'' = 6x
+    y''(2) = 12
+*/
+
+// double f_mv(std::vector<double> x){
+//     return x[0] * x[0] + x[0] * x[1] * x[1] + x[1] + 5; 
+// }
+
+/* 
+    Where x, y = x[0], x[1], this function is defined as:
+    f(x, y) = x^2 + xy^2 + y + 5
+    ∂f/∂x = 2x + 2y
+    ∂^2f/∂x∂y = 2
+*/
+
+double f_mv(std::vector<double> x){
+    return x[0] * x[0] * x[0] + x[0] + x[1] * x[1] * x[1] * x[0] + x[2] * x[2] * x[1];
+}
+
+/*
+    Where x, y = x[0], x[1], this function is defined as:
+    f(x, y) = x^3 + x + xy^3 + yz^2
+
+    fy = 3xy^2 + 2yz
+    fyy = 6xy + 2z
+    fyyz = 2
+
+    ∂^2f/∂y^2 = 6xy + 2z
+    ∂^3f/∂y^3 = 6x
+
+    ∂f/∂z = 2zy
+    ∂^2f/∂z^2 = 2y
+    ∂^3f/∂z^3 = 0
+    
+    ∂f/∂x = 3x^2 + 1 + y^3
+    ∂^2f/∂x^2 = 6x
+    ∂^3f/∂x^3 = 6
+
+    ∂f/∂z = 2zy
+    ∂^2f/∂z^2 = 2z
+
+    ∂f/∂y = 3xy^2
+    ∂^2f/∂y∂x = 3y^2
+
+*/
+
+
+int main() {
+
+    // // OBJECTS
+    Stat stat;
+    LinAlg alg;
+    Activation avn;
+    Cost cost;
+    Data data; 
+    Convolutions conv; 
+
+    // DATA SETS
+    // std::vector<std::vector<double>> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
+    // std::vector<double> outputSet = {2,4,6,8,10,12,14,16,18,20};
+
+    // std::vector<std::vector<double>> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}};
+    // std::vector<double> outputSet = {0,0,0,0,1,1,1,1};
+
+    // std::vector<std::vector<double>> inputSet = {{4,3,0,-3,-4}, {0,0,0,1,1}};
+    // std::vector<double> outputSet = {1,1,0,-1,-1};
+
+    // std::vector<std::vector<double>> inputSet = {{0,1,2,3,4}};
+    // std::vector<double> outputSet = {1,2,4,8,16};
+
+    //std::vector<std::vector<double>> inputSet = {{32, 0, 7}, {2, 28, 17}, {0, 9, 23}}; 
+
+    // std::vector<std::vector<double>> inputSet = {{1,1,0,0,1}, {0,0,1,1,1}, {0,1,1,0,1}};
+    // std::vector<double> outputSet = {0,1,0,1,1};
+
+    // std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
+    // std::vector<double> outputSet = {0,1,1,0};
+
+    // // STATISTICS
+    // std::vector<double> x = {1,2,3,4,5,6,7,8,9,10};
+    // std::vector<double> y = {10,9,8,7,6,5,4,3,2,1};
+    // std::vector<double> w = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
+
+    // std::cout << "Arithmetic Mean: " << stat.mean(x) << std::endl;
+    // std::cout << "Median: " << stat.median(x) << std::endl;
+    // alg.printVector(x);
+    // alg.printVector(stat.mode(x));
+    // std::cout << "Range: " << stat.range(x) << std::endl;
+    // std::cout << "Midrange: " << stat.midrange(x) << std::endl;
+    // std::cout << "Absolute Average Deviation: " << stat.absAvgDeviation(x) << std::endl;
+    // std::cout << "Standard Deviation: " << stat.standardDeviation(x) << std::endl;
+    // std::cout << "Variance: " << stat.variance(x) << std::endl;
+    // std::cout << "Covariance: " << stat.covariance(x, y) << std::endl;
+    // std::cout << "Correlation: " << stat.correlation(x, y) << std::endl;
+    // std::cout << "R^2: " << stat.R2(x, y) << std::endl;
+    // // Returns 1 - (1/k^2)
+    // std::cout << "Chebyshev Inequality: " << stat.chebyshevIneq(2) << std::endl;
+    // std::cout << "Weighted Mean: " << stat.weightedMean(x, w) << std::endl;
+    // std::cout << "Geometric Mean: " << stat.geometricMean(x) << std::endl;
+    // std::cout << "Harmonic Mean: " << stat.harmonicMean(x) << std::endl;
+    // std::cout << "Root Mean Square (Quadratic mean): " << stat.RMS(x) << std::endl;
+    // std::cout << "Power Mean (p = 5): " << stat.powerMean(x, 5) << std::endl;
+    // std::cout << "Lehmer Mean (p = 5): " << stat.lehmerMean(x, 5) << std::endl;
+    // std::cout << "Weighted Lehmer Mean (p = 5): " << stat.weightedLehmerMean(x, w, 5) << std::endl;
+    // std::cout << "Contraharmonic Mean: " << stat.contraHarmonicMean(x) << std::endl;
+    // std::cout << "Hernonian Mean: " << stat.heronianMean(1, 10) << std::endl;
+    // std::cout << "Heinz Mean (x = 1): " << stat.heinzMean(1, 10, 1) << std::endl;
+    // std::cout << "Neuman-Sandor Mean: " << stat.neumanSandorMean(1, 10) << std::endl;
+    // std::cout << "Stolarsky Mean (p = 5): " << stat.stolarskyMean(1, 10, 5) << std::endl;
+    // std::cout << "Identric Mean: " << stat.identricMean(1, 10) << std::endl;
+    // std::cout << "Logarithmic Mean: " << stat.logMean(1, 10) << std::endl;
+    // std::cout << "Absolute Average Deviation: " << stat.absAvgDeviation(x) << std::endl;
+
+    // LINEAR ALGEBRA
+    // std::vector<std::vector<double>> square = {{1, 1}, {-1, 1}, {1, -1}, {-1, -1}};
+
+    // alg.printMatrix(alg.rotate(square, M_PI/4));
+
+    // std::vector<std::vector<double>> A = {
+    //     {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+    //     {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+    // };
+    // std::vector<double> a = {4, 3, 1, 3}; 
+    // std::vector<double> b = {3, 5, 6, 1};
+
+    // alg.printMatrix(alg.matmult(alg.transpose(A), A)); 
+    // std::cout << std::endl;
+    // std::cout << alg.dot(a, b) << std::endl;
+    // std::cout << std::endl;
+    // alg.printMatrix(alg.hadamard_product(A, A));
+    // std::cout << std::endl;
+    // alg.printMatrix(alg.identity(10));
+
+    // UNIVARIATE LINEAR REGRESSION 
+    // Univariate, simple linear regression, case where k = 1
+    // auto [inputSet, outputSet] = data.loadFiresAndCrime();
+    // UniLinReg model(inputSet, outputSet);
+    // alg.printVector(model.modelSetTest(inputSet));
+
+    // // MULIVARIATE LINEAR REGRESSION
+    // auto [inputSet, outputSet] = data.loadCaliforniaHousing();
+
+    // LinReg model(inputSet, outputSet); // Can use Lasso, Ridge, ElasticNet Reg
+
+    //model.gradientDescent(0.001, 30, 0);
+    //model.SGD(0.00000001, 300000, 1);
+    //model.MBGD(0.001, 10000, 2, 1);
+    //model.normalEquation(); 
+
+    // LinReg adamModel(alg.transpose(inputSet), outputSet);
+    // alg.printVector(model.modelSetTest(inputSet));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+
+    // const int TRIAL_NUM = 1000; 
+
+    // double scoreSGD = 0; 
+    // double scoreADAM = 0; 
+    // for(int i = 0; i < TRIAL_NUM; i++){
+    //     LinReg model(alg.transpose(inputSet), outputSet);
+    //     model.MBGD(0.001, 5, 1, 0);
+    //     scoreSGD += model.score();
+
+    //     LinReg adamModel(alg.transpose(inputSet), outputSet);
+    //     adamModel.Adam(0.1, 5, 1, 0.9, 0.999, 1e-8, 0); // Change batch size = sgd, bgd
+    //     scoreADAM += adamModel.score();
+    // }
+
+    // std::cout << "ACCURACY, AVG, SGD: " << 100 * scoreSGD/TRIAL_NUM << "%" << std::endl;
+
+    // std::cout << std::endl;
+
+    // std::cout << "ACCURACY, AVG, ADAM: " << 100 * scoreADAM/TRIAL_NUM << "%" << std::endl;
+
+
+    // std::cout << "Total epoch num: 300" << std::endl;
+    // std::cout << "Method: 1st Order w/ Jacobians" << std::endl;
+
+    // LinReg model(alg.transpose(inputSet), outputSet); // Can use Lasso, Ridge, ElasticNet Reg
+
+    // model.gradientDescent(0.001, 300, 0);
+
+
+    // std::cout << "--------------------------------------------" << std::endl;
+    // std::cout << "Total epoch num: 300" << std::endl;
+    // std::cout << "Method: Newtonian 2nd Order w/ Hessians" << std::endl;
+    // LinReg model2(alg.transpose(inputSet), outputSet); 
+
+    // model2.NewtonRaphson(1.5, 300, 0);
+
+
+    // // LOGISTIC REGRESSION
+    // auto [inputSet, outputSet] = data.load rastCancer();
+    // LogReg model(inputSet, outputSet); 
+    // model.SGD(0.001, 100000, 0);
+    // alg.printVector(model.modelSetTest(inputSet));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // PROBIT REGRESSION
+    // std::vector<std::vector<double>> inputSet;
+    // std::vector<double> outputSet;
+    // data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancer.csv", inputSet, outputSet);
+    // ProbitReg model(inputSet, outputSet); 
+    // model.SGD(0.001, 10000, 1);
+    // alg.printVector(model.modelSetTest(inputSet));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // CLOGLOG REGRESSION
+    // std::vector<std::vector<double>> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}};
+    // std::vector<double> outputSet = {0,0,0,0,1,1,1,1};
+    // CLogLogReg model(alg.transpose(inputSet), outputSet); 
+    // model.SGD(0.1, 10000, 0);
+    // alg.printVector(model.modelSetTest(alg.transpose(inputSet)));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // EXPREG REGRESSION
+    // std::vector<std::vector<double>> inputSet = {{0,1,2,3,4}};
+    // std::vector<double> outputSet = {1,2,4,8,16};
+    // ExpReg model(alg.transpose(inputSet), outputSet); 
+    // model.SGD(0.001, 10000, 0);
+    // alg.printVector(model.modelSetTest(alg.transpose(inputSet)));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // TANH REGRESSION
+    // std::vector<std::vector<double>> inputSet = {{4,3,0,-3,-4}, {0,0,0,1,1}};
+    // std::vector<double> outputSet = {1,1,0,-1,-1};
+    // TanhReg model(alg.transpose(inputSet), outputSet); 
+    // model.SGD(0.1, 10000, 0);
+    // alg.printVector(model.modelSetTest(alg.transpose(inputSet)));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // SOFTMAX REGRESSION
+    // auto [inputSet, outputSet] = data.loadIris();
+    // SoftmaxReg model(inputSet, outputSet);
+    // model.SGD(0.1, 10000, 1);
+    // alg.printMatrix(model.modelSetTest(inputSet));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // SUPPORT VECTOR CLASSIFICATION
+    // auto [inputSet, outputSet] = data.loadBreastCancerSVC();
+    // SVC model(inputSet, outputSet, 1); 
+    // model.SGD(0.00001, 100000, 1);
+    // alg.printVector(model.modelSetTest(inputSet));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // SoftmaxReg model(inputSet, outputSet); 
+    // model.SGD(0.001, 20000, 0);
+    // alg.printMatrix(model.modelSetTest(inputSet));
+
+    // // MLP
+    // std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
+    // inputSet = alg.transpose(inputSet);
+    // std::vector<double> outputSet = {0,1,1,0};
+
+    // MLP model(inputSet, outputSet, 2); 
+    // model.gradientDescent(0.1, 10000, 0);
+    // alg.printVector(model.modelSetTest(inputSet));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // SOFTMAX NETWORK
+    // auto [inputSet, outputSet] = data.loadWine();
+    // SoftmaxNet model(inputSet, outputSet, 1); 
+    // model.gradientDescent(0.01, 100000, 1);
+    // alg.printMatrix(model.modelSetTest(inputSet));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // AUTOENCODER
+    // std::vector<std::vector<double>> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
+    // AutoEncoder model(alg.transpose(inputSet), 5); 
+    // model.SGD(0.001, 300000, 0);
+    // alg.printMatrix(model.modelSetTest(alg.transpose(inputSet)));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // DYNAMICALLY SIZED ANN
+    // Possible Weight Init Methods: Default, Uniform, HeNormal, HeUniform, XavierNormal, XavierUniform
+    // Possible Activations: Linear, Sigmoid, Swish, Softplus, Softsign, CLogLog, Ar{Sinh, Cosh, Tanh, Csch, Sech, Coth},  GaussianCDF, GELU, UnitStep
+    // Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss
+    // std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
+    // std::vector<double> outputSet = {0,1,1,0};
+    // ANN ann(alg.transpose(inputSet), outputSet);
+    // ann.addLayer(2, "Cosh");
+    // ann.addOutputLayer("Sigmoid", "LogLoss");
+
+
+    // ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, 1);
+    // ann.Adadelta(1, 1000, 2, 0.9, 0.000001, 1);
+    // ann.Momentum(0.1, 8000, 2, 0.9, true, 1);
+
+    //ann.setLearningRateScheduler("Step", 0.5, 1000);
+    // ann.gradientDescent(0.01, 30000);
+    // alg.printVector(ann.modelSetTest(alg.transpose(inputSet)));
+    // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
+
+    std::vector<std::vector<double>> outputSet = {{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}, 
+                                               {2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40}};
+
+    WGAN gan(2, alg.transpose(outputSet)); // our gan is a wasserstein gan (wgan)
+    gan.addLayer(5, "Sigmoid");
+    gan.addLayer(2, "RELU");
+    gan.addLayer(5, "Sigmoid");
+    gan.addOutputLayer(); // User can specify weight init- if necessary.
+    gan.gradientDescent(0.1, 55000, 0);
+    std::cout << "GENERATED INPUT: (Gaussian-sampled noise):" << std::endl;
+    alg.printMatrix(gan.generateExample(100));
+
+
+    // typedef std::vector<std::vector<double>> Matrix;
+    // typedef std::vector<double> Vector;
+
+    // Matrix inputSet = {{0,0}, {0,1}, {1,0}, {1,1}}; // XOR 
+    // Vector outputSet = {0,1,1,0};
+
+    // ANN ann(inputSet, outputSet);
+    // ann.addLayer(5, "Sigmoid");
+    // ann.addLayer(8, "Sigmoid"); // Add more layers as needed. 
+    // ann.addOutputLayer("Sigmoid", "LogLoss");
+    // ann.gradientDescent(1, 20000, 1);
+
+    // Vector predictions = ann.modelSetTest(inputSet);
+    // alg.printVector(predictions); // Testing out the model's preds for train set.
+    // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; // Accuracy.
+
+    // // DYNAMICALLY SIZED MANN (Multidimensional Output ANN)
+    // std::vector<std::vector<double>> inputSet = {{1,2,3},{2,4,6},{3,6,9},{4,8,12}};
+    // std::vector<std::vector<double>> outputSet = {{1,5}, {2,10}, {3,15}, {4,20}};
+
+    // MANN mann(inputSet, outputSet);
+    // mann.addOutputLayer("Linear", "MSE");
+    // mann.gradientDescent(0.001, 80000, 0);
+    // alg.printMatrix(mann.modelSetTest(inputSet));
+    // std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl;
+
+    // std::vector<std::vector<double>> inputSet;
+    // std::vector<double> tempOutputSet;
+    // data.setData(4, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
+    // std::vector<std::vector<double>> outputSet = data.oneHotRep(tempOutputSet, 3);
+
+    // TRAIN TEST SPLIT CHECK
+    // std::vector<std::vector<double>> inputSet1 = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
+    // std::vector<std::vector<double>> outputSet1 = {{2,4,6,8,10,12,14,16,18,20}};
+    // auto [inputSet, outputSet, inputTestSet, outputTestSet] = data.trainTestSplit(alg.transpose(inputSet1), alg.transpose(outputSet1), 0.2);
+    // alg.printMatrix(inputSet);
+    // alg.printMatrix(outputSet);
+    // alg.printMatrix(inputTestSet);
+    // alg.printMatrix(outputTestSet);
+
+
+    // alg.printMatrix(inputSet);
+    // alg.printMatrix(outputSet);
+
+    // MANN mann(inputSet, outputSet);
+    // mann.addLayer(100, "RELU", "XavierNormal");
+    // mann.addOutputLayer("Softmax", "CrossEntropy", "XavierNormal");
+    // mann.gradientDescent(0.1, 80000, 1);
+    // alg.printMatrix(mann.modelSetTest(inputSet));
+    // std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl;
+
+    // // NAIVE BAYES
+    // std::vector<std::vector<double>> inputSet = {{1,1,1,1,1}, {0,0,1,1,1}, {0,0,1,0,1}};
+    // std::vector<double> outputSet = {0,1,0,1,1};
+
+    // MultinomialNB MNB(alg.transpose(inputSet), outputSet, 2);
+    // alg.printVector(MNB.modelSetTest(alg.transpose(inputSet)));
+
+    // BernoulliNB BNB(alg.transpose(inputSet), outputSet);
+    // alg.printVector(BNB.modelSetTest(alg.transpose(inputSet)));
+
+    // GaussianNB GNB(alg.transpose(inputSet), outputSet, 2);
+    // alg.printVector(GNB.modelSetTest(alg.transpose(inputSet)));
+
+    // // KMeans
+    // std::vector<std::vector<double>> inputSet = {{32, 0, 7}, {2, 28, 17}, {0, 9, 23}}; 
+    // KMeans kmeans(inputSet, 3, "KMeans++");
+    // kmeans.train(3, 1);
+    // std::cout << std::endl;
+    // alg.printMatrix(kmeans.modelSetTest(inputSet)); // Returns the assigned centroids to each of the respective training examples
+    // std::cout << std::endl;
+    // alg.printVector(kmeans.silhouette_scores());
+
+    // // kNN 
+    // std::vector<std::vector<double>> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}};
+    // std::vector<double> outputSet = {0,0,0,0,1,1,1,1};
+    // kNN knn(alg.transpose(inputSet), outputSet, 8);
+    // alg.printVector(knn.modelSetTest(alg.transpose(inputSet)));
+    // std::cout << "ACCURACY: " << 100 * knn.score() << "%" << std::endl;
+
+
+    // // CONVOLUTION, POOLING, ETC.. 
+    // std::vector<std::vector<double>> input = {
+    //     {1},
+    // };
+
+    // std::vector<std::vector<std::vector<double>>> tensorSet; 
+    // tensorSet.push_back(input);
+    // tensorSet.push_back(input);
+    // tensorSet.push_back(input);
+
+    // alg.printTensor(data.rgb2xyz(tensorSet));
+
+    // std::vector<std::vector<double>> input = {
+    //     {62,55,55,54,49,48,47,55},
+    //     {62,57,54,52,48,47,48,53},
+    //     {61,60,52,49,48,47,49,54},
+    //     {63,61,60,60,63,65,68,65},
+    //     {67,67,70,74,79,85,91,92},
+    //     {82,95,101,106,114,115,112,117},
+    //     {96,111,115,119,128,128,130,127},
+    //     {109,121,127,133,139,141,140,133},
+    // };
+
+    // Transforms trans; 
+
+    // alg.printMatrix(trans.discreteCosineTransform(input));
+
+    // alg.printMatrix(conv.convolve(input, conv.getPrewittVertical(), 1)); // Can use padding
+    // alg.printMatrix(conv.pool(input, 4, 4, "Max")); // Can use Max, Min, or Average pooling. 
+
+    // std::vector<std::vector<std::vector<double>>> tensorSet; 
+    // tensorSet.push_back(input);
+    // tensorSet.push_back(input);
+    // alg.printVector(conv.globalPool(tensorSet, "Average")); // Can use Max, Min, or Average global pooling. 
+
+    // std::vector<std::vector<double>> laplacian = {{1, 1, 1}, {1, -4, 1}, {1, 1, 1}};
+    // alg.printMatrix(conv.convolve(conv.gaussianFilter2D(5, 1), laplacian, 1));
+
+
+    // // PCA, SVD, eigenvalues & eigenvectors
+    // std::vector<std::vector<double>> inputSet = {{1,1}, {1,1}};
+    // auto [Eigenvectors, Eigenvalues] = alg.eig(inputSet); 
+    // std::cout << "Eigenvectors:" << std::endl; 
+    // alg.printMatrix(Eigenvectors);
+    // std::cout << std::endl;
+    // std::cout << "Eigenvalues:" << std::endl; 
+    // alg.printMatrix(Eigenvalues);
+
+    // auto [U, S, Vt] = alg.SVD(inputSet);
+
+    // // PCA done using Jacobi's method to approximate eigenvalues and eigenvectors.
+    // PCA dr(inputSet, 1); // 1 dimensional representation. 
+    // std::cout << std::endl;
+    // std::cout << "Dimensionally reduced representation:" << std::endl;
+    // alg.printMatrix(dr.principalComponents());
+    // std::cout << "SCORE: " << dr.score() << std::endl; 
+
+
+    // // NLP/DATA
+    // std::string verbText = "I am appearing and thinking, as well as conducting.";
+    // std::cout << "Stemming Example:" << std::endl;
+    // std::cout << data.stemming(verbText) << std::endl;
+    // std::cout << std::endl;
+
+    // std::vector<std::string> sentences = {"He is a good boy", "She is a good girl", "The boy and girl are good"};
+    // std::cout << "Bag of Words Example:" << std::endl;
+    // alg.printMatrix(data.BOW(sentences, "Default"));
+    // std::cout << std::endl;
+    // std::cout << "TFIDF Example:" << std::endl;
+    // alg.printMatrix(data.TFIDF(sentences));
+    // std::cout << std::endl;
+
+    // std::cout << "Tokenization:" << std::endl;
+    // alg.printVector(data.tokenize(verbText));
+    // std::cout << std::endl;
+
+    // std::cout << "Word2Vec:" << std::endl;
+    // std::string textArchive = {"He is a good boy. She is a good girl. The boy and girl are good."};
+    // std::vector<std::string> corpus = data.splitSentences(textArchive);
+    // auto [wordEmbeddings, wordList] = data.word2Vec(corpus, "CBOW", 2, 2, 0.1, 10000); // Can use either CBOW or Skip-n-gram.
+    // alg.printMatrix(wordEmbeddings);
+    // std::cout << std::endl;
+
+    // std::vector<std::string> textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"};
+
+    // alg.printMatrix(data.LSA(textArchive, 2));
+    // //alg.printMatrix(data.BOW(textArchive, "Default"));
+    // std::cout << std::endl;
+    
+
+    // std::vector<std::vector<double>> inputSet = {{1,2},{2,3},{3,4},{4,5},{5,6}};
+    // std::cout << "Feature Scaling Example:" << std::endl;
+    // alg.printMatrix(data.featureScaling(inputSet));
+    // std::cout << std::endl;
+
+    // std::cout << "Mean Centering Example:" << std::endl;
+    // alg.printMatrix(data.meanCentering(inputSet));
+    // std::cout << std::endl;
+
+    // std::cout << "Mean Normalization Example:" << std::endl;
+    // alg.printMatrix(data.meanNormalization(inputSet));
+    // std::cout << std::endl;
+
+    // // Outlier Finder
+    // std::vector<double> inputSet = {1,2,3,4,5,6,7,8,9,23554332523523};
+    // OutlierFinder outlierFinder(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier. 
+    // alg.printVector(outlierFinder.modelTest(inputSet));
+
+    // // Testing new Functions
+    // double z_s = 0.001;
+    // std::cout << avn.logit(z_s) << std::endl;
+    // std::cout << avn.logit(z_s, 1) << std::endl;
+
+    // std::vector<double> z_v = {0.001};
+    // alg.printVector(avn.logit(z_v));
+    // alg.printVector(avn.logit(z_v, 1));
+
+    // std::vector<std::vector<double>> Z_m = {{0.001}};
+    // alg.printMatrix(avn.logit(Z_m));
+    // alg.printMatrix(avn.logit(Z_m, 1));
+
+    // std::cout << alg.trace({{1,2}, {3,4}}) << std::endl;
+    // alg.printMatrix(alg.pinverse({{1,2}, {3,4}}));
+    // alg.printMatrix(alg.diag({1,2,3,4,5}));
+    // alg.printMatrix(alg.kronecker_product({{1,2,3,4,5}}, {{6,7,8,9,10}}));
+    // alg.printMatrix(alg.matrixPower({{5,5},{5,5}}, 2));
+    // alg.printVector(alg.solve({{1,1}, {1.5, 4.0}}, {2200, 5050}));
+
+    // std::vector<std::vector<double>> matrixOfCubes = {{1,2,64,27}};
+    // std::vector<double> vectorOfCubes = {1,2,64,27};
+    // alg.printMatrix(alg.cbrt(matrixOfCubes));
+    // alg.printVector(alg.cbrt(vectorOfCubes));
+    // std::cout << alg.max({{1,2,3,4,5}, {6,5,3,4,1}, {9,9,9,9,9}}) << std::endl;
+    // std::cout << alg.min({{1,2,3,4,5}, {6,5,3,4,1}, {9,9,9,9,9}}) << std::endl;
+
+    // std::vector<double> chicken; 
+    // data.getImage("../../Data/apple.jpeg", chicken);
+    // alg.printVector(chicken);
+
+    // std::vector<std::vector<double>> P = {{12, -51, 4}, {6, 167, -68}, {-4, 24, -41}};
+    // alg.printMatrix(P);
+
+    // alg.printMatrix(alg.gramSchmidtProcess(P));
+
+    // auto [Q, R] = alg.QRD(P); // It works! 
+    
+    //  alg.printMatrix(Q);
+
+    //  alg.printMatrix(R); 
+
+    // // Checking positive-definiteness checker. For Cholesky Decomp. 
+    // std::vector<std::vector<double>> A = 
+    // {
+    //     {1,-1,-1,-1},                        
+    //     {-1,2,2,2},
+    //     {-1,2,3,1},
+    //     {-1,2,1,4}
+    // };
+
+    // std::cout << std::boolalpha << alg.positiveDefiniteChecker(A) << std::endl;
+    // auto [L, Lt] = alg.chol(A); // works.
+    // alg.printMatrix(L);
+    // alg.printMatrix(Lt);
+
+    // Checks for numerical analysis class.
+    NumericalAnalysis numAn;
+
+    //std::cout << numAn.quadraticApproximation(f, 0, 1) << std::endl;
+
+    // std::cout << numAn.cubicApproximation(f, 0, 1.001) << std::endl;
+
+    // std::cout << f(1.001) << std::endl;
+
+    // std::cout << numAn.quadraticApproximation(f_mv, {0, 0, 0}, {1, 1, 1}) << std::endl;
+
+    // std::cout << numAn.numDiff(&f, 1) << std::endl;
+    // std::cout << numAn.newtonRaphsonMethod(&f, 1, 1000) << std::endl;
+    //std::cout << numAn.invQuadraticInterpolation(&f, {100, 2,1.5}, 10) << std::endl;
+
+    // std::cout << numAn.numDiff(&f_mv, {1, 1}, 1) << std::endl; // Derivative w.r.t. x.
+
+    // alg.printVector(numAn.jacobian(&f_mv, {1, 1}));
+
+    //std::cout << numAn.numDiff_2(&f, 2) << std::endl; 
+
+    //std::cout << numAn.numDiff_3(&f, 2) << std::endl; 
+
+    // std::cout << numAn.numDiff_2(&f_mv, {2, 2, 500}, 2, 2) << std::endl;
+    //std::cout << numAn.numDiff_3(&f_mv, {2, 1000, 130}, 0, 0, 0) << std::endl;
+
+    // alg.printTensor(numAn.thirdOrderTensor(&f_mv, {1, 1, 1}));
+    // std::cout << "Our Hessian." << std::endl;
+    // alg.printMatrix(numAn.hessian(&f_mv, {2, 2, 500}));
+
+    // std::cout << numAn.laplacian(f_mv, {1,1,1}) << std::endl;
+
+    // std::vector<std::vector<std::vector<double>>> tensor;
+    // tensor.push_back({{1,2}, {1,2}, {1,2}});
+    // tensor.push_back({{1,2}, {1,2}, {1,2}});
+
+    // alg.printTensor(tensor);
+
+    // alg.printMatrix(alg.tensor_vec_mult(tensor, {1,2}));
+
+    // std::cout << numAn.cubicApproximation(f_mv, {0, 0, 0}, {1, 1, 1}) << std::endl;
+
+    // std::cout << numAn.eulerianMethod(f_prime, {1, 1}, 1.5, 0.000001) << std::endl;
+
+    // std::cout << numAn.eulerianMethod(f_prime_2var, {2, 3}, 2.5, 0.00000001) << std::endl;
+
+    // alg.printMatrix(conv.dx(A));
+    // alg.printMatrix(conv.dy(A));
+
+    // alg.printMatrix(conv.gradOrientation(A));
+
+    // std::vector<std::vector<double>> A = 
+    // {
+    //     {1,0,0,0},                        
+    //     {0,0,0,0},
+    //     {0,0,0,0},
+    //     {0,0,0,1}
+    // };
+
+    // std::vector<std::vector<std::string>> h = conv.harrisCornerDetection(A); 
+
+    // for(int i = 0; i < h.size(); i++){
+    //     for(int j = 0; j < h[i].size(); j++){
+    //         std::cout << h[i][j] << " ";
+    //     }
+    //     std::cout << std::endl;
+    // } // Harris detector works. Life is good!
+
+    // std::vector<double> a = {3,4,4};
+    // std::vector<double> b = {4,4,4};
+    // alg.printVector(alg.cross(a,b));
+
+    //SUPPORT VECTOR CLASSIFICATION (kernel method)
+    // std::vector<std::vector<double>> inputSet; 
+    // std::vector<double> outputSet; 
+    // data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancerSVM.csv", inputSet, outputSet);
+
+    // std::vector<std::vector<double>> inputSet; 
+    // std::vector<double> outputSet; 
+    // data.setData(4, "/Users/marcmelikyan/Desktop/Data/IrisSVM.csv", inputSet, outputSet);
+
+    // DualSVC kernelSVM(inputSet, outputSet, 1000);
+    // kernelSVM.gradientDescent(0.0001, 20, 1);
+
+    // std::vector<std::vector<double>> linearlyIndependentMat = 
+    
+    // {
+    //     {1,2,3,4},
+    //     {234538495,4444,6111,55}
+    // };
+
+    // std::cout << "True of false: linearly independent?: " << std::boolalpha << alg.linearIndependenceChecker(linearlyIndependentMat) << std::endl;
+    
+
+    return 0;
+}
+