commit 13e5c9c761cb77570ad088ac0324dd9ecbb03546
Author: novak-99 <78002988+novak-99@users.noreply.github.com>
Date:   Sun May 23 20:22:24 2021 -0700

    Add files via upload

diff --git a/MLPP/ANN/ANN.cpp b/MLPP/ANN/ANN.cpp
new file mode 100644
index 0000000..7410957
--- /dev/null
+++ b/MLPP/ANN/ANN.cpp
@@ -0,0 +1,162 @@
+//
+//  ANN.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "ANN.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+
+namespace MLPP {
+    ANN::ANN(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size())
+    {
+
+    }
+
+    ANN::~ANN(){
+        delete outputLayer;
+    }
+
+    std::vector<double> ANN::modelSetTest(std::vector<std::vector<double>> X){
+        network[0].input = X;
+        network[0].forwardPass();
+
+        for(int i = 1; i < network.size(); i++){
+            network[i].input = network[i - 1].a;
+            network[i].forwardPass();
+        }
+        outputLayer->input = network[network.size() - 1].a;
+        outputLayer->forwardPass();
+        return outputLayer->a;
+    }
+
+    double ANN::modelTest(std::vector<double> x){
+
+        network[0].Test(x);
+        for(int i = 1; i < network.size(); i++){
+            network[i].Test(network[i - 1].a_test);
+        }
+        outputLayer->Test(network[network.size() - 1].a_test);
+        return outputLayer->a_test;
+    }
+
+    void ANN::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+        Activation avn;
+        Reg regularization;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+ 
+            auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
+            auto outputAvn = outputLayer->activation_map[outputLayer->activation];
+            outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
+            std::vector<double> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
+
+            outputLayer->weights = alg.subtraction(outputLayer->weights, alg.scalarMultiply(learning_rate/n, outputWGrad));
+            outputLayer->weights = regularization.regWeights(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
+            outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n;
+
+            auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
+            network[network.size() - 1].delta = alg.hadamard_product(alg.vecmult(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
+            std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
+            
+            network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad));
+            network[network.size() - 1].weights = regularization.regWeights(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg);
+            network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta));
+
+            for(int i = network.size() - 2; i >= 0; i--){
+                auto hiddenLayerAvn = network[i].activation_map[network[i].activation];
+                network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, network[i + 1].weights), (avn.*hiddenLayerAvn)(network[i].z, 1));
+                std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
+                network[i].weights = alg.subtraction(network[i].weights, alg.scalarMultiply(learning_rate/n, hiddenLayerWGrad));
+                network[i].weights = regularization.regWeights(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
+                network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta));
+            }
+            
+            forwardPass();
+
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
+                Utilities::UI(outputLayer->weights, outputLayer->bias); 
+                std::cout << "Layer " << network.size() << ": " << std::endl;
+                Utilities::UI(network[network.size() - 1].weights, network[network.size() - 1].bias); 
+                for(int i = network.size() - 2; i >= 0; i--){
+                    std::cout << "Layer " << i + 1 << ": " << std::endl;
+                    Utilities::UI(network[i].weights, network[i].bias); 
+                }
+            }
+
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    double ANN::score(){
+        Utilities util;
+        forwardPass();
+        return util.performance(y_hat, outputSet);
+    }
+
+    void ANN::save(std::string fileName){
+        Utilities util;
+        util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1);
+        for(int i = 1; i < network.size(); i++){
+            util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); 
+        }
+        util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1);
+     }
+
+    void ANN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){
+        if(network.empty()){
+            network.push_back(HiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha));
+            network[0].forwardPass();
+        }
+        else{
+            network.push_back(HiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
+            network[network.size() - 1].forwardPass();
+        }
+    }
+    
+    void ANN::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, double lambda, double alpha){
+        outputLayer = new OutputLayer(network[0].n_hidden, outputSet.size(), activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha);
+    }
+
+    double ANN::Cost(std::vector<double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost;
+        double totalRegTerm = 0;
+
+        auto cost_function = outputLayer->cost_map[outputLayer->cost];
+        for(int i = 0; i < network.size() - 1; i++){
+            totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
+        }
+        return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
+    }
+
+    void ANN::forwardPass(){
+        network[0].input = inputSet;
+        network[0].forwardPass();
+
+        for(int i = 1; i < network.size(); i++){
+            network[i].input = network[i - 1].a;
+            network[i].forwardPass();
+        }
+        outputLayer->input = network[network.size() - 1].a;
+        outputLayer->forwardPass();
+        y_hat = outputLayer->a;
+    }
+}
\ No newline at end of file
diff --git a/MLPP/ANN/ANN.hpp b/MLPP/ANN/ANN.hpp
new file mode 100644
index 0000000..4964acf
--- /dev/null
+++ b/MLPP/ANN/ANN.hpp
@@ -0,0 +1,47 @@
+//
+//  ANN.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef ANN_hpp
+#define ANN_hpp
+
+#include "HiddenLayer/HiddenLayer.hpp"
+#include "OutputLayer/OutputLayer.hpp"
+
+#include <vector>
+#include <string>
+
+namespace  MLPP{
+
+class ANN{
+        public:
+        ANN(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet);
+        ~ANN();
+        std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+        double modelTest(std::vector<double> x);
+        void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+        double score(); 
+        void save(std::string fileName);
+
+        void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        
+        private:
+            double Cost(std::vector<double> y_hat, std::vector<double> y);
+            void forwardPass();
+
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+
+            std::vector<HiddenLayer> network;
+            OutputLayer *outputLayer;
+
+            int n;
+            int k;
+    };
+}
+
+#endif /* ANN_hpp */
\ No newline at end of file
diff --git a/MLPP/Activation/Activation.cpp b/MLPP/Activation/Activation.cpp
new file mode 100644
index 0000000..6b31fa0
--- /dev/null
+++ b/MLPP/Activation/Activation.cpp
@@ -0,0 +1,1007 @@
+//
+//  Activation.cpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#include <iostream>
+#include "LinAlg/LinAlg.hpp"
+#include "Activation.hpp"
+
+namespace MLPP{
+
+    double Activation::linear(double z, bool deriv){
+        if(deriv){ return 1; }
+        return z; 
+    }
+
+    std::vector<double> Activation::linear(std::vector<double> z, bool deriv){
+        if(deriv) { 
+            LinAlg alg; 
+            return alg.onevec(z.size());
+         }
+         return z; 
+
+    }
+
+    std::vector<std::vector<double>> Activation::linear(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            LinAlg alg;
+            return alg.onemat(z.size(), z[0].size());
+        }
+        return z; 
+    }
+
+    double Activation::sigmoid(double z, bool deriv){
+        if(deriv) { return sigmoid(z) * (1 - sigmoid(z)); }
+        return 1 / (1 + exp(-z));
+    }
+
+    std::vector<double> Activation::sigmoid(std::vector<double> z, bool deriv){
+        if(deriv) {
+            LinAlg alg;
+            return alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), sigmoid(z)));
+         }
+
+        std::vector<double> a;
+        a.resize(z.size());
+
+        for(int i = 0; i < z.size(); i++){
+            a[i] = sigmoid(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::sigmoid(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv) {
+            LinAlg alg;
+            return alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), sigmoid(z)));
+         }
+
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+
+        for(int i = 0; i < z.size(); i++){
+            a[i] = sigmoid(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<double> Activation::softmax(std::vector<double> z){
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            double sum = 0;
+            for(int j = 0; j < z.size(); j++){
+                sum += exp(z[j]);
+            }
+            a[i] = exp(z[i]) / sum;
+        }
+        
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::softmax(std::vector<std::vector<double>> z){
+        LinAlg alg;
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+
+        for(int i = 0; i < z.size(); i++){
+            a[i] = softmax(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<double> Activation::adjSoftmax(std::vector<double> z){
+        LinAlg alg;
+        std::vector<double> a;
+        double C = -*max_element(z.begin(), z.end());
+        z = alg.scalarAdd(C, z);
+
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            double sum = 0;
+            for(int j = 0; j < z.size(); j++){
+                sum += exp(z[j]);
+            }
+            a[i] = exp(z[i]) / sum;
+        }
+        
+        return a;
+    }
+    
+    std::vector<std::vector<double>> Activation::adjSoftmax(std::vector<std::vector<double>> z){
+        LinAlg alg;
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+
+        for(int i = 0; i < z.size(); i++){
+            a[i] = adjSoftmax(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::softmaxDeriv(std::vector<double> z){
+        LinAlg alg;
+        std::vector<std::vector<double>> deriv;
+        std::vector<double> a = softmax(z);
+        deriv.resize(a.size());
+        for(int i = 0; i < deriv.size(); i++){
+            deriv[i].resize(a.size());
+        }
+        for(int i = 0; i < a.size(); i++){
+            for(int j = 0; j < z.size(); j++){
+                if(i == j){
+                    deriv[i][j] = a[i] * (1 - a[i]);
+                }
+                else{
+                    deriv[i][j] = -a[i] * a[j];
+                }
+            }
+        }
+        return deriv;
+    }
+
+    std::vector<std::vector<std::vector<double>>> Activation::softmaxDeriv(std::vector<std::vector<double>> z){
+        LinAlg alg;
+        std::vector<std::vector<std::vector<double>>> deriv;
+        std::vector<std::vector<double>> a = softmax(z);
+        
+        deriv.resize(a.size());
+        for(int i = 0; i < deriv.size(); i++){
+            deriv[i].resize(a.size());
+        }
+        for(int i = 0; i < a.size(); i++){
+            for(int j = 0; j < z.size(); j++){
+                if(i == j){
+                    deriv[i][j] = alg.subtraction(a[i], alg.hadamard_product(a[i], a[i]));
+                }
+                else{
+                    deriv[i][j] = alg.scalarMultiply(-1, alg.hadamard_product(a[i], a[j])); 
+                }
+            }
+        }
+        return deriv;
+    }
+
+    double Activation::softplus(double z, bool deriv){
+        if(deriv){ return sigmoid(z); }
+        return log(1 + exp(z)); 
+    }
+            
+    std::vector<double> Activation::softplus(std::vector<double> z, bool deriv){
+        if(deriv) { return sigmoid(z); }
+        LinAlg alg;
+        return alg.log(alg.addition(alg.onevec(z.size()), alg.exp(z)));
+    }
+    
+    std::vector<std::vector<double>> Activation::softplus(std::vector<std::vector<double>>  z, bool deriv){
+        if(deriv) { return sigmoid(z); }
+        LinAlg alg;
+        return alg.log(alg.addition(alg.onemat(z.size(), z[0].size()), alg.exp(z)));
+    }
+
+    double Activation::gaussianCDF(double z, bool deriv){
+        if(deriv) {
+            return (1 / sqrt(2 * M_PI)) * exp(-z * z / 2);
+        }
+        return 0.5 * (1 + erf(z / sqrt(2)));
+    }
+
+    std::vector<double> Activation::gaussianCDF(std::vector<double> z, bool deriv){
+        LinAlg alg; 
+        if(deriv) {
+            return alg.scalarMultiply(1 / sqrt(2 * M_PI), alg.exp(alg.scalarMultiply(-1/2, alg.hadamard_product(z, z))));
+        }
+        return alg.scalarMultiply(0.5, alg.addition(alg.onevec(z.size()), alg.erf(alg.scalarMultiply(1/sqrt(2), z))));
+    }
+
+    std::vector<std::vector<double>> Activation::gaussianCDF(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg; 
+        if(deriv) {
+            return alg.scalarMultiply(1 / sqrt(2 * M_PI), alg.exp(alg.scalarMultiply(-1/2, alg.hadamard_product(z, z))));
+        }
+        return alg.scalarMultiply(0.5, alg.addition(alg.onemat(z.size(), z[0].size()), alg.erf(alg.scalarMultiply(1/sqrt(2), z))));
+    }
+
+    double Activation::cloglog(double z, bool deriv){
+        if(deriv) { return exp(z-exp(z)); }
+        return 1 - exp(-exp(z));
+    }
+
+    std::vector<double> Activation::cloglog(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv) {
+            return alg.exp(alg.scalarMultiply(-1, alg.exp(z)));
+        }
+        return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.exp(alg.scalarMultiply(-1, alg.exp(z)))));
+    }
+
+    std::vector<std::vector<double>> Activation::cloglog(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv) {
+            return alg.exp(alg.scalarMultiply(-1, alg.exp(z)));
+        }
+        return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.exp(alg.scalarMultiply(-1, alg.exp(z)))));
+    }
+
+    double Activation::unitStep(double z, bool deriv){
+        if(deriv) { 
+            return 0;
+        }
+        return z < 0 ? 0 : 1;
+    }
+
+    std::vector<double> Activation::unitStep(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = unitStep(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = unitStep(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::unitStep(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = unitStep(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = unitStep(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::swish(double z, bool deriv){
+        if(deriv){
+            return swish(z) + sigmoid(z) * (1 - swish(z));
+        }
+        return z * sigmoid(z);
+    }
+
+    std::vector<double> Activation::swish(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = swish(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = swish(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::swish(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = swish(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = swish(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::RELU(double z, bool deriv){
+        if (deriv){
+            if(z <= 0){
+                return 0;
+            }
+            else {
+                return 1;
+            }
+        }
+        return fmax(0, z);
+    }
+
+    std::vector<double> Activation::RELU(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = RELU(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = RELU(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::RELU(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = RELU(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = RELU(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::leakyReLU(double z, double c, bool deriv){
+        if (deriv){
+            if(z <= 0){
+                return c;
+            }
+            else {
+                return 1;
+            }
+        }
+        return fmax(c * z, z);
+    }
+
+    std::vector<double> Activation::leakyReLU(std::vector<double> z, double c, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = leakyReLU(z[i], c, 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = leakyReLU(z[i], c);
+        }
+        return a;
+    }
+
+    double Activation::ELU(double z, double c, bool deriv){
+        if (deriv){
+            if(z <= 0){
+                return c * exp(z);
+            }
+            else {
+                return 1;
+            }
+        }
+        if(z >= 0){
+            return z;
+        }
+        else{
+            return c * (exp(z) - 1);
+        }
+    }
+
+    std::vector<double> Activation::ELU(std::vector<double> z, double c, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = ELU(z[i], c, 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = ELU(z[i], c);
+        }
+        return a;
+    }
+
+    double Activation::SELU(double z, double lambda, double c, bool deriv){
+        if (deriv){
+            return ELU(z, c, 1);
+        }
+        return lambda * ELU(z, c);
+    }
+
+    std::vector<double> Activation::SELU(std::vector<double> z, double lambda, double c, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = SELU(z[i], lambda, c, 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = SELU(z[i], lambda, c);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::SELU(std::vector<std::vector<double>> z, double lambda, double c, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = SELU(z[i], lambda, c, 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = SELU(z[i], lambda, c);
+        }
+        return a;
+    }
+
+    double Activation::GELU(double z, bool deriv){
+        if (deriv){
+            return 0.5 * tanh(0.0356774 * pow(z, 3) + 0.797885 * z) + (0.0535161 * pow(z, 3) + 0.398942 * z) * pow(sech(0.0356774 * pow(z, 3) + 0.797885 * z), 2) + 0.5;
+        }
+        return 0.5 * z * (1 + tanh(sqrt(2/M_PI) * (z + 0.044715 * pow(z, 3))));
+    }
+
+    std::vector<double> Activation::GELU(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = GELU(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = GELU(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::GELU(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = GELU(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a; 
+        a.resize(z.size());
+
+        for(int i = 0; i < a.size(); i++){
+            a[i] = GELU(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::sinh(double z, bool deriv){
+        if(deriv){ return cosh(z); }
+        return 0.5 * (exp(z) - exp(-z));
+    }
+
+    std::vector<double> Activation::sinh(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = sinh(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = sinh(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::sinh(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = sinh(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = sinh(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::cosh(double z, bool deriv){
+        if(deriv){ return cosh(z); }
+        return 0.5 * (exp(z) + exp(-z));
+    }
+
+    std::vector<double> Activation::cosh(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = sinh(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = cosh(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::cosh(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = cosh(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = cosh(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::tanh(double z, bool deriv){
+        if(deriv){ return 1 - tanh(z) * tanh(z); }
+        return (exp(z) - exp(-z)) / (exp(z) + exp(-z));
+    }
+
+    std::vector<double> Activation::tanh(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ 
+            return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.hadamard_product(tanh(z), tanh(z)))); 
+        }
+        return alg.elementWiseDivision(alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))), alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
+    }
+
+    std::vector<std::vector<double>> Activation::tanh(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){ 
+            return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.hadamard_product(tanh(z), tanh(z)))); 
+        }
+
+        return alg.elementWiseDivision(alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))), alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
+    }
+
+    double Activation::csch(double z, bool deriv){
+        if(deriv){ return -csch(z) * coth(z); }
+        return 1 / sinh(z);
+    }
+
+    std::vector<double> Activation::csch(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = csch(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = csch(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::csch(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = csch(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = csch(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::sech(double z, bool deriv){
+        if(deriv){ return -sech(z) * tanh(z); }
+        return 2 / (exp(z) + exp(-z));
+    }
+
+    std::vector<double> Activation::sech(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = sech(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = sech(z[i]);
+        }
+        return a;
+
+        // return activation(z, deriv, static_cast<void (*)(double, bool)>(&sech));
+    }
+
+    std::vector<std::vector<double>> Activation::sech(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = sech(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = sech(z[i]);
+        }
+        return a;
+
+        // return activation(z, deriv, static_cast<void (*)(double, bool)>(&sech));
+    }
+
+
+    double Activation::coth(double z, bool deriv){
+        if(deriv){ return -csch(z) * csch(z); }
+        return 1 / tanh(z);
+    }
+
+    std::vector<double> Activation::coth(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = coth(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = coth(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::coth(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = coth(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = coth(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::arsinh(double z, bool deriv){
+        if(deriv){ return 1 / sqrt(z * z + 1); }
+        return log(z + sqrt(z * z + 1));
+    }
+
+    std::vector<double> Activation::arsinh(std::vector<double> z, bool deriv){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = arsinh(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = arsinh(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::arsinh(std::vector<std::vector<double>> z, bool deriv){
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = arsinh(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = arsinh(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::arcosh(double z, bool deriv){
+        if(deriv){
+            return 1/sqrt(z * z - 1);
+        }
+        return log(z + sqrt(z * z - 1));
+    }
+
+    std::vector<double> Activation::arcosh(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = arcosh(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = arcosh(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::arcosh(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = arcosh(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = arcosh(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::artanh(double z, bool deriv){
+        if(deriv){
+            return 1/(1 - z * z);
+        }
+        return 0.5 * log((1 + z)/(1 - z));
+    }
+
+    std::vector<double> Activation::artanh(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = artanh(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = artanh(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::artanh(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = artanh(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = artanh(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::arcsch(double z, bool deriv){
+        if(deriv){
+            return -1/((z * z) * sqrt(1 + (1/(z * z))));
+        }
+        return log(sqrt(1 + (1 / (z * z))) + (1/z));
+    }
+
+    std::vector<double> Activation::arcsch(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = arcsch(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = arcsch(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::arcsch(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = arcsch(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = arcsch(z[i]);
+        }
+        return a;
+    }
+
+
+    double Activation::arsech(double z, bool deriv){
+        if(deriv){
+            return -1/(z * sqrt(1 - z * z));
+        }
+        return log((1/z) + ((1/z) + 1) * ((1/z) - 1));
+    }
+
+    std::vector<double> Activation::arsech(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = arsech(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = arsech(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<std::vector<double>> Activation::arsech(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = arsech(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = arsech(z[i]);
+        }
+        return a;
+    }
+
+    double Activation::arcoth(double z, bool deriv){
+        if(deriv){
+            return 1/(1 - z * z);
+        }
+        return 0.5 * log((1 + z)/(z - 1));
+    }
+
+    std::vector<std::vector<double>> Activation::arcoth(std::vector<std::vector<double>> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            std::vector<std::vector<double>> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = arcoth(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<std::vector<double>> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = arcoth(z[i]);
+        }
+        return a;
+    }
+
+    std::vector<double> Activation::arcoth(std::vector<double> z, bool deriv){
+        LinAlg alg;
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = arcoth(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = arcoth(z[i]);
+        }
+        return a;
+    }
+
+    // TO DO: Implement this template activation
+    std::vector<double> Activation::activation(std::vector<double> z, bool deriv, double(*function)(double, bool)){
+        if(deriv){
+            std::vector<double> deriv;
+            deriv.resize(z.size());
+            for(int i = 0; i < z.size(); i++){
+                deriv[i] = function(z[i], 1);
+            }
+            return deriv;
+        }
+        std::vector<double> a;
+        a.resize(z.size());
+        for(int i = 0; i < z.size(); i++){
+            a[i] = function(z[i], deriv);
+        }
+        return a;
+    }
+}
\ No newline at end of file
diff --git a/MLPP/Activation/Activation.hpp b/MLPP/Activation/Activation.hpp
new file mode 100644
index 0000000..440350f
--- /dev/null
+++ b/MLPP/Activation/Activation.hpp
@@ -0,0 +1,124 @@
+//
+//  Activation.hpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#ifndef Activation_hpp
+#define Activation_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class Activation{
+        public:
+            double linear(double z, bool deriv = 0); 
+            std::vector<double> linear(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> linear(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double sigmoid(double z, bool deriv = 0); 
+            std::vector<double> sigmoid(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> sigmoid(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            std::vector<double> softmax(std::vector<double> z);
+            std::vector<std::vector<double>> softmax(std::vector<std::vector<double>> z);
+
+            std::vector<double> adjSoftmax(std::vector<double> z);
+            std::vector<std::vector<double>> adjSoftmax(std::vector<std::vector<double>> z);
+
+            std::vector<std::vector<double>> softmaxDeriv(std::vector<double> z);
+            std::vector<std::vector<std::vector<double>>> softmaxDeriv(std::vector<std::vector<double>> z);
+
+            double softplus(double z, bool deriv = 0);
+            std::vector<double> softplus(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> softplus(std::vector<std::vector<double>>  z, bool deriv = 0);
+
+            double gaussianCDF(double z, bool deriv = 0);
+            std::vector<double> gaussianCDF(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> gaussianCDF(std::vector<std::vector<double>>  z, bool deriv = 0);
+
+            double cloglog(double z, bool deriv = 0);
+            std::vector<double> cloglog(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> cloglog(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double unitStep(double z, bool deriv = 0);
+            std::vector<double> unitStep(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> unitStep(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double swish(double z, bool deriv = 0);
+            std::vector<double> swish(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> swish(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double RELU(double z, bool deriv = 0);
+            std::vector<double> RELU(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> RELU(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double leakyReLU(double z, double c, bool deriv = 0);
+            std::vector<double> leakyReLU(std::vector<double> z, double c, bool deriv = 0);
+
+            double ELU(double z, double c, bool deriv = 0);
+            std::vector<double> ELU(std::vector<double> z, double c, bool deriv = 0);
+
+            double SELU(double z, double lambda, double c, bool deriv = 0);
+            std::vector<double> SELU(std::vector<double> z, double lambda, double c, bool deriv = 0);
+            std::vector<std::vector<double>> SELU(std::vector<std::vector<double>>, double lambda, double c, bool deriv = 0);
+
+            double GELU(double z, bool deriv = 0);
+            std::vector<double> GELU(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> GELU(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double sinh(double z, bool deriv = 0);
+            std::vector<double> sinh(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> sinh(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double cosh(double z, bool deriv = 0);
+            std::vector<double> cosh(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> cosh(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double tanh(double z, bool deriv = 0);
+            std::vector<double> tanh(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> tanh(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double csch(double z, bool deriv = 0);
+            std::vector<double> csch(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> csch( std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double sech(double z, bool deriv = 0);
+            std::vector<double> sech(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> sech(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double coth(double z, bool deriv = 0);
+            std::vector<double> coth(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> coth(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double arsinh(double z, bool deriv = 0);
+            std::vector<double> arsinh(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> arsinh(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double arcosh(double z, bool deriv = 0);
+            std::vector<double> arcosh(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> arcosh(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double artanh(double z, bool deriv = 0);
+            std::vector<double> artanh(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> artanh(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double arcsch(double z, bool deriv = 0);
+            std::vector<double> arcsch(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> arcsch(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double arsech(double z, bool deriv = 0);
+            std::vector<double> arsech(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> arsech(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            double arcoth(double z, bool deriv = 0);
+            std::vector<double> arcoth(std::vector<double> z, bool deriv = 0);
+            std::vector<std::vector<double>> arcoth(std::vector<std::vector<double>> z, bool deriv = 0);
+
+            std::vector<double> activation(std::vector<double> z, bool deriv, double(*function)(double, bool));
+
+        private:
+    };
+}
+
+#endif /* Activation_hpp */
diff --git a/MLPP/AutoEncoder/AutoEncoder.cpp b/MLPP/AutoEncoder/AutoEncoder.cpp
new file mode 100644
index 0000000..423c0eb
--- /dev/null
+++ b/MLPP/AutoEncoder/AutoEncoder.cpp
@@ -0,0 +1,270 @@
+//
+//  AutoEncoder.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "AutoEncoder.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP {
+    AutoEncoder::AutoEncoder(std::vector<std::vector<double>> inputSet, int n_hidden)
+    : inputSet(inputSet), n_hidden(n_hidden), n(inputSet.size()), k(inputSet[0].size())
+    {
+        Activation avn;
+        y_hat.resize(inputSet.size());
+
+        weights1 = Utilities::weightInitialization(k, n_hidden);
+        weights2 = Utilities::weightInitialization(n_hidden, k);
+        bias1 = Utilities::biasInitialization(n_hidden);
+        bias2 = Utilities::biasInitialization(k);
+    }
+
+    std::vector<std::vector<double>> AutoEncoder::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    std::vector<double> AutoEncoder::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void AutoEncoder::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Activation avn;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, inputSet);
+
+            // Calculating the errors
+            std::vector<std::vector<double>> error = alg.subtraction(y_hat, inputSet);
+                    
+            // Calculating the weight/bias gradients for layer 2
+            std::vector<std::vector<double>> D2_1 = alg.matmult(alg.transpose(a2), error);
+
+            // weights and bias updation for layer 2
+            weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate/n, D2_1));
+
+            // Calculating the bias gradients for layer 2
+            bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
+
+            //Calculating the weight/bias for layer 1
+
+            std::vector<std::vector<double>> D1_1 = alg.matmult(error, alg.transpose(weights2));
+
+            std::vector<std::vector<double>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+
+            std::vector<std::vector<double>> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2);
+
+
+            // weight an bias updation for layer 1
+            weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate/n, D1_3));
+
+            bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate/n, D1_2));
+    
+            forwardPass();
+                
+            // UI PORTION
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, inputSet));
+                std::cout << "Layer 1:" << std::endl;
+                Utilities::UI(weights1, bias1); 
+                std::cout << "Layer 2:" << std::endl;
+                Utilities::UI(weights2, bias2);
+            }
+            epoch++;
+                
+            if(epoch > max_epoch) { break; }
+        }
+
+    }
+
+    void AutoEncoder::SGD(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Activation avn;
+        Utilities util;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd());
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            std::vector<double> y_hat = Evaluate(inputSet[outputIndex]);
+            auto [z2, a2] = propagate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {inputSet[outputIndex]});
+            std::vector<double> error = alg.subtraction(y_hat, inputSet[outputIndex]);
+            
+            // Weight updation for layer 2
+            std::vector<std::vector<double>> D2_1 = alg.vecmult(error, a2);
+            weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, alg.transpose(D2_1)));
+
+            // Bias updation for layer 2
+            bias2 = alg.subtraction(bias2, alg.scalarMultiply(learning_rate, error));
+
+            // Weight updation for layer 1
+             std::vector<double> D1_1 = alg.mat_vec_mult(weights2, error);
+             std::vector<double> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+             std::vector<std::vector<double>> D1_3 = alg.vecmult(inputSet[outputIndex], D1_2);
+
+            weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
+            // Bias updation for layer 1
+
+            bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2));
+
+            y_hat = Evaluate(inputSet[outputIndex]);
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {inputSet[outputIndex]}));
+                std::cout << "Layer 1:" << std::endl;
+                Utilities::UI(weights1, bias1); 
+                std::cout << "Layer 2:" << std::endl;
+                Utilities::UI(weights2, bias2);
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void AutoEncoder::MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI){
+        Activation avn;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        int n_miniBatch = n/miniBatch_size;
+        
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches; 
+
+        // Creating the mini-batches
+        for(int i = 0; i < n_miniBatch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            for(int j = 0; j < n/n_miniBatch; j++){
+                currentInputSet.push_back(inputSet[n/n_miniBatch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+        }
+
+        if(double(n)/double(n_miniBatch) - int(n/n_miniBatch) != 0){
+            for(int i = 0; i < n - n/n_miniBatch * n_miniBatch; i++){
+                inputMiniBatches[n_miniBatch - 1].push_back(inputSet[n/n_miniBatch * n_miniBatch + i]);
+            }
+        }
+        
+        while(true){
+            for(int i = 0; i < n_miniBatch; i++){
+                std::vector<std::vector<double>> y_hat = Evaluate(inputMiniBatches[i]);
+                auto [z2, a2] = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, inputMiniBatches[i]);
+
+                // Calculating the errors
+                std::vector<std::vector<double>> error = alg.subtraction(y_hat, inputMiniBatches[i]);
+                        
+                // Calculating the weight/bias gradients for layer 2
+
+                std::vector<std::vector<double>> D2_1 = alg.matmult(alg.transpose(a2), error);
+
+                // weights and bias updation for layer 2
+                weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate/inputMiniBatches[i].size(), D2_1));
+                
+                // Bias Updation for layer 2
+                bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
+
+                //Calculating the weight/bias for layer 1
+
+                std::vector<std::vector<double>> D1_1 = alg.matmult(error, alg.transpose(weights2));
+
+                std::vector<std::vector<double>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+
+                std::vector<std::vector<double>> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2);
+
+
+                // weight an bias updation for layer 1
+                weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate/inputMiniBatches[i].size(), D1_3));
+
+                bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate/inputMiniBatches[i].size(), D1_2));
+
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, inputMiniBatches[i]));
+                    std::cout << "Layer 1:" << std::endl;
+                    Utilities::UI(weights1, bias1); 
+                    std::cout << "Layer 2:" << std::endl;
+                    Utilities::UI(weights2, bias2);
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double AutoEncoder::score(){
+        Utilities util;
+        return util.performance(y_hat, inputSet);
+    }
+
+    void AutoEncoder::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights1, bias1, 0, 1);
+         util.saveParameters(fileName, weights2, bias2, 1, 2);
+     }
+
+    double AutoEncoder::Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        class Cost cost; 
+        return cost.MSE(y_hat, inputSet);
+    }
+
+    std::vector<std::vector<double>> AutoEncoder::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        std::vector<std::vector<double>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
+        std::vector<std::vector<double>> a2 = avn.sigmoid(z2);
+        return alg.mat_vec_add(alg.matmult(a2, weights2), bias2); 
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> AutoEncoder::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        std::vector<std::vector<double>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
+        std::vector<std::vector<double>> a2 = avn.sigmoid(z2);
+        return {z2, a2};
+    }
+
+    std::vector<double> AutoEncoder::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        std::vector<double> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); 
+        std::vector<double> a2 = avn.sigmoid(z2);
+        return alg.addition(alg.mat_vec_mult(alg.transpose(weights2), a2), bias2);
+    }
+
+    std::tuple<std::vector<double>, std::vector<double>> AutoEncoder::propagate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        std::vector<double> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); 
+        std::vector<double> a2 = avn.sigmoid(z2);
+        return {z2, a2};
+    }
+
+    void AutoEncoder::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1);
+        a2 = avn.sigmoid(z2);
+        y_hat = alg.mat_vec_add(alg.matmult(a2, weights2), bias2); 
+    }
+}
\ No newline at end of file
diff --git a/MLPP/AutoEncoder/AutoEncoder.hpp b/MLPP/AutoEncoder/AutoEncoder.hpp
new file mode 100644
index 0000000..d65ab35
--- /dev/null
+++ b/MLPP/AutoEncoder/AutoEncoder.hpp
@@ -0,0 +1,54 @@
+//
+//  AutoEncoder.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef AutoEncoder_hpp
+#define AutoEncoder_hpp
+
+#include <vector>
+#include <tuple>
+#include <string>
+
+namespace  MLPP {
+
+class AutoEncoder{
+        public:
+        AutoEncoder(std::vector<std::vector<double>> inputSet, int n_hidden);
+        std::vector<std::vector<double>> modelSetTest(std::vector<std::vector<double>> X);
+        std::vector<double> modelTest(std::vector<double> x);
+        void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+        void SGD(double learning_rate, int max_epoch, bool UI = 1);
+        void MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI = 1);
+        double score(); 
+        void save(std::string fileName);
+        
+        private:
+            double Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<std::vector<double>> Evaluate(std::vector<std::vector<double>> X);
+            std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> propagate(std::vector<std::vector<double>> X);
+            std::vector<double> Evaluate(std::vector<double> x);
+            std::tuple<std::vector<double>, std::vector<double>> propagate(std::vector<double> x);
+            void forwardPass();
+
+            std::vector<std::vector<double>> inputSet;
+            std::vector<std::vector<double>> y_hat; // This is your latent representation
+        
+            std::vector<std::vector<double>> weights1;
+            std::vector<std::vector<double>> weights2;
+           
+            std::vector<double> bias1;
+            std::vector<double> bias2;
+        
+            std::vector<std::vector<double>> z2;
+            std::vector<std::vector<double>> a2;
+
+            int n;
+            int k;
+            int n_hidden;
+    };
+}
+
+#endif /* AutoEncoder_hpp */
diff --git a/MLPP/BernoulliNB/BernoulliNB.cpp b/MLPP/BernoulliNB/BernoulliNB.cpp
new file mode 100644
index 0000000..edeadf5
--- /dev/null
+++ b/MLPP/BernoulliNB/BernoulliNB.cpp
@@ -0,0 +1,182 @@
+//
+//  BernoulliNB.cpp
+//
+//  Created by Marc Melikyan on 1/17/21.
+//
+
+#include "BernoulliNB.hpp"
+#include "Utilities/Utilities.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Data/Data.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    BernoulliNB::BernoulliNB(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet)
+    : inputSet(inputSet), outputSet(outputSet), class_num(2)
+    {
+        y_hat.resize(outputSet.size());
+        Evaluate();
+    }
+
+    std::vector<double> BernoulliNB::modelSetTest(std::vector<std::vector<double>> X){
+        std::vector<double> y_hat;
+        for(int i = 0; i < X.size(); i++){
+            y_hat.push_back(modelTest(X[i]));
+        }
+        return y_hat;
+    }
+
+    double BernoulliNB::modelTest(std::vector<double> x){
+        double score_0 = 1;
+        double score_1 = 1;
+        
+        std::vector<int> foundIndices;
+
+        for(int j = 0; j < x.size(); j++){
+            for(int k = 0; k < vocab.size(); k++){
+                if(x[j] == vocab[k]){
+                    score_0 *= theta[0][vocab[k]];
+                    score_1 *= theta[1][vocab[k]];
+
+                    foundIndices.push_back(k);
+                }
+            }
+        }
+
+        for(int i = 0; i < vocab.size(); i++){
+            bool found = false; 
+            for(int j = 0; j < foundIndices.size(); j++){
+                if(vocab[i] == vocab[foundIndices[j]]){
+                    found = true; 
+                }
+            }
+            if(!found){
+                score_0 *= 1 - theta[0][vocab[i]];
+                score_1 *= 1 - theta[1][vocab[i]];
+            }
+        }
+            
+        score_0 *= prior_0;
+        score_1 *= prior_1;
+            
+        // Assigning the traning example to a class
+
+        if(score_0 > score_1){
+            return 0;
+        }
+        else{
+            return 1;
+        }
+    }
+
+    double BernoulliNB::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void BernoulliNB::computeVocab(){
+        LinAlg alg;
+        Data data;
+        vocab = data.vecToSet<double>(alg.flatten(inputSet));
+    }
+    
+    void BernoulliNB::computeTheta(){
+        
+        // Resizing theta for the sake of ease & proper access of the elements.
+        theta.resize(class_num);
+        
+        // Setting all values in the hasmap by default to 0.
+        for(int i = class_num - 1; i >= 0; i--){
+            for(int j = 0; j < vocab.size(); j++){
+                theta[i][vocab[j]] = 0; 
+            }
+        }
+
+        for(int i = 0; i < inputSet.size(); i++){  
+            for(int j = 0; j < inputSet[0].size(); j++){
+                theta[outputSet[i]][inputSet[i][j]]++;
+            }
+        }
+        
+        for(int i = 0; i < theta.size(); i++){
+            for(int j = 0; j < theta[i].size(); j++){
+                if(i == 0){
+                    theta[i][j] /= prior_0 * y_hat.size();
+                }
+                else{
+                    theta[i][j] /= prior_1 * y_hat.size();
+                }
+            }
+        }
+    }
+
+    void BernoulliNB::Evaluate(){
+        for(int i = 0; i < outputSet.size(); i++){
+            // Pr(B | A) * Pr(A)
+            double score_0 = 1;
+            double score_1 = 1;
+
+            
+            double sum = 0;
+            for(int i = 0; i < outputSet.size(); i++){
+                if(outputSet[i] == 1){ sum += outputSet[i]; }
+            }
+            
+            // Easy computation of priors, i.e. Pr(C_k)
+            prior_1 = sum / y_hat.size();
+            prior_0 = 1 - prior_1;
+            
+            // Evaluating Theta...
+            computeTheta();
+
+            // Evaluating the vocab set...
+            computeVocab();
+
+            std::vector<int> foundIndices;
+
+            for(int j = 0; j < inputSet.size(); j++){
+                for(int k = 0; k < vocab.size(); k++){
+                    if(inputSet[i][j] == vocab[k]){
+                        score_0 += log(theta[0][vocab[k]]);
+                        score_1 += log(theta[1][vocab[k]]);
+
+                        foundIndices.push_back(k);
+                    }
+                }
+            }
+
+            for(int i = 0; i < vocab.size(); i++){
+                bool found = false; 
+                for(int j = 0; j < foundIndices.size(); j++){
+                    if(vocab[i] == vocab[foundIndices[j]]){
+                        found = true; 
+                    }
+                }
+                if(!found){
+                    score_0 += log(1 - theta[0][vocab[i]]);
+                    score_1 += log(1 - theta[1][vocab[i]]);
+                }
+            }
+            
+            score_0 += log(prior_0);
+            score_1 += log(prior_1);
+
+            score_0 = exp(score_0);
+            score_1 = exp(score_1);
+
+            std::cout << score_0 << std::endl;
+            std::cout << score_1 << std::endl;
+            
+            // Assigning the traning example to a class
+
+            if(score_0 > score_1){
+                y_hat[i] = 0;
+            }
+            else{
+                y_hat[i] = 1;
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/MLPP/BernoulliNB/BernoulliNB.hpp b/MLPP/BernoulliNB/BernoulliNB.hpp
new file mode 100644
index 0000000..dd10ec2
--- /dev/null
+++ b/MLPP/BernoulliNB/BernoulliNB.hpp
@@ -0,0 +1,47 @@
+//
+//  BernoulliNB.hpp
+//
+//  Created by Marc Melikyan on 1/17/21.
+//
+
+#ifndef BernoulliNB_hpp
+#define BernoulliNB_hpp
+
+#include <vector>
+#include <map>
+
+namespace MLPP{
+    class BernoulliNB{
+        
+        public:
+            BernoulliNB(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            double score();
+            
+        private:
+        
+            void computeVocab();
+            void computeTheta();
+            void Evaluate();
+        
+            // Model Params
+            double prior_1 = 0;
+            double prior_0 = 0;
+        
+            std::vector<std::map<double, int>> theta;
+            std::vector<double> vocab;
+            int class_num;
+            
+            // Datasets
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+            
+        
+            
+        
+    };
+
+    #endif /* BernoulliNB_hpp */
+}
\ No newline at end of file
diff --git a/MLPP/CLogLogReg/CLogLogReg.cpp b/MLPP/CLogLogReg/CLogLogReg.cpp
new file mode 100644
index 0000000..e0641d6
--- /dev/null
+++ b/MLPP/CLogLogReg/CLogLogReg.cpp
@@ -0,0 +1,247 @@
+//
+//  CLogLogReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "CLogLogReg.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    CLogLogReg::CLogLogReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+        weights = Utilities::weightInitialization(k);
+        bias = Utilities::biasInitialization();
+    }
+
+    std::vector<double> CLogLogReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double CLogLogReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void CLogLogReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        Activation avn;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+                
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+
+
+            // Calculating the weight gradients
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.cloglog(z, 1)))));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+
+            // Calculating the bias gradients
+            bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n;
+            
+            forwardPass();
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void CLogLogReg::MLE(double learning_rate, int max_epoch, bool UI){
+        Reg regularization; 
+        Activation avn;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);            
+
+            weights = alg.addition(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.cloglog(z, 1)))));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+
+            // Calculating the bias gradients
+            bias += learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n;
+            forwardPass();
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+                
+        }
+    }
+
+    void CLogLogReg::SGD(double learning_rate, int max_epoch, bool UI){
+        Reg regularization; 
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            double z = propagate(inputSet[outputIndex]);
+
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+                
+            for(int i = 0; i < k; i++){
+                    
+                // Calculating the weight gradients
+                double w_gradient = (y_hat - outputSet[outputIndex]) * exp(z-exp(z)) * inputSet[outputIndex][i];
+                    
+
+                // Weight updation
+                weights[i] -= learning_rate * w_gradient;
+            }
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            
+            // Calculating the bias gradients
+            double b_gradient = (y_hat - outputSet[outputIndex]) * exp(z-exp(z));
+            
+            // Bias updation
+            bias -= learning_rate * b_gradient;
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+                
+           if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void CLogLogReg::MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI){
+        Reg regularization;
+        Activation avn;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        int n_miniBatch = n/miniBatch_size;
+        
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches; 
+        std::vector<std::vector<double>> outputMiniBatches; 
+        // Creating the mini-batches
+        for(int i = 0; i < n_miniBatch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            std::vector<double> currentOutputSet; 
+            std::vector<double> currentPreActivationSet; 
+            for(int j = 0; j < n/n_miniBatch; j++){
+                currentInputSet.push_back(inputSet[n/n_miniBatch * i + j]);
+                currentOutputSet.push_back(outputSet[n/n_miniBatch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+            outputMiniBatches.push_back(currentOutputSet);
+        }
+
+        if(double(n)/double(n_miniBatch) - int(n/n_miniBatch) != 0){
+            for(int i = 0; i < n - n/n_miniBatch * n_miniBatch; i++){
+                inputMiniBatches[n_miniBatch - 1].push_back(inputSet[n/n_miniBatch * n_miniBatch + i]);
+                outputMiniBatches[n_miniBatch - 1].push_back(outputSet[n/n_miniBatch * n_miniBatch + i]);
+            }
+        }
+        
+        while(true){
+            for(int i = 0; i < n_miniBatch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                std::vector<double> z = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                // Calculating the weight gradients
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.cloglog(z, 1)))));
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+
+                // Calculating the bias gradients
+                bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n;
+            
+                forwardPass();
+
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double CLogLogReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    double CLogLogReg::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+    std::vector<double> CLogLogReg::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        return avn.cloglog(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); 
+    }
+    
+    std::vector<double>CLogLogReg::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); 
+    }
+
+    double CLogLogReg::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        return avn.cloglog(alg.dot(weights, x) + bias);
+    }
+
+    double CLogLogReg::propagate(std::vector<double> x){
+        LinAlg alg;
+        return alg.dot(weights, x) + bias;
+    }
+
+    // cloglog ( wTx + b )
+    void CLogLogReg::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        
+        z = propagate(inputSet);
+        y_hat = avn.cloglog(z);
+    }
+}
\ No newline at end of file
diff --git a/MLPP/CLogLogReg/CLogLogReg.hpp b/MLPP/CLogLogReg/CLogLogReg.hpp
new file mode 100644
index 0000000..335186f
--- /dev/null
+++ b/MLPP/CLogLogReg/CLogLogReg.hpp
@@ -0,0 +1,58 @@
+//
+//  CLogLogReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef CLogLogReg_hpp
+#define CLogLogReg_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class CLogLogReg{
+        
+        public:
+            CLogLogReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void MLE(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI = 1);
+            double score();
+        private:
+
+            void weightInitialization(int k);
+            void biasInitialization();
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+        
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            std::vector<double> propagate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            double propagate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+            std::vector<double> z;
+            std::vector<double> weights;
+            double bias;
+        
+            int n; 
+            int k;
+
+            // Regularization Params
+            std::string reg;
+            double lambda;
+            double alpha; /* This is the controlling param for Elastic Net*/
+        
+        
+    };
+}
+
+#endif /* CLogLogReg_hpp */
diff --git a/MLPP/Convolutions/Convolutions.cpp b/MLPP/Convolutions/Convolutions.cpp
new file mode 100644
index 0000000..9e3d6d9
--- /dev/null
+++ b/MLPP/Convolutions/Convolutions.cpp
@@ -0,0 +1,259 @@
+//
+//  Convolutions.cpp
+//
+//  Created by Marc Melikyan on 4/6/21.
+//
+
+#include <iostream>
+#include "Convolutions/Convolutions.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Stat/Stat.hpp"
+
+namespace MLPP{
+
+    Convolutions::Convolutions()
+    : prewittHorizontal({{1,1,1}, {0,0,0}, {-1,-1,-1}}), prewittVertical({{1,0,-1}, {1,0,-1}, {1,0,-1}}), 
+    sobelHorizontal({{1,2,1}, {0,0,0}, {-1,-2,-1}}), sobelVertical({{-1,0,1}, {-2,0,2}, {-1,0,1}}), 
+    scharrHorizontal({{3,10,3}, {0,0,0}, {-3,-10,-3}}), scharrVertical({{3,0,-3}, {10,0,-10}, {3,0,-3}}),
+    robertsHorizontal({{0,1}, {-1,0}}), robertsVertical({{1,0}, {0,-1}}) 
+    {
+
+    }
+
+    std::vector<std::vector<double>> Convolutions::convolve(std::vector<std::vector<double>> input, std::vector<std::vector<double>> filter, int S, int P){
+        LinAlg alg;
+        std::vector<std::vector<double>> featureMap;
+        int N = input.size();
+        int F = filter.size();
+        int mapSize = (N - F + 2*P) / S + 1; // This is computed as ⌊mapSize⌋ by def- thanks C++!
+
+        if(P != 0){
+            std::vector<std::vector<double>> paddedInput; 
+            paddedInput.resize(N + 2*P);
+            for(int i = 0; i < paddedInput.size(); i++){
+                paddedInput[i].resize(N + 2*P);
+            }
+            for(int i = 0; i < paddedInput.size(); i++){
+                for(int j = 0; j < paddedInput[i].size(); j++){
+                    if(i - P < 0 || j - P < 0 || i - P > input.size() - 1 || j - P > input[0].size() - 1){
+                        paddedInput[i][j] = 0;
+                    }
+                    else{
+                        paddedInput[i][j] = input[i - P][j - P];
+                    }
+                }
+            }
+            input.resize(paddedInput.size());
+            for(int i = 0; i < paddedInput.size(); i++){
+                input[i].resize(paddedInput[i].size());
+            }
+            input = paddedInput;
+        }
+
+        featureMap.resize(mapSize);
+        for(int i = 0; i < mapSize; i++){
+            featureMap[i].resize(mapSize);
+        }
+
+        for(int i = 0; i < mapSize; i++){
+            for(int j = 0; j < mapSize; j++){
+                std::vector<double> convolvingInput; 
+                for(int k = 0; k < F; k++){       
+                    for(int p = 0; p < F; p++){
+                        if(i == 0 && j == 0){
+                            convolvingInput.push_back(input[i + k][j + p]);
+                        }
+                        else if(i == 0){
+                            convolvingInput.push_back(input[i + k][j + (S - 1) + p]);
+                        }
+                        else if(j == 0){
+                            convolvingInput.push_back(input[i + (S - 1) + k][j + p]);
+                        }
+                        else{
+                            convolvingInput.push_back(input[i + (S - 1) + k][j + (S - 1) + p]);
+                        }   
+                    }
+                } 
+                featureMap[i][j] = alg.dot(convolvingInput, alg.flatten(filter));
+            }
+        }
+        return featureMap;
+    }
+
+    std::vector<std::vector<std::vector<double>>> Convolutions::convolve(std::vector<std::vector<std::vector<double>>> input, std::vector<std::vector<std::vector<double>>> filter, int S, int P){
+        LinAlg alg;
+        std::vector<std::vector<std::vector<double>>> featureMap;
+        int N = input[0].size();
+        int F = filter[0].size();
+        int C = filter.size() / input.size();
+        int mapSize = (N - F + 2*P) / S + 1; // This is computed as ⌊mapSize⌋ by def- thanks C++!
+
+        if(P != 0){
+            for(int c = 0; c < input.size(); c++){
+                std::vector<std::vector<double>> paddedInput; 
+                paddedInput.resize(N + 2*P);
+                for(int i = 0; i < paddedInput.size(); i++){
+                    paddedInput[i].resize(N + 2*P);
+                }
+                for(int i = 0; i < paddedInput.size(); i++){
+                    for(int j = 0; j < paddedInput[i].size(); j++){
+                        if(i - P < 0 || j - P < 0 || i - P > input[c].size() - 1 || j - P > input[c][0].size() - 1){
+                            paddedInput[i][j] = 0;
+                        }
+                        else{
+                            paddedInput[i][j] = input[c][i - P][j - P];
+                        }
+                    }
+                }
+                input[c].resize(paddedInput.size());
+                for(int i = 0; i < paddedInput.size(); i++){
+                    input[c][i].resize(paddedInput[i].size());
+                }
+                input[c] = paddedInput;
+            }
+        }
+
+        featureMap.resize(C);
+        for(int i = 0; i < featureMap.size(); i++){
+            featureMap[i].resize(mapSize);
+            for(int j = 0; j < featureMap[i].size(); j++){
+                featureMap[i][j].resize(mapSize);
+            }
+        }
+
+        for(int c = 0; c < C; c++){
+            for(int i = 0; i < mapSize; i++){
+                for(int j = 0; j < mapSize; j++){
+                    std::vector<double> convolvingInput; 
+                    for(int t = 0; t < input.size(); t++){
+                        for(int k = 0; k < F; k++){       
+                            for(int p = 0; p < F; p++){
+                                if(i == 0 && j == 0){
+                                    convolvingInput.push_back(input[t][i + k][j + p]);
+                                }
+                                else if(i == 0){
+                                    convolvingInput.push_back(input[t][i + k][j + (S - 1) + p]);
+                                }
+                                else if(j == 0){
+                                    convolvingInput.push_back(input[t][i + (S - 1) + k][j + p]);
+                                }
+                                else{
+                                    convolvingInput.push_back(input[t][i + (S - 1) + k][j + (S - 1) + p]);
+                                }   
+                            }
+                        } 
+                    }
+                    featureMap[c][i][j] = alg.dot(convolvingInput, alg.flatten(filter));
+                }
+            }
+        }
+        return featureMap;
+    }
+
+    std::vector<std::vector<double>> Convolutions::pool(std::vector<std::vector<double>> input, int F, int S, std::string type){
+        LinAlg alg;
+        std::vector<std::vector<double>> pooledMap;
+        int N = input.size();
+        int mapSize = floor((N - F) / S + 1); 
+ 
+        pooledMap.resize(mapSize);
+        for(int i = 0; i < mapSize; i++){
+            pooledMap[i].resize(mapSize);
+        }
+
+        for(int i = 0; i < mapSize; i++){
+            for(int j = 0; j < mapSize; j++){
+                std::vector<double> poolingInput; 
+                for(int k = 0; k < F; k++){       
+                    for(int p = 0; p < F; p++){
+                        if(i == 0 && j == 0){
+                            poolingInput.push_back(input[i + k][j + p]);
+                        }
+                        else if(i == 0){
+                            poolingInput.push_back(input[i + k][j + (S - 1) + p]);
+                        }
+                        else if(j == 0){
+                            poolingInput.push_back(input[i + (S - 1) + k][j + p]);
+                        }
+                        else{
+                            poolingInput.push_back(input[i + (S - 1) + k][j + (S - 1) + p]);
+                        }   
+                    }
+                } 
+                if(type == "Average"){
+                    Stat stat; 
+                    pooledMap[i][j] = stat.mean(poolingInput);
+                }
+                else if(type == "Min"){
+                    pooledMap[i][j] = alg.min(poolingInput);
+                }
+                else{
+                    pooledMap[i][j] = alg.max(poolingInput);
+                }
+            }
+        }
+        return pooledMap;
+    }
+
+    std::vector<std::vector<std::vector<double>>> Convolutions::pool(std::vector<std::vector<std::vector<double>>> input, int F, int S, std::string type){
+        std::vector<std::vector<std::vector<double>>> pooledMap;
+        for(int i = 0; i < input.size(); i++){
+            pooledMap.push_back(pool(input[i], F, S, type));
+        }
+        return pooledMap; 
+    }
+
+    double Convolutions::globalPool(std::vector<std::vector<double>> input, std::string type){
+        LinAlg alg;
+        if(type == "Average"){
+            Stat stat; 
+            return stat.mean(alg.flatten(input));
+        }
+        else if(type == "Min"){
+            return alg.min(alg.flatten(input));
+        }
+        else{
+            return alg.max(alg.flatten(input));
+        }
+    }
+            
+    std::vector<double> Convolutions::globalPool(std::vector<std::vector<std::vector<double>>> input, std::string type){
+        std::vector<double> pooledMap;
+        for(int i = 0; i < input.size(); i++){
+            pooledMap.push_back(globalPool(input[i], type));
+        }
+        return pooledMap; 
+    }
+
+    std::vector<std::vector<double>> Convolutions::getPrewittHorizontal(){
+        return prewittHorizontal;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getPrewittVertical(){
+        return prewittVertical;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getSobelHorizontal(){
+        return sobelHorizontal;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getSobelVertical(){
+        return sobelVertical;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getScharrHorizontal(){
+        return scharrHorizontal;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getScharrVertical(){
+        return scharrVertical;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getRobertsHorizontal(){
+        return robertsHorizontal;
+    }
+
+    std::vector<std::vector<double>> Convolutions::getRobertsVertical(){
+        return robertsVertical;
+    }
+}
\ No newline at end of file
diff --git a/MLPP/Convolutions/Convolutions.hpp b/MLPP/Convolutions/Convolutions.hpp
new file mode 100644
index 0000000..913c965
--- /dev/null
+++ b/MLPP/Convolutions/Convolutions.hpp
@@ -0,0 +1,39 @@
+#ifndef Convolutions_hpp
+#define Convolutions_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class Convolutions{
+        public:
+            Convolutions();
+            std::vector<std::vector<double>> convolve(std::vector<std::vector<double>> input, std::vector<std::vector<double>> filter, int S, int P = 0);
+            std::vector<std::vector<std::vector<double>>> convolve(std::vector<std::vector<std::vector<double>>> input, std::vector<std::vector<std::vector<double>>> filter, int S, int P = 0);
+            std::vector<std::vector<double>> pool(std::vector<std::vector<double>> input, int F, int S, std::string type);
+            std::vector<std::vector<std::vector<double>>> pool(std::vector<std::vector<std::vector<double>>> input, int F, int S, std::string type);
+            double globalPool(std::vector<std::vector<double>> input, std::string type);
+            std::vector<double> globalPool(std::vector<std::vector<std::vector<double>>> input, std::string type);
+
+            std::vector<std::vector<double>> getPrewittHorizontal();
+            std::vector<std::vector<double>> getPrewittVertical();
+            std::vector<std::vector<double>> getSobelHorizontal();
+            std::vector<std::vector<double>> getSobelVertical();
+            std::vector<std::vector<double>> getScharrHorizontal();
+            std::vector<std::vector<double>> getScharrVertical();
+            std::vector<std::vector<double>> getRobertsHorizontal();
+            std::vector<std::vector<double>> getRobertsVertical();
+ 
+        private: 
+            std::vector<std::vector<double>> prewittHorizontal;
+            std::vector<std::vector<double>> prewittVertical;
+            std::vector<std::vector<double>> sobelHorizontal;
+            std::vector<std::vector<double>> sobelVertical;
+            std::vector<std::vector<double>> scharrHorizontal;
+            std::vector<std::vector<double>> scharrVertical;
+            std::vector<std::vector<double>> robertsHorizontal;
+            std::vector<std::vector<double>> robertsVertical;
+
+    };
+}
+
+#endif // Convolutions_hpp
\ No newline at end of file
diff --git a/MLPP/Cost/Cost.cpp b/MLPP/Cost/Cost.cpp
new file mode 100644
index 0000000..7ca3634
--- /dev/null
+++ b/MLPP/Cost/Cost.cpp
@@ -0,0 +1,344 @@
+//
+//  Reg.cpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#include <iostream>
+#include "Cost.hpp"
+#include "LinAlg/LinAlg.hpp"
+
+namespace MLPP{
+    double Cost::MSE(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += (y_hat[i] - y[i]) * (y_hat[i] - y[i]);
+        }
+        return sum / 2 * y_hat.size();
+    }
+
+    double Cost::MSE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += (y_hat[i][j] - y[i][j]) * (y_hat[i][j] - y[i][j]);
+            }
+        }
+        return sum / 2 * y_hat.size();
+    }
+
+    std::vector<double> Cost::MSEDeriv(std::vector <double> y_hat, std::vector<double> y){
+        LinAlg alg;
+        return alg.subtraction(y_hat, y);
+    }
+
+    std::vector<std::vector<double>> Cost::MSEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        LinAlg alg;
+        return alg.subtraction(y_hat, y);
+    }
+
+    double Cost::RMSE(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += (y_hat[i] - y[i]) * (y_hat[i] - y[i]);
+        }
+        return sqrt(sum / y_hat.size());
+    }
+
+    double Cost::RMSE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += (y_hat[i][j] - y[i][j]) * (y_hat[i][j] - y[i][j]);
+            }
+        }
+        return sqrt(sum / y_hat.size());  
+    }
+
+    std::vector<double> Cost::RMSEDeriv(std::vector <double> y_hat, std::vector<double> y){
+        LinAlg alg;
+        return alg.scalarMultiply(1/(2*sqrt(MSE(y_hat, y))), MSEDeriv(y_hat, y));
+    }
+
+    std::vector<std::vector<double>> Cost::RMSEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        LinAlg alg;
+        return alg.scalarMultiply(1/(2/sqrt(MSE(y_hat, y))), MSEDeriv(y_hat, y));
+    }
+
+    double Cost::MAE(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += abs((y_hat[i] - y[i]));
+        }
+        return sum / y_hat.size();
+    }
+
+    double Cost::MAE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += abs((y_hat[i][j] - y[i][j]));
+            }
+        }
+        return sum / y_hat.size();
+    }
+
+    std::vector<double> Cost::MAEDeriv(std::vector <double> y_hat, std::vector <double> y){
+        std::vector<double> deriv; 
+        deriv.resize(y_hat.size());
+        for(int i = 0; i < deriv.size(); i++){
+            if(y_hat[i] < 0){
+                deriv[i] = -1;
+            }
+            else if(y_hat[i] == 0){
+                deriv[i] = 0;
+            }
+            else{
+                deriv[i] = 1;
+           
+            }
+        }
+        return deriv;
+    }
+
+    std::vector<std::vector<double>> Cost::MAEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        std::vector<std::vector<double>> deriv;
+        deriv.resize(y_hat.size());
+        for(int i = 0; i < deriv.size(); i++){
+            deriv.resize(y_hat[i].size());
+        }
+        for(int i = 0; i < deriv.size(); i++){
+            for(int j = 0; j < deriv[i].size(); j++){
+                if(y_hat[i][j] < 0){
+                    deriv[i][j] = -1;
+                }
+                else if(y_hat[i][j] == 0){
+                    deriv[i][j] = 0;
+                }
+                else{
+                    deriv[i][j] = 1;
+            
+                }
+            }
+        }
+        return deriv;
+    }
+
+    double Cost::MBE(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += (y_hat[i] - y[i]);
+        }
+        return sum / y_hat.size();
+    }
+
+    double Cost::MBE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += (y_hat[i][j] - y[i][j]);
+            }
+        }
+        return sum / y_hat.size();
+    }
+
+    std::vector<double> Cost::MBEDeriv(std::vector <double> y_hat, std::vector<double> y){
+        LinAlg alg;
+        return alg.onevec(y_hat.size());
+    }
+
+    std::vector<std::vector<double>> Cost::MBEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        LinAlg alg;
+        return alg.onemat(y_hat.size(), y_hat[0].size());
+    }
+
+    double Cost::LogLoss(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        double eps = 1e-8;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += -(y[i] * log(y_hat[i] + eps) + (1 - y[i]) * log(1 - y_hat[i] + eps));
+        }
+        
+        return sum / y_hat.size();
+    }
+
+    double Cost::LogLoss(std::vector <std::vector<double>> y_hat, std::vector <std::vector<double>> y){
+        double sum = 0;
+        double eps = 1e-8;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += -(y[i][j] * log(y_hat[i][j] + eps) + (1 - y[i][j]) * log(1 - y_hat[i][j] + eps));
+            }
+        }
+        
+        return sum / y_hat.size();
+    }
+
+    std::vector<double> Cost::LogLossDeriv(std::vector <double> y_hat, std::vector<double> y){
+        LinAlg alg;
+        return alg.addition(alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)), alg.elementWiseDivision(alg.scalarMultiply(-1, alg.scalarAdd(-1, y)), alg.scalarMultiply(-1, alg.scalarAdd(-1, y_hat))));
+    }
+
+    std::vector<std::vector<double>> Cost::LogLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        LinAlg alg;
+        return alg.addition(alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)), alg.elementWiseDivision(alg.scalarMultiply(-1, alg.scalarAdd(-1, y)), alg.scalarMultiply(-1, alg.scalarAdd(-1, y_hat))));
+    }
+
+    double Cost::CrossEntropy(std::vector<double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += y[i] * log(y_hat[i]);
+        }
+        
+        return -1 * sum;
+    }
+
+    double Cost::CrossEntropy(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += y[i][j] * log(y_hat[i][j]);
+            }
+        }
+        
+        return -1 * sum;
+    }
+
+    std::vector<double> Cost::CrossEntropyDeriv(std::vector<double> y_hat, std::vector<double> y){
+        LinAlg alg;
+        return alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat));
+    }
+
+    std::vector<std::vector<double>> Cost::CrossEntropyDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        LinAlg alg;
+        return alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat));
+    }
+
+    double Cost::HuberLoss(std::vector <double> y_hat, std::vector<double> y, double delta){
+        LinAlg alg;
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            if(abs(y[i] - y_hat[i]) <= delta){
+                sum += (y[i] - y_hat[i]) * (y[i] - y_hat[i]); 
+            }
+            else{
+                sum += 2 * delta * abs(y[i] - y_hat[i]) - delta * delta;
+            }
+        }
+        return sum;
+    }
+
+    double Cost::HuberLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double delta){
+        LinAlg alg;
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                if(abs(y[i][j] - y_hat[i][j]) <= delta){
+                    sum += (y[i][j] - y_hat[i][j]) * (y[i][j] - y_hat[i][j]); 
+                }
+                else{
+                    sum += 2 * delta * abs(y[i][j] - y_hat[i][j]) - delta * delta;
+                }
+            }
+        }
+        return sum;
+    }
+
+    std::vector<double> Cost::HuberLossDeriv(std::vector <double> y_hat, std::vector<double> y, double delta){
+        LinAlg alg;
+        double sum = 0;
+        std::vector<double> deriv; 
+        deriv.resize(y_hat.size());
+
+        for(int i = 0; i < y_hat.size(); i++){  
+            if(abs(y[i] - y_hat[i]) <= delta){
+                deriv.push_back(-(y[i] - y_hat[i]));
+            }
+            else{
+                if(y_hat[i] > 0 || y_hat[i] < 0){
+                    deriv.push_back(2 * delta * (y_hat[i]/abs(y_hat[i]))); 
+                }
+                else{
+                    deriv.push_back(0);
+                }
+            }
+        }
+        return deriv;
+    }
+            
+    std::vector<std::vector<double>> Cost::HuberLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double delta){
+        LinAlg alg;
+        double sum = 0;
+        std::vector<std::vector<double>> deriv; 
+        deriv.resize(y_hat.size());
+        for(int i = 0; i < deriv.size(); i++){
+            deriv[i].resize(y_hat[i].size());
+        }
+        
+        for(int i = 0; i < y_hat.size(); i++){  
+            for(int j = 0; j < y_hat[i].size(); j++){
+                if(abs(y[i][j] - y_hat[i][j]) <= delta){
+                    deriv[i].push_back(-(y[i][j] - y_hat[i][j]));
+                }
+                else{
+                    if(y_hat[i][j] > 0 || y_hat[i][j] < 0){
+                        deriv[i].push_back(2 * delta * (y_hat[i][j]/abs(y_hat[i][j]))); 
+                    }
+                    else{
+                        deriv[i].push_back(0);
+                    }
+                }
+            }
+        }
+        return deriv;
+    }
+
+    double Cost::HingeLoss(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += fmax(0, 1 - y[i] * y_hat[i]);
+        }
+
+        return sum / y_hat.size();   
+    }
+
+    double Cost::HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += fmax(0, 1 - y[i][j] * y_hat[i][j]);
+            }
+        }
+
+        return sum / y_hat.size();   
+    }
+    
+    std::vector<double> Cost::HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y){
+        std::vector<double> deriv; 
+        deriv.resize(y_hat.size());
+        for(int i = 0; i < y_hat.size(); i++){
+            if(1 - y[i] * y_hat[i] > 0){
+                deriv[i] = -y[i];
+            }
+            else{
+                deriv[i] = 0; 
+            }
+        }
+        return deriv;
+    }
+
+    std::vector<std::vector<double>> Cost::HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        std::vector<std::vector<double>> deriv; 
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                if(1 - y[i][j] * y_hat[i][j] > 0){
+                    deriv[i][j] = -y[i][j];
+                }
+                else{
+                    deriv[i][j] = 0; 
+                }
+            }
+        }
+        return deriv;
+    }
+}
\ No newline at end of file
diff --git a/MLPP/Cost/Cost.hpp b/MLPP/Cost/Cost.hpp
new file mode 100644
index 0000000..f59465c
--- /dev/null
+++ b/MLPP/Cost/Cost.hpp
@@ -0,0 +1,70 @@
+//
+//  Cost.hpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#ifndef Cost_hpp
+#define Cost_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class Cost{
+        public:
+            // Regression Costs
+            double MSE(std::vector <double> y_hat, std::vector<double> y);
+            double MSE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> MSEDeriv(std::vector <double> y_hat, std::vector<double> y);
+            std::vector<std::vector<double>> MSEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            double RMSE(std::vector <double> y_hat, std::vector<double> y);
+            double RMSE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> RMSEDeriv(std::vector <double> y_hat, std::vector<double> y);
+            std::vector<std::vector<double>> RMSEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            double MAE(std::vector <double> y_hat, std::vector<double> y);
+            double MAE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> MAEDeriv(std::vector <double> y_hat, std::vector <double> y);
+            std::vector<std::vector<double>> MAEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            double MBE(std::vector <double> y_hat, std::vector <double> y);
+            double MBE(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> MBEDeriv(std::vector <double> y_hat, std::vector <double> y);
+            std::vector<std::vector<double>> MBEDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            // Classification Costs
+            double LogLoss(std::vector <double> y_hat, std::vector<double> y);
+            double LogLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> LogLossDeriv(std::vector <double> y_hat, std::vector<double> y);
+            std::vector<std::vector<double>> LogLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            double CrossEntropy(std::vector<double> y_hat, std::vector<double> y);
+            double CrossEntropy(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> CrossEntropyDeriv(std::vector<double> y_hat, std::vector<double> y);
+            std::vector<std::vector<double>> CrossEntropyDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            double HuberLoss(std::vector <double> y_hat, std::vector<double> y, double delta);
+            double HuberLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double delta);
+
+            std::vector<double> HuberLossDeriv(std::vector <double> y_hat, std::vector<double> y, double delta); 
+            std::vector<std::vector<double>> HuberLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double delta);
+
+            double HingeLoss(std::vector <double> y_hat, std::vector<double> y);
+            double HingeLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y); 
+            std::vector<std::vector<double>> HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+            
+
+        private:
+    };
+}
+
+#endif /* Cost_hpp */
diff --git a/MLPP/Data/Data.cpp b/MLPP/Data/Data.cpp
new file mode 100644
index 0000000..eadc66d
--- /dev/null
+++ b/MLPP/Data/Data.cpp
@@ -0,0 +1,567 @@
+//
+//  Data.cpp
+//  MLP
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "Data.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Stat/Stat.hpp"
+#include "SoftmaxNet/SoftmaxNet.hpp"
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+
+namespace MLPP{
+    // MULTIVARIATE SUPERVISED
+
+    void Data::setData(int k, std::string fileName, std::vector<std::vector<double>>& inputSet, std::vector<double>& outputSet){
+        LinAlg alg;
+        std::string inputTemp;
+        std::string outputTemp;
+        
+        inputSet.resize(k);
+        
+        std::ifstream dataFile(fileName);
+        if(!dataFile.is_open()){
+            std::cout << fileName << " failed to open." << std::endl;
+        }
+        
+        std::string line;
+        while(std::getline(dataFile, line)){
+            std::stringstream ss(line);
+       
+            for(int i = 0; i < k; i++){
+                std::getline(ss, inputTemp, ',');
+                inputSet[i].push_back(std::stod(inputTemp));
+                
+            }
+            
+            std::getline(ss, outputTemp, ',');
+            outputSet.push_back(std::stod(outputTemp));
+        }
+        inputSet = alg.transpose(inputSet);
+        dataFile.close();
+    }
+
+    void Data::printData(std::vector <std::string> inputName, std::string outputName, std::vector<std::vector<double>> inputSet, std::vector<double> outputSet){
+        LinAlg alg;
+        inputSet = alg.transpose(inputSet);
+        for(int i = 0; i < inputSet.size(); i++){
+            std::cout << inputName[i] << std::endl;
+            for(int j = 0; j < inputSet[i].size(); j++){
+                std::cout << inputSet[i][j] << std::endl;
+            }
+        }
+        
+        std::cout << outputName << std::endl;
+        for(int i = 0; i < outputSet.size(); i++){
+            std::cout << outputSet[i] << std::endl;
+        }
+    }
+
+    // UNSUPERVISED
+
+    void Data::setData(int k, std::string fileName, std::vector<std::vector<double>>& inputSet){
+        LinAlg alg;
+        std::string inputTemp;
+        
+        inputSet.resize(k);
+        
+        std::ifstream dataFile(fileName);
+        if(!dataFile.is_open()){
+            std::cout << fileName << " failed to open." << std::endl;
+        }
+        
+        std::string line;
+        while(std::getline(dataFile, line)){
+            std::stringstream ss(line);
+       
+            for(int i = 0; i < k; i++){
+                std::getline(ss, inputTemp, ',');
+                inputSet[i].push_back(std::stod(inputTemp));
+                
+            }
+        }
+        inputSet = alg.transpose(inputSet);
+        dataFile.close();
+    }
+
+    void Data::printData(std::vector <std::string> inputName, std::vector<std::vector<double>> inputSet){
+        LinAlg alg;
+        inputSet = alg.transpose(inputSet);
+        for(int i = 0; i < inputSet.size(); i++){
+            std::cout << inputName[i] << std::endl;
+            for(int j = 0; j < inputSet[i].size(); j++){
+                std::cout << inputSet[i][j] << std::endl;
+            }
+        }
+    }
+
+    // SIMPLE
+
+    void Data::setData(std::string fileName, std::vector <double>& inputSet, std::vector <double>& outputSet){
+        std::string inputTemp, outputTemp;
+        
+        std::ifstream dataFile(fileName);
+        if(!dataFile.is_open()){
+            std::cout << "The file failed to open." << std::endl;
+        }
+        
+        std::string line;
+        
+        
+        while(std::getline(dataFile, line)){
+            std::stringstream ss(line);
+
+            std::getline(ss, inputTemp, ',');
+            std::getline(ss, outputTemp, ',');
+            
+            inputSet.push_back(std::stod(inputTemp));
+            outputSet.push_back(std::stod(outputTemp));
+        }
+            
+        dataFile.close();
+    }
+
+    void Data::printData(std::string& inputName, std::string& outputName, std::vector <double>& inputSet, std::vector <double>& outputSet){
+        std::cout << inputName << std::endl;
+        for(int i = 0; i < inputSet.size(); i++){
+            std::cout << inputSet[i] << std::endl;
+        }
+        
+        std::cout << outputName << std::endl;
+        for(int i = 0; i < inputSet.size(); i++){
+            std::cout << outputSet[i] << std::endl;
+        }
+    }
+
+    // Images
+
+    void Data::getImage(std::string fileName, std::vector<double>& image){
+        std::ifstream img(fileName, std::ios::binary); 
+        if(!img.is_open()){
+            std::cout << "The file failed to open." << std::endl;
+        }
+        std::vector<double> v{std::istreambuf_iterator<char>{img}, {}}; 
+        image = v;
+    }
+    
+    // TEXT-BASED & NLP
+    std::string Data::toLower(std::string text){
+        for(int i = 0; i < text.size(); i++){
+            text[i] = tolower(text[i]);
+        }
+        return text;
+    }
+
+    std::vector<char> Data::split(std::string text){
+        std::vector<char> split_data;
+        for(int i = 0; i < text.size(); i++){
+            split_data.push_back(text[i]);
+        }
+        return split_data;
+    }
+
+    std::vector<std::string> Data::splitSentences(std::string data){
+        std::vector<std::string> sentences;
+        std::string currentStr = "";
+
+        for(int i = 0; i < data.length(); i++){
+            currentStr.push_back(data[i]); 
+            if(data[i] == '.' && data[i + 1] != '.'){
+                sentences.push_back(currentStr);
+                currentStr = "";
+                i++;
+            }
+        }
+        return sentences;
+    }
+
+    std::vector<std::string> Data::removeSpaces(std::vector<std::string> data){
+        for(int i = 0; i < data.size(); i++){
+            auto it = data[i].begin();
+            for(int j = 0; j < data[i].length(); j++){
+                if(data[i][j] == ' '){
+                    data[i].erase(it);
+                }
+                it++;
+            }
+        }
+        return data; 
+    }
+
+    std::vector<std::string> Data::removeNullByte(std::vector<std::string> data){
+        for(int i = 0; i < data.size(); i++){
+            if(data[i] == "\0"){
+                data.erase(data.begin() + i);
+            }
+        }
+        return data; 
+    }
+
+    std::vector<std::string> Data::segment(std::string text){
+        std::vector<std::string> segmented_data;
+        int prev_delim = 0;
+        for(int i = 0; i < text.length(); i++){
+            if(text[i] == ' '){
+                segmented_data.push_back(text.substr(prev_delim, i - prev_delim)); 
+                prev_delim = i + 1;  
+            }
+            else if(text[i] == ',' || text[i] == '!' || text[i] == '.' || text[i] == '-'){
+                segmented_data.push_back(text.substr(prev_delim, i - prev_delim)); 
+                std::string punc;
+                punc.push_back(text[i]);
+                segmented_data.push_back(punc);
+                prev_delim = i + 2; 
+                i++;
+            }
+            else if(i == text.length() - 1){
+                segmented_data.push_back(text.substr(prev_delim, text.length() - prev_delim)); // hehe oops- forgot this
+            }
+        }
+
+        return segmented_data;
+    }
+
+    std::vector<double> Data::tokenize(std::string text){
+        int max_num = 0;
+        bool new_num = true;
+        std::vector<std::string> segmented_data = segment(text);
+        std::vector<double> tokenized_data; 
+        tokenized_data.resize(segmented_data.size());
+        for(int i = 0; i < segmented_data.size(); i++){
+            for(int j = i - 1; j >= 0; j--){
+                if(segmented_data[i] == segmented_data[j]){
+                    tokenized_data[i] = tokenized_data[j];
+                    new_num = false;
+                }
+            }
+            if(!new_num){
+                new_num = true;
+            }
+            else{ 
+                max_num++;
+                tokenized_data[i] = max_num;
+            }
+        }
+        return tokenized_data;
+    }
+
+    std::vector<std::string> Data::removeStopWords(std::string text){
+        std::vector<std::string> stopWords = {"i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"}; 
+        std::vector<std::string> segmented_data = removeSpaces(segment(toLower(text))); 
+
+        for(int i = 0; i < stopWords.size(); i++){
+            for(int j = 0; j < segmented_data.size(); j++){
+                if(segmented_data[j] == stopWords[i]){
+                    segmented_data.erase(segmented_data.begin() + j);
+                }
+            }
+        }
+        return segmented_data;
+    }
+
+    std::vector<std::string> Data::removeStopWords(std::vector<std::string> segmented_data){
+        std::vector<std::string> stopWords = {"i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"}; 
+        for(int i = 0; i < segmented_data.size(); i++){
+            for(int j = 0; j < stopWords.size(); j++){
+                if(segmented_data[i] == stopWords[j]){
+                    segmented_data.erase(segmented_data.begin() + i);
+                }
+            }
+        }
+        return segmented_data;
+    }
+
+    std::string Data::stemming(std::string text){
+
+        // Our list of suffixes which we use to compare against
+        std::vector<std::string> suffixes = {"eer", "er", "ion", "ity", "ment", "ness", "or", "sion", "ship", "th", "able", "ible", "al", "ant", "ary", "ful", "ic", "ious", "ous", "ive", "less", "y", "ed", "en", "ing", "ize", "ise", "ly", "ward", "wise"};
+        int padding_size = 4; 
+        char padding = ' '; // our padding
+
+        for(int i = 0; i < padding_size; i++){
+            text[text.length() + i] = padding; // ' ' will be our padding value
+        }
+        
+
+        for(int i = 0; i < text.size(); i++){
+            for(int j = 0; j < suffixes.size(); j++){
+                if(text.substr(i, suffixes[j].length()) == suffixes[j] && (text[i + suffixes[j].length()] == ' ' || text[i + suffixes[j].length()] == ',' || text[i + suffixes[j].length()] == '-' || text[i + suffixes[j].length()] == '.' || text[i + suffixes[j].length()] == '!')){
+                    text.erase(i, suffixes[j].length());
+                }
+            }
+        }
+
+        return text; 
+    }
+
+    std::vector<std::vector<double>> Data::BOW(std::vector<std::string> sentences, std::string type){
+        /* 
+        STEPS OF BOW: 
+            1) To lowercase (done by removeStopWords function by def)
+            2) Removing stop words
+            3) Obtain a list of the used words
+            4) Create a one hot encoded vector of the words and sentences
+            5) Sentence.size() x list.size() matrix
+        */
+
+        std::vector<std::string> wordList = removeNullByte(removeStopWords(createWordList(sentences)));
+
+        std::vector<std::vector<std::string>> segmented_sentences; 
+        segmented_sentences.resize(sentences.size());
+
+        for(int i = 0; i < sentences.size(); i++){
+            segmented_sentences[i] = removeStopWords(sentences[i]);
+        }
+
+        std::vector<std::vector<double>> bow; 
+
+        bow.resize(sentences.size());
+        for(int i = 0; i < bow.size(); i++){
+            bow[i].resize(wordList.size());
+        }
+
+
+        for(int i = 0; i < segmented_sentences.size(); i++){
+            for(int j = 0; j < segmented_sentences[i].size(); j++){
+                for(int k = 0; k < wordList.size(); k++){ 
+                    if(segmented_sentences[i][j] == wordList[k]){
+                        if(type == "Binary"){
+                            bow[i][k] = 1;
+                        }
+                        else{
+                            bow[i][k]++;
+                        }
+                    }
+                }
+            }
+        }
+        return bow; 
+    }
+
+    std::vector<std::vector<double>> Data::TFIDF(std::vector<std::string> sentences){
+        LinAlg alg;
+        std::vector<std::string> wordList = removeNullByte(removeStopWords(createWordList(sentences)));
+
+        std::vector<std::vector<std::string>> segmented_sentences; 
+        segmented_sentences.resize(sentences.size());
+
+        for(int i = 0; i < sentences.size(); i++){
+            segmented_sentences[i] = removeStopWords(sentences[i]);
+        }
+
+        std::vector<std::vector<double>> TF; 
+        std::vector<int> frequency;
+        frequency.resize(wordList.size());
+        TF.resize(segmented_sentences.size());
+        for(int i = 0; i < TF.size(); i++){
+            TF[i].resize(wordList.size());
+        }
+        for(int i = 0; i < segmented_sentences.size(); i++){
+            std::vector<bool> present(wordList.size(), 0);
+            for(int j = 0; j < segmented_sentences[i].size(); j++){
+                for(int k = 0; k < wordList.size(); k++){
+                    if(segmented_sentences[i][j] == wordList[k]){
+                        TF[i][k]++;
+                        if(!present[k]){
+                            frequency[k]++;
+                            present[k] = true;   
+                        }
+                    }
+                }
+            }
+            TF[i] = alg.scalarMultiply(double(1) / double(segmented_sentences[i].size()), TF[i]);
+        }
+
+        std::vector<double> IDF; 
+        IDF.resize(frequency.size());
+
+        for(int i = 0; i < IDF.size(); i++){
+            IDF[i] = log((double)segmented_sentences.size() / (double)frequency[i]);
+        }
+
+        std::vector<std::vector<double>> TFIDF;
+        TFIDF.resize(segmented_sentences.size());
+        for(int i = 0; i < TFIDF.size(); i++){
+            TFIDF[i].resize(wordList.size());
+        }
+
+        for(int i = 0; i < TFIDF.size(); i++){
+            for(int j = 0; j < TFIDF[i].size(); j++){
+                TFIDF[i][j] = TF[i][j] * IDF[j];
+            }
+        }
+
+        return TFIDF;
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::string>> Data::word2Vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, double learning_rate, int max_epoch){
+        std::vector<std::string> wordList = removeNullByte(removeStopWords(createWordList(sentences)));
+
+        std::vector<std::vector<std::string>> segmented_sentences; 
+        segmented_sentences.resize(sentences.size());
+
+        for(int i = 0; i < sentences.size(); i++){
+            segmented_sentences[i] = removeStopWords(sentences[i]);
+        }
+
+        std::vector<std::string> inputStrings; 
+        std::vector<std::string> outputStrings; 
+
+        for(int i = 0; i < segmented_sentences.size(); i++){
+            for(int j = 0; j < segmented_sentences[i].size(); j++){
+                for(int k = windowSize; k > 0; k--){
+                    if(j - k >= 0){
+                        inputStrings.push_back(segmented_sentences[i][j]);
+
+                        outputStrings.push_back(segmented_sentences[i][j - k]);
+                    }
+                    if(j + k <= segmented_sentences[i].size() - 1){
+                        inputStrings.push_back(segmented_sentences[i][j]);
+                        outputStrings.push_back(segmented_sentences[i][j + k]);
+                    }
+                }
+            }
+        }
+        
+        int inputSize = inputStrings.size();
+
+        inputStrings.insert(inputStrings.end(), outputStrings.begin(), outputStrings.end());
+
+        std::vector<std::vector<double>> BOW = Data::BOW(inputStrings, "Binary");
+
+        std::vector<std::vector<double>> inputSet;
+        std::vector<std::vector<double>> outputSet; 
+        
+        for(int i = 0; i < inputSize; i++){
+            inputSet.push_back(BOW[i]);
+        }
+
+        for(int i = inputSize; i < BOW.size(); i++){
+            outputSet.push_back(BOW[i]);
+        }
+        LinAlg alg;
+        SoftmaxNet* model;
+        if(type == "Skipgram"){
+            model = new SoftmaxNet(outputSet, inputSet, dimension);
+        }
+        else { // else = CBOW. We maintain it is a default, however. 
+            model = new SoftmaxNet(inputSet, outputSet, dimension);
+        }
+        model->gradientDescent(learning_rate, max_epoch, 1);
+
+        std::vector<std::vector<double>> wordEmbeddings = model->getEmbeddings();
+        delete model;
+        return {wordEmbeddings, wordList};
+    }
+
+    std::vector<std::string> Data::createWordList(std::vector<std::string> sentences){
+        std::string combinedText = "";
+        for(int i = 0; i < sentences.size(); i++){
+            if(i != 0){ combinedText += " "; }
+            combinedText += sentences[i];
+        }
+        
+        return removeSpaces(vecToSet(removeStopWords(combinedText)));
+    }
+
+    // EXTRA 
+    void Data::setInputNames(std::string fileName, std::vector<std::string>& inputNames){
+        std::string inputNameTemp;
+        std::ifstream dataFile(fileName);
+        if(!dataFile.is_open()){
+            std::cout << fileName << " failed to open." << std::endl;
+        }
+        
+        while (std::getline(dataFile, inputNameTemp))
+        {
+            inputNames.push_back(inputNameTemp);
+        }
+        
+        dataFile.close();
+    }
+    
+    std::vector<std::vector<double>> Data::featureScaling(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        X = alg.transpose(X);
+        std::vector<double> max_elements, min_elements;
+        max_elements.resize(X.size());
+        min_elements.resize(X.size());
+
+        for(int i = 0; i < X.size(); i++){
+            max_elements[i] = alg.max(X[i]);
+            min_elements[i] = alg.min(X[i]);
+        }
+
+        for(int i = 0; i < X.size(); i++){
+            for(int j = 0; j < X[i].size(); j++){
+                X[i][j] = (X[i][j] - min_elements[i]) / (max_elements[i] - min_elements[i]);
+            }
+        }
+        return alg.transpose(X);
+    }
+
+
+    std::vector<std::vector<double>> Data::meanNormalization(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Stat stat; 
+        // (X_j - mu_j) / std_j, for every j
+
+        X = meanCentering(X);
+        for(int i = 0; i < X.size(); i++){
+            X[i] = alg.scalarMultiply(1/stat.standardDeviation(X[i]), X[i]);
+        }
+        return X;
+    }
+
+    std::vector<std::vector<double>> Data::meanCentering(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Stat stat; 
+        for(int i = 0; i < X.size(); i++){
+            double mean_i = stat.mean(X[i]);
+            for(int j = 0; j < X[i].size(); j++){
+                X[i][j] -= mean_i; 
+            }
+        }
+        return X; 
+    }
+    
+    std::vector<std::vector<double>> Data::oneHotRep(std::vector<double> tempOutputSet, int n_class){
+        std::vector<std::vector<double>> outputSet;
+        outputSet.resize(tempOutputSet.size());
+        for(int i = 0; i < tempOutputSet.size(); i++){
+            for(int j = 0; j <= n_class - 1; j++){
+                if(tempOutputSet[i] == j){
+                    outputSet[i].push_back(1);
+                }
+                else{
+                    outputSet[i].push_back(0);
+                }
+            }
+        }
+        return outputSet;
+    }
+
+    std::vector<double> Data::reverseOneHot(std::vector<std::vector<double>> tempOutputSet){
+        std::vector<double> outputSet;
+        int n_class = tempOutputSet[0].size();
+        for(int i = 0; i < tempOutputSet.size(); i++){
+            int current_class = 1;
+            for(int j = 0; j < tempOutputSet[i].size(); j++){
+                if(tempOutputSet[i][j] == 1){
+                    break;
+                } 
+                else{
+                    current_class++;
+                }
+            }
+            outputSet.push_back(current_class);
+        }
+
+        return outputSet;
+    }
+}
\ No newline at end of file
diff --git a/MLPP/Data/Data.hpp b/MLPP/Data/Data.hpp
new file mode 100644
index 0000000..0173dc3
--- /dev/null
+++ b/MLPP/Data/Data.hpp
@@ -0,0 +1,82 @@
+//
+//  Data.hpp
+//  MLP
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef Data_hpp
+#define Data_hpp
+
+#include <vector>
+#include <tuple>
+#include <string>
+
+
+namespace MLPP{
+class Data{
+        public:
+        // Supervised
+        void setData(int k, std::string fileName, std::vector<std::vector<double>>& inputSet, std::vector<double>& outputSet);
+        void printData(std::vector <std::string> inputName, std::string outputName, std::vector<std::vector<double>> inputSet, std::vector<double> outputSet);
+        
+        // Unsupervised
+        void setData(int k, std::string fileName, std::vector<std::vector<double>>& inputSet);
+        void printData(std::vector <std::string> inputName, std::vector<std::vector<double>> inputSet);
+        
+        // Simple
+        void setData(std::string fileName, std::vector <double>& inputSet, std::vector <double>& outputSet);
+        void printData(std::string& inputName, std::string& outputName, std::vector <double>& inputSet, std::vector <double>& outputSet);
+
+        // Images
+        void getImage(std::string fileName, std::vector<double>& image);
+
+        // Text-Based & NLP
+        std::string toLower(std::string text);
+        std::vector<char> split(std::string text);
+        std::vector<std::string> splitSentences(std::string data);
+        std::vector<std::string> removeSpaces(std::vector<std::string> data);
+        std::vector<std::string> removeNullByte(std::vector<std::string> data);
+        std::vector<std::string> segment(std::string text);
+        std::vector<double> tokenize(std::string text);
+        std::vector<std::string> removeStopWords(std::string text);
+        std::vector<std::string> removeStopWords(std::vector<std::string> segmented_data);
+        
+        std::string stemming(std::string text);
+        
+        std::vector<std::vector<double>> BOW(std::vector<std::string> sentences, std::string = "Default"); 
+        std::vector<std::vector<double>> TFIDF(std::vector<std::string> sentences);
+        std::tuple<std::vector<std::vector<double>>, std::vector<std::string>> word2Vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, double learning_rate, int max_epoch);
+
+        std::vector<std::string> createWordList(std::vector<std::string> sentences);
+
+        // Extra
+        void setInputNames(std::string fileName, std::vector<std::string>& inputNames);
+        std::vector<std::vector<double>> featureScaling(std::vector<std::vector<double>> X);
+        std::vector<std::vector<double>> meanNormalization(std::vector<std::vector<double>> X);
+        std::vector<std::vector<double>> meanCentering(std::vector<std::vector<double>> X);
+        std::vector<std::vector<double>> oneHotRep (std::vector<double> tempOutputSet, int n_class); 
+        std::vector<double> reverseOneHot(std::vector<std::vector<double>> tempOutputSet); 
+
+        template <class T>
+        std::vector<T> vecToSet(std::vector<T> inputSet){
+            std::vector<T> setInputSet;
+            for(int i = 0; i < inputSet.size(); i++){
+                bool new_element = true; 
+                for(int j = 0; j < setInputSet.size(); j++){
+                    if(setInputSet[j] == inputSet[i]){
+                        new_element = false;
+                    }
+                }
+                if(new_element){
+                    setInputSet.push_back(inputSet[i]);
+                }
+            }
+            return setInputSet;
+        }
+
+        private:
+    };
+}
+
+#endif /* Data_hpp */
diff --git a/MLPP/ExpReg/ExpReg.cpp b/MLPP/ExpReg/ExpReg.cpp
new file mode 100644
index 0000000..daed9f6
--- /dev/null
+++ b/MLPP/ExpReg/ExpReg.cpp
@@ -0,0 +1,261 @@
+//
+//  ExpReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "ExpReg.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Stat/Stat.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    ExpReg::ExpReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+        weights = Utilities::weightInitialization(k);
+        initial = Utilities::weightInitialization(k);
+        bias = Utilities::biasInitialization();
+    }
+
+    std::vector<double> ExpReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double ExpReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void ExpReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+
+            for(int i = 0; i < k; i++){
+            
+                // Calculating the weight gradient
+                double sum = 0;
+                for(int j = 0; j < n; j++){
+                    sum += error[j] * inputSet[j][i] * pow(weights[i], inputSet[j][i] - 1);
+                }
+                double w_gradient = sum / n;
+                    
+                // Calculating the initial gradient
+                double sum2 = 0;
+                for(int j = 0; j < n; j++){
+                    sum2 += error[j] * pow(weights[i], inputSet[j][i]);
+                }
+
+
+                double i_gradient = sum2 / n;
+                
+                // Weight/initial updation
+                weights[i] -= learning_rate * w_gradient;
+                initial[i] -= learning_rate * i_gradient;
+                    
+            }
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+                
+            // Calculating the bias gradient
+            double sum = 0;
+            for(int j = 0; j < n; j++){
+                sum += (y_hat[j] - outputSet[j]);
+            }
+            double b_gradient = sum / n;
+                
+            // bias updation
+            bias -= learning_rate * b_gradient;
+            forwardPass();
+            
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+                
+            if(epoch > max_epoch) { break; }
+                
+        }
+    }
+
+    void ExpReg::SGD(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        Utilities util;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd());
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+
+                
+            for(int i = 0; i < k; i++){
+                    
+                // Calculating the weight gradients
+                
+                double w_gradient = (y_hat - outputSet[outputIndex]) * inputSet[outputIndex][i] * pow(weights[i], inputSet[outputIndex][i] - 1);
+                double i_gradient = (y_hat - outputSet[outputIndex]) * pow(weights[i], inputSet[outputIndex][i]);
+
+                // Weight/initial updation
+                weights[i] -= learning_rate * w_gradient;
+                initial[i] -= learning_rate * i_gradient;
+            }
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            // Calculating the bias gradients
+            double b_gradient = (y_hat - outputSet[outputIndex]);
+            
+            // Bias updation
+            bias -= learning_rate * b_gradient;
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void ExpReg::MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        int n_miniBatch = n/miniBatch_size;
+        
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches; 
+        std::vector<std::vector<double>> outputMiniBatches; 
+
+        // Creating the mini-batches
+        for(int i = 0; i < n_miniBatch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            std::vector<double> currentOutputSet; 
+            for(int j = 0; j < n/n_miniBatch; j++){
+                currentInputSet.push_back(inputSet[n/n_miniBatch * i + j]);
+                currentOutputSet.push_back(outputSet[n/n_miniBatch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+            outputMiniBatches.push_back(currentOutputSet);
+        }
+
+        if(double(n)/double(n_miniBatch) - int(n/n_miniBatch) != 0){
+            for(int i = 0; i < n - n/n_miniBatch * n_miniBatch; i++){
+                inputMiniBatches[n_miniBatch - 1].push_back(inputSet[n/n_miniBatch * n_miniBatch + i]);
+                outputMiniBatches[n_miniBatch - 1].push_back(outputSet[n/n_miniBatch * n_miniBatch + i]);
+            }
+        }
+        
+        while(true){
+            for(int i = 0; i < n_miniBatch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                for(int j = 0; j < k; j++){
+                    // Calculating the weight gradient
+                    double sum = 0;
+                    for(int k = 0; k < outputMiniBatches[i].size(); k++){
+                        sum += error[k] * inputMiniBatches[i][k][j] * pow(weights[j], inputMiniBatches[i][k][j] - 1);
+                    }
+                    double w_gradient = sum / outputMiniBatches[i].size();
+                        
+                    // Calculating the initial gradient
+                    double sum2 = 0;
+                    for(int k = 0; k < outputMiniBatches[i].size(); k++){
+                        sum2 += error[k] * pow(weights[j], inputMiniBatches[i][k][j]);
+                    }
+
+
+                    double i_gradient = sum2 / outputMiniBatches[i].size();
+                    
+                    // Weight/initial updation
+                    weights[j] -= learning_rate * w_gradient;
+                    initial[j] -= learning_rate * i_gradient;
+                }   
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+                    
+                // Calculating the bias gradient
+                double sum = 0;
+                for(int j = 0; j < outputMiniBatches[i].size(); j++){
+                    sum += (y_hat[j] - outputMiniBatches[i][j]);
+                }
+                double b_gradient = sum / outputMiniBatches[i].size();
+                y_hat = Evaluate(inputMiniBatches[i]);
+
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double ExpReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void ExpReg::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights, initial, bias);
+     }
+
+    double ExpReg::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+    std::vector<double> ExpReg::Evaluate(std::vector<std::vector<double>> X){
+        std::vector<double> y_hat;
+        y_hat.resize(X.size());
+        for(int i = 0; i < X.size(); i++){
+            y_hat[i] = 0;
+            for(int j = 0; j < X[i].size(); j++){
+                y_hat[i] += initial[j] * pow(weights[j], X[i][j]);
+            }
+            y_hat[i] += bias;
+        }
+        return y_hat;
+    }
+
+    double ExpReg::Evaluate(std::vector<double> x){
+        double y_hat = 0;
+        for(int i = 0; i < x.size(); i++){
+            y_hat += initial[i] * pow(weights[i], x[i]);
+        }
+        
+        return y_hat + bias;
+    }
+
+    // a * w^x + b
+    void ExpReg::forwardPass(){
+        y_hat = Evaluate(inputSet); 
+    }
+}
\ No newline at end of file
diff --git a/MLPP/ExpReg/ExpReg.hpp b/MLPP/ExpReg/ExpReg.hpp
new file mode 100644
index 0000000..c3722bd
--- /dev/null
+++ b/MLPP/ExpReg/ExpReg.hpp
@@ -0,0 +1,51 @@
+//
+//  ExpReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef ExpReg_hpp
+#define ExpReg_hpp
+
+#include <vector>
+#include <string>
+
+namespace MLPP{
+    class ExpReg{
+        
+        public:
+            ExpReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+        private:
+
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+        
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+            std::vector<double> weights;
+            std::vector<double> initial;
+            double bias;
+        
+            int n; 
+            int k;
+
+            // Regularization Params
+            std::string reg;
+            double lambda;
+            double alpha; /* This is the controlling param for Elastic Net*/
+            
+    };
+}
+
+#endif /* ExpReg_hpp */
diff --git a/MLPP/GaussMarkovChecker/GaussMarkovChecker.cpp b/MLPP/GaussMarkovChecker/GaussMarkovChecker.cpp
new file mode 100644
index 0000000..a037ce5
--- /dev/null
+++ b/MLPP/GaussMarkovChecker/GaussMarkovChecker.cpp
@@ -0,0 +1,59 @@
+//
+//  GaussMarkovChecker.cpp
+//
+//  Created by Marc Melikyan on 11/13/20.
+//
+
+#include "GaussMarkovChecker.hpp"
+#include "Stat/Stat.hpp"
+#include <iostream>
+
+
+namespace MLPP{
+    void GaussMarkovChecker::checkGMConditions(std::vector<double> eps){
+        bool condition1 = arithmeticMean(eps);
+        bool condition2 = homoscedasticity(eps);
+        bool condition3 = exogeneity(eps);
+        
+        if(condition1 && condition2 && condition3){
+            std::cout << "Gauss-Markov conditions were not violated. You may use OLS to obtain a BLUE estimator" << std::endl;
+        }
+        else{
+            std::cout << "A test of the expected value of 0 of the error terms returned " << std::boolalpha << condition1 << ", a test of homoscedasticity has returned " << std::boolalpha << condition2 << ", and a test of exogenity has returned " << std::boolalpha << "." << std::endl;
+        }
+        
+    }
+    
+    bool GaussMarkovChecker::arithmeticMean(std::vector<double> eps){
+        Stat stat;
+        if(stat.mean(eps) == 0) {
+            return 1;
+        }
+        else { return 0; }
+    }
+    
+    bool GaussMarkovChecker::homoscedasticity(std::vector<double> eps){
+        Stat stat;
+        double currentVar = (eps[0] - stat.mean(eps)) * (eps[0] - stat.mean(eps)) / eps.size();
+        for(int i = 0; i < eps.size(); i++){
+            if(currentVar != (eps[i] - stat.mean(eps)) * (eps[i] - stat.mean(eps)) / eps.size()){
+                return 0;
+            }
+        }
+        return 1;
+    }
+
+    bool GaussMarkovChecker::exogeneity(std::vector<double> eps){
+        Stat stat;
+        for(int i = 0; i < eps.size(); i++){
+            for(int j = 0; j < eps.size(); j++){
+                if(i != j){
+                    if((eps[i] - stat.mean(eps)) * (eps[j] - stat.mean(eps)) / eps.size() != 0){
+                        return 0;
+                    }
+                }
+            }
+        }
+        return 1;
+    }
+}
diff --git a/MLPP/GaussMarkovChecker/GaussMarkovChecker.hpp b/MLPP/GaussMarkovChecker/GaussMarkovChecker.hpp
new file mode 100644
index 0000000..4944d4d
--- /dev/null
+++ b/MLPP/GaussMarkovChecker/GaussMarkovChecker.hpp
@@ -0,0 +1,27 @@
+//
+//  GaussMarkovChecker.hpp
+//
+//  Created by Marc Melikyan on 11/13/20.
+//
+
+#ifndef GaussMarkovChecker_hpp
+#define GaussMarkovChecker_hpp
+
+#include <string>
+#include <vector>
+
+namespace MLPP{
+    class GaussMarkovChecker{
+        public:
+            void checkGMConditions(std::vector<double> eps);
+        
+            // Independent, 3 Gauss-Markov Conditions
+            bool arithmeticMean(std::vector<double> eps); // 1) Arithmetic Mean of 0.
+            bool homoscedasticity(std::vector<double> eps); // 2) Homoscedasticity
+            bool exogeneity(std::vector<double> eps); // 3) Cov of any 2 non-equal eps values = 0.
+        private:
+        
+    };
+}
+
+#endif /* GaussMarkovChecker_hpp */
diff --git a/MLPP/GaussianNB/GaussianNB.cpp b/MLPP/GaussianNB/GaussianNB.cpp
new file mode 100644
index 0000000..cc7ef58
--- /dev/null
+++ b/MLPP/GaussianNB/GaussianNB.cpp
@@ -0,0 +1,91 @@
+//
+//  GaussianNB.cpp
+//
+//  Created by Marc Melikyan on 1/17/21.
+//
+
+#include "GaussianNB.hpp"
+#include "Stat/Stat.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Utilities/Utilities.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    GaussianNB::GaussianNB(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int class_num)
+    : inputSet(inputSet), outputSet(outputSet), class_num(class_num)
+    {
+        y_hat.resize(outputSet.size());
+        Evaluate();
+        LinAlg alg;
+    }
+
+    std::vector<double> GaussianNB::modelSetTest(std::vector<std::vector<double>> X){
+        std::vector<double> y_hat;
+        for(int i = 0; i < X.size(); i++){
+            y_hat.push_back(modelTest(X[i]));
+        }
+        return y_hat;
+    }
+    
+    double GaussianNB::modelTest(std::vector<double> x){
+        Stat stat;
+        LinAlg alg;
+
+        double score[class_num];
+        double y_hat_i = 1;
+        for(int i = class_num - 1; i >= 0; i--){
+            y_hat_i += log(priors[i] * (1 / sqrt(2 * M_PI * sigma[i] * sigma[i])) * exp(-(x[i] * mu[i]) * (x[i] * mu[i]) / (2 * sigma[i] * sigma[i])));
+            score[i] = exp(y_hat_i);
+        }
+        return std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double)));
+    }
+
+    double GaussianNB::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void GaussianNB::Evaluate(){
+        Stat stat;
+        LinAlg alg;
+
+        // Computing mu_k_y and sigma_k_y
+        mu.resize(class_num);
+        sigma.resize(class_num);
+        for(int i = class_num - 1; i >= 0; i--){
+            std::vector<double> set; 
+            for(int j = 0; j < inputSet.size(); j++){
+                for(int k = 0; k < inputSet[j].size(); k++){
+                    if(outputSet[j] == i){
+                        set.push_back(inputSet[j][k]);
+                    }
+                }
+            }
+            mu[i] = stat.mean(set);
+            sigma[i] = stat.standardDeviation(set);
+        }
+
+        // Priors
+        priors.resize(class_num);
+        for(int i = 0; i < outputSet.size(); i++){
+            priors[int(outputSet[i])]++;
+        }
+        priors = alg.scalarMultiply( double(1)/double(outputSet.size()), priors);
+
+        for(int i = 0; i < outputSet.size(); i++){
+            double score[class_num];
+            double y_hat_i = 1;
+            for(int j = class_num - 1; j >= 0; j--){
+                for(int k = 0; k < inputSet[i].size(); k++){
+                    y_hat_i += log(priors[j] * (1 / sqrt(2 * M_PI * sigma[j] * sigma[j])) * exp(-(inputSet[i][k] * mu[j]) * (inputSet[i][k] * mu[j]) / (2 * sigma[j] * sigma[j])));
+                }
+                score[j] = exp(y_hat_i);
+                std::cout << score[j] << std::endl;
+            }
+            y_hat[i] = std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double)));
+            std::cout << std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double))) << std::endl;
+        }
+    }
+}
\ No newline at end of file
diff --git a/MLPP/GaussianNB/GaussianNB.hpp b/MLPP/GaussianNB/GaussianNB.hpp
new file mode 100644
index 0000000..636ed42
--- /dev/null
+++ b/MLPP/GaussianNB/GaussianNB.hpp
@@ -0,0 +1,42 @@
+//
+//  GaussianNB.hpp
+//
+//  Created by Marc Melikyan on 1/17/21.
+//
+
+#ifndef GaussianNB_hpp
+#define GaussianNB_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class GaussianNB{
+        
+        public:
+            GaussianNB(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int class_num);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            double score();
+            
+        private:
+        
+            void Evaluate();
+
+            int class_num;
+
+            std::vector<double> priors; 
+            std::vector<double> mu;
+            std::vector<double> sigma;
+            
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+
+            std::vector<double> y_hat;
+            
+        
+            
+        
+    };
+
+    #endif /* GaussianNB_hpp */
+}
\ No newline at end of file
diff --git a/MLPP/HiddenLayer/HiddenLayer.cpp b/MLPP/HiddenLayer/HiddenLayer.cpp
new file mode 100644
index 0000000..fcc08e7
--- /dev/null
+++ b/MLPP/HiddenLayer/HiddenLayer.cpp
@@ -0,0 +1,99 @@
+//
+//  HiddenLayer.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "HiddenLayer.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Utilities/Utilities.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP {
+    HiddenLayer::HiddenLayer(int n_hidden, std::string activation, std::vector<std::vector<double>> input, std::string weightInit, std::string reg, double lambda, double alpha)
+    : n_hidden(n_hidden), activation(activation), input(input), weightInit(weightInit), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        weights = Utilities::weightInitialization(input[0].size(), n_hidden, weightInit);
+        bias = Utilities::biasInitialization(n_hidden);
+
+        activation_map["Linear"] = &Activation::linear;
+        activationTest_map["Linear"] = &Activation::linear;
+
+        activation_map["Sigmoid"] = &Activation::sigmoid;
+        activationTest_map["Sigmoid"] = &Activation::sigmoid;
+
+        activation_map["Swish"] = &Activation::swish;
+        activationTest_map["Swish"] = &Activation::swish;
+
+        activation_map["Softplus"] = &Activation::softplus;
+        activationTest_map["Softplus"] = &Activation::softplus;
+
+        activation_map["CLogLog"] = &Activation::cloglog;
+        activationTest_map["CLogLog"] = &Activation::cloglog;
+
+        activation_map["Sinh"] = &Activation::sinh;
+        activationTest_map["Sinh"] = &Activation::sinh;
+
+        activation_map["Cosh"] = &Activation::cosh;
+        activationTest_map["Cosh"] = &Activation::cosh;
+
+        activation_map["Tanh"] = &Activation::tanh;
+        activationTest_map["Tanh"] = &Activation::tanh;
+
+        activation_map["Csch"] = &Activation::csch;
+        activationTest_map["Csch"] = &Activation::csch;   
+
+        activation_map["Sech"] = &Activation::sech;
+        activationTest_map["Sech"] = &Activation::sech;  
+
+        activation_map["Coth"] = &Activation::coth;
+        activationTest_map["Coth"] = &Activation::coth;  
+
+        activation_map["Arsinh"] = &Activation::arsinh;
+        activationTest_map["Arsinh"] = &Activation::arsinh;
+
+        activation_map["Arcosh"] = &Activation::arcosh;
+        activationTest_map["Arcosh"] = &Activation::arcosh;
+
+        activation_map["Artanh"] = &Activation::artanh;
+        activationTest_map["Artanh"] = &Activation::artanh;
+
+        activation_map["Arcsch"] = &Activation::arcsch;
+        activationTest_map["Arcsch"] = &Activation::arcsch;
+
+        activation_map["Arsech"] = &Activation::arsech;
+        activationTest_map["Arsech"] = &Activation::arsech;
+
+        activation_map["Arcoth"] = &Activation::arcoth;
+        activationTest_map["Arcoth"] = &Activation::arcoth;
+
+        activation_map["GaussianCDF"] = &Activation::gaussianCDF;
+        activationTest_map["GaussianCDF"] = &Activation::gaussianCDF;
+
+        activation_map["RELU"] = &Activation::RELU;
+        activationTest_map["RELU"] = &Activation::RELU;
+
+        activation_map["GELU"] = &Activation::GELU;
+        activationTest_map["GELU"] = &Activation::GELU;
+
+        activation_map["UnitStep"] = &Activation::unitStep;
+        activationTest_map["UnitStep"] = &Activation::unitStep;
+    }
+
+    void HiddenLayer::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        z = alg.mat_vec_add(alg.matmult(input, weights), bias);
+        a = (avn.*activation_map[activation])(z, 0);
+    }
+
+    void HiddenLayer::Test(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        z_test = alg.addition(alg.mat_vec_mult(alg.transpose(weights), x), bias); 
+        a_test = (avn.*activationTest_map[activation])(z_test, 0);
+    }
+}
\ No newline at end of file
diff --git a/MLPP/HiddenLayer/HiddenLayer.hpp b/MLPP/HiddenLayer/HiddenLayer.hpp
new file mode 100644
index 0000000..b243043
--- /dev/null
+++ b/MLPP/HiddenLayer/HiddenLayer.hpp
@@ -0,0 +1,52 @@
+//
+//  HiddenLayer.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef HiddenLayer_hpp
+#define HiddenLayer_hpp
+
+#include "Activation/Activation.hpp"
+
+#include <vector>
+#include <map>
+#include <string>
+
+namespace  MLPP {
+    class HiddenLayer{
+        public:
+            HiddenLayer(int n_hidden, std::string activation, std::vector<std::vector<double>> input, std::string weightInit, std::string reg, double lambda, double alpha);
+
+            int n_hidden;
+            std::string activation;
+
+            std::vector<std::vector<double>> input;   
+
+            std::vector<std::vector<double>> weights;
+            std::vector<double> bias;
+        
+            std::vector<std::vector<double>> z;
+            std::vector<std::vector<double>> a;
+
+            std::map<std::string, std::vector<std::vector<double>> (Activation::*)(std::vector<std::vector<double>>, bool)> activation_map;
+            std::map<std::string, std::vector<double> (Activation::*)(std::vector<double>, bool)> activationTest_map;
+
+            std::vector<double> z_test;
+            std::vector<double> a_test;
+
+            std::vector<std::vector<double>> delta;
+
+            // Regularization Params
+            std::string reg;
+            double lambda; /* Regularization Parameter */
+            double alpha; /* This is the controlling param for Elastic Net*/
+
+            std::string weightInit;
+
+            void forwardPass();
+            void Test(std::vector<double> x);
+    };
+}
+
+#endif /* HiddenLayer_hpp */
\ No newline at end of file
diff --git a/MLPP/HypothesisTesting/HypothesisTesting.cpp b/MLPP/HypothesisTesting/HypothesisTesting.cpp
new file mode 100644
index 0000000..d0e4477
--- /dev/null
+++ b/MLPP/HypothesisTesting/HypothesisTesting.cpp
@@ -0,0 +1,19 @@
+//
+//  HypothesisTesting.cpp
+//
+//  Created by Marc Melikyan on 3/10/21.
+//
+
+#include "HypothesisTesting.hpp"
+
+namespace MLPP{
+
+    std::tuple<bool, double> HypothesisTesting::chiSquareTest(std::vector<double> observed, std::vector<double> expected){
+        double df = observed.size() - 1; // These are our degrees of freedom
+        double sum = 0; 
+        for(int i = 0; i < observed.size(); i++){
+            sum += (observed[i] - expected[i]) * (observed[i] - expected[i]) / expected[i];
+        }
+    }
+
+}
\ No newline at end of file
diff --git a/MLPP/HypothesisTesting/HypothesisTesting.hpp b/MLPP/HypothesisTesting/HypothesisTesting.hpp
new file mode 100644
index 0000000..4764f62
--- /dev/null
+++ b/MLPP/HypothesisTesting/HypothesisTesting.hpp
@@ -0,0 +1,24 @@
+//
+//  HypothesisTesting.hpp
+//
+//  Created by Marc Melikyan on 3/10/21.
+//
+
+#ifndef HypothesisTesting_hpp
+#define HypothesisTesting_hpp
+
+#include <vector>
+#include <tuple>
+
+namespace MLPP{
+    class HypothesisTesting{
+      
+        public:
+            std::tuple<bool, double> chiSquareTest(std::vector<double> observed, std::vector<double> expected);
+        
+        private:
+            
+    };
+}
+
+#endif /* HypothesisTesting_hpp */
diff --git a/MLPP/KMeans/KMeans.cpp b/MLPP/KMeans/KMeans.cpp
new file mode 100644
index 0000000..d014241
--- /dev/null
+++ b/MLPP/KMeans/KMeans.cpp
@@ -0,0 +1,236 @@
+//
+//  KMeans.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "KMeans.hpp"
+#include "Utilities/Utilities.hpp"
+#include "LinAlg/LinAlg.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    KMeans::KMeans(std::vector<std::vector<double>> inputSet, int k, std::string init_type)
+    : inputSet(inputSet), k(k), init_type(init_type)
+    {
+        if(init_type == "KMeans++"){ 
+            kmeansppInitialization(k); 
+        }
+        else{
+            centroidInitialization(k);
+        }
+    }
+
+    std::vector<std::vector<double>> KMeans::modelSetTest(std::vector<std::vector<double>> X){
+        std::vector<std::vector<double>> closestCentroids; 
+        for(int i = 0; i < inputSet.size(); i++){
+            std::vector<double> closestCentroid = mu[0];
+            for(int j = 0; j < r[0].size(); j++){
+                if(euclideanDistance(X[i], mu[j]) < euclideanDistance(X[i], closestCentroid)){
+                    closestCentroid = mu[j];
+                }
+            }
+            closestCentroids.push_back(closestCentroid);
+        }
+        return closestCentroids;
+    }
+
+    std::vector<double> KMeans::modelTest(std::vector<double> x){
+        std::vector<double> closestCentroid = mu[0];
+        for(int j = 0; j < mu.size(); j++){
+            if(euclideanDistance(x, mu[j]) < euclideanDistance(x, closestCentroid)){
+                closestCentroid = mu[j];
+            }
+        }
+        return closestCentroid;
+    }
+
+    void KMeans::train(int epoch_num, bool UI){
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        Evaluate();
+        
+        while(true){
+            
+            // STEPS OF THE ALGORITHM
+            // 1. DETERMINE r_nk
+            // 2. DETERMINE J
+            // 3. DETERMINE mu_k
+            
+            // STOP IF CONVERGED, ELSE REPEAT
+            
+            cost_prev = Cost();
+            
+            computeMu();
+            Evaluate();
+                
+            // UI PORTION
+            if(UI) { Utilities::CostInfo(epoch, cost_prev, Cost()); }
+            epoch++;
+
+            if(epoch > epoch_num) { break; }
+
+        }
+    }
+
+    double KMeans::score(){
+        return Cost();
+    }
+
+    std::vector<double> KMeans::silhouette_scores(){
+        std::vector<std::vector<double>> closestCentroids = modelSetTest(inputSet);
+        std::vector<double> silhouette_scores;
+        for(int i = 0; i < inputSet.size(); i++){
+            // COMPUTING a[i]
+            double a = 0;
+            for(int j = 0; j < inputSet.size(); j++){
+                if(i != j && r[i] == r[j]){
+                    a += euclideanDistance(inputSet[i], inputSet[j]);
+                }
+            }   
+            // NORMALIZE a[i]
+            a /= closestCentroids[i].size() - 1; 
+
+
+            // COMPUTING b[i]
+            double b = INT_MAX; 
+            for(int j = 0; j < mu.size(); j++){
+                if(closestCentroids[i] != mu[j]){
+                    double sum = 0;
+                    for(int k = 0; k < inputSet.size(); k++){
+                        sum += euclideanDistance(inputSet[i], inputSet[k]);
+                    }
+                    // NORMALIZE b[i]
+                    double k_clusterSize = 0;
+                    for(int k = 0; k < closestCentroids.size(); k++){
+                        if(closestCentroids[k] == mu[j]){
+                            k_clusterSize++;
+                        }
+                    }
+                    if(sum / k_clusterSize < b) { b = sum / k_clusterSize; }
+                }
+            }
+            silhouette_scores.push_back((b - a)/fmax(a, b));
+            // Or the expanded version: 
+            // if(a < b) {
+            //     silhouette_scores.push_back(1 - a/b); 
+            // }
+            // else if(a == b){
+            //     silhouette_scores.push_back(0);
+            // }
+            // else{
+            //     silhouette_scores.push_back(b/a - 1);
+            // }
+        }
+        return silhouette_scores;
+    }
+
+    // This simply computes r_nk
+    void KMeans::Evaluate(){
+        r.resize(inputSet.size());
+        
+        for(int i = 0; i < r.size(); i++){
+            r[i].resize(k);
+        }
+        
+        for(int i = 0; i < r.size(); i++){
+            std::vector<double> closestCentroid = mu[0];
+            for(int j = 0; j < r[0].size(); j++){
+                if(euclideanDistance(inputSet[i], mu[j]) < euclideanDistance(inputSet[i], closestCentroid)){
+                    closestCentroid = mu[j];
+                }
+            }
+            for(int j = 0; j < r[0].size(); j++){
+                if(mu[j] == closestCentroid) {
+                    r[i][j] = 1;
+                }
+                else { r[i][j] = 0; }
+            }
+        }
+        
+    }
+
+    // This simply computes or re-computes mu_k
+    void KMeans::computeMu(){
+        LinAlg alg;
+        for(int i = 0; i < mu.size(); i++){
+            std::vector<double> num;
+            num.resize(r.size());
+            
+            for(int i = 0; i < num.size(); i++){
+                num[i] = 0;
+            }
+            
+            int den = 0;
+            for(int j = 0; j < r.size(); j++){
+                num = alg.addition(num, alg.scalarMultiply(r[j][i], inputSet[j]));
+            }
+            for(int j = 0; j < r.size(); j++){
+                den += r[j][i];
+            }
+            mu[i] = alg.scalarMultiply(1/den, num);
+        }
+        
+    }
+
+    void KMeans::centroidInitialization(int k){
+        mu.resize(k);
+        
+        for(int i = 0; i < k; i++){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(inputSet.size() - 1));
+
+            mu[i].resize(inputSet.size());
+            mu[i] = inputSet[distribution(generator)];
+        }
+    }
+
+    void KMeans::kmeansppInitialization(int k){
+        std::random_device rd;
+        std::default_random_engine generator(rd()); 
+        std::uniform_int_distribution<int> distribution(0, int(inputSet.size() - 1));
+        mu.push_back(inputSet[distribution(generator)]);
+
+        for(int i = 0; i < k - 1; i++){
+            std::vector<double> farthestCentroid;
+            for(int j = 0; j < inputSet.size(); j++){
+                double max_dist = 0; 
+                /* SUM ALL THE SQUARED DISTANCES, CHOOSE THE ONE THAT'S FARTHEST
+                AS TO SPREAD OUT THE CLUSTER CENTROIDS. */
+                double sum = 0;
+                for(int k = 0; k < mu.size(); k++){
+                    sum += euclideanDistance(inputSet[j], mu[k]);
+                }
+                if(sum * sum > max_dist){
+                    farthestCentroid = inputSet[j];
+                    max_dist = sum * sum;
+                }
+            }
+            mu.push_back(farthestCentroid);
+        }
+    }
+
+    double KMeans::Cost(){
+        LinAlg alg;
+        double sum = 0;
+        for(int i = 0; i < r.size(); i++){
+            for(int j = 0; j < r[0].size(); j++){
+                sum += r[i][j] * alg.norm_sq(alg.subtraction(inputSet[i], mu[j]));
+            }
+        }
+        return sum;
+    }
+
+    // Multidimensional Euclidean Distance
+    double KMeans::euclideanDistance(std::vector<double> A, std::vector<double> B){
+        double dist = 0;
+        for(int i = 0; i < A.size(); i++){
+            dist += (A[i] - B[i])*(A[i] - B[i]);
+        }
+        return sqrt(dist);
+    }
+}
diff --git a/MLPP/KMeans/KMeans.hpp b/MLPP/KMeans/KMeans.hpp
new file mode 100644
index 0000000..281bec8
--- /dev/null
+++ b/MLPP/KMeans/KMeans.hpp
@@ -0,0 +1,45 @@
+//
+//  KMeans.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef KMeans_hpp
+#define KMeans_hpp
+
+#include <vector>
+#include <string>
+
+namespace MLPP{
+    class KMeans{
+        
+        public:
+            KMeans(std::vector<std::vector<double>> inputSet, int k, std::string init_type = "Default");
+            std::vector<std::vector<double>> modelSetTest(std::vector<std::vector<double>> X);
+            std::vector<double> modelTest(std::vector<double> x);
+            void train(int epoch_num, bool UI = 1);
+            double score();
+            std::vector<double> silhouette_scores(); 
+        private:
+        
+            void Evaluate();
+            void computeMu();
+        
+            void centroidInitialization(int k);
+            void kmeansppInitialization(int k);
+            double Cost();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<std::vector<double>> mu;
+            std::vector<std::vector<double>> r;
+        
+            double euclideanDistance(std::vector<double> A, std::vector<double> B);
+        
+            double accuracy_threshold;
+            int k;        
+
+            std::string init_type;
+    };
+}
+
+#endif /* KMeans_hpp */
diff --git a/MLPP/LinAlg/LinAlg.cpp b/MLPP/LinAlg/LinAlg.cpp
new file mode 100644
index 0000000..bb0e74b
--- /dev/null
+++ b/MLPP/LinAlg/LinAlg.cpp
@@ -0,0 +1,744 @@
+//
+//  LinAlg.cpp
+//
+//  Created by Marc Melikyan on 1/8/21.
+//
+
+#include "LinAlg.hpp"
+#include "Stat/Stat.hpp"
+#include <iostream>
+#include <map>
+#include <cmath>
+
+namespace MLPP{
+
+    std::vector<std::vector<double>> LinAlg::addition(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B){
+        std::vector<std::vector<double>> C;
+        C.resize(A.size());
+        for(int i = 0; i < C.size(); i++){
+            C[i].resize(A[0].size());
+        }
+        
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[0].size(); j++){
+                C[i][j] = A[i][j] + B[i][j];
+            }
+        }
+        return C;
+    }
+
+    std::vector<std::vector<double>> LinAlg::subtraction(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B){
+        std::vector<std::vector<double>> C;
+        C.resize(A.size());
+        for(int i = 0; i < C.size(); i++){
+            C[i].resize(A[0].size());
+        }
+
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[0].size(); j++){
+                C[i][j] = A[i][j] - B[i][j];
+            }
+        }
+        return C;
+    }
+
+    std::vector<std::vector<double>> LinAlg::matmult(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B){
+        std::vector<std::vector<double>> C;
+        C.resize(A.size());
+        for(int i = 0; i < C.size(); i++){
+            C[i].resize(B[0].size());
+        }
+        
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < B[0].size(); j++){
+                for(int k = 0; k < B.size(); k++){
+                    C[i][j] += A[i][k] * B[k][j];
+                }
+            }
+        }
+        return C;
+    }
+
+    std::vector<std::vector<double>> LinAlg::hadamard_product(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B){
+        std::vector<std::vector<double>> C;
+        C.resize(A.size());
+        for(int i = 0; i < C.size(); i++){
+            C[i].resize(A[0].size());
+        }
+        
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[0].size(); j++){
+                C[i][j] = A[i][j] * B[i][j];
+            }
+        }
+        return C;
+    }
+
+    std::vector<std::vector<double>> LinAlg::elementWiseDivision(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B){
+        std::vector<std::vector<double>> C;
+        C.resize(A.size());
+        for(int i = 0; i < C.size(); i++){
+            C[i].resize(A[0].size());
+        }
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[i].size(); j++){
+                C[i][j] = A[i][j] / B[i][j];
+            }
+        }
+        return C;
+    }
+
+    std::vector<std::vector<double>> LinAlg::transpose(std::vector<std::vector<double>> A){
+        std::vector<std::vector<double>> AT;
+        AT.resize(A[0].size());
+        for(int i = 0; i < AT.size(); i++){
+            AT[i].resize(A.size());
+        }
+        
+        for(int i = 0; i < A[0].size(); i++){
+            for(int j = 0; j < A.size(); j++){
+                AT[i][j] = A[j][i];
+            }
+        }
+        return AT;
+    }
+
+    std::vector<std::vector<double>> LinAlg::scalarMultiply(double scalar, std::vector<std::vector<double>> A){
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[i].size(); j++){
+                A[i][j] *= scalar;
+            }
+        }
+        return A;
+    }
+
+    std::vector<std::vector<double>> LinAlg::scalarAdd(double scalar, std::vector<std::vector<double>> A){
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[i].size(); j++){
+                A[i][j] += scalar;
+            }
+        }
+        return A;
+    }
+
+    std::vector<std::vector<double>> LinAlg::log(std::vector<std::vector<double>> A){
+        std::vector<std::vector<double>> B;
+        B.resize(A.size());
+        for(int i = 0; i < B.size(); i++){
+            B[i].resize(A[0].size());
+        }
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[i].size(); j++){
+                B[i][j] = std::log(A[i][j]);
+            }
+        }
+        return B;
+    }
+
+    std::vector<std::vector<double>> LinAlg::log10(std::vector<std::vector<double>> A){
+        std::vector<std::vector<double>> B;
+        B.resize(A.size());
+        for(int i = 0; i < B.size(); i++){
+            B[i].resize(A[0].size());
+        }
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[i].size(); j++){
+                B[i][j] = std::log10(A[i][j]);
+            }
+        }
+        return B;
+    }
+
+    std::vector<std::vector<double>> LinAlg::exp(std::vector<std::vector<double>> A){
+        std::vector<std::vector<double>> B;
+        B.resize(A.size());
+        for(int i = 0; i < B.size(); i++){
+            B[i].resize(A[0].size());
+        }
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[i].size(); j++){
+                B[i][j] = std::exp(A[i][j]);
+            }
+        }
+        return B;
+    }
+
+    std::vector<std::vector<double>> LinAlg::erf(std::vector<std::vector<double>> A){
+        std::vector<std::vector<double>> B;
+        B.resize(A.size());
+        for(int i = 0; i < B.size(); i++){
+            B[i].resize(A[0].size());
+        }
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[i].size(); j++){
+                B[i][j] = std::erf(A[i][j]);
+            }
+        }
+        return B;
+    }
+
+    std::vector<std::vector<double>> LinAlg::exponentiate(std::vector<std::vector<double>> A, double p){
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[i].size(); j++){
+                A[i][j] = pow(A[i][j], p); 
+            }
+        }
+        return A; 
+    }
+
+    double LinAlg::det(std::vector<std::vector<double>> A, int d){
+
+        double deter = 0;
+        std::vector<std::vector<double>> B;
+        B.resize(d);
+        for(int i = 0; i < d; i++){
+            B[i].resize(d);
+        }
+
+        /* This is the base case in which the input is a 2x2 square matrix.
+        Recursion is performed unless and until we reach this base case,
+        such that we recieve a scalar as the result. */
+        if(d == 2){
+            return A[0][0] * A[1][1] - A[0][1] * A[1][0];
+        }
+
+        else{
+            for(int i = 0; i < d; i++){
+                int sub_i = 0;
+                for(int j = 1; j < d; j++){
+                    int sub_j = 0;
+                    for(int k = 0; k < d; k++){
+                        if(k == i){
+                            continue;
+                        }
+                        B[sub_i][sub_j] = A[j][k];
+                        sub_j++;
+                    }
+                    sub_i++;
+                }
+                deter += pow(-1, i) * A[0][i] * det(B, d-1);
+            }
+        }
+        return deter;
+    }
+
+    std::vector<std::vector<double>> LinAlg::cofactor(std::vector<std::vector<double>> A, int n, int i, int j){
+        std::vector<std::vector<double>> cof;
+        cof.resize(A.size());
+        for(int i = 0; i < cof.size(); i++){
+          cof[i].resize(A.size());
+        }
+        int sub_i = 0, sub_j = 0;
+      
+        for (int row = 0; row < n; row++){
+            for (int col = 0; col < n; col++){
+                if (row != i && col != j) {
+                    cof[sub_i][sub_j++] = A[row][col];
+      
+                    if (sub_j == n - 1){
+                        sub_j = 0;
+                        sub_i++;
+                    }
+                }
+            }
+        }
+        return cof;
+    }
+
+    std::vector<std::vector<double>> LinAlg::adjoint(std::vector<std::vector<double>> A){
+
+        //Resizing the initial adjoint matrix
+        std::vector<std::vector<double>> adj;
+        adj.resize(A.size());
+        for(int i = 0; i < adj.size(); i++){
+            adj[i].resize(A.size());
+        }
+
+        // Checking for the case where the given N x N matrix is a scalar
+        if(A.size() == 1){
+            adj[0][0] = 1;
+            return adj;
+        }
+        
+        if(A.size() == 2){
+            adj[0][0] = A[1][1];
+            adj[1][1] = A[0][0];
+            
+            adj[0][1] = -A[0][1];
+            adj[1][0] = -A[1][0];
+            return adj;
+        }
+
+      for(int i = 0; i < A.size(); i++){
+        for(int j = 0; j < A.size(); j++){
+          std::vector<std::vector<double>> cof = cofactor(A, int(A.size()), i, j);
+          // 1 if even, -1 if odd
+          int sign = (i + j) % 2 == 0 ? 1 : -1;
+          adj[j][i] = sign * det(cof, int(A.size()) - 1);
+        }
+      }
+      return adj;
+    }
+
+    // The inverse can be computed as (1 / determinant(A)) * adjoint(A)
+    std::vector<std::vector<double>> LinAlg::inverse(std::vector<std::vector<double>> A){
+      return scalarMultiply(1/det(A, int(A.size())), adjoint(A));
+    }
+    
+    // This is simply the Moore-Penrose least squares approximation of the inverse. 
+    std::vector<std::vector<double>> LinAlg::pinverse(std::vector<std::vector<double>> A){
+        return matmult(inverse(matmult(transpose(A), A)), transpose(A));
+    }
+
+    std::vector<std::vector<double>> LinAlg::zeromat(int n, int m){
+        std::vector<std::vector<double>> zeromat;
+        zeromat.resize(n);
+        for(int i = 0; i < zeromat.size(); i++){
+            zeromat[i].resize(m);
+        }
+        return zeromat; 
+    }
+
+    std::vector<std::vector<double>> LinAlg::onemat(int n, int m){
+        std::vector<std::vector<double>> onemat;
+        onemat.resize(n);
+        for(int i = 0; i < onemat.size(); i++){
+            onemat[i].resize(m);
+        }
+        for(int i = 0; i < onemat.size(); i++){
+            for(int j = 0; j < onemat[i].size(); j++){
+                onemat[i][j] = 1; 
+            }
+        }
+        return onemat; 
+    }
+
+    std::vector<std::vector<double>> LinAlg::round(std::vector<std::vector<double>> A){
+        std::vector<std::vector<double>> B;
+        B.resize(A.size());
+        for(int i = 0; i < B.size(); i++){
+            B[i].resize(A[0].size());
+        }
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[i].size(); j++){
+                B[i][j] = std::round(A[i][j]);
+            }
+        }
+        return B;
+    }
+
+    std::vector<std::vector<double>> LinAlg::identity(double d){
+        std::vector<std::vector<double>> identityMat; 
+        identityMat.resize(d);
+        for(int i = 0; i < identityMat.size(); i++){
+            identityMat[i].resize(d);
+        }
+        for(int i = 0; i < identityMat.size(); i++){
+            for(int j = 0; j < identityMat.size(); j++){
+                if(i == j){
+                    identityMat[i][j] = 1;
+                }
+                else { identityMat[i][j] = 0; }
+            }
+        }
+        return identityMat;
+    }
+
+    std::vector<std::vector<double>> LinAlg::cov(std::vector<std::vector<double>> A){
+        Stat stat;
+        std::vector<std::vector<double>> covMat;
+        covMat.resize(A.size());
+        for(int i = 0; i < covMat.size(); i++){
+            covMat[i].resize(A.size());
+        }
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A.size(); j++){
+                covMat[i][j] = stat.covariance(A[i], A[j]);
+            }
+        }
+        return covMat;
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> LinAlg::eig(std::vector<std::vector<double>> A){
+        /*
+        A (the entered parameter) in most use cases will be X'X, XX', etc. and must be symmetric.
+        That simply means that 1) X' = X and 2) X is a square matrix. This function that computes the 
+        eigenvalues of a matrix is utilizing Jacobi's method. 
+        */
+
+        double diagonal = true; // Perform the iterative Jacobi algorithm unless and until we reach a diagonal matrix which yields us the eigenvals. 
+        
+        std::map<int, int> val_to_vec; 
+        std::vector<std::vector<double>> a_new;
+        std::vector<std::vector<double>> eigenvectors = identity(A.size());
+        do{
+            double a_ij = A[0][1];
+            double sub_i = 0; 
+            double sub_j = 1;
+            for(int i = 0; i < A.size(); i++){
+                for(int j = 0; j < A[i].size(); j++){
+                    if(i != j && abs(A[i][j]) > a_ij){
+                        a_ij = A[i][j];
+                        sub_i = i; 
+                        sub_j = j;
+                    }
+                    else if(i != j && abs(A[i][j]) == a_ij){
+                        if(i < sub_i){
+                            a_ij = A[i][j];
+                            sub_i = i; 
+                            sub_j = j;
+                        }
+                    }
+                }
+            }
+
+            double a_ii = A[sub_i][sub_i];
+            double a_jj = A[sub_j][sub_j]; 
+            double a_ji = A[sub_j][sub_i]; 
+            double theta; 
+
+            if(a_ii == a_jj) {
+                theta = M_PI / 4; 
+            }
+            else{
+                theta = 0.5 * atan(2 * a_ij / (a_ii - a_jj));
+            }
+
+            std::vector<std::vector<double>> P = identity(A.size());
+            P[sub_i][sub_j] = -sin(theta);
+            P[sub_i][sub_i] = cos(theta);
+            P[sub_j][sub_j] = cos(theta);
+            P[sub_j][sub_i] = sin(theta);
+
+            a_new = matmult(matmult(inverse(P), A), P);
+
+            for(int i = 0; i < a_new.size(); i++){
+                for(int j = 0; j < a_new[i].size(); j++){
+                    if(i != j && std::round(a_new[i][j]) == 0){
+                        a_new[i][j] = 0;
+                    }
+                }
+            }
+
+            bool non_zero = false;
+            for(int i = 0; i < a_new.size(); i++){
+                for(int j = 0; j < a_new[i].size(); j++){
+                    if(i != j && std::round(a_new[i][j]) != 0){
+                        non_zero = true;
+                    }
+                }
+            }   
+
+            if(non_zero) { 
+                diagonal = false;
+            }
+            else{
+                diagonal = true;
+            }
+
+            if(a_new == A){
+                diagonal = true; 
+                for(int i = 0; i < a_new.size(); i++){
+                    for(int j = 0; j < a_new[i].size(); j++){
+                        if(i != j){
+                            a_new[i][j] = 0;
+                        }
+                    }
+                }   
+            }
+            
+            eigenvectors = matmult(eigenvectors, P);
+            A = a_new;
+
+        } while(!diagonal);
+
+        std::vector<std::vector<double>> a_new_prior = a_new;
+        
+        // Bubble Sort
+        for(int i = 0; i < a_new.size() - 1; i++){
+            for(int j = 0; j < a_new.size() - 1 - i; j++){
+                if(a_new[j][j] < a_new[j + 1][j + 1]){
+                    double temp = a_new[j + 1][j + 1];
+                    a_new[j + 1][j + 1] = a_new[j][j];
+                    a_new[j][j] = temp;
+                }
+            }
+        }
+
+
+        for(int i = 0; i < a_new.size(); i++){
+            for(int j = 0; j < a_new.size(); j++){
+                if(a_new[i][i] == a_new_prior[j][j]){
+                    val_to_vec[i] = j;
+                }
+            }
+        }
+
+        std::vector<std::vector<double>> eigen_temp = eigenvectors;
+        for(int i = 0; i < eigenvectors.size(); i++){
+            for(int j = 0; j < eigenvectors[i].size(); j++){
+                eigenvectors[i][j] = eigen_temp[i][val_to_vec[j]];  
+            }
+        }
+        return {eigenvectors, a_new};
+
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>, std::vector<std::vector<double>>> LinAlg::SVD(std::vector<std::vector<double>> A){
+        auto [left_eigenvecs, eigenvals] = eig(matmult(A, transpose(A)));
+        auto [right_eigenvecs, right_eigenvals] = eig(matmult(transpose(A), A));
+
+        std::vector<std::vector<double>> singularvals = exponentiate(eigenvals, 0.5);
+        std::vector<std::vector<double>> sigma = zeromat(A.size(), A[0].size());
+         for(int i = 0; i < singularvals.size(); i++){
+            for(int j = 0; j < singularvals[i].size(); j++){
+                sigma[i][j] = singularvals[i][j];
+            }
+        }
+        return {left_eigenvecs, sigma, right_eigenvecs};
+    }
+
+    double LinAlg::sum_elements(std::vector<std::vector<double>> A){
+        double sum = 0;
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[i].size(); j++){
+                sum += A[i][j];
+            }
+        }
+        return sum;
+    }
+
+    std::vector<double> LinAlg::flatten(std::vector<std::vector<double>> A){
+        std::vector<double> a; 
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[i].size(); j++){
+                a.push_back(A[i][j]);
+            }
+        }
+        return a;
+    }
+
+    void LinAlg::printMatrix(std::vector<std::vector<double>> A){
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[i].size(); j++){
+                std::cout << A[i][j] << " ";
+            }
+            std::cout << std::endl;
+        }
+    }
+
+    std::vector<std::vector<double>> LinAlg::vecmult(std::vector<double> a, std::vector<double> b){
+        std::vector<std::vector<double>> C;
+        C.resize(a.size());
+        for(int i = 0; i < C.size(); i++){
+            C[i] = scalarMultiply(a[i], b);
+        }
+        return C;
+    }
+
+    std::vector<double> LinAlg::hadamard_product(std::vector<double> a, std::vector<double> b){
+        std::vector<double> c;
+        c.resize(a.size());
+        
+        for(int i = 0; i < a.size(); i++){
+            c[i] = a[i] * b[i];
+        }
+        
+        return c;
+    }
+
+    std::vector<double> LinAlg::elementWiseDivision(std::vector<double> a, std::vector<double> b){
+        std::vector<double> c;
+        c.resize(a.size());
+
+        for(int i = 0; i < a.size(); i++){
+            c[i] = a[i] / b[i];
+        }
+        return c;
+    }
+
+    std::vector<double> LinAlg::scalarMultiply(double scalar, std::vector<double> a){
+        for(int i = 0; i < a.size(); i++){
+            a[i] *= scalar;
+        }
+        return a;
+    }
+
+    std::vector<double> LinAlg::scalarAdd(double scalar, std::vector<double> a){
+        for(int i = 0; i < a.size(); i++){
+            a[i] += scalar;
+        }
+        return a;
+    }
+
+    std::vector<double> LinAlg::addition(std::vector<double> a, std::vector<double> b){
+        std::vector<double> c;
+        c.resize(a.size());
+        for(int i = 0; i < a.size(); i++){
+            c[i] = a[i] + b[i];
+        }
+        return c;
+    }
+
+    std::vector<double> LinAlg::subtraction(std::vector<double> a, std::vector<double> b){
+        std::vector<double> c;
+        c.resize(a.size());
+        for(int i = 0; i < a.size(); i++){
+            c[i] = a[i] - b[i];
+        }
+        return c;
+    }
+
+    std::vector<double> LinAlg::subtractMatrixRows(std::vector<double> a, std::vector<std::vector<double>> B){
+        for(int i = 0; i < B.size(); i++){
+            a = subtraction(a, B[i]);
+        }
+        return a; 
+    }
+
+    std::vector<double> LinAlg::log(std::vector<double> a){
+        std::vector<double> b; 
+        b.resize(a.size());
+        for(int i = 0; i < a.size(); i++){
+            b[i] = std::log(a[i]);
+        }
+        return b; 
+    }
+
+    std::vector<double> LinAlg::log10(std::vector<double> a){
+        std::vector<double> b; 
+        b.resize(a.size());
+        for(int i = 0; i < a.size(); i++){
+            b[i] = std::log10(a[i]);
+        }
+        return b; 
+    }
+
+    std::vector<double> LinAlg::exp(std::vector<double> a){
+        std::vector<double> b;
+        b.resize(a.size());
+        for(int i = 0; i < a.size(); i++){
+            b[i] = std::exp(a[i]);
+        }
+        return b;
+    }
+
+    std::vector<double> LinAlg::erf(std::vector<double> a){
+        std::vector<double> b;
+        b.resize(a.size());
+        for(int i = 0; i < a.size(); i++){
+            b[i] = std::erf(a[i]);
+        }
+        return b;
+    }
+
+    double LinAlg::dot(std::vector<double> a, std::vector<double> b){
+        double c = 0;
+        for(int i = 0; i < a.size(); i++){
+            c += a[i] * b[i];
+        }
+        return c;
+    }
+
+    std::vector<double> LinAlg::onevec(int n){
+        std::vector<double> onevec; 
+        onevec.resize(n);
+        for(int i = 0; i < onevec.size(); i++){
+            onevec[i] = 1;
+        }
+        return onevec;
+    }
+
+    double LinAlg::max(std::vector<double> a){
+        int max = a[0];
+        for(int i = 0; i < a.size(); i++){
+            if(a[i] > max){
+                max = a[i];
+            }
+        }
+        return max; 
+    }
+
+    double LinAlg::min(std::vector<double> a){
+        int min = a[0];
+        for(int i = 0; i < a.size(); i++){
+            if(a[i] < min){
+                min = a[i];
+            }
+        }
+        return min; 
+    }
+
+    std::vector<double> LinAlg::round(std::vector<double> a){
+        std::vector<double> b;
+        b.resize(a.size());
+        for(int i = 0; i < a.size(); i++){
+            b[i] = std::round(a[i]);
+        }
+        return b;
+    }
+
+    double LinAlg::norm_sq(std::vector<double> a){
+        double n_sq = 0;
+        for(int i = 0; i < a.size(); i++){
+            n_sq += a[i] * a[i];
+        }
+        return n_sq;
+    }
+
+    double LinAlg::sum_elements(std::vector<double> a){
+        double sum = 0;
+        for(int i = 0; i < a.size(); i++){
+            sum += a[i];
+        }
+        return sum;
+    }
+
+    double LinAlg::cosineSimilarity(std::vector<double> a, std::vector<double> b){
+        return dot(a, b) / (sqrt(norm_sq(a)) * sqrt(norm_sq(b)));
+    }
+
+    void LinAlg::printVector(std::vector<double> a){
+        for(int i = 0; i < a.size(); i++){
+            std::cout << a[i] << " ";
+        }
+        std::cout << std::endl;
+    }
+
+    std::vector<std::vector<double>> LinAlg::mat_vec_add(std::vector<std::vector<double>> A, std::vector<double> b){
+        for(int i = 0; i < A.size(); i++){
+            for(int j = 0; j < A[i].size(); j++){
+                A[i][j] += b[j];
+            }
+        }
+        return A;
+    }
+
+    std::vector<double> LinAlg::mat_vec_mult(std::vector<std::vector<double>> A, std::vector<double> b){
+        std::vector<double> c;
+        c.resize(A.size());
+            
+        for(int i = 0; i < A.size(); i++){
+            for(int k = 0; k < b.size(); k++){
+                c[i] += A[i][k] * b[k];
+            }
+        }
+        return c;
+    }
+
+    std::vector<double> LinAlg::flatten(std::vector<std::vector<std::vector<double>>> A){
+        std::vector<double> c;
+        for(int i = 0; i < A.size(); i++){
+            std::vector<double> flattenedVec = flatten(A[i]);
+            c.insert(c.end(), flattenedVec.begin(), flattenedVec.end());
+        }
+        return c;
+    }
+
+    void LinAlg::printTensor(std::vector<std::vector<std::vector<double>>> A){
+        for(int i = 0; i < A.size(); i++){
+            printMatrix(A[i]);
+            if(i != A.size() - 1) { std::cout << std::endl; }
+        }
+    }
+}
\ No newline at end of file
diff --git a/MLPP/LinAlg/LinAlg.hpp b/MLPP/LinAlg/LinAlg.hpp
new file mode 100644
index 0000000..f9ec1c6
--- /dev/null
+++ b/MLPP/LinAlg/LinAlg.hpp
@@ -0,0 +1,136 @@
+//
+//  LinAlg.hpp
+//
+//  Created by Marc Melikyan on 1/8/21.
+//
+
+#ifndef LinAlg_hpp
+#define LinAlg_hpp
+
+#include <vector>
+#include <tuple>
+
+namespace MLPP{
+    class LinAlg{
+        public:
+        
+        // MATRIX FUNCTIONS
+
+        std::vector<std::vector<double>> addition(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
+
+        std::vector<std::vector<double>> subtraction(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
+        
+        std::vector<std::vector<double>> matmult(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
+        
+        std::vector<std::vector<double>> hadamard_product(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
+
+        std::vector<std::vector<double>> kronecker_product(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
+
+        std::vector<std::vector<double>> elementWiseDivision(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
+        
+        std::vector<std::vector<double>> transpose(std::vector<std::vector<double>> A);
+        
+        std::vector<std::vector<double>> scalarMultiply(double scalar, std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> scalarAdd(double scalar, std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> log(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> log10(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> exp(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> erf(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> exponentiate(std::vector<std::vector<double>> A, double p);
+        
+        double det(std::vector<std::vector<double>> A, int d);
+        
+        std::vector<std::vector<double>> cofactor(std::vector<std::vector<double>> A, int n, int i, int j);
+        
+        std::vector<std::vector<double>> adjoint(std::vector<std::vector<double>> A);
+        
+        std::vector<std::vector<double>> inverse(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> pinverse(std::vector<std::vector<double>> A);
+
+        std::vector<std::vector<double>> zeromat(int n, int m);
+
+        std::vector<std::vector<double>> onemat(int n, int m);
+
+        std::vector<std::vector<double>> round(std::vector<std::vector<double>> A);
+        
+        std::vector<std::vector<double>> identity(double d);
+
+        std::vector<std::vector<double>> cov(std::vector<std::vector<double>> A);
+
+        std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> eig(std::vector<std::vector<double>> A);
+
+        std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>, std::vector<std::vector<double>>> SVD(std::vector<std::vector<double>> A);
+
+        double sum_elements(std::vector<std::vector<double>> A);
+
+        std::vector<double> flatten(std::vector<std::vector<double>> A);
+        
+        void printMatrix(std::vector<std::vector<double>> A);
+        
+        // VECTOR FUNCTIONS
+
+        std::vector<std::vector<double>> vecmult(std::vector<double> a, std::vector<double> b); // This multiplies a, bT 
+        
+        std::vector<double> hadamard_product(std::vector<double> a, std::vector<double> b);
+
+        std::vector<double> elementWiseDivision(std::vector<double> a, std::vector<double> b);
+        
+        std::vector<double> scalarMultiply(double scalar, std::vector<double> a);
+
+        std::vector<double> scalarAdd(double scalar, std::vector<double> a);
+        
+        std::vector<double> addition(std::vector<double> a, std::vector<double> b);
+        
+        std::vector<double> subtraction(std::vector<double> a, std::vector<double> b);
+
+        std::vector<double> subtractMatrixRows(std::vector<double> a, std::vector<std::vector<double>> B);
+
+        std::vector<double> log(std::vector<double> a);
+
+        std::vector<double> log10(std::vector<double> a);
+
+        std::vector<double> exp(std::vector<double> a);
+
+        std::vector<double> erf(std::vector<double> a);
+        
+        double dot(std::vector<double> a, std::vector<double> b);
+
+        std::vector<double> onevec(int n);
+
+        double max(std::vector<double> a);
+
+        double min(std::vector<double> a);
+
+        std::vector<double> round(std::vector<double> a);
+        
+        double norm_sq(std::vector<double> a);
+        
+        double sum_elements(std::vector<double> a);
+
+        double cosineSimilarity(std::vector<double> a, std::vector<double> b);
+        
+        void printVector(std::vector<double> a);
+        
+        // MATRIX-VECTOR FUNCTIONS
+        std::vector<std::vector<double>> mat_vec_add(std::vector<std::vector<double>> A, std::vector<double> b);
+
+        std::vector<double> mat_vec_mult(std::vector<std::vector<double>> A, std::vector<double> b);
+
+        // TENSOR FUNCTIONS
+        std::vector<double> flatten(std::vector<std::vector<std::vector<double>>> A);
+        void printTensor(std::vector<std::vector<std::vector<double>>> A);
+
+        
+        private:
+    };
+
+}
+
+#endif /* LinAlg_hpp */
\ No newline at end of file
diff --git a/MLPP/LinReg/LinReg.cpp b/MLPP/LinReg/LinReg.cpp
new file mode 100644
index 0000000..5bd4b03
--- /dev/null
+++ b/MLPP/LinReg/LinReg.cpp
@@ -0,0 +1,232 @@
+//
+//  LinReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "LinReg.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Stat/Stat.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <cmath>
+#include <random>
+
+namespace MLPP{
+
+    LinReg::LinReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+
+        weights = Utilities::weightInitialization(k);
+        bias = Utilities::biasInitialization();
+    }
+
+    std::vector<double> LinReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double LinReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void LinReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+                
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+
+            // Calculating the weight gradients
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), error)));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+ 
+            // Calculating the bias gradients
+            bias -= learning_rate * alg.sum_elements(error) / n;
+            forwardPass();
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void LinReg::SGD(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        Utilities util;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+
+
+            for(int i = 0; i < k; i++){
+                    
+                // Calculating the weight gradients
+                
+                double w_gradient = (y_hat - outputSet[outputIndex]) * inputSet[outputIndex][i];
+                    
+
+                // Weight updation
+                weights[i] -= learning_rate * w_gradient;
+            }
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            // Calculating the bias gradients
+            double b_gradient = (y_hat - outputSet[outputIndex]);
+            
+            // Bias updation
+            bias -= learning_rate * b_gradient;
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void LinReg::MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        int n_miniBatch = n/miniBatch_size;
+        
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches; 
+        std::vector<std::vector<double>> outputMiniBatches; 
+        // Creating the mini-batches
+        for(int i = 0; i < n_miniBatch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            std::vector<double> currentOutputSet; 
+            for(int j = 0; j < n/n_miniBatch; j++){
+                currentInputSet.push_back(inputSet[n/n_miniBatch * i + j]);
+                currentOutputSet.push_back(outputSet[n/n_miniBatch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+            outputMiniBatches.push_back(currentOutputSet);
+        }
+
+        if(double(n)/double(n_miniBatch) - int(n/n_miniBatch) != 0){
+            for(int i = 0; i < n - n/n_miniBatch * n_miniBatch; i++){
+                inputMiniBatches[n_miniBatch - 1].push_back(inputSet[n/n_miniBatch * n_miniBatch + i]);
+                outputMiniBatches[n_miniBatch - 1].push_back(outputSet[n/n_miniBatch * n_miniBatch + i]);
+            }
+        }
+        
+        while(true){
+            for(int i = 0; i < n_miniBatch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+                
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                // Calculating the weight gradients
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error)));
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+    
+                // Calculating the bias gradients
+                bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size();
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    void LinReg::normalEquation(){
+        LinAlg alg;
+        Stat stat;
+        std::vector<double> x_means;
+        
+        x_means.resize(alg.transpose(inputSet).size());
+        for(int i = 0; i < alg.transpose(inputSet).size(); i++){
+            x_means[i] = (stat.mean(alg.transpose(inputSet)[i]));
+        }
+        
+        try{
+            std::vector<double> temp;
+            temp.resize(k);
+            temp = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
+            if(isnan(temp[0])){
+                throw 99;
+            }
+            else{
+                if(reg == "Ridge") {
+                    weights = alg.mat_vec_mult(alg.inverse(alg.addition(alg.matmult(alg.transpose(inputSet), inputSet), alg.scalarMultiply(lambda, alg.identity(k)))), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
+                }
+                else{ weights = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));  }
+                
+                bias = stat.mean(outputSet) - alg.dot(weights, x_means);
+                
+                forwardPass();
+            }
+        }
+        catch(int err_num){
+            std::cout << "ERR " << err_num << ": Resulting matrix was noninvertible/degenerate, and so the normal equation could not be performed. Try utilizing gradient descent." << std::endl;
+        }
+        
+    }
+
+    double LinReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void LinReg::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights, bias);
+     }
+
+    double LinReg::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+    std::vector<double> LinReg::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); 
+    }
+
+    double LinReg::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        return alg.dot(weights, x) + bias;
+    }
+
+    // wTx + b
+    void LinReg::forwardPass(){
+        y_hat = Evaluate(inputSet);
+    }
+}
\ No newline at end of file
diff --git a/MLPP/LinReg/LinReg.hpp b/MLPP/LinReg/LinReg.hpp
new file mode 100644
index 0000000..51b80ce
--- /dev/null
+++ b/MLPP/LinReg/LinReg.hpp
@@ -0,0 +1,52 @@
+//
+//  LinReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef LinReg_hpp
+#define LinReg_hpp
+
+#include <vector>
+#include <string>
+
+namespace MLPP{
+    class LinReg{
+        
+        public:
+            LinReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI = 1);
+            void normalEquation();
+            double score();
+            void save(std::string fileName);
+        private:
+
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+        
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+            std::vector<double> weights;
+            double bias;
+        
+            int n; 
+            int k;
+        
+            // Regularization Params
+            std::string reg;
+            int lambda;
+            int alpha; /* This is the controlling param for Elastic Net*/
+        
+        
+    };
+}
+
+#endif /* LinReg_hpp */
diff --git a/MLPP/LogReg/LogReg.cpp b/MLPP/LogReg/LogReg.cpp
new file mode 100644
index 0000000..9c7d8dd
--- /dev/null
+++ b/MLPP/LogReg/LogReg.cpp
@@ -0,0 +1,229 @@
+//
+//  LogReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "LogReg.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    LogReg::LogReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+        weights = Utilities::weightInitialization(k);
+        bias = Utilities::biasInitialization();
+    }
+
+    std::vector<double> LogReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double LogReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void LogReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Reg regularization; 
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+                
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+
+            // Calculating the weight gradients
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), error)));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+ 
+            // Calculating the bias gradients
+            bias -= learning_rate * alg.sum_elements(error) / n;
+            forwardPass();
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+            
+        }
+    }
+
+    void LogReg::MLE(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+                    
+            std::vector<double> error = alg.subtraction(outputSet, y_hat);
+
+            // Calculating the weight gradients
+            weights = alg.addition(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), error)));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+
+            // Calculating the bias gradients
+            bias += learning_rate * alg.sum_elements(error) / n;
+            forwardPass();
+                    
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void LogReg::SGD(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        Utilities util;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd());
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+
+
+            for(int i = 0; i < k; i++){
+                    
+                // Calculating the weight gradients
+                
+                double w_gradient = (y_hat - outputSet[outputIndex]) * inputSet[outputIndex][i];
+                    
+
+                // Weight updation
+                weights[i] -= learning_rate * w_gradient;
+            }
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            // Calculating the bias gradients
+            double b_gradient = (y_hat - outputSet[outputIndex]);
+            
+            // Bias updation
+            bias -= learning_rate * b_gradient;
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void LogReg::MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        int n_miniBatch = n/miniBatch_size;
+        
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches; 
+        std::vector<std::vector<double>> outputMiniBatches; 
+
+        // Creating the mini-batches
+        for(int i = 0; i < n_miniBatch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            std::vector<double> currentOutputSet; 
+            for(int j = 0; j < n/n_miniBatch; j++){
+                currentInputSet.push_back(inputSet[n/n_miniBatch * i + j]);
+                currentOutputSet.push_back(outputSet[n/n_miniBatch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+            outputMiniBatches.push_back(currentOutputSet);
+        }
+
+        if(double(n)/double(n_miniBatch) - int(n/n_miniBatch) != 0){
+            for(int i = 0; i < n - n/n_miniBatch * n_miniBatch; i++){
+                inputMiniBatches[n_miniBatch - 1].push_back(inputSet[n/n_miniBatch * n_miniBatch + i]);
+                outputMiniBatches[n_miniBatch - 1].push_back(outputSet[n/n_miniBatch * n_miniBatch + i]);
+            }
+        }
+        
+        while(true){
+            for(int i = 0; i < n_miniBatch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+                
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                // Calculating the weight gradients
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error)));
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+    
+                // Calculating the bias gradients
+                bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size();
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double LogReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void LogReg::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights, bias);
+     }
+
+    double LogReg::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.LogLoss(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+
+    std::vector<double> LogReg::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        return avn.sigmoid(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); 
+    }
+
+    double LogReg::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        return avn.sigmoid(alg.dot(weights, x) + bias);
+    }
+
+    // sigmoid ( wTx + b )
+    void LogReg::forwardPass(){
+        y_hat = Evaluate(inputSet); 
+    }
+}
\ No newline at end of file
diff --git a/MLPP/LogReg/LogReg.hpp b/MLPP/LogReg/LogReg.hpp
new file mode 100644
index 0000000..2b8be88
--- /dev/null
+++ b/MLPP/LogReg/LogReg.hpp
@@ -0,0 +1,53 @@
+//
+//  LogReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef LogReg_hpp
+#define LogReg_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class LogReg{
+        
+        public:
+            LogReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void MLE(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+        private:
+
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+        
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+            std::vector<double> weights;
+            double bias;
+        
+            int n; 
+            int k;
+            double learning_rate;
+
+            // Regularization Params
+            std::string reg;
+            double lambda; /* Regularization Parameter */
+            double alpha; /* This is the controlling param for Elastic Net*/
+    };
+}
+
+#endif /* LogReg_hpp */
diff --git a/MLPP/MLP/MLP.cpp b/MLPP/MLP/MLP.cpp
new file mode 100644
index 0000000..028829b
--- /dev/null
+++ b/MLPP/MLP/MLP.cpp
@@ -0,0 +1,292 @@
+//
+//  MLP.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "MLP.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP {
+    MLP::MLP(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int n_hidden, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n_hidden(n_hidden), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        Activation avn;
+        y_hat.resize(n);
+
+        weights1 = Utilities::weightInitialization(k, n_hidden);
+        weights2 = Utilities::weightInitialization(n_hidden);
+        bias1 = Utilities::biasInitialization(n_hidden);
+        bias2 = Utilities::biasInitialization();
+    }
+
+    std::vector<double> MLP::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double MLP::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void MLP::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        Activation avn;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+
+            // Calculating the errors
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+                    
+            // Calculating the weight/bias gradients for layer 2
+
+            std::vector<double> D2_1 = alg.mat_vec_mult(alg.transpose(a2), error);
+
+            // weights and bias updation for layer 2
+            weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate/n, D2_1));
+            weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
+
+            bias2 -= learning_rate * alg.sum_elements(error) / n;
+
+            //Calculating the weight/bias for layer 1
+
+            std::vector<std::vector<double>> D1_1;
+            D1_1.resize(n);
+
+            D1_1 = alg.vecmult(error, weights2);
+
+            std::vector<std::vector<double>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+
+            std::vector<std::vector<double>> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2);
+
+
+            // weight an bias updation for layer 1
+            weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate/n, D1_3));
+            weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
+
+            bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate/n, D1_2));
+    
+            forwardPass();
+                
+            // UI PORTION
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                std::cout << "Layer 1:" << std::endl;
+                Utilities::UI(weights1, bias1); 
+                std::cout << "Layer 2:" << std::endl;
+                Utilities::UI(weights2, bias2);
+            }
+            epoch++;
+                
+            if(epoch > max_epoch) { break; }
+        }
+
+    }
+
+    void MLP::SGD(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        Activation avn;
+        Utilities util;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            auto [z2, a2] = propagate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+            double error = y_hat - outputSet[outputIndex];
+
+            // Weight updation for layer 2
+            std::vector<double> D2_1 = alg.scalarMultiply(error, a2);
+            weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1));
+            weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
+
+            // Bias updation for layer 2
+            bias2 -= learning_rate * error;
+
+            // Weight updation for layer 1
+            std::vector<double> D1_1 = alg.scalarMultiply(error, weights2);
+            std::vector<double> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+            std::vector<std::vector<double>> D1_3 = alg.vecmult(inputSet[outputIndex], D1_2);
+
+            weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
+            weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
+            // Bias updation for layer 1
+
+            bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2));
+
+            y_hat = Evaluate(inputSet[outputIndex]);
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                std::cout << "Layer 1:" << std::endl;
+                Utilities::UI(weights1, bias1); 
+                std::cout << "Layer 2:" << std::endl;
+                Utilities::UI(weights2, bias2);
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void MLP::MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI){
+        Reg regularization;
+        Activation avn;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        int n_miniBatch = n/miniBatch_size;
+        
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches; 
+        std::vector<std::vector<double>> outputMiniBatches; 
+
+        // Creating the mini-batches
+        for(int i = 0; i < n_miniBatch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            std::vector<double> currentOutputSet; 
+            for(int j = 0; j < n/n_miniBatch; j++){
+                currentInputSet.push_back(inputSet[n/n_miniBatch * i + j]);
+                currentOutputSet.push_back(outputSet[n/n_miniBatch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+            outputMiniBatches.push_back(currentOutputSet);
+        }
+
+        if(double(n)/double(n_miniBatch) - int(n/n_miniBatch) != 0){
+            for(int i = 0; i < n - n/n_miniBatch * n_miniBatch; i++){
+                inputMiniBatches[n_miniBatch - 1].push_back(inputSet[n/n_miniBatch * n_miniBatch + i]);
+                outputMiniBatches[n_miniBatch - 1].push_back(outputSet[n/n_miniBatch * n_miniBatch + i]);
+            }
+        }
+        
+        while(true){
+            for(int i = 0; i < n_miniBatch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                auto [z2, a2] = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                // Calculating the errors
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+                        
+                // Calculating the weight/bias gradients for layer 2
+
+                std::vector<double> D2_1 = alg.mat_vec_mult(alg.transpose(a2), error);
+
+                // weights and bias updation for layser 2
+                weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), D2_1));
+                weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
+
+                // Calculating the bias gradients for layer 2
+                double b_gradient = alg.sum_elements(error);
+                
+                // Bias Updation for layer 2
+                bias2 -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size();
+
+                //Calculating the weight/bias for layer 1
+
+                std::vector<std::vector<double>> D1_1 = alg.vecmult(error, weights2);
+
+                std::vector<std::vector<double>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+
+                std::vector<std::vector<double>> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2);
+
+
+                // weight an bias updation for layer 1
+                weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), D1_3));
+                weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
+
+                bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate/outputMiniBatches[i].size(), D1_2));
+
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    std::cout << "Layer 1:" << std::endl;
+                    Utilities::UI(weights1, bias1); 
+                    std::cout << "Layer 2:" << std::endl;
+                    Utilities::UI(weights2, bias2);
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double MLP::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void MLP::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights1, bias1, 0, 1);
+         util.saveParameters(fileName, weights2, bias2, 1, 2);
+     }
+
+    double MLP::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.LogLoss(y_hat, y) + regularization.regTerm(weights2, lambda, alpha, reg) + regularization.regTerm(weights1, lambda, alpha, reg);
+    }
+
+    std::vector<double> MLP::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        std::vector<std::vector<double>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
+        std::vector<std::vector<double>> a2 = avn.sigmoid(z2);
+        return avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2))); 
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> MLP::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        std::vector<std::vector<double>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
+        std::vector<std::vector<double>> a2 = avn.sigmoid(z2);
+        return {z2, a2};
+    }
+
+    double MLP::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        std::vector<double> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); 
+        std::vector<double> a2 = avn.sigmoid(z2);
+        return avn.sigmoid(alg.dot(weights2, a2) + bias2);
+    }
+
+    std::tuple<std::vector<double>, std::vector<double>> MLP::propagate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        std::vector<double> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); 
+        std::vector<double> a2 = avn.sigmoid(z2);
+        return {z2, a2};
+    }
+
+    void MLP::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1);
+        a2 = avn.sigmoid(z2);
+        y_hat = avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2))); 
+    }
+}
diff --git a/MLPP/MLP/MLP.hpp b/MLPP/MLP/MLP.hpp
new file mode 100644
index 0000000..915ca39
--- /dev/null
+++ b/MLPP/MLP/MLP.hpp
@@ -0,0 +1,61 @@
+//
+//  MLP.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef MLP_hpp
+#define MLP_hpp
+
+#include <vector>
+#include <map>
+#include <string>
+
+namespace  MLPP {
+
+class MLP{
+        public:
+        MLP(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int n_hidden, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+        std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+        double modelTest(std::vector<double> x);
+        void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+        void SGD(double learning_rate, int max_epoch, bool UI = 1);
+        void MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI = 1);
+        double score(); 
+        void save(std::string fileName);
+        
+        private:
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> propagate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            std::tuple<std::vector<double>, std::vector<double>> propagate(std::vector<double> x);
+            void forwardPass();
+
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+        
+            std::vector<std::vector<double>> weights1;
+            std::vector<double> weights2;
+           
+            std::vector<double> bias1;
+            double bias2;
+        
+            std::vector<std::vector<double>> z2;
+            std::vector<std::vector<double>> a2;
+
+            int n;
+            int k;
+            int n_hidden;
+
+
+            // Regularization Params
+            std::string reg;
+            double lambda; /* Regularization Parameter */
+            double alpha; /* This is the controlling param for Elastic Net*/
+    };
+}
+
+#endif /* MLP_hpp */
diff --git a/MLPP/MultinomialNB/MultinomialNB.cpp b/MLPP/MultinomialNB/MultinomialNB.cpp
new file mode 100644
index 0000000..5b1af86
--- /dev/null
+++ b/MLPP/MultinomialNB/MultinomialNB.cpp
@@ -0,0 +1,120 @@
+//
+//  MultinomialNB.cpp
+//
+//  Created by Marc Melikyan on 1/17/21.
+//
+
+#include "MultinomialNB.hpp"
+#include "Utilities/Utilities.hpp"
+#include "LinAlg/LinAlg.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    MultinomialNB::MultinomialNB(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int class_num)
+    : inputSet(inputSet), outputSet(outputSet), class_num(class_num)
+    {
+        y_hat.resize(outputSet.size());
+        Evaluate();
+    }
+
+    std::vector<double> MultinomialNB::modelSetTest(std::vector<std::vector<double>> X){
+        std::vector<double> y_hat;
+        for(int i = 0; i < X.size(); i++){
+            y_hat.push_back(modelTest(X[i]));
+        }
+        return y_hat;
+    }
+
+    double MultinomialNB::modelTest(std::vector<double> x){
+        double score[class_num];
+        computeTheta();
+        
+        for(int j = 0; j < x.size(); j++){
+            for(int k = 0; k < vocab.size(); k++){
+                if(x[j] == vocab[k]){
+                    for(int p = class_num - 1; p >= 0; p--){
+                        score[p] += log(theta[p][vocab[k]]);
+                    }
+                }
+            }
+        }
+
+        for(int i = 0; i < priors.size(); i++){
+            score[i] += log(priors[i]);
+        }
+
+        return std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double)));
+    }
+
+    double MultinomialNB::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+    void MultinomialNB::computeTheta(){
+        
+        // Resizing theta for the sake of ease & proper access of the elements.
+        theta.resize(class_num);
+        
+        // Setting all values in the hasmap by default to 0.
+        for(int i = class_num - 1; i >= 0; i--){
+            for(int j = 0; j < vocab.size(); j++){
+                theta[i][vocab[j]] = 0; 
+            }
+        }
+
+        for(int i = 0; i < inputSet.size(); i++){  
+            for(int j = 0; j < inputSet[0].size(); j++){
+                theta[outputSet[i]][inputSet[i][j]]++;
+            }
+        }
+        
+        for(int i = 0; i < theta.size(); i++){
+            for(int j = 0; j < theta[i].size(); j++){
+                theta[i][j] /= priors[i] * y_hat.size();
+            }
+        }
+    }
+
+    void MultinomialNB::Evaluate(){
+        LinAlg alg;
+        for(int i = 0; i < outputSet.size(); i++){
+            // Pr(B | A) * Pr(A)
+            double score[class_num];
+
+            // Easy computation of priors, i.e. Pr(C_k)
+            priors.resize(class_num);
+            for(int i = 0; i < outputSet.size(); i++){
+                priors[int(outputSet[i])]++;
+            }
+            priors = alg.scalarMultiply( double(1)/double(outputSet.size()), priors);
+            
+            // Evaluating Theta...
+            computeTheta();
+            
+            for(int j = 0; j < inputSet.size(); j++){
+                for(int k = 0; k < vocab.size(); k++){
+                    if(inputSet[i][j] == vocab[k]){
+                        for(int p = class_num - 1; p >= 0; p--){
+                            score[p] += log(theta[i][vocab[k]]);
+                        }
+                    }
+                }
+            }
+
+            for(int i = 0; i < priors.size(); i++){
+                score[i] += log(priors[i]);
+                score[i] = exp(score[i]);
+            }
+
+            for(int i = 0; i < 2; i++){
+                std::cout << score[i] << std::endl;
+            }
+            
+            // Assigning the traning example's y_hat to a class
+            y_hat[i] = std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(double)));
+        }
+    }
+}
\ No newline at end of file
diff --git a/MLPP/MultinomialNB/MultinomialNB.hpp b/MLPP/MultinomialNB/MultinomialNB.hpp
new file mode 100644
index 0000000..3fadcdf
--- /dev/null
+++ b/MLPP/MultinomialNB/MultinomialNB.hpp
@@ -0,0 +1,45 @@
+//
+//  MultinomialNB.hpp
+//
+//  Created by Marc Melikyan on 1/17/21.
+//
+
+#ifndef MultinomialNB_hpp
+#define MultinomialNB_hpp
+
+#include <vector>
+#include <map>
+
+namespace MLPP{
+    class MultinomialNB{
+        
+        public:
+            MultinomialNB(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int class_num);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            double score();
+            
+        private:
+        
+            void computeTheta();
+            void Evaluate();
+        
+            // Model Params
+            std::vector<double> priors;
+        
+            std::vector<std::map<double, int>> theta;
+            std::vector<double> vocab;
+            int class_num;
+            
+            // Datasets
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> y_hat;
+            
+        
+            
+        
+    };
+
+    #endif /* MultinomialNB_hpp */
+}
\ No newline at end of file
diff --git a/MLPP/OutlierFinder/OutlierFinder.cpp b/MLPP/OutlierFinder/OutlierFinder.cpp
new file mode 100644
index 0000000..836bac5
--- /dev/null
+++ b/MLPP/OutlierFinder/OutlierFinder.cpp
@@ -0,0 +1,43 @@
+//
+//  OutlierFinder.cpp
+//
+//  Created by Marc Melikyan on 11/13/20.
+//
+
+#include "OutlierFinder.hpp"
+#include "Stat/Stat.hpp"
+#include <iostream>
+
+namespace MLPP{
+    OutlierFinder::OutlierFinder(int threshold)
+    : threshold(threshold){
+
+    }
+
+    std::vector<std::vector<double>> OutlierFinder::modelSetTest(std::vector<std::vector<double>> inputSet){
+        Stat op;
+        std::vector<std::vector<double>> outliers;
+        outliers.resize(inputSet.size());
+        for(int i = 0; i < inputSet.size(); i++){
+            for(int j = 0; j < inputSet[i].size(); j++){
+                double z = (inputSet[i][j] - op.mean(inputSet[i])) / op.standardDeviation(inputSet[i]);
+                if(abs(z) > threshold){
+                    outliers[i].push_back(inputSet[i][j]);
+                }
+            }
+        }
+        return outliers; 
+    }
+
+    std::vector<double> OutlierFinder::modelTest(std::vector<double> inputSet){
+        Stat op;
+        std::vector<double> outliers;
+        for(int i = 0; i < inputSet.size(); i++){
+            double z = (inputSet[i] - op.mean(inputSet)) / op.standardDeviation(inputSet);
+            if(abs(z) > threshold){
+                outliers.push_back(inputSet[i]);
+            }
+        }
+        return outliers; 
+    }
+}
\ No newline at end of file
diff --git a/MLPP/OutlierFinder/OutlierFinder.hpp b/MLPP/OutlierFinder/OutlierFinder.hpp
new file mode 100644
index 0000000..eaaf648
--- /dev/null
+++ b/MLPP/OutlierFinder/OutlierFinder.hpp
@@ -0,0 +1,27 @@
+//
+//  OutlierFinder.hpp
+//
+//  Created by Marc Melikyan on 11/13/20.
+//
+
+#ifndef OutlierFinder_hpp
+#define OutlierFinder_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class OutlierFinder{
+        public:
+            // Cnstr
+            OutlierFinder(int threshold);
+
+            std::vector<std::vector<double>> modelSetTest(std::vector<std::vector<double>> inputSet);
+            std::vector<double> modelTest(std::vector<double> inputSet);
+
+            // Variables required 
+            int threshold;
+        
+    };
+}
+
+#endif /* OutlierFinder_hpp */
diff --git a/MLPP/OutlierFinder/OutlierFinder.hpp.gch b/MLPP/OutlierFinder/OutlierFinder.hpp.gch
new file mode 100644
index 0000000..fe442bb
Binary files /dev/null and b/MLPP/OutlierFinder/OutlierFinder.hpp.gch differ
diff --git a/MLPP/OutputLayer/OutputLayer.cpp b/MLPP/OutputLayer/OutputLayer.cpp
new file mode 100644
index 0000000..0553c9b
--- /dev/null
+++ b/MLPP/OutputLayer/OutputLayer.cpp
@@ -0,0 +1,113 @@
+//
+//  OutputLayer.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "OutputLayer.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Utilities/Utilities.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP {
+    OutputLayer::OutputLayer(int n_hidden, int outputSize, std::string activation, std::string cost, std::vector<std::vector<double>> input, std::string weightInit, std::string reg, double lambda, double alpha)
+    : n_hidden(n_hidden), outputSize(outputSize), activation(activation), cost(cost), input(input), weightInit(weightInit), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        weights = Utilities::weightInitialization(n_hidden, weightInit);
+        bias = Utilities::biasInitialization();
+
+        activation_map["Linear"] = &Activation::linear;
+        activationTest_map["Linear"] = &Activation::linear;
+
+        activation_map["Sigmoid"] = &Activation::sigmoid;
+        activationTest_map["Sigmoid"] = &Activation::sigmoid;
+
+        activation_map["Swish"] = &Activation::swish;
+        activationTest_map["Swish"] = &Activation::swish;
+
+        activation_map["Softplus"] = &Activation::softplus;
+        activationTest_map["Softplus"] = &Activation::softplus;
+
+        activation_map["CLogLog"] = &Activation::cloglog;
+        activationTest_map["CLogLog"] = &Activation::cloglog;
+
+        activation_map["Sinh"] = &Activation::sinh;
+        activationTest_map["Sinh"] = &Activation::sinh;
+
+        activation_map["Cosh"] = &Activation::cosh;
+        activationTest_map["Cosh"] = &Activation::cosh;
+
+        activation_map["Tanh"] = &Activation::tanh;
+        activationTest_map["Tanh"] = &Activation::tanh;
+
+        activation_map["Csch"] = &Activation::csch;
+        activationTest_map["Csch"] = &Activation::csch;   
+
+        activation_map["Sech"] = &Activation::sech;
+        activationTest_map["Sech"] = &Activation::sech;  
+
+        activation_map["Coth"] = &Activation::coth;
+        activationTest_map["Coth"] = &Activation::coth;  
+
+        activation_map["Arsinh"] = &Activation::arsinh;
+        activationTest_map["Arsinh"] = &Activation::arsinh;
+
+        activation_map["Arcosh"] = &Activation::arcosh;
+        activationTest_map["Arcosh"] = &Activation::arcosh;
+
+        activation_map["Artanh"] = &Activation::artanh;
+        activationTest_map["Artanh"] = &Activation::artanh;
+
+        activation_map["Arcsch"] = &Activation::arcsch;
+        activationTest_map["Arcsch"] = &Activation::arcsch;
+
+        activation_map["Arsech"] = &Activation::arsech;
+        activationTest_map["Arsech"] = &Activation::arsech;
+
+        activation_map["Arcoth"] = &Activation::arcoth;
+        activationTest_map["Arcoth"] = &Activation::arcoth;
+
+        activation_map["GaussianCDF"] = &Activation::gaussianCDF;
+        activationTest_map["GaussianCDF"] = &Activation::gaussianCDF;
+
+        activation_map["RELU"] = &Activation::RELU;
+        activationTest_map["RELU"] = &Activation::RELU;
+
+        activation_map["GELU"] = &Activation::GELU;
+        activationTest_map["GELU"] = &Activation::GELU;
+
+        activation_map["UnitStep"] = &Activation::unitStep;
+        activationTest_map["UnitStep"] = &Activation::unitStep;
+
+        costDeriv_map["MSE"] = &Cost::MSEDeriv;
+        cost_map["MSE"] = &Cost::MSE;
+        costDeriv_map["RMSE"] = &Cost::RMSEDeriv;
+        cost_map["RMSE"] = &Cost::RMSE;
+        costDeriv_map["MAE"] = &Cost::MAEDeriv;
+        cost_map["MAE"] = &Cost::MAE;
+        costDeriv_map["MBE"] = &Cost::MBEDeriv;
+        cost_map["MBE"] = &Cost::MBE;
+        costDeriv_map["LogLoss"] = &Cost::LogLossDeriv;
+        cost_map["LogLoss"] = &Cost::LogLoss;
+        costDeriv_map["CrossEntropy"] = &Cost::CrossEntropyDeriv;
+        cost_map["CrossEntropy"] = &Cost::CrossEntropy;
+        costDeriv_map["HingeLoss"] = &Cost::HingeLossDeriv;
+        cost_map["HingeLoss"] = &Cost::HingeLoss;
+    }
+    
+    void OutputLayer::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        z = alg.scalarAdd(bias, alg.mat_vec_mult(input, weights));
+        a = (avn.*activation_map[activation])(z, 0); 
+    }
+
+    void OutputLayer::Test(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        z_test = alg.dot(weights, x) + bias;
+        a_test = (avn.*activationTest_map[activation])(z_test, 0);
+    }
+}
\ No newline at end of file
diff --git a/MLPP/OutputLayer/OutputLayer.hpp b/MLPP/OutputLayer/OutputLayer.hpp
new file mode 100644
index 0000000..5c4de53
--- /dev/null
+++ b/MLPP/OutputLayer/OutputLayer.hpp
@@ -0,0 +1,57 @@
+//
+//  OutputLayer.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef OutputLayer_hpp
+#define OutputLayer_hpp
+
+#include "Activation/Activation.hpp"
+#include "Cost/Cost.hpp"
+
+#include <vector>
+#include <map>
+#include <string>
+
+namespace  MLPP {
+    class OutputLayer{
+        public:
+            OutputLayer(int n_hidden, int outputSize, std::string activation, std::string cost, std::vector<std::vector<double>> input, std::string weightInit, std::string reg, double lambda, double alpha);
+        
+            int n_hidden;
+            int outputSize;
+            std::string activation;
+            std::string cost;
+
+            std::vector<std::vector<double>> input;   
+
+            std::vector<double> weights;
+            double bias;
+        
+            std::vector<double> z;
+            std::vector<double> a;
+
+            std::map<std::string, std::vector<double> (Activation::*)(std::vector<double>, bool)> activation_map;
+            std::map<std::string, double (Activation::*)(double, bool)> activationTest_map;
+            std::map<std::string, double (Cost::*)(std::vector<double>, std::vector<double>)> cost_map;
+            std::map<std::string, std::vector<double> (Cost::*)(std::vector<double>, std::vector<double>)> costDeriv_map;
+
+            double z_test;
+            double a_test; 
+
+            std::vector<double> delta;
+
+            // Regularization Params
+            std::string reg;
+            double lambda; /* Regularization Parameter */
+            double alpha; /* This is the controlling param for Elastic Net*/
+            
+            std::string weightInit;
+
+            void forwardPass();
+            void Test(std::vector<double> x);
+    };
+}
+
+#endif /* OutputLayer_hpp */
diff --git a/MLPP/PCA/PCA.cpp b/MLPP/PCA/PCA.cpp
new file mode 100644
index 0000000..4e4e8a1
--- /dev/null
+++ b/MLPP/PCA/PCA.cpp
@@ -0,0 +1,56 @@
+//
+//  PCA.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "PCA.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Data/Data.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+
+    PCA::PCA(std::vector<std::vector<double>> inputSet, int k)
+    : inputSet(inputSet), k(k)
+    {
+
+    }
+
+    std::vector<std::vector<double>> PCA::principalComponents(){
+        LinAlg alg;
+        Data data; 
+
+        auto [U, S, Vt] = alg.SVD(alg.cov(inputSet));
+        X_normalized = data.meanCentering(inputSet);
+        U_reduce.resize(U.size());
+        for(int i = 0; i < k; i++){
+            for(int j = 0; j < U.size(); j++){
+                U_reduce[j].push_back(U[j][i]);
+            }
+        }
+        Z = alg.matmult(alg.transpose(U_reduce), X_normalized);
+        return Z;
+    }
+    // Simply tells us the percentage of variance maintained. 
+    double PCA::score(){
+        LinAlg alg;
+        std::vector<std::vector<double>> X_approx = alg.matmult(U_reduce, Z);
+        double num, den = 0;
+        for(int i = 0; i < X_normalized.size(); i++){
+            num += alg.norm_sq(alg.subtraction(X_normalized[i], X_approx[i]));
+        }
+        num /= X_normalized.size();
+        for(int i = 0; i < X_normalized.size(); i++){
+            den += alg.norm_sq(X_normalized[i]);
+        }
+
+        den /= X_normalized.size();
+        if(den == 0){
+            den+=1e-10; // For numerical sanity as to not recieve a domain error
+        }
+        return 1 - num/den;
+    }
+}
diff --git a/MLPP/PCA/PCA.hpp b/MLPP/PCA/PCA.hpp
new file mode 100644
index 0000000..59e1d75
--- /dev/null
+++ b/MLPP/PCA/PCA.hpp
@@ -0,0 +1,28 @@
+//
+//  PCA.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef PCA_hpp
+#define PCA_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class PCA{
+        
+        public:
+            PCA(std::vector<std::vector<double>> inputSet, int k);
+            std::vector<std::vector<double>> principalComponents();
+            double score(); 
+        private:
+            std::vector<std::vector<double>> inputSet;
+            std::vector<std::vector<double>> X_normalized;
+            std::vector<std::vector<double>> U_reduce;
+            std::vector<std::vector<double>> Z;  
+            int k;
+    };
+}
+
+#endif /* PCA_hpp */
diff --git a/MLPP/ProbitReg/ProbitReg.cpp b/MLPP/ProbitReg/ProbitReg.cpp
new file mode 100644
index 0000000..20dfd5b
--- /dev/null
+++ b/MLPP/ProbitReg/ProbitReg.cpp
@@ -0,0 +1,249 @@
+//
+//  ProbitReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "ProbitReg.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    ProbitReg::ProbitReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+        weights = Utilities::weightInitialization(k);
+        bias = Utilities::biasInitialization();
+    }
+
+    std::vector<double> ProbitReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double ProbitReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void ProbitReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Reg regularization; 
+        Activation avn;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+                
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+
+            // Calculating the weight gradients
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.gaussianCDF(z, 1)))));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+ 
+            // Calculating the bias gradients
+            bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / n;
+            forwardPass();
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+                
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void ProbitReg::MLE(double learning_rate, int max_epoch, bool UI){
+        Activation avn;
+        Reg regularization; 
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+                    
+            std::vector<double> error = alg.subtraction(outputSet, y_hat);
+
+            // Calculating the weight gradients
+            weights = alg.addition(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.gaussianCDF(z, 1)))));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+
+            // Calculating the bias gradients
+            bias += learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / n;
+            forwardPass();
+                    
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+                    
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void ProbitReg::SGD(double learning_rate, int max_epoch, bool UI){
+        LinAlg alg;
+        Activation avn;
+        Reg regularization;
+        Utilities util;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            double z = propagate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+
+
+            for(int i = 0; i < k; i++){
+                    
+                // Calculating the weight gradients
+                
+                double w_gradient = (y_hat - outputSet[outputIndex]) * ((1 / sqrt(2 * M_PI)) * exp(-z * z / 2)) * inputSet[outputIndex][i];
+
+                std::cout << exp(-z * z / 2) << std::endl;
+                // Weight updation
+                weights[i] -= learning_rate * w_gradient;
+            }
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            // Calculating the bias gradients
+            double b_gradient = (y_hat - outputSet[outputIndex]);
+            
+            // Bias updation
+            bias -= learning_rate * b_gradient * ((1 / sqrt(2 * M_PI)) * exp(-z * z / 2));
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void ProbitReg::MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI){
+        Reg regularization;
+        Activation avn;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        int n_miniBatch = n/miniBatch_size;
+        
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches; 
+        std::vector<std::vector<double>> outputMiniBatches; 
+
+        // Creating the mini-batches
+        for(int i = 0; i < n_miniBatch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            std::vector<double> currentOutputSet; 
+            for(int j = 0; j < n/n_miniBatch; j++){
+                currentInputSet.push_back(inputSet[n/n_miniBatch * i + j]);
+                currentOutputSet.push_back(outputSet[n/n_miniBatch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+            outputMiniBatches.push_back(currentOutputSet);
+        }
+
+        if(double(n)/double(n_miniBatch) - int(n/n_miniBatch) != 0){
+            for(int i = 0; i < n - n/n_miniBatch * n_miniBatch; i++){
+                inputMiniBatches[n_miniBatch - 1].push_back(inputSet[n/n_miniBatch * n_miniBatch + i]);
+                outputMiniBatches[n_miniBatch - 1].push_back(outputSet[n/n_miniBatch * n_miniBatch + i]);
+            }
+        }
+        
+        while(true){
+            for(int i = 0; i < n_miniBatch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                std::vector<double> z = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+                
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                // Calculating the weight gradients
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/outputMiniBatches.size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.gaussianCDF(z, 1)))));
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+    
+                // Calculating the bias gradients
+                bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / outputMiniBatches.size();
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double ProbitReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+     void ProbitReg::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights, bias);
+     }
+
+    double ProbitReg::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+    std::vector<double> ProbitReg::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        return avn.gaussianCDF(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); 
+    }
+    
+    std::vector<double>ProbitReg::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); 
+    }
+
+    double ProbitReg::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        return avn.gaussianCDF(alg.dot(weights, x) + bias);
+    }
+
+    double ProbitReg::propagate(std::vector<double> x){
+        LinAlg alg;
+        return alg.dot(weights, x) + bias;
+    }
+
+    // gaussianCDF ( wTx + b )
+    void ProbitReg::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        
+        z = propagate(inputSet);
+        y_hat = avn.gaussianCDF(z);
+    }
+}
\ No newline at end of file
diff --git a/MLPP/ProbitReg/ProbitReg.hpp b/MLPP/ProbitReg/ProbitReg.hpp
new file mode 100644
index 0000000..8c026c7
--- /dev/null
+++ b/MLPP/ProbitReg/ProbitReg.hpp
@@ -0,0 +1,57 @@
+//
+//  ProbitReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef ProbitReg_hpp
+#define ProbitReg_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class ProbitReg{
+        
+        public:
+            ProbitReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch = 0, bool UI = 1);
+            void MLE(double learning_rate, int max_epoch = 0, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch = 0, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+        private:
+
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            std::vector<double> propagate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            double propagate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> z;
+            std::vector<double> y_hat;
+            std::vector<double> weights;
+            double bias;
+        
+            int n; 
+            int k;
+
+            // Regularization Params
+            std::string reg;
+            double lambda;
+            double alpha; /* This is the controlling param for Elastic Net*/
+        
+        
+    };
+}
+
+#endif /* ProbitReg_hpp */
diff --git a/MLPP/Regularization/Reg.cpp b/MLPP/Regularization/Reg.cpp
new file mode 100644
index 0000000..726beb0
--- /dev/null
+++ b/MLPP/Regularization/Reg.cpp
@@ -0,0 +1,128 @@
+//
+//  Reg.cpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#include <iostream>
+#include <random>
+#include "Reg.hpp"
+
+namespace MLPP{
+
+    double Reg::regTerm(std::vector<double> weights, double lambda, double alpha, std::string reg){
+        if(reg == "Ridge"){
+            double reg = 0;
+            for(int i = 0; i < weights.size(); i++){
+                reg += weights[i] * weights[i];
+            }
+            return reg * lambda / 2;
+        }
+        else if(reg == "Lasso"){
+            double reg = 0;
+            for(int i = 0; i < weights.size(); i++){
+                reg += abs(weights[i]);
+            }
+            return reg * lambda;
+        }
+        else if(reg == "ElasticNet"){
+            double reg = 0;
+            for(int i = 0; i < weights.size(); i++){
+                reg += alpha * abs(weights[i]); // Lasso Reg
+                reg += ((1 - alpha) / 2) * weights[i] * weights[i]; // Ridge Reg
+            }
+            return reg * lambda;
+        }
+        return 0;
+    }
+
+    double Reg::regTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg){
+        if(reg == "Ridge"){
+            double reg = 0;
+            for(int i = 0; i < weights.size(); i++){
+                for(int j = 0; j < weights[i].size(); j++){
+                    reg += weights[i][j] * weights[i][j];
+                }
+            }
+            return reg * lambda / 2;
+        }
+        else if(reg == "Lasso"){
+            double reg = 0;
+            for(int i = 0; i < weights.size(); i++){
+                for(int j = 0; j < weights[i].size(); j++){
+                    reg += abs(weights[i][j]);
+                }
+            }
+            return reg * lambda;
+        }
+        else if(reg == "ElasticNet"){
+            double reg = 0;
+            for(int i = 0; i < weights.size(); i++){
+                for(int j = 0; j < weights[i].size(); j++){
+                    reg += alpha * abs(weights[i][j]); // Lasso Reg
+                    reg += ((1 - alpha) / 2) * weights[i][j] * weights[i][j]; // Ridge Reg
+                }
+            }
+            return reg * lambda;
+        }
+        return 0;
+    }
+
+    std::vector<double> Reg::regWeights(std::vector<double> weights, double lambda, double alpha, std::string reg){
+        for(int i = 0; i < weights.size(); i++){
+            weights[i] -= regDerivTerm(weights, lambda, alpha, reg, i);
+        }
+        return weights;
+    }
+
+    std::vector<std::vector<double>> Reg::regWeights(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg){
+        for(int i = 0; i < weights.size(); i++){
+            for(int j = 0; j < weights[i].size(); j++){
+                weights[i][j] -= regDerivTerm(weights, lambda, alpha, reg, i, j);
+            }
+        }
+        return weights;
+    }
+
+    double Reg::regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg, int j){
+        if(reg == "Ridge"){
+            return lambda * weights[j];
+        }
+        else if(reg == "Lasso"){
+            return lambda * sign(weights[j]);
+        }
+        else if(reg == "ElasticNet"){
+            return alpha * lambda * sign(weights[j]) + (1 - alpha) * lambda * weights[j];
+        }
+        else {
+            return 0;
+        }
+    }
+
+    double Reg::regDerivTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg, int i, int j){
+        if(reg == "Ridge"){
+            return lambda * weights[i][j];
+        }
+        else if(reg == "Lasso"){
+            return lambda * sign(weights[i][j]);
+        }
+        else if(reg == "ElasticNet"){
+            return alpha * lambda * sign(weights[i][j]) + (1 - alpha) * lambda * weights[i][j];
+        }
+        else {
+            return 0;
+        }
+    }
+
+    int Reg::sign(double weight){
+        if(weight < 0){
+            return -1;
+        }
+        else if(weight == 0){
+            return 0;
+        }
+        else{
+            return 1;
+        }
+    }
+}
diff --git a/MLPP/Regularization/Reg.hpp b/MLPP/Regularization/Reg.hpp
new file mode 100644
index 0000000..37d2b80
--- /dev/null
+++ b/MLPP/Regularization/Reg.hpp
@@ -0,0 +1,29 @@
+//
+//  Reg.hpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#ifndef Reg_hpp
+#define Reg_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class Reg{
+        public:
+        
+            double regTerm(std::vector<double> weights, double lambda, double alpha, std::string reg);
+            double regTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg);
+            
+            std::vector<double> regWeights(std::vector<double> weights, double lambda, double alpha, std::string reg);
+            std::vector<std::vector<double>> regWeights(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg);
+
+        private:
+            double regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg, int j);
+            double regDerivTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg, int i, int j);
+            int sign(double weight);
+    };
+}
+
+#endif /* Reg_hpp */
diff --git a/MLPP/SoftmaxNet/SoftmaxNet.cpp b/MLPP/SoftmaxNet/SoftmaxNet.cpp
new file mode 100644
index 0000000..2b847c1
--- /dev/null
+++ b/MLPP/SoftmaxNet/SoftmaxNet.cpp
@@ -0,0 +1,294 @@
+//
+//  SoftmaxNet.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "SoftmaxNet.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Data/Data.hpp"
+#include "Regularization/Reg.hpp"
+#include "Activation/Activation.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    SoftmaxNet::SoftmaxNet(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet, int n_hidden, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), n_hidden(n_hidden), n_class(outputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+
+        weights1 = Utilities::weightInitialization(k, n_hidden);
+        weights2 = Utilities::weightInitialization(n_hidden, n_class);
+        bias1 = Utilities::biasInitialization(n_hidden);
+        bias2 = Utilities::biasInitialization(n_class);
+    }
+
+    std::vector<double> SoftmaxNet::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    std::vector<std::vector<double>> SoftmaxNet::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    void SoftmaxNet::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        Activation avn;
+
+
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+
+            // Calculating the errors
+            std::vector<std::vector<double>> error = alg.subtraction(y_hat, outputSet);
+                    
+            // Calculating the weight/bias gradients for layer 2
+
+            std::vector<std::vector<double>> D2_1 = alg.matmult(alg.transpose(a2), error);
+
+            // weights and bias updation for layer 2
+            weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1));
+            weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
+
+            bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
+
+            //Calculating the weight/bias for layer 1
+
+            std::vector<std::vector<double>> D1_1 = alg.matmult(error, alg.transpose(weights2));
+
+            std::vector<std::vector<double>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+
+            std::vector<std::vector<double>> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2);
+
+
+            // weight an bias updation for layer 1
+            weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
+            weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
+
+            bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate, D1_2));
+    
+            forwardPass();
+                
+            // UI PORTION
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                std::cout << "Layer 1:" << std::endl;
+                Utilities::UI(weights1, bias1); 
+                std::cout << "Layer 2:" << std::endl;
+                Utilities::UI(weights2, bias2);
+            }
+            epoch++;
+                
+            if(epoch > max_epoch) { break; }
+        }
+
+    }
+
+    void SoftmaxNet::SGD(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        Activation avn;
+        Utilities util;
+
+        double cost_prev = 0;
+        int epoch = 1;
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            std::vector<double> y_hat = Evaluate(inputSet[outputIndex]);
+            auto [z2, a2] = propagate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+            std::vector<double> error = alg.subtraction(y_hat, outputSet[outputIndex]);
+            
+            // Weight updation for layer 2
+            std::vector<std::vector<double>> D2_1 = alg.vecmult(error, a2);
+            weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, alg.transpose(D2_1)));
+            weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
+
+            // Bias updation for layer 2
+            bias2 = alg.subtraction(bias2, alg.scalarMultiply(learning_rate, error));
+
+            // Weight updation for layer 1
+            std::vector<double> D1_1 = alg.mat_vec_mult(weights2, error);
+            std::vector<double> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+            std::vector<std::vector<double>> D1_3 = alg.vecmult(inputSet[outputIndex], D1_2);
+
+            weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
+            weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
+            // Bias updation for layer 1
+
+            bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2));
+
+            y_hat = Evaluate(inputSet[outputIndex]);
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                std::cout << "Layer 1:" << std::endl;
+                Utilities::UI(weights1, bias1); 
+                std::cout << "Layer 2:" << std::endl;
+                Utilities::UI(weights2, bias2);
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void SoftmaxNet::MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI){
+        Reg regularization;
+        Activation avn;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        int n_miniBatch = n/miniBatch_size;
+        
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches; 
+        std::vector<std::vector<std::vector<double>>> outputMiniBatches; 
+
+        //Creating the mini-batches
+        for(int i = 0; i < n_miniBatch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            std::vector<std::vector<double>> currentOutputSet; 
+            for(int j = 0; j < n/n_miniBatch; j++){
+                currentInputSet.push_back(inputSet[n/n_miniBatch * i + j]);
+                currentOutputSet.push_back(outputSet[n/n_miniBatch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+            outputMiniBatches.push_back(currentOutputSet);
+        }
+
+        if(double(n)/double(n_miniBatch) - int(n/n_miniBatch) != 0){
+            for(int i = 0; i < n - n/n_miniBatch * n_miniBatch; i++){
+                inputMiniBatches[n_miniBatch - 1].push_back(inputSet[n/n_miniBatch * n_miniBatch + i]);
+                outputMiniBatches[n_miniBatch - 1].push_back(outputSet[n/n_miniBatch * n_miniBatch + i]);
+            }
+        }
+        
+        while(true){
+            for(int i = 0; i < n_miniBatch; i++){
+                std::vector<std::vector<double>> y_hat = Evaluate(inputMiniBatches[i]);
+                auto [z2, a2] = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                // Calculating the errors
+                std::vector<std::vector<double>> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+                        
+                // Calculating the weight/bias gradients for layer 2
+
+                std::vector<std::vector<double>> D2_1 = alg.matmult(alg.transpose(a2), error);
+
+                // weights and bias updation for layser 2
+                weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1));
+                weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
+                
+                // Bias Updation for layer 2
+                bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
+
+                //Calculating the weight/bias for layer 1
+
+                std::vector<std::vector<double>> D1_1 = alg.matmult(error, alg.transpose(weights2));
+
+                std::vector<std::vector<double>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
+
+                std::vector<std::vector<double>> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2);
+
+
+                // weight an bias updation for layer 1
+                weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
+                weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
+
+                bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate, D1_2));
+
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    std::cout << "Layer 1:" << std::endl;
+                    Utilities::UI(weights1, bias1); 
+                    std::cout << "Layer 2:" << std::endl;
+                    Utilities::UI(weights2, bias2);
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double SoftmaxNet::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+     void SoftmaxNet::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights1, bias1, 0, 1);
+         util.saveParameters(fileName, weights2, bias2, 1, 2);
+
+         LinAlg alg; 
+     }
+
+    std::vector<std::vector<double>> SoftmaxNet::getEmbeddings(){
+        return weights1;
+    }
+
+    double SoftmaxNet::Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        Reg regularization;
+        Data data;
+        class Cost cost; 
+        return cost.CrossEntropy(y_hat, y) + regularization.regTerm(weights1, lambda, alpha, reg) + regularization.regTerm(weights2, lambda, alpha, reg);
+    }
+
+    std::vector<std::vector<double>> SoftmaxNet::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        std::vector<std::vector<double>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
+        std::vector<std::vector<double>> a2 = avn.sigmoid(z2);
+        return avn.adjSoftmax(alg.mat_vec_add(alg.matmult(a2, weights2), bias2)); 
+    }
+
+    std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> SoftmaxNet::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        std::vector<std::vector<double>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
+        std::vector<std::vector<double>> a2 = avn.sigmoid(z2);
+        return {z2, a2};
+    }
+
+    std::vector<double> SoftmaxNet::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        std::vector<double> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); 
+        std::vector<double> a2 = avn.sigmoid(z2);
+        return avn.adjSoftmax(alg.addition(alg.mat_vec_mult(alg.transpose(weights2), a2), bias2));
+    }
+
+    std::tuple<std::vector<double>, std::vector<double>> SoftmaxNet::propagate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        std::vector<double> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1); 
+        std::vector<double> a2 = avn.sigmoid(z2);
+        return {z2, a2};
+    }
+
+    void SoftmaxNet::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1);
+        a2 = avn.sigmoid(z2);
+        y_hat = avn.adjSoftmax(alg.mat_vec_add(alg.matmult(a2, weights2), bias2)); 
+    }
+}
\ No newline at end of file
diff --git a/MLPP/SoftmaxNet/SoftmaxNet.hpp b/MLPP/SoftmaxNet/SoftmaxNet.hpp
new file mode 100644
index 0000000..de732b5
--- /dev/null
+++ b/MLPP/SoftmaxNet/SoftmaxNet.hpp
@@ -0,0 +1,66 @@
+//
+//  SoftmaxNet.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef SoftmaxNet_hpp
+#define SoftmaxNet_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class SoftmaxNet{
+        
+        public:
+            SoftmaxNet(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet, int n_hidden, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelTest(std::vector<double> x);
+            std::vector<std::vector<double>> modelSetTest(std::vector<std::vector<double>> X);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+
+            std::vector<std::vector<double>> getEmbeddings(); // This class is used (mostly) for word2Vec. This function returns our embeddings.
+         private:
+
+            double Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+        
+            std::vector<std::vector<double>> Evaluate(std::vector<std::vector<double>> X);
+            std::tuple<std::vector<std::vector<double>>, std::vector<std::vector<double>>> propagate(std::vector<std::vector<double>> X);
+            std::vector<double> Evaluate(std::vector<double> x);
+            std::tuple<std::vector<double>, std::vector<double>> propagate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<std::vector<double>> outputSet;
+            std::vector<std::vector<double>> y_hat;
+
+            std::vector<std::vector<double>> weights1;
+            std::vector<std::vector<double>> weights2;
+           
+            std::vector<double> bias1;
+            std::vector<double> bias2;
+
+            std::vector<std::vector<double>> z2;
+            std::vector<std::vector<double>> a2;
+    
+            int n; 
+            int k;    
+            int n_class;
+            int n_hidden;
+
+            // Regularization Params
+            std::string reg;
+            double lambda;
+            double alpha; /* This is the controlling param for Elastic Net*/
+        
+        
+    };
+}
+
+#endif /* SoftmaxNet_hpp */
diff --git a/MLPP/SoftmaxReg/SoftmaxReg.cpp b/MLPP/SoftmaxReg/SoftmaxReg.cpp
new file mode 100644
index 0000000..fb8b34e
--- /dev/null
+++ b/MLPP/SoftmaxReg/SoftmaxReg.cpp
@@ -0,0 +1,213 @@
+//
+//  SoftmaxReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "SoftmaxReg.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Activation/Activation.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    SoftmaxReg::SoftmaxReg(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), n_class(outputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+        weights = Utilities::weightInitialization(k, n_class);
+        bias = Utilities::biasInitialization(n_class);
+    }
+
+    std::vector<double> SoftmaxReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+
+    }
+
+    std::vector<std::vector<double>> SoftmaxReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    void SoftmaxReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+            std::vector<std::vector<double>> error = alg.subtraction(y_hat, outputSet);
+ 
+                
+            //Calculating the weight gradients
+            std::vector<std::vector<double>> w_gradient = alg.matmult(alg.transpose(inputSet), error);
+                
+            //Weight updation
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+                
+            
+            // Calculating the bias gradients
+            //double b_gradient = alg.sum_elements(error);
+            
+            // Bias Updation
+            bias = alg.subtractMatrixRows(bias, alg.scalarMultiply(learning_rate, error));
+                
+            forwardPass();
+                
+            // UI PORTION
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    void SoftmaxReg::SGD(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        Utilities util;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            double outputIndex = distribution(generator);
+
+            std::vector<double> y_hat = Evaluate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+                
+            // Calculating the weight gradients            
+            std::vector<std::vector<double>> w_gradient = alg.vecmult(inputSet[outputIndex], alg.subtraction(y_hat, outputSet[outputIndex]));
+
+            // Weight Updation
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            // Calculating the bias gradients
+            std::vector<double> b_gradient = alg.subtraction(y_hat, outputSet[outputIndex]);
+            
+            // Bias updation
+            bias = alg.subtraction(bias, alg.scalarMultiply(learning_rate, b_gradient));
+
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+
+    }
+
+    void SoftmaxReg::MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        int n_miniBatch = n/miniBatch_size;
+        
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches; 
+        std::vector<std::vector<std::vector<double>>> outputMiniBatches; 
+
+        // Creating the mini-batches
+        for(int i = 0; i < n_miniBatch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            std::vector<std::vector<double>> currentOutputSet; 
+            for(int j = 0; j < n/n_miniBatch; j++){
+                currentInputSet.push_back(inputSet[n/n_miniBatch * i + j]);
+                currentOutputSet.push_back(outputSet[n/n_miniBatch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+            outputMiniBatches.push_back(currentOutputSet);
+        }
+
+        if(double(n)/double(n_miniBatch) - int(n/n_miniBatch) != 0){
+            for(int i = 0; i < n - n/n_miniBatch * n_miniBatch; i++){
+                inputMiniBatches[n_miniBatch - 1].push_back(inputSet[n/n_miniBatch * n_miniBatch + i]);
+                outputMiniBatches[n_miniBatch - 1].push_back(outputSet[n/n_miniBatch * n_miniBatch + i]);
+            }
+        }
+        
+        while(true){
+            for(int i = 0; i < n_miniBatch; i++){
+                std::vector<std::vector<double>> y_hat = Evaluate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+                
+                std::vector<std::vector<double>> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                // Calculating the weight gradients
+                std::vector<std::vector<double>> w_gradient = alg.matmult(alg.transpose(inputMiniBatches[i]), error);
+                
+                //Weight updation
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient));
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+        
+                // Calculating the bias gradients
+                bias = alg.subtractMatrixRows(bias, alg.scalarMultiply(learning_rate, error));
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double SoftmaxReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+     void SoftmaxReg::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights, bias);
+     }
+
+    double SoftmaxReg::Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.CrossEntropy(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+    std::vector<double> SoftmaxReg::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        return avn.softmax(alg.addition(bias, alg.mat_vec_mult(alg.transpose(weights), x)));
+
+    }
+
+    std::vector<std::vector<double>> SoftmaxReg::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+
+        return avn.softmax(alg.mat_vec_add(alg.matmult(X, weights), bias));
+    }
+
+    // softmax ( wTx + b )
+    void SoftmaxReg::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+
+        y_hat = avn.softmax(alg.mat_vec_add(alg.matmult(inputSet, weights), bias));
+    }
+}
\ No newline at end of file
diff --git a/MLPP/SoftmaxReg/SoftmaxReg.hpp b/MLPP/SoftmaxReg/SoftmaxReg.hpp
new file mode 100644
index 0000000..22c4ccd
--- /dev/null
+++ b/MLPP/SoftmaxReg/SoftmaxReg.hpp
@@ -0,0 +1,54 @@
+//
+//  SoftmaxReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef SoftmaxReg_hpp
+#define SoftmaxReg_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class SoftmaxReg{
+        
+        public:
+            SoftmaxReg(std::vector<std::vector<double>> inputSet, std::vector<std::vector<double>> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelTest(std::vector<double> x);
+            std::vector<std::vector<double>> modelSetTest(std::vector<std::vector<double>> X);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+        private:
+
+            double Cost(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+        
+            std::vector<std::vector<double>> Evaluate(std::vector<std::vector<double>> X);
+            std::vector<double> Evaluate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<std::vector<double>> outputSet;
+            std::vector<std::vector<double>> y_hat;
+            std::vector<std::vector<double>> weights;
+            std::vector<double> bias;
+    
+            int n; 
+            int k;    
+            int n_class;
+
+            // Regularization Params
+            std::string reg;
+            double lambda;
+            double alpha; /* This is the controlling param for Elastic Net*/
+        
+        
+    };
+}
+
+#endif /* SoftmaxReg_hpp */
diff --git a/MLPP/Stat/Stat.cpp b/MLPP/Stat/Stat.cpp
new file mode 100644
index 0000000..5120e59
--- /dev/null
+++ b/MLPP/Stat/Stat.cpp
@@ -0,0 +1,168 @@
+//
+//  Stat.cpp
+//
+//  Created by Marc Melikyan on 9/29/20.
+//
+
+#include "Stat.hpp"
+#include "Activation/Activation.hpp"
+#include <cmath>
+
+namespace MLPP{
+    double Stat::b0Estimation(std::vector<double> x, std::vector<double> y){
+        return mean(y) - b1Estimation(x, y) * mean(x);
+    }
+
+    double Stat::b1Estimation(std::vector<double> x, std::vector<double> y){
+        return covariance(x, y) / variance(x);
+    }
+
+    double Stat::mean(std::vector<double> x){
+        double sum = 0;
+        for(int i = 0; i < x.size(); i++){
+            sum += x[i];
+        }
+        return sum / x.size();
+    }
+
+    double Stat::variance(std::vector<double> x){
+        double sum = 0;
+        for(int i = 0; i < x.size(); i++){
+            sum += (x[i] - mean(x)) * (x[i] - mean(x));
+        }
+        return sum / (x.size() - 1);
+    }
+
+    double Stat::covariance(std::vector<double> x, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < x.size(); i++){
+            sum += (x[i] - mean(x)) * (y[i] - mean(y));
+        }
+        return sum / (x.size() - 1);
+    }
+
+    double Stat::correlation(std::vector<double> x, std::vector<double> y){
+        return covariance(x, y) / (standardDeviation(x) * standardDeviation(y));
+    }
+
+    double Stat::R2(std::vector<double> x, std::vector<double> y){
+        return correlation(x, y) * correlation(x, y);
+    }
+
+    double Stat::weightedMean(std::vector<double> x, std::vector<double> weights){
+        double sum = 0;
+        double weights_sum = 0; 
+        for(int i = 0; i < x.size(); i++){
+            sum += x[i] * weights[i];
+            weights_sum += weights[i];
+        }
+        return sum / weights_sum;
+    }
+
+    double Stat::geometricMean(std::vector<double> x){
+        double product = 1;
+        for(int i = 0; i < x.size(); i++){
+            product *= x[i];
+        }
+        return std::pow(product, 1.0/x.size());
+    }
+
+    double Stat::harmonicMean(std::vector<double> x){
+        double sum = 0;
+        for(int i = 0; i < x.size(); i++){
+            sum += 1/x[i];
+        }
+        return x.size()/sum;
+    }
+
+    double Stat::RMS(std::vector<double> x){
+        double sum = 0; 
+        for(int i = 0; i < x.size(); i++){
+            sum += x[i] * x[i];
+        }
+        return sqrt(sum / x.size());
+    }
+
+    double Stat::powerMean(std::vector<double> x, double p){
+        double sum = 0; 
+        for(int i = 0; i < x.size(); i++){
+            sum += pow(x[i], p); 
+        }
+        return pow(sum / x.size(), 1/p);
+    }
+    
+    double Stat::lehmerMean(std::vector<double> x, double p){
+        double num = 0; 
+        double den = 0; 
+        for(int i = 0; i < x.size(); i++){
+            num += pow(x[i], p); 
+            den += pow(x[i], p - 1);
+        }
+        return num/den;
+    }
+
+    double Stat::weightedLehmerMean(std::vector<double> x, std::vector<double> weights, double p){
+        double num = 0; 
+        double den = 0; 
+        for(int i = 0; i < x.size(); i++){
+            num += weights[i] * pow(x[i], p); 
+            den += weights[i] * pow(x[i], p - 1);
+        }
+        return num/den;
+    }
+
+    double Stat::heronianMean(double A, double B){
+        return (A + sqrt(A * B) + B) / 3;
+    }
+
+    double Stat::contraharmonicMean(std::vector<double> x){
+        return lehmerMean(x, 2);
+    }
+
+    double Stat::heinzMean(double A, double B, double x){
+        return (pow(A, x) * pow(B, 1 - x) + pow(A, 1 - x) * pow(B, x)) / 2;
+    }
+
+    double Stat::neumanSandorMean(double a, double b){
+        Activation avn;
+        return (a - b) / 2 * avn.arsinh((a - b)/(a + b));
+    }
+
+    double Stat::stolarskyMean(double x, double y, double p){
+        if(x == y){
+            return x; 
+        }
+        return pow((pow(x, p) - pow(y, p)) / (p * (x - y)), 1/(p - 1));
+    }
+
+    double Stat::identricMean(double x, double y){
+        if(x == y){
+            return x; 
+        }
+        return (1/M_E) * pow(pow(x, x) / pow(y, y), 1/(x-y));
+    }
+
+    double Stat::logMean(double x, double y){
+        if(x == y){
+            return x; 
+        }
+        return (y - x) / (log(y) - log(x)); 
+    }
+
+    double Stat::standardDeviation(std::vector<double> x){
+        return std::sqrt(variance(x));
+    }
+
+    double Stat::absAvgDeviation(std::vector<double> x){
+        double sum = 0;
+        for(int i = 0; i < x.size(); i++){
+            sum += std::abs(x[i] - mean(x));
+        }
+        return sum / x.size();
+    }
+
+    double Stat::chebyshevIneq(double k){
+        //Pr(|X - mu| >= k * sigma) <= 1/k^2, X may or may not belong to a Gaussian Distribution
+        return 1 - 1 / (k * k);
+    }
+}
\ No newline at end of file
diff --git a/MLPP/Stat/Stat.hpp b/MLPP/Stat/Stat.hpp
new file mode 100644
index 0000000..5997480
--- /dev/null
+++ b/MLPP/Stat/Stat.hpp
@@ -0,0 +1,53 @@
+//
+//  Stat.hpp
+//
+//  Created by Marc Melikyan on 9/29/20.
+//
+
+#ifndef Stat_hpp
+#define Stat_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class Stat{
+      
+        public:
+            double b0Estimation(std::vector<double> x, std::vector<double> y);
+            double b1Estimation(std::vector<double> x, std::vector<double> y);
+        
+            // Statistical Functions
+            double mean(std::vector <double> x);
+            double variance(std::vector <double> x);
+            double covariance(std::vector <double> x, std::vector <double> y);
+            double correlation(std::vector <double> x, std::vector<double> y);
+            double R2(std::vector <double> x, std::vector<double> y);
+
+            // Extras
+            double weightedMean(std::vector<double> x, std::vector<double> weights);
+            double geometricMean(std::vector <double> x);
+            double harmonicMean(std::vector <double> x);
+            double RMS(std::vector<double> x);
+            double powerMean(std::vector<double> x, double p);
+            double lehmerMean(std::vector<double> x, double p);
+            double weightedLehmerMean(std::vector<double> x, std::vector<double> weights, double p);
+            double contraharmonicMean(std::vector<double> x);
+            double heronianMean(double A, double B);
+            double heinzMean(double A, double B, double x);
+            double neumanSandorMean(double a, double b);
+            double stolarskyMean(double x, double y, double p);
+            double identricMean(double x, double y);
+            double logMean(double x, double y);
+            double standardDeviation(std::vector <double> x);
+            double absAvgDeviation(std::vector <double> x);
+            double chebyshevIneq(double k);
+
+        private:
+
+        
+        
+            
+    };
+}
+
+#endif /* Stat_hpp */
diff --git a/MLPP/TanhReg/TanhReg.cpp b/MLPP/TanhReg/TanhReg.cpp
new file mode 100644
index 0000000..89984a5
--- /dev/null
+++ b/MLPP/TanhReg/TanhReg.cpp
@@ -0,0 +1,222 @@
+//
+//  TanhReg.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "TanhReg.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <random>
+
+namespace MLPP{
+    TanhReg::TanhReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg, double lambda, double alpha)
+    : inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha)
+    {
+        y_hat.resize(n);
+        weights = Utilities::weightInitialization(k);
+        bias = Utilities::biasInitialization();
+    }
+
+    std::vector<double> TanhReg::modelSetTest(std::vector<std::vector<double>> X){
+        return Evaluate(X);
+    }
+
+    double TanhReg::modelTest(std::vector<double> x){
+        return Evaluate(x);
+    }
+
+    void TanhReg::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        LinAlg alg;
+        Activation avn;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+        
+        while(true){
+            cost_prev = Cost(y_hat, outputSet);
+
+            std::vector<double> error = alg.subtraction(y_hat, outputSet);
+
+            weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.tanh(z, 1)))));
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+                
+
+            // Calculating the bias gradients
+            bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.tanh(z, 1))) / n;
+            
+            forwardPass();
+                
+            // UI PORTION
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+
+        }
+    }
+
+    void TanhReg::SGD(double learning_rate, int max_epoch, bool UI){
+        Reg regularization;
+        Utilities util;
+        double cost_prev = 0;
+        int epoch = 1;
+        
+        while(true){
+            std::random_device rd;
+            std::default_random_engine generator(rd()); 
+            std::uniform_int_distribution<int> distribution(0, int(n - 1));
+            int outputIndex = distribution(generator);
+
+            double y_hat = Evaluate(inputSet[outputIndex]);
+            cost_prev = Cost({y_hat}, {outputSet[outputIndex]});
+
+
+            for(int i = 0; i < k; i++){
+                    
+                // Calculating the weight gradients
+                
+                double w_gradient = (y_hat - outputSet[outputIndex]) * (1 - y_hat * y_hat) * inputSet[outputIndex][i];
+                    
+
+                // Weight updation
+                weights[i] -= learning_rate * w_gradient;
+            }
+            weights = regularization.regWeights(weights, lambda, alpha, reg);
+            
+            // Calculating the bias gradients
+            double b_gradient = (y_hat - outputSet[outputIndex]) * (1 - y_hat * y_hat);
+            
+            // Bias updation
+            bias -= learning_rate * b_gradient;
+            y_hat = Evaluate({inputSet[outputIndex]});
+                
+            if(UI) { 
+                Utilities::CostInfo(epoch, cost_prev, Cost({y_hat}, {outputSet[outputIndex]}));
+                Utilities::UI(weights, bias); 
+            }
+            epoch++;
+            
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass();
+    }
+
+    void TanhReg::MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI){
+        Reg regularization;
+        Activation avn;
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+
+        int n_miniBatch = n/miniBatch_size;
+        
+        std::vector<std::vector<std::vector<double>>> inputMiniBatches; 
+        std::vector<std::vector<double>> outputMiniBatches; 
+        // Creating the mini-batches
+        for(int i = 0; i < n_miniBatch; i++){
+            std::vector<std::vector<double>> currentInputSet; 
+            std::vector<double> currentOutputSet; 
+            std::vector<double> currentPreActivationSet; 
+            for(int j = 0; j < n/n_miniBatch; j++){
+                currentInputSet.push_back(inputSet[n/n_miniBatch * i + j]);
+                currentOutputSet.push_back(outputSet[n/n_miniBatch * i + j]);
+            }
+            inputMiniBatches.push_back(currentInputSet);
+            outputMiniBatches.push_back(currentOutputSet);
+        }
+
+        if(double(n)/double(n_miniBatch) - int(n/n_miniBatch) != 0){
+            for(int i = 0; i < n - n/n_miniBatch * n_miniBatch; i++){
+                inputMiniBatches[n_miniBatch - 1].push_back(inputSet[n/n_miniBatch * n_miniBatch + i]);
+                outputMiniBatches[n_miniBatch - 1].push_back(outputSet[n/n_miniBatch * n_miniBatch + i]);
+            }
+        }
+        
+        while(true){
+            for(int i = 0; i < n_miniBatch; i++){
+                std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
+                std::vector<double> z = propagate(inputMiniBatches[i]);
+                cost_prev = Cost(y_hat, outputMiniBatches[i]);
+
+                std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
+
+                // Calculating the weight gradients
+                weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.tanh(z, 1)))));
+                weights = regularization.regWeights(weights, lambda, alpha, reg);
+                
+
+                // Calculating the bias gradients
+                bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.tanh(z, 1))) / n;
+            
+                forwardPass();
+
+                y_hat = Evaluate(inputMiniBatches[i]);
+                    
+                if(UI) { 
+                    Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
+                    Utilities::UI(weights, bias); 
+                }
+            }
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+        forwardPass(); 
+    }
+
+    double TanhReg::score(){
+        Utilities util;
+        return util.performance(y_hat, outputSet);
+    }
+
+     void TanhReg::save(std::string fileName){
+         Utilities util;
+         util.saveParameters(fileName, weights, bias);
+     }
+
+    double TanhReg::Cost(std::vector <double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost; 
+        return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
+    }
+
+    std::vector<double> TanhReg::Evaluate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        Activation avn;
+        return avn.tanh(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights))); 
+    }
+    
+    std::vector<double>TanhReg::propagate(std::vector<std::vector<double>> X){
+        LinAlg alg;
+        return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)); 
+    }
+
+    double TanhReg::Evaluate(std::vector<double> x){
+        LinAlg alg;
+        Activation avn;
+        return avn.tanh(alg.dot(weights, x) + bias);
+    }
+
+    double TanhReg::propagate(std::vector<double> x){
+        LinAlg alg;
+        return alg.dot(weights, x) + bias;
+    }
+
+    // Tanh ( wTx + b )
+    void TanhReg::forwardPass(){
+        LinAlg alg;
+        Activation avn;
+        
+        z = propagate(inputSet);
+        y_hat = avn.tanh(z);
+    }
+}
\ No newline at end of file
diff --git a/MLPP/TanhReg/TanhReg.hpp b/MLPP/TanhReg/TanhReg.hpp
new file mode 100644
index 0000000..e2930bb
--- /dev/null
+++ b/MLPP/TanhReg/TanhReg.hpp
@@ -0,0 +1,59 @@
+//
+//  TanhReg.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef TanhReg_hpp
+#define TanhReg_hpp
+
+
+#include <vector>
+#include <string>
+
+namespace MLPP {
+
+    class TanhReg{
+        
+        public:
+            TanhReg(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, std::string reg = "None", double lambda = 0.5, double alpha = 0.5);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            double modelTest(std::vector<double> x);
+            void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+            void SGD(double learning_rate, int max_epoch, bool UI = 1);
+            void MBGD(double learning_rate, int max_epoch, int miniBatch_size, bool UI = 1);
+            double score();
+            void save(std::string fileName);
+        private:
+
+            double Cost(std::vector <double> y_hat, std::vector<double> y);
+        
+            std::vector<double> Evaluate(std::vector<std::vector<double>> X);
+            std::vector<double> propagate(std::vector<std::vector<double>> X);
+            double Evaluate(std::vector<double> x);
+            double propagate(std::vector<double> x);
+            void forwardPass();
+        
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            std::vector<double> z;
+            std::vector<double> y_hat;
+            std::vector<double> weights;
+            double bias;
+        
+            int n; 
+            int k;
+        
+            // UI Portion
+            void UI(int epoch, double cost_prev);
+
+            // Regularization Params
+            std::string reg;
+            double lambda;
+            double alpha; /* This is the controlling param for Elastic Net*/
+
+        
+    };
+}
+
+#endif /* TanhReg_hpp */
diff --git a/MLPP/UniLinReg/UniLinReg.cpp b/MLPP/UniLinReg/UniLinReg.cpp
new file mode 100644
index 0000000..85f207f
--- /dev/null
+++ b/MLPP/UniLinReg/UniLinReg.cpp
@@ -0,0 +1,37 @@
+//
+//  UniLinReg.cpp
+//
+//  Created by Marc Melikyan on 9/29/20.
+//
+
+#include "UniLinReg.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Stat/Stat.hpp"
+#include <iostream>
+
+
+// General Multivariate Linear Regression Model
+// ŷ = b0 + b1x1 + b2x2 + ... + bkxk
+
+
+// Univariate Linear Regression Model
+// ŷ = b0 + b1x1
+
+namespace MLPP{
+    UniLinReg::UniLinReg(std::vector<double> x, std::vector<double> y)
+    : inputSet(x), outputSet(y)
+    {
+        Stat estimator;
+        b1 = estimator.b1Estimation(inputSet, outputSet);
+        b0 = estimator.b0Estimation(inputSet, outputSet);
+    }
+
+    std::vector<double> UniLinReg::modelSetTest(std::vector<double> x){
+        LinAlg alg;
+        return alg.scalarAdd(b0, alg.scalarMultiply(b1, x));
+    }
+
+    double UniLinReg::modelTest(double input){
+        return b0 + b1 * input;
+    }
+}
diff --git a/MLPP/UniLinReg/UniLinReg.hpp b/MLPP/UniLinReg/UniLinReg.hpp
new file mode 100644
index 0000000..3ff7715
--- /dev/null
+++ b/MLPP/UniLinReg/UniLinReg.hpp
@@ -0,0 +1,30 @@
+//
+//  UniLinReg.hpp
+//
+//  Created by Marc Melikyan on 9/29/20.
+//
+
+#ifndef UniLinReg_hpp
+#define UniLinReg_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class UniLinReg{
+        
+        public:
+            UniLinReg(std::vector <double> x, std::vector<double> y);
+            std::vector<double> modelSetTest(std::vector<double> x);
+            double modelTest(double x);
+        
+        private:
+            std::vector <double> inputSet;
+            std::vector <double> outputSet;
+        
+            double b0;
+            double b1;
+        
+    };
+}
+
+#endif /* UniLinReg_hpp */
diff --git a/MLPP/Utilities/Utilities.cpp b/MLPP/Utilities/Utilities.cpp
new file mode 100644
index 0000000..c81032e
--- /dev/null
+++ b/MLPP/Utilities/Utilities.cpp
@@ -0,0 +1,307 @@
+//
+//  Reg.cpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#include <iostream>
+#include <string>
+#include <random>
+#include <fstream>
+#include "Utilities.hpp"
+
+namespace MLPP{
+
+    std::vector<double> Utilities::weightInitialization(int n, std::string type){
+        std::random_device rd;
+        std::default_random_engine generator(rd()); 
+
+        std::vector<double> weights; 
+        for(int i = 0; i < n; i++){
+            if(type == "XavierNormal"){
+                std::normal_distribution<double> distribution(0, sqrt(2 / (n + 1)));
+                weights.push_back(distribution(generator));
+            }
+            else if(type == "XavierUniform"){
+                std::uniform_real_distribution<double> distribution(-sqrt(6 / (n + 1)), sqrt(6 / (n + 1)));
+                weights.push_back(distribution(generator));
+            }
+            else if(type == "HeNormal"){
+                std::normal_distribution<double> distribution(0, sqrt(2 / n));
+                weights.push_back(distribution(generator));
+            }
+            else if(type == "HeUniform"){
+                std::uniform_real_distribution<double> distribution(-sqrt(6 / n), sqrt(6 / n));
+                weights.push_back(distribution(generator));
+            }
+            else if(type == "Uniform"){
+                std::uniform_real_distribution<double> distribution(-1/sqrt(n), 1/sqrt(n));
+                weights.push_back(distribution(generator));
+            }
+            else{
+                std::uniform_real_distribution<double> distribution(0, 1);
+                weights.push_back(distribution(generator));
+            }
+        }
+        return weights;
+    }
+
+    double Utilities::biasInitialization(){
+        std::random_device rd;
+        std::default_random_engine generator(rd()); 
+        std::uniform_real_distribution<double> distribution(0,1);
+
+        return distribution(generator);
+    }
+
+    std::vector<std::vector<double>> Utilities::weightInitialization(int n, int m, std::string type){
+        std::random_device rd;
+        std::default_random_engine generator(rd()); 
+
+        std::vector<std::vector<double>> weights; 
+        weights.resize(n);
+
+        for(int i = 0; i < n; i++){
+            for(int j = 0; j < m; j++){
+                if(type == "XavierNormal"){
+                    std::normal_distribution<double> distribution(0, sqrt(2 / (n + m)));
+                    weights[i].push_back(distribution(generator));
+                }
+                else if(type == "XavierUniform"){
+                    std::uniform_real_distribution<double> distribution(-sqrt(6 / (n + m)), sqrt(6 / (n + m)));
+                    weights[i].push_back(distribution(generator));
+                }
+                else if(type == "HeNormal"){
+                    std::normal_distribution<double> distribution(0, sqrt(2 / n));
+                    weights[i].push_back(distribution(generator));
+                }
+                else if(type == "HeUniform"){
+                    std::uniform_real_distribution<double> distribution(-sqrt(6 / n), sqrt(6 / n));
+                    weights[i].push_back(distribution(generator));
+                }
+                else if(type == "Uniform"){
+                    std::uniform_real_distribution<double> distribution(-1/sqrt(n), 1/sqrt(n));
+                    weights[i].push_back(distribution(generator));
+                }
+                else{
+                    std::uniform_real_distribution<double> distribution(0, 1);
+                    weights[i].push_back(distribution(generator));
+                }
+            }
+        }
+        return weights;
+    }
+
+    std::vector<double> Utilities::biasInitialization(int n){
+        std::vector<double> bias; 
+        std::random_device rd;
+        std::default_random_engine generator(rd()); 
+        std::uniform_real_distribution<double> distribution(0,1);
+
+        for(int i = 0; i < n; i++){
+          bias.push_back(distribution(generator));
+        }
+        return bias; 
+    }
+
+    double Utilities::performance(std::vector<double> y_hat, std::vector<double> outputSet){
+        double correct = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            if(std::round(y_hat[i]) == outputSet[i]){
+                correct++;
+            }
+        }
+        return correct/y_hat.size();
+    }
+
+    double Utilities::performance(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double correct = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            int sub_correct = 0;
+            for(int j = 0; j < y_hat[i].size(); j++){
+                if(std::round(y_hat[i][j]) == y[i][j]){
+                    sub_correct++;
+                }
+                if(sub_correct == y_hat[0].size()){
+                    correct++;
+                }
+            }
+        }
+        return correct/y_hat.size();
+    }
+
+    void Utilities::saveParameters(std::string fileName, std::vector<double> weights, double bias, bool app, int layer){
+        std::string layer_info = "";        
+        std::ofstream saveFile;
+
+        if(layer > -1){
+            layer_info = " for layer " + std::to_string(layer);
+        }
+
+        if(app){
+            saveFile.open(fileName.c_str(), std::ios_base::app); 
+        }
+        else { saveFile.open(fileName.c_str()); }
+
+        if(!saveFile.is_open()){
+            std::cout << fileName << " failed to open." << std::endl;
+        }
+
+        saveFile << "Weight(s)" << layer_info << std::endl;
+        for(int i = 0; i < weights.size(); i++){
+            saveFile << weights[i] << std::endl;
+        }
+        saveFile << "Bias" << layer_info << std::endl;
+        saveFile << bias << std::endl;
+
+        saveFile.close();
+    }
+
+    void Utilities::saveParameters(std::string fileName, std::vector<double> weights, std::vector<double> initial, double bias, bool app, int layer){
+        std::string layer_info = "";        
+        std::ofstream saveFile;
+
+        if(layer > -1){
+            layer_info = " for layer " + std::to_string(layer);
+        }
+
+        if(app){
+            saveFile.open(fileName.c_str(), std::ios_base::app); 
+        }
+        else { saveFile.open(fileName.c_str()); }
+
+        if(!saveFile.is_open()){
+            std::cout << fileName << " failed to open." << std::endl;
+        }
+
+        saveFile << "Weight(s)" << layer_info << std::endl;
+        for(int i = 0; i < weights.size(); i++){
+            saveFile << weights[i] << std::endl;
+        }
+
+        saveFile << "Initial(s)" << layer_info << std::endl;
+        for(int i = 0; i < initial.size(); i++){
+            saveFile << initial[i] << std::endl;
+        }
+
+        saveFile << "Bias" << layer_info << std::endl;
+        saveFile << bias << std::endl;
+
+        saveFile.close();
+    }
+
+    void Utilities::saveParameters(std::string fileName, std::vector<std::vector<double>> weights, std::vector<double> bias, bool app, int layer){
+        std::string layer_info = "";        
+        std::ofstream saveFile;
+
+        if(layer > -1){
+            layer_info = " for layer " + std::to_string(layer);
+        }
+
+        if(app){
+            saveFile.open(fileName.c_str(), std::ios_base::app); 
+        }
+        else { saveFile.open(fileName.c_str()); }
+
+        if(!saveFile.is_open()){
+            std::cout << fileName << " failed to open." << std::endl;
+        }
+
+        saveFile << "Weight(s)" << layer_info << std::endl;
+        for(int i = 0; i < weights.size(); i++){
+            for(int j = 0; j < weights[i].size(); j++){
+                saveFile << weights[i][j] << std::endl;
+            }
+        }
+        saveFile << "Bias(es)" << layer_info << std::endl;
+        for(int i = 0; i < bias.size(); i++){
+            saveFile << bias[i] << std::endl;
+        }
+
+        saveFile.close();
+    }
+
+    void Utilities::UI(std::vector<double> weights, double bias){
+        std::cout << "Values of the weight(s):" << std::endl;
+        for(int i = 0; i < weights.size(); i++){
+            std::cout << weights[i] << std::endl;
+        }
+        std:: cout << "Value of the bias:" << std::endl;
+        std::cout << bias << std::endl;
+    }
+
+    void Utilities::UI(std::vector<std::vector<double>> weights, std::vector<double> bias){
+        std::cout << "Values of the weight(s):" << std::endl;
+        for(int i = 0; i < weights.size(); i++){
+            for(int j = 0; j < weights[i].size(); j++){
+                std::cout << weights[i][j] << std::endl;
+            }
+        }
+        std::cout << "Value of the biases:" << std::endl;
+        for(int i = 0; i < bias.size(); i++){
+            std::cout << bias[i] << std::endl;
+        }
+    }
+
+    void Utilities::UI(std::vector<double> weights, std::vector<double> initial, double bias){
+        std::cout << "Values of the weight(s):" << std::endl;
+        for(int i = 0; i < weights.size(); i++){
+            std::cout << weights[i] << std::endl;
+        }
+        std::cout << "Values of the initial(s):" << std::endl;
+        for(int i = 0; i < initial.size(); i++){
+            std::cout << initial[i] << std::endl;
+        }
+        std:: cout << "Value of the bias:" << std::endl;
+        std::cout << bias << std::endl;
+    }
+
+    void Utilities::CostInfo(int epoch, double cost_prev, double Cost){
+        std::cout << "-----------------------------------" << std::endl;
+        std::cout << "This is epoch: " << epoch << std::endl;
+        std::cout << "The cost function has been minimized by " << cost_prev - Cost << std::endl;
+        std::cout << "Current Cost:" << std::endl;
+        std::cout << Cost << std::endl;
+    }
+
+    std::tuple<double, double, double, double> Utilities::TF_PN(std::vector<double> y_hat, std::vector<double> y){
+        double TP, FP, TN, FN = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            if(y_hat[i] == y[i]){
+                if(y_hat[i] == 1){
+                    TP++;
+                }
+                else{
+                    TN++;
+                }
+            }
+            else{
+                if(y_hat[i] == 1){
+                    FP++;
+                }
+                else{
+                    FN++;
+                }
+            }
+        }
+        return {TP, FP, TN, FN};
+    }
+
+    double Utilities::recall(std::vector<double> y_hat, std::vector<double> y){
+        auto [TP, FP, TN, FN] = TF_PN(y_hat, y);
+        return TP / (TP + FN);
+    }
+
+    double Utilities::precision(std::vector<double> y_hat, std::vector<double> y){
+        auto [TP, FP, TN, FN] = TF_PN(y_hat, y);
+        return TP / (TP + FP);
+    }
+
+    double Utilities::accuracy(std::vector<double> y_hat, std::vector<double> y){
+        auto [TP, FP, TN, FN] = TF_PN(y_hat, y);
+        return (TP + TN) / (TP + FP + FN + TN);
+    }
+    double Utilities::f1_score(std::vector<double> y_hat, std::vector<double> y){
+        return 2 * precision(y_hat, y) * recall(y_hat, y) / (precision(y_hat, y) + recall(y_hat, y));
+    }
+}
\ No newline at end of file
diff --git a/MLPP/Utilities/Utilities.hpp b/MLPP/Utilities/Utilities.hpp
new file mode 100644
index 0000000..9cbd23f
--- /dev/null
+++ b/MLPP/Utilities/Utilities.hpp
@@ -0,0 +1,51 @@
+//
+//  Utilities.hpp
+//
+//  Created by Marc Melikyan on 1/16/21.
+//
+
+#ifndef Utilities_hpp
+#define Utilities_hpp
+
+#include <vector>
+#include <tuple>
+#include <string>
+
+namespace MLPP{
+    class Utilities{
+        public:
+            // Weight Init
+            static std::vector<double> weightInitialization(int n, std::string type = "Default");
+            static double biasInitialization();
+
+            static std::vector<std::vector<double>> weightInitialization(int n, int m, std::string type = "Default");
+            static std::vector<double> biasInitialization(int n);
+
+            // Cost/Performance related Functions
+            double performance(std::vector<double> y_hat, std::vector<double> y);
+            double performance(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y); 
+
+            // Parameter Saving Functions
+            void saveParameters(std::string fileName, std::vector<double> weights, double bias, bool app = 0, int layer = -1);
+            void saveParameters(std::string fileName, std::vector<double> weights, std::vector<double> initial, double bias, bool app = 0, int layer = -1);
+            void saveParameters(std::string fileName, std::vector<std::vector<double>> weights, std::vector<double> bias, bool app = 0, int layer = -1);
+
+            // Gradient Descent related
+            static void UI(std::vector<double> weights, double bias);
+            static void UI(std::vector<double> weights, std::vector<double> initial, double bias);
+            static void UI(std::vector<std::vector<double>>, std::vector<double> bias);
+
+            static void CostInfo(int epoch, double cost_prev, double Cost);
+
+            // F1 score, Precision/Recall, TP, FP, TN, FN, etc. 
+            std::tuple<double, double, double, double> TF_PN(std::vector<double> y_hat, std::vector<double> y); //TF_PN = "True", "False", "Positive", "Negative"
+            double recall(std::vector<double> y_hat, std::vector<double> y);
+            double precision(std::vector<double> y_hat, std::vector<double> y);
+            double accuracy(std::vector<double> y_hat, std::vector<double> y);
+            double f1_score(std::vector<double> y_hat, std::vector<double> y);
+
+        private:
+    };
+}
+
+#endif /* Utilities_hpp */
diff --git a/MLPP/kNN/kNN.cpp b/MLPP/kNN/kNN.cpp
new file mode 100644
index 0000000..ec53f65
--- /dev/null
+++ b/MLPP/kNN/kNN.cpp
@@ -0,0 +1,94 @@
+//
+//  kNN.cpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#include "kNN.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Utilities/Utilities.hpp"
+
+#include <iostream>
+#include <map>
+#include <algorithm>
+
+namespace MLPP{
+    kNN::kNN(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int k)
+    : inputSet(inputSet), outputSet(outputSet), k(k)
+    {
+        
+    }
+    
+    std::vector<double> kNN::modelSetTest(std::vector<std::vector<double>> X){
+        std::vector<double> y_hat;
+        for(int i = 0; i < X.size(); i++){
+            y_hat.push_back(modelTest(X[i]));
+        }
+        return y_hat;
+    }
+
+    int kNN::modelTest(std::vector<double> x){
+        return determineClass(nearestNeighbors(x));
+    }
+    
+    double kNN::score(){
+        Utilities util;
+        return util.performance(modelSetTest(inputSet), outputSet);
+    }
+
+    int kNN::determineClass(std::vector<double> knn){
+        std::map<int, int> class_nums;
+        for(int i = 0; i < outputSet.size(); i++){
+            class_nums[outputSet[i]] = 0;
+        }
+        for(int i = 0; i < knn.size(); i++){
+            for(int j = 0; j < outputSet.size(); j++){
+                if(knn[i] == outputSet[j]){
+                    class_nums[outputSet[j]]++;
+                }
+            }
+        }
+        int max = class_nums[outputSet[0]];
+        int final_class = outputSet[0];
+        
+        for(int i = 0; i < outputSet.size(); i++){
+            if(class_nums[outputSet[i]] > max){
+                max = class_nums[outputSet[i]];
+            }
+        }
+        for(auto [c, v] : class_nums){
+            if(v == max){
+                final_class = c;
+            }
+        }
+        return final_class;
+    }
+    
+    std::vector<double> kNN::nearestNeighbors(std::vector<double> x){
+        // The nearest neighbors
+        std::vector<double> knn;
+        
+        std::vector<std::vector<double>> inputUseSet = inputSet;
+        //Perfom this loop unless and until all k nearest neighbors are found, appended, and returned
+        for(int i = 0; i < k; i++){
+            int neighbor = 0;
+            for(int j = 0; j < inputUseSet.size(); j++){
+                if(euclideanDistance(x, inputUseSet[j]) < euclideanDistance(x, inputUseSet[neighbor])){
+                    neighbor = j;
+                }
+            }
+            knn.push_back(neighbor);
+            inputUseSet.erase(inputUseSet.begin() + neighbor);
+        }
+        return knn;
+    }
+
+    // Multidimensional Euclidean Distance
+    double kNN::euclideanDistance(std::vector<double> A, std::vector<double> B){
+        double dist = 0;
+        for(int i = 0; i < A.size(); i++){
+            dist += (A[i] - B[i])*(A[i] - B[i]);
+        }
+        return sqrt(dist);
+    }
+}
\ No newline at end of file
diff --git a/MLPP/kNN/kNN.hpp b/MLPP/kNN/kNN.hpp
new file mode 100644
index 0000000..a786c5a
--- /dev/null
+++ b/MLPP/kNN/kNN.hpp
@@ -0,0 +1,38 @@
+//
+//  kNN.hpp
+//
+//  Created by Marc Melikyan on 10/2/20.
+//
+
+#ifndef kNN_hpp
+#define kNN_hpp
+
+#include <vector>
+
+namespace MLPP{
+    class kNN{
+        
+        public:
+            kNN(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int k);
+            std::vector<double> modelSetTest(std::vector<std::vector<double>> X);
+            int modelTest(std::vector<double> x);
+            double score();
+        
+        private:
+        
+            // Private Model Functions
+            std::vector<double> nearestNeighbors(std::vector<double> x);
+            int determineClass(std::vector<double> knn);
+        
+            // Helper Functions
+            double euclideanDistance(std::vector<double> A, std::vector<double> B);
+            
+            // Model Inputs and Parameters
+            std::vector<std::vector<double>> inputSet;
+            std::vector<double> outputSet;
+            int k;
+        
+    };
+}
+
+#endif /* kNN_hpp */
diff --git a/main.cpp b/main.cpp
new file mode 100644
index 0000000..4481611
--- /dev/null
+++ b/main.cpp
@@ -0,0 +1,353 @@
+//
+//  main.cpp
+//  TEST_APP
+//
+//  Created by Marc on 1/20/21.
+//
+
+// THINGS CURRENTLY TO DO: 
+// POLYMORPHIC IMPLEMENTATION OF REGRESSION CLASSES
+// EXTEND SGD/MBGD SUPPORT FOR DYN. SIZED ANN 
+// STANDARDIZE ACTIVATIONS/OPTIMIZATIONS 
+// FINISH ADDING ALL ACTIVATIONS TO ANN 
+
+// HYPOTHESIS TESTING CLASS 
+// GAUSS MARKOV CHECKER CLASS
+
+#include <iostream>
+#include <ctime>
+#include <vector>
+#include "MLPP/UniLinReg/UniLinReg.hpp"
+#include "MLPP/LinReg/LinReg.hpp"
+#include "MLPP/LogReg/LogReg.hpp"
+#include "MLPP/CLogLogReg/CLogLogReg.hpp"
+#include "MLPP/ExpReg/ExpReg.hpp"
+#include "MLPP/ProbitReg/ProbitReg.hpp"
+#include "MLPP/SoftmaxReg/SoftmaxReg.hpp"
+#include "MLPP/TanhReg/TanhReg.hpp"
+#include "MLPP/MLP/MLP.hpp"
+#include "MLPP/SoftmaxNet/SoftmaxNet.hpp"
+#include "MLPP/AutoEncoder/AutoEncoder.hpp"
+#include "MLPP/ANN/ANN.hpp"
+#include "MLPP/MultinomialNB/MultinomialNB.hpp"
+#include "MLPP/BernoulliNB/BernoulliNB.hpp"
+#include "MLPP/GaussianNB/GaussianNB.hpp"
+#include "MLPP/KMeans/KMeans.hpp"
+#include "MLPP/kNN/kNN.hpp"
+#include "MLPP/PCA/PCA.hpp"
+#include "MLPP/OutlierFinder/OutlierFinder.hpp"
+#include "MLPP/Stat/Stat.hpp"
+#include "MLPP/LinAlg/LinAlg.hpp"
+#include "MLPP/Activation/Activation.hpp"
+#include "MLPP/Data/Data.hpp"
+#include "MLPP/Convolutions/Convolutions.hpp"
+
+
+using namespace MLPP;
+
+int main() {
+
+    // OBJECTS
+    Stat stat;
+    LinAlg alg;
+    Activation avn;
+    Data data; 
+    Convolutions conv; 
+
+    // DATA SETS
+    // std::vector<std::vector<double>> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
+    // std::vector<double> outputSet = {2,4,6,8,10,12,14,16,18,20};
+
+    // std::vector<std::vector<double>> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}};
+    // std::vector<double> outputSet = {0,0,0,0,1,1,1,1};
+
+    // std::vector<std::vector<double>> inputSet = {{4,3,0,-3,-4}, {0,0,0,1,1}};
+    // std::vector<double> outputSet = {1,1,0,-1,-1};
+
+    // std::vector<std::vector<double>> inputSet = {{0,1,2,3,4}};
+    // std::vector<double> outputSet = {1,2,4,8,16};
+
+    //std::vector<std::vector<double>> inputSet = {{32, 0, 7}, {2, 28, 17}, {0, 9, 23}}; 
+
+    // std::vector<std::vector<double>> inputSet = {{1,1,0,0,1}, {0,0,1,1,1}, {0,1,1,0,1}};
+    // std::vector<double> outputSet = {0,1,0,1,1};
+
+    // std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
+    // std::vector<double> outputSet = {0,1,1,0};
+
+    // // STATISTICS
+    // std::vector<double> x = {1,2,3,4,5,6,7,8,9,10};
+    // std::vector<double> y = {10,9,8,7,6,5,4,3,2,1};
+    // std::vector<double> w = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
+
+    // std::cout << "Arithmetic Mean: " << stat.mean(x) << std::endl;
+    // std::cout << "Variance: " << stat.variance(x) << std::endl;
+    // std::cout << "Covariance: " << stat.covariance(x, y) << std::endl;
+    // std::cout << "Correlation: " << stat.correlation(x, y) << std::endl;
+    // std::cout << "R^2: " << stat.R2(x, y) << std::endl;
+    // std::cout << "Weighted Mean: " << stat.weightedMean(x, w) << std::endl;
+    // std::cout << "Geometric Mean: " << stat.geometricMean(x) << std::endl;
+    // std::cout << "Harmonic Mean: " << stat.harmonicMean(x) << std::endl;
+    // std::cout << "Root Mean Square (Quadratic mean): " << stat.RMS(x) << std::endl;
+    // std::cout << "Power Mean (p = 5): " << stat.powerMean(x, 5) << std::endl;
+    // std::cout << "Lehmer Mean (p = 5): " << stat.lehmerMean(x, 5) << std::endl;
+    // std::cout << "Weighted Lehmer Mean (p = 5): " << stat.weightedLehmerMean(x, w, 5) << std::endl;
+    // std::cout << "Contraharmonic Mean: " << stat.contraharmonicMean(x) << std::endl;
+    // std::cout << "Hernonian Mean: " << stat.heronianMean(1, 10) << std::endl;
+    // std::cout << "Heinz Mean (x = 1): " << stat.heinzMean(1, 10, 1) << std::endl;
+    // std::cout << "Neuman-Sandor Mean: " << stat.neumanSandorMean(1, 10) << std::endl;
+    // std::cout << "Stolarsky Mean (p = 5): " << stat.stolarskyMean(1, 10, 5) << std::endl;
+    // std::cout << "Identric Mean: " << stat.identricMean(1, 10) << std::endl;
+    // std::cout << "Logarithmic Mean: " << stat.logMean(1, 10) << std::endl;
+    // std::cout << "Standard Deviation: " << stat.standardDeviation(x) << std::endl;
+    // std::cout << "Absolute Average Deviation: " << stat.absAvgDeviation(x) << std::endl;
+    // // Returns 1 - (1/k^2)
+    // std::cout << "Chebyshev Inequality: " << stat.chebyshevIneq(2) << std::endl;
+
+    // // LINEAR ALGEBRA
+    // std::vector<std::vector<double>> A = {
+    //     {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+    //     {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+    // };
+    // std::vector<double> a = {4, 3, 1, 3}; 
+    // std::vector<double> b = {3, 5, 6, 1};
+
+    // alg.printMatrix(alg.matmult(alg.transpose(A), A)); 
+    // std::cout << std::endl;
+    // std::cout << alg.dot(a, b) << std::endl;
+    // std::cout << std::endl;
+    // alg.printMatrix(alg.hadamard_product(A, A));
+    // std::cout << std::endl;
+    // alg.printMatrix(alg.identity(10));
+
+    // // UNIVARIATE LINEAR REGRESSION 
+    // // Univariate, simple linear regression case where k = 1
+    // std::vector<double> inputSet; 
+    // std::vector<double> outputSet; 
+    // // Analytical solution used for calculating the parameters. 
+    // data.setData("/Users/marcmelikyan/Desktop/Data/FiresAndCrime.csv", inputSet, outputSet);
+    // UniLinReg model(inputSet, outputSet);
+    // alg.printVector(model.modelSetTest(inputSet));
+
+    // // MULIVARIATE LINEAR REGRESSION
+    // std::vector<std::vector<double>> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
+    // std::vector<double> outputSet = {2,4,6,8,10,12,14,16,18,20};
+    // LinReg model(alg.transpose(inputSet), outputSet); // Can use Lasso, Ridge, ElasticNet Reg
+    // model.normalEquation(); 
+    // model.gradientDescent(0.001, 30000, 1);
+    // model.SGD(0.001, 30000, 1);
+    // model.MBGD(0.001, 10000, 2, 1);
+    // alg.printVector(model.modelSetTest((alg.transpose(inputSet))));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // LOGISTIC REGRESSION
+    // std::vector<std::vector<double>> inputSet; 
+    // std::vector<double> outputSet; 
+    // data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancer.csv", inputSet, outputSet);
+    // LogReg model(inputSet, outputSet); 
+    // //model.SGD(0.1, 50000, 0);
+    // model.MLE(0.1, 10000, 0);
+    // alg.printVector(model.modelSetTest(inputSet));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // PROBIT REGRESSION
+    // std::vector<std::vector<double>> inputSet;
+    // std::vector<double> outputSet;
+    // data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancer.csv", inputSet, outputSet);
+    // ProbitReg model(inputSet, outputSet); 
+    // model.gradientDescent(0.0001, 10000, 1);
+    // alg.printVector(model.modelSetTest(inputSet));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // CLOGLOG REGRESSION
+    // std::vector<std::vector<double>> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}};
+    // std::vector<double> outputSet = {0,0,0,0,1,1,1,1};
+    // CLogLogReg model(alg.transpose(inputSet), outputSet); 
+    // model.SGD(0.1, 10000, 0);
+    // alg.printVector(model.modelSetTest(alg.transpose(inputSet)));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // EXPREG REGRESSION
+    // std::vector<std::vector<double>> inputSet = {{0,1,2,3,4}};
+    // std::vector<double> outputSet = {1,2,4,8,16};
+    // ExpReg model(alg.transpose(inputSet), outputSet); 
+    // model.SGD(0.001, 10000, 0);
+    // alg.printVector(model.modelSetTest(alg.transpose(inputSet)));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // TANH REGRESSION
+    // std::vector<std::vector<double>> inputSet = {{4,3,0,-3,-4}, {0,0,0,1,1}};
+    // std::vector<double> outputSet = {1,1,0,-1,-1};
+    // TanhReg model(alg.transpose(inputSet), outputSet); 
+    // model.SGD(0.1, 10000, 0);
+    // alg.printVector(model.modelSetTest(alg.transpose(inputSet)));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // SOFTMAX REGRESSION
+    // std::vector<std::vector<double>> inputSet; 
+    // std::vector<double> tempOutputSet; 
+    // data.setData(4, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
+    // std::vector<std::vector<double>> outputSet = data.oneHotRep(tempOutputSet, 3);
+
+    // SoftmaxReg model(inputSet, outputSet); 
+    // model.SGD(0.001, 20000, 0);
+    // alg.printMatrix(model.modelSetTest(inputSet));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // MLP
+    // std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
+    // std::vector<double> outputSet = {0,1,1,0};
+    // MLP model(alg.transpose(inputSet), outputSet, 2); 
+    // model.gradientDescent(0.1, 10000, 0);
+    // alg.printVector(model.modelSetTest(alg.transpose(inputSet)));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // SOFTMAX NETWORK
+    // std::vector<std::vector<double>> inputSet; 
+    // std::vector<double> tempOutputSet; 
+    // data.setData(4, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
+    // std::vector<std::vector<double>> outputSet = data.oneHotRep(tempOutputSet, 3);
+
+    // SoftmaxNet model(inputSet, outputSet, 2); 
+    // model.gradientDescent(0.001, 10000, 0);
+    // alg.printMatrix(model.modelSetTest(inputSet));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // AUTOENCODER
+    // std::vector<std::vector<double>> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
+    // AutoEncoder model(alg.transpose(inputSet), 5); 
+    // model.SGD(0.001, 300000, 0);
+    // alg.printMatrix(model.modelSetTest(alg.transpose(inputSet)));
+    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
+
+    // // DYNAMICALLY SIZED ANN
+    // // Possible Weight Init Methods: Default, Uniform, HeNormal, HeUniform, XavierNormal, XavierUniform
+    // // Possible Activations: Linear, Sigmoid, Swish, CLogLog, Ar{Sinh, Cosh, Tanh, Csch, Sech, Coth},  GaussianCDF, GELU, UnitStep
+    // // Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss
+    // std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
+    // std::vector<double> outputSet = {0,1,1,0};
+    // ANN ann(alg.transpose(inputSet), outputSet);
+    // ann.addLayer(10, "RELU", "Default", "Ridge", 0.0001);
+    // ann.addLayer(10, "Sigmoid", "Default");
+    // ann.addOutputLayer("Sigmoid", "LogLoss", "XavierNormal");
+    // ann.gradientDescent(0.1, 80000, 0);
+    // alg.printVector(ann.modelSetTest(alg.transpose(inputSet)));
+    // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
+
+    // // NAIVE BAYES
+    // std::vector<std::vector<double>> inputSet = {{1,1,1,1,1}, {0,0,1,1,1}, {0,0,1,0,1}};
+    // std::vector<double> outputSet = {0,1,0,1,1};
+
+    // MultinomialNB MNB(alg.transpose(inputSet), outputSet, 2);
+    // alg.printVector(MNB.modelSetTest(alg.transpose(inputSet)));
+
+    // BernoulliNB BNB(alg.transpose(inputSet), outputSet);
+    // alg.printVector(BNB.modelSetTest(alg.transpose(inputSet)));
+
+    // GaussianNB GNB(alg.transpose(inputSet), outputSet, 2);
+    // alg.printVector(GNB.modelSetTest(alg.transpose(inputSet)));
+
+    // // KMeans
+    // std::vector<std::vector<double>> inputSet = {{32, 0, 7}, {2, 28, 17}, {0, 9, 23}}; 
+    // KMeans kmeans(inputSet, 3, "KMeans++");
+    // kmeans.train(3, 1);
+    // std::cout << std::endl;
+    // alg.printMatrix(kmeans.modelSetTest(inputSet)); // Returns the assigned centroids to each of the respective training examples
+    // std::cout << std::endl;
+    // alg.printVector(kmeans.silhouette_scores());
+
+    // // kNN 
+    // std::vector<std::vector<double>> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}};
+    // std::vector<double> outputSet = {0,0,0,0,1,1,1,1};
+    // kNN knn(alg.transpose(inputSet), outputSet, 8);
+    // alg.printVector(knn.modelSetTest(alg.transpose(inputSet)));
+    // std::cout << "ACCURACY: " << 100 * knn.score() << "%" << std::endl;
+
+
+    // //CONVOLUTION, POOLING, ETC.. 
+    // std::vector<std::vector<double>> input = {
+    //     {1,1,1,1,0,0,0,0},
+    //     {1,1,1,1,0,0,0,0},
+    //     {1,1,1,1,0,0,0,0},
+    //     {1,1,1,1,0,0,0,0},
+    //     {1,1,1,1,0,0,0,0},
+    //     {1,1,1,1,0,0,0,0},
+    //     {1,1,1,1,0,0,0,0},
+    //     {1,1,1,1,0,0,0,0}
+    // };
+
+    // alg.printMatrix(conv.convolve(input, conv.getPrewittVertical(), 1)); // Can use padding
+    // alg.printMatrix(conv.pool(input, 4, 4, "Max")); // Can use Max, Min, or Average pooling. 
+
+    // std::vector<std::vector<std::vector<double>>> tensorSet; 
+    // tensorSet.push_back(input);
+    // tensorSet.push_back(input);
+    // alg.printVector(conv.globalPool(tensorSet, "Average")); // Can use Max, Min, or Average global pooling. 
+
+    // // PCA, SVD, eigenvalues & eigenvectors
+    // std::vector<std::vector<double>> inputSet = {{1,1}, {1,1}};
+    // auto [Eigenvectors, Eigenvalues] = alg.eig(inputSet); 
+    // std::cout << "Eigenvectors:" << std::endl; 
+    // alg.printMatrix(Eigenvectors);
+    // std::cout << std::endl;
+    // std::cout << "Eigenvalues:" << std::endl; 
+    // alg.printMatrix(Eigenvalues);
+
+    // auto [U, S, Vt] = alg.SVD(inputSet);
+
+    // // PCA done using Jacobi's method to approximate eigenvalues.
+    // PCA dr(inputSet, 1); // 1 dimensional representation. 
+    // std::cout << std::endl;
+    // std::cout << "Dimensionally reduced representation:" << std::endl;
+    // alg.printMatrix(dr.principalComponents());
+    // std::cout << "SCORE: " << dr.score() << std::endl; 
+
+
+    // // NLP/DATA
+    // std::string verbText = "I am appearing and thinking, as well as conducting.";
+    // std::cout << "Stemming Example:" << std::endl;
+    // std::cout << data.stemming(verbText) << std::endl;
+    // std::cout << std::endl;
+
+    // std::vector<std::string> sentences = {"He is a good boy", "She is a good girl", "The boy and girl are good"};
+    // std::cout << "Bag of Words Example:" << std::endl;
+    // alg.printMatrix(data.BOW(sentences, "Default"));
+    // std::cout << std::endl;
+    // std::cout << "TFIDF Example:" << std::endl;
+    // alg.printMatrix(data.TFIDF(sentences));
+    // std::cout << std::endl;
+
+    // std::cout << "Tokenization:" << std::endl;
+    // alg.printVector(data.tokenize(verbText));
+    // std::cout << std::endl;
+
+    // std::cout << "Word2Vec:" << std::endl;
+    // std::string textArchive = {"He is a good boy. She is a good girl. The boy and girl are good."};
+    // std::vector<std::string> corpus = data.splitSentences(textArchive);
+    // auto [wordEmbeddings, wordList] = data.word2Vec(corpus, "CBOW", 2, 2, 0.1, 10000); // Can use either CBOW or Skip-n-gram.
+    // alg.printMatrix(wordEmbeddings);
+    // std::cout << std::endl;
+
+    // std::vector<std::vector<double>> inputSet = {{1,2},{2,3},{3,4},{4,5},{5,6}};
+    // std::cout << "Feature Scaling Example:" << std::endl;
+    // alg.printMatrix(data.featureScaling(inputSet));
+    // std::cout << std::endl;
+
+    // std::cout << "Mean Centering Example:" << std::endl;
+    // alg.printMatrix(data.meanCentering(inputSet));
+    // std::cout << std::endl;
+
+    // std::cout << "Mean Normalization Example:" << std::endl;
+    // alg.printMatrix(data.meanNormalization(inputSet));
+    // std::cout << std::endl;
+
+    // // Outlier Finder
+    // std::vector<double> inputSet = {1,2,3,4,5,6,7,8,9,23554332523523};
+    // OutlierFinder outlierFinder(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier. 
+    // alg.printVector(outlierFinder.modelTest(inputSet));
+
+    // // Testing for new Functions
+    // alg.printMatrix(alg.pinverse({{1,2}, {3,4}}));
+    
+    return 0;
+}
\ No newline at end of file