Removed the old classes.

2025-05-02 17:57:54 +02:00 · 2023-12-30 00:20:49 +01:00 · 2023-12-30 00:20:49 +01:00 · 62598551f4
commit 62598551f4
parent 6f10a7f556
79 changed files with 15 additions and 13939 deletions
--- a/56
+++ b/56
@ -4,19 +4,11 @@ Import('env')
 module_env = env.Clone()
 module_env.pmlpp_build_old_classes = True
 module_env.pmlpp_build_tests = True
 if ARGUMENTS.get('pmlpp_build_old_classes', 'yes') == 'no':
    module_env.pmlpp_build_old_classes = False
 if ARGUMENTS.get('pmlpp_build_tests', 'yes') == 'no':
    module_env.pmlpp_build_tests = False
 if env.msvc:
    # Old classes can't build on MSVC
    module_env.pmlpp_build_old_classes = False
 sources = [
    "register_types.cpp",
@ -73,54 +65,6 @@ if module_env.pmlpp_build_tests:
        "test/mlpp_matrix_tests.cpp",
    ]
 if module_env.pmlpp_build_old_classes:
    module_env.Prepend(CPPDEFINES=["OLD_CLASSES_ENABLED"])
    sources += [
        "mlpp/wgan/wgan_old.cpp",
        "mlpp/output_layer/output_layer_old.cpp",
        "mlpp/multi_output_layer/multi_output_layer_old.cpp",
        "mlpp/hidden_layer/hidden_layer_old.cpp",
        "mlpp/mlp/mlp_old.cpp",
        "mlpp/pca/pca_old.cpp",
        "mlpp/uni_lin_reg/uni_lin_reg_old.cpp",
        "mlpp/outlier_finder/outlier_finder_old.cpp",
        "mlpp/probit_reg/probit_reg_old.cpp",
        "mlpp/svc/svc_old.cpp",
        "mlpp/softmax_reg/softmax_reg_old.cpp",
        "mlpp/auto_encoder/auto_encoder_old.cpp",
        "mlpp/tanh_reg/tanh_reg_old.cpp",
        "mlpp/softmax_net/softmax_net_old.cpp",
        "mlpp/multinomial_nb/multinomial_nb_old.cpp",
        "mlpp/mann/mann_old.cpp",
        "mlpp/log_reg/log_reg_old.cpp",
        "mlpp/lin_reg/lin_reg_old.cpp",
        "mlpp/gaussian_nb/gaussian_nb_old.cpp",
        "mlpp/gan/gan_old.cpp",
        "mlpp/exp_reg/exp_reg_old.cpp",
        "mlpp/dual_svc/dual_svc_old.cpp",
        "mlpp/c_log_log_reg/c_log_log_reg_old.cpp",
        "mlpp/bernoulli_nb/bernoulli_nb_old.cpp",
        "mlpp/ann/ann_old.cpp",
        "mlpp/numerical_analysis/numerical_analysis_old.cpp",
        "mlpp/regularization/reg_old.cpp",
        "mlpp/gauss_markov_checker/gauss_markov_checker_old.cpp",
        "mlpp/utilities/utilities_old.cpp",
        "mlpp/transforms/transforms_old.cpp",
        "mlpp/stat/stat_old.cpp",
        "mlpp/lin_alg/lin_alg_old.cpp",
        "mlpp/hypothesis_testing/hypothesis_testing_old.cpp",
        "mlpp/data/data_old.cpp",
        "mlpp/cost/cost_old.cpp",
        "mlpp/convolutions/convolutions_old.cpp",
        "mlpp/activation/activation_old.cpp",
    ]
 if module_env.pmlpp_build_tests and module_env.pmlpp_build_old_classes:
    sources += [
        "test/mlpp_tests_old.cpp",
    ]
 if ARGUMENTS.get('pmlpp_shared', 'no') == 'yes':
    # Shared lib compilation
--- a/main.cpp
+++ b/main.cpp
@ -1,720 +0,0 @@
 //
 //  main.cpp
 //  TEST_APP
 //
 //  Created by Marc on 1/20/21.
 //
 // THINGS CURRENTLY TO DO: 
 // POLYMORPHIC IMPLEMENTATION OF REGRESSION CLASSES
 // EXTEND SGD/MBGD SUPPORT FOR DYN. SIZED ANN 
 // ADD LEAKYRELU, ELU, SELU TO ANN
 // FIX VECTOR/MATRIX/TENSOR RESIZE ROUTINE
 // HYPOTHESIS TESTING CLASS 
 // GAUSS MARKOV CHECKER CLASS
 #include <iostream>
 #include <ctime>
 #include <cmath>
 #include <vector>
 #include "MLPP/UniLinReg/UniLinReg.hpp"
 #include "MLPP/LinReg/LinReg.hpp"
 #include "MLPP/LogReg/LogReg.hpp"
 #include "MLPP/CLogLogReg/CLogLogReg.hpp"
 #include "MLPP/ExpReg/ExpReg.hpp"
 #include "MLPP/ProbitReg/ProbitReg.hpp"
 #include "MLPP/SoftmaxReg/SoftmaxReg.hpp"
 #include "MLPP/TanhReg/TanhReg.hpp"
 #include "MLPP/MLP/MLP.hpp"
 #include "MLPP/SoftmaxNet/SoftmaxNet.hpp"
 #include "MLPP/AutoEncoder/AutoEncoder.hpp"
 #include "MLPP/ANN/ANN.hpp"
 #include "MLPP/MANN/MANN.hpp"
 #include "MLPP/MultinomialNB/MultinomialNB.hpp"
 #include "MLPP/BernoulliNB/BernoulliNB.hpp"
 #include "MLPP/GaussianNB/GaussianNB.hpp"
 #include "MLPP/KMeans/KMeans.hpp"
 #include "MLPP/kNN/kNN.hpp"
 #include "MLPP/PCA/PCA.hpp"
 #include "MLPP/OutlierFinder/OutlierFinder.hpp"
 #include "MLPP/Stat/Stat.hpp"
 #include "MLPP/LinAlg/LinAlg.hpp"
 #include "MLPP/Activation/Activation.hpp"
 #include "MLPP/Cost/Cost.hpp"
 #include "MLPP/Data/Data.hpp"
 #include "MLPP/Convolutions/Convolutions.hpp"
 #include "MLPP/SVC/SVC.hpp"
 #include "MLPP/NumericalAnalysis/NumericalAnalysis.hpp"
 #include "MLPP/DualSVC/DualSVC.hpp"
 #include "MLPP/GAN/GAN.hpp"
 #include "MLPP/WGAN/WGAN.hpp"
 #include "MLPP/Transforms/Transforms.hpp"
 // real_t f(real_t x){
 //     return x*x*x + 2*x - 2; 
 // }
 real_t f(real_t x){
    return sin(x);
 }
 real_t f_prime(real_t x){
    return 2 * x;
 }
 real_t f_prime_2var(std::vector<real_t> x){
    return 2 * x[0] + x[1];
 }
 /*
    y = x^3 + 2x - 2
    y' = 3x^2 + 2
    y'' = 6x
    y''(2) = 12
 */
 // real_t f_mv(std::vector<real_t> x){
 //     return x[0] * x[0] + x[0] * x[1] * x[1] + x[1] + 5; 
 // }
 /* 
    Where x, y = x[0], x[1], this function is defined as:
    f(x, y) = x^2 + xy^2 + y + 5
    ∂f/∂x = 2x + 2y
    ∂^2f/∂x∂y = 2
 */
 real_t f_mv(std::vector<real_t> x){
    return x[0] * x[0] * x[0] + x[0] + x[1] * x[1] * x[1] * x[0] + x[2] * x[2] * x[1];
 }
 /*
    Where x, y = x[0], x[1], this function is defined as:
    f(x, y) = x^3 + x + xy^3 + yz^2
    fy = 3xy^2 + 2yz
    fyy = 6xy + 2z
    fyyz = 2
    ∂^2f/∂y^2 = 6xy + 2z
    ∂^3f/∂y^3 = 6x
    ∂f/∂z = 2zy
    ∂^2f/∂z^2 = 2y
    ∂^3f/∂z^3 = 0
    ∂f/∂x = 3x^2 + 1 + y^3
    ∂^2f/∂x^2 = 6x
    ∂^3f/∂x^3 = 6
    ∂f/∂z = 2zy
    ∂^2f/∂z^2 = 2z
    ∂f/∂y = 3xy^2
    ∂^2f/∂y∂x = 3y^2
 */
 int main() {
    // // OBJECTS
    MLPPStat stat;
    MLPPLinAlg alg;
    MLPPActivation avn;
    MLPPCost cost;
    MLPPData data; 
    MLPPConvolutions conv; 
    // DATA SETS
    // std::vector<std::vector<real_t>> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
    // std::vector<real_t> outputSet = {2,4,6,8,10,12,14,16,18,20};
    // std::vector<std::vector<real_t>> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}};
    // std::vector<real_t> outputSet = {0,0,0,0,1,1,1,1};
    // std::vector<std::vector<real_t>> inputSet = {{4,3,0,-3,-4}, {0,0,0,1,1}};
    // std::vector<real_t> outputSet = {1,1,0,-1,-1};
    // std::vector<std::vector<real_t>> inputSet = {{0,1,2,3,4}};
    // std::vector<real_t> outputSet = {1,2,4,8,16};
    //std::vector<std::vector<real_t>> inputSet = {{32, 0, 7}, {2, 28, 17}, {0, 9, 23}}; 
    // std::vector<std::vector<real_t>> inputSet = {{1,1,0,0,1}, {0,0,1,1,1}, {0,1,1,0,1}};
    // std::vector<real_t> outputSet = {0,1,0,1,1};
    // std::vector<std::vector<real_t>> inputSet = {{0,0,1,1}, {0,1,0,1}};
    // std::vector<real_t> outputSet = {0,1,1,0};
    // // STATISTICS
    // std::vector<real_t> x = {1,2,3,4,5,6,7,8,9,10};
    // std::vector<real_t> y = {10,9,8,7,6,5,4,3,2,1};
    // std::vector<real_t> w = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
    // std::cout << "Arithmetic Mean: " << stat.mean(x) << std::endl;
    // std::cout << "Median: " << stat.median(x) << std::endl;
    // alg.printVector(x);
    // alg.printVector(stat.mode(x));
    // std::cout << "Range: " << stat.range(x) << std::endl;
    // std::cout << "Midrange: " << stat.midrange(x) << std::endl;
    // std::cout << "Absolute Average Deviation: " << stat.absAvgDeviation(x) << std::endl;
    // std::cout << "Standard Deviation: " << stat.standardDeviation(x) << std::endl;
    // std::cout << "Variance: " << stat.variance(x) << std::endl;
    // std::cout << "Covariance: " << stat.covariance(x, y) << std::endl;
    // std::cout << "Correlation: " << stat.correlation(x, y) << std::endl;
    // std::cout << "R^2: " << stat.R2(x, y) << std::endl;
    // // Returns 1 - (1/k^2)
    // std::cout << "Chebyshev Inequality: " << stat.chebyshevIneq(2) << std::endl;
    // std::cout << "Weighted Mean: " << stat.weightedMean(x, w) << std::endl;
    // std::cout << "Geometric Mean: " << stat.geometricMean(x) << std::endl;
    // std::cout << "Harmonic Mean: " << stat.harmonicMean(x) << std::endl;
    // std::cout << "Root Mean Square (Quadratic mean): " << stat.RMS(x) << std::endl;
    // std::cout << "Power Mean (p = 5): " << stat.powerMean(x, 5) << std::endl;
    // std::cout << "Lehmer Mean (p = 5): " << stat.lehmerMean(x, 5) << std::endl;
    // std::cout << "Weighted Lehmer Mean (p = 5): " << stat.weightedLehmerMean(x, w, 5) << std::endl;
    // std::cout << "Contraharmonic Mean: " << stat.contraHarmonicMean(x) << std::endl;
    // std::cout << "Hernonian Mean: " << stat.heronianMean(1, 10) << std::endl;
    // std::cout << "Heinz Mean (x = 1): " << stat.heinzMean(1, 10, 1) << std::endl;
    // std::cout << "Neuman-Sandor Mean: " << stat.neumanSandorMean(1, 10) << std::endl;
    // std::cout << "Stolarsky Mean (p = 5): " << stat.stolarskyMean(1, 10, 5) << std::endl;
    // std::cout << "Identric Mean: " << stat.identricMean(1, 10) << std::endl;
    // std::cout << "Logarithmic Mean: " << stat.logMean(1, 10) << std::endl;
    // std::cout << "Absolute Average Deviation: " << stat.absAvgDeviation(x) << std::endl;
    // LINEAR ALGEBRA
    // std::vector<std::vector<real_t>> square = {{1, 1}, {-1, 1}, {1, -1}, {-1, -1}};
    // alg.printMatrix(alg.rotate(square, M_PI/4));
    // std::vector<std::vector<real_t>> A = {
    //     {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
    //     {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
    // };
    // std::vector<real_t> a = {4, 3, 1, 3}; 
    // std::vector<real_t> b = {3, 5, 6, 1};
    // alg.printMatrix(alg.matmult(alg.transpose(A), A)); 
    // std::cout << std::endl;
    // std::cout << alg.dot(a, b) << std::endl;
    // std::cout << std::endl;
    // alg.printMatrix(alg.hadamard_product(A, A));
    // std::cout << std::endl;
    // alg.printMatrix(alg.identity(10));
    // UNIVARIATE LINEAR REGRESSION 
    // Univariate, simple linear regression, case where k = 1
    // auto [inputSet, outputSet] = data.loadFiresAndCrime();
    // UniLinReg model(inputSet, outputSet);
    // alg.printVector(model.modelSetTest(inputSet));
    // // MULIVARIATE LINEAR REGRESSION
    // auto [inputSet, outputSet] = data.loadCaliforniaHousing();
    // LinReg model(inputSet, outputSet); // Can use Lasso, Ridge, ElasticNet Reg
    //model.gradientDescent(0.001, 30, 0);
    //model.SGD(0.00000001, 300000, 1);
    //model.MBGD(0.001, 10000, 2, 1);
    //model.normalEquation(); 
    // LinReg adamModel(alg.transpose(inputSet), outputSet);
    // alg.printVector(model.modelSetTest(inputSet));
    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
    // const int TRIAL_NUM = 1000; 
    // real_t scoreSGD = 0; 
    // real_t scoreADAM = 0; 
    // for(int i = 0; i < TRIAL_NUM; i++){
    //     LinReg model(alg.transpose(inputSet), outputSet);
    //     model.MBGD(0.001, 5, 1, 0);
    //     scoreSGD += model.score();
    //     LinReg adamModel(alg.transpose(inputSet), outputSet);
    //     adamModel.Adam(0.1, 5, 1, 0.9, 0.999, 1e-8, 0); // Change batch size = sgd, bgd
    //     scoreADAM += adamModel.score();
    // }
    // std::cout << "ACCURACY, AVG, SGD: " << 100 * scoreSGD/TRIAL_NUM << "%" << std::endl;
    // std::cout << std::endl;
    // std::cout << "ACCURACY, AVG, ADAM: " << 100 * scoreADAM/TRIAL_NUM << "%" << std::endl;
    // std::cout << "Total epoch num: 300" << std::endl;
    // std::cout << "Method: 1st Order w/ Jacobians" << std::endl;
    // LinReg model(alg.transpose(inputSet), outputSet); // Can use Lasso, Ridge, ElasticNet Reg
    // model.gradientDescent(0.001, 300, 0);
    // std::cout << "--------------------------------------------" << std::endl;
    // std::cout << "Total epoch num: 300" << std::endl;
    // std::cout << "Method: Newtonian 2nd Order w/ Hessians" << std::endl;
    // LinReg model2(alg.transpose(inputSet), outputSet); 
    // model2.NewtonRaphson(1.5, 300, 0);
    // // LOGISTIC REGRESSION
    // auto [inputSet, outputSet] = data.load rastCancer();
    // LogReg model(inputSet, outputSet); 
    // model.SGD(0.001, 100000, 0);
    // alg.printVector(model.modelSetTest(inputSet));
    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
    // // PROBIT REGRESSION
    // std::vector<std::vector<real_t>> inputSet;
    // std::vector<real_t> outputSet;
    // data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancer.csv", inputSet, outputSet);
    // ProbitReg model(inputSet, outputSet); 
    // model.SGD(0.001, 10000, 1);
    // alg.printVector(model.modelSetTest(inputSet));
    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
    // // CLOGLOG REGRESSION
    // std::vector<std::vector<real_t>> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}};
    // std::vector<real_t> outputSet = {0,0,0,0,1,1,1,1};
    // CLogLogReg model(alg.transpose(inputSet), outputSet); 
    // model.SGD(0.1, 10000, 0);
    // alg.printVector(model.modelSetTest(alg.transpose(inputSet)));
    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
    // // EXPREG REGRESSION
    // std::vector<std::vector<real_t>> inputSet = {{0,1,2,3,4}};
    // std::vector<real_t> outputSet = {1,2,4,8,16};
    // ExpReg model(alg.transpose(inputSet), outputSet); 
    // model.SGD(0.001, 10000, 0);
    // alg.printVector(model.modelSetTest(alg.transpose(inputSet)));
    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
    // // TANH REGRESSION
    // std::vector<std::vector<real_t>> inputSet = {{4,3,0,-3,-4}, {0,0,0,1,1}};
    // std::vector<real_t> outputSet = {1,1,0,-1,-1};
    // TanhReg model(alg.transpose(inputSet), outputSet); 
    // model.SGD(0.1, 10000, 0);
    // alg.printVector(model.modelSetTest(alg.transpose(inputSet)));
    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
    // // SOFTMAX REGRESSION
    // auto [inputSet, outputSet] = data.loadIris();
    // SoftmaxReg model(inputSet, outputSet);
    // model.SGD(0.1, 10000, 1);
    // alg.printMatrix(model.modelSetTest(inputSet));
    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
    // // SUPPORT VECTOR CLASSIFICATION
    // auto [inputSet, outputSet] = data.loadBreastCancerSVC();
    // SVC model(inputSet, outputSet, 1); 
    // model.SGD(0.00001, 100000, 1);
    // alg.printVector(model.modelSetTest(inputSet));
    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
    // SoftmaxReg model(inputSet, outputSet); 
    // model.SGD(0.001, 20000, 0);
    // alg.printMatrix(model.modelSetTest(inputSet));
    // // MLP
    // std::vector<std::vector<real_t>> inputSet = {{0,0,1,1}, {0,1,0,1}};
    // inputSet = alg.transpose(inputSet);
    // std::vector<real_t> outputSet = {0,1,1,0};
    // MLP model(inputSet, outputSet, 2); 
    // model.gradientDescent(0.1, 10000, 0);
    // alg.printVector(model.modelSetTest(inputSet));
    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
    // // SOFTMAX NETWORK
    // auto [inputSet, outputSet] = data.loadWine();
    // SoftmaxNet model(inputSet, outputSet, 1); 
    // model.gradientDescent(0.01, 100000, 1);
    // alg.printMatrix(model.modelSetTest(inputSet));
    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
    // // AUTOENCODER
    // std::vector<std::vector<real_t>> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
    // AutoEncoder model(alg.transpose(inputSet), 5); 
    // model.SGD(0.001, 300000, 0);
    // alg.printMatrix(model.modelSetTest(alg.transpose(inputSet)));
    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
    // DYNAMICALLY SIZED ANN
    // Possible Weight Init Methods: Default, Uniform, HeNormal, HeUniform, XavierNormal, XavierUniform
    // Possible Activations: Linear, Sigmoid, Swish, Softplus, Softsign, CLogLog, Ar{Sinh, Cosh, Tanh, Csch, Sech, Coth},  GaussianCDF, GELU, UnitStep
    // Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss
    // std::vector<std::vector<real_t>> inputSet = {{0,0,1,1}, {0,1,0,1}};
    // std::vector<real_t> outputSet = {0,1,1,0};
    // ANN ann(alg.transpose(inputSet), outputSet);
    // ann.addLayer(2, "Cosh");
    // ann.addOutputLayer("Sigmoid", "LogLoss");
    // ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, 1);
    // ann.Adadelta(1, 1000, 2, 0.9, 0.000001, 1);
    // ann.Momentum(0.1, 8000, 2, 0.9, true, 1);
    //ann.setLearningRateScheduler("Step", 0.5, 1000);
    // ann.gradientDescent(0.01, 30000);
    // alg.printVector(ann.modelSetTest(alg.transpose(inputSet)));
    // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
    std::vector<std::vector<real_t>> outputSet = {{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}, 
                                               {2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40}};
    WGAN gan(2, alg.transpose(outputSet)); // our gan is a wasserstein gan (wgan)
    gan.addLayer(5, "Sigmoid");
    gan.addLayer(2, "RELU");
    gan.addLayer(5, "Sigmoid");
    gan.addOutputLayer(); // User can specify weight init- if necessary.
    gan.gradientDescent(0.1, 55000, 0);
    std::cout << "GENERATED INPUT: (Gaussian-sampled noise):" << std::endl;
    alg.printMatrix(gan.generateExample(100));
    // typedef std::vector<std::vector<real_t>> Matrix;
    // typedef std::vector<real_t> Vector;
    // Matrix inputSet = {{0,0}, {0,1}, {1,0}, {1,1}}; // XOR 
    // Vector outputSet = {0,1,1,0};
    // ANN ann(inputSet, outputSet);
    // ann.addLayer(5, "Sigmoid");
    // ann.addLayer(8, "Sigmoid"); // Add more layers as needed. 
    // ann.addOutputLayer("Sigmoid", "LogLoss");
    // ann.gradientDescent(1, 20000, 1);
    // Vector predictions = ann.modelSetTest(inputSet);
    // alg.printVector(predictions); // Testing out the model's preds for train set.
    // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; // Accuracy.
    // // DYNAMICALLY SIZED MANN (Multidimensional Output ANN)
    // std::vector<std::vector<real_t>> inputSet = {{1,2,3},{2,4,6},{3,6,9},{4,8,12}};
    // std::vector<std::vector<real_t>> outputSet = {{1,5}, {2,10}, {3,15}, {4,20}};
    // MANN mann(inputSet, outputSet);
    // mann.addOutputLayer("Linear", "MSE");
    // mann.gradientDescent(0.001, 80000, 0);
    // alg.printMatrix(mann.modelSetTest(inputSet));
    // std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl;
    // std::vector<std::vector<real_t>> inputSet;
    // std::vector<real_t> tempOutputSet;
    // data.setData(4, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
    // std::vector<std::vector<real_t>> outputSet = data.oneHotRep(tempOutputSet, 3);
    // TRAIN TEST SPLIT CHECK
    // std::vector<std::vector<real_t>> inputSet1 = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
    // std::vector<std::vector<real_t>> outputSet1 = {{2,4,6,8,10,12,14,16,18,20}};
    // auto [inputSet, outputSet, inputTestSet, outputTestSet] = data.trainTestSplit(alg.transpose(inputSet1), alg.transpose(outputSet1), 0.2);
    // alg.printMatrix(inputSet);
    // alg.printMatrix(outputSet);
    // alg.printMatrix(inputTestSet);
    // alg.printMatrix(outputTestSet);
    // alg.printMatrix(inputSet);
    // alg.printMatrix(outputSet);
    // MANN mann(inputSet, outputSet);
    // mann.addLayer(100, "RELU", "XavierNormal");
    // mann.addOutputLayer("Softmax", "CrossEntropy", "XavierNormal");
    // mann.gradientDescent(0.1, 80000, 1);
    // alg.printMatrix(mann.modelSetTest(inputSet));
    // std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl;
    // // NAIVE BAYES
    // std::vector<std::vector<real_t>> inputSet = {{1,1,1,1,1}, {0,0,1,1,1}, {0,0,1,0,1}};
    // std::vector<real_t> outputSet = {0,1,0,1,1};
    // MultinomialNB MNB(alg.transpose(inputSet), outputSet, 2);
    // alg.printVector(MNB.modelSetTest(alg.transpose(inputSet)));
    // BernoulliNB BNB(alg.transpose(inputSet), outputSet);
    // alg.printVector(BNB.modelSetTest(alg.transpose(inputSet)));
    // GaussianNB GNB(alg.transpose(inputSet), outputSet, 2);
    // alg.printVector(GNB.modelSetTest(alg.transpose(inputSet)));
    // // KMeans
    // std::vector<std::vector<real_t>> inputSet = {{32, 0, 7}, {2, 28, 17}, {0, 9, 23}}; 
    // KMeans kmeans(inputSet, 3, "KMeans++");
    // kmeans.train(3, 1);
    // std::cout << std::endl;
    // alg.printMatrix(kmeans.modelSetTest(inputSet)); // Returns the assigned centroids to each of the respective training examples
    // std::cout << std::endl;
    // alg.printVector(kmeans.silhouette_scores());
    // // kNN 
    // std::vector<std::vector<real_t>> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}};
    // std::vector<real_t> outputSet = {0,0,0,0,1,1,1,1};
    // kNN knn(alg.transpose(inputSet), outputSet, 8);
    // alg.printVector(knn.modelSetTest(alg.transpose(inputSet)));
    // std::cout << "ACCURACY: " << 100 * knn.score() << "%" << std::endl;
    // // CONVOLUTION, POOLING, ETC.. 
    // std::vector<std::vector<real_t>> input = {
    //     {1},
    // };
    // std::vector<std::vector<std::vector<real_t>>> tensorSet; 
    // tensorSet.push_back(input);
    // tensorSet.push_back(input);
    // tensorSet.push_back(input);
    // alg.printTensor(data.rgb2xyz(tensorSet));
    // std::vector<std::vector<real_t>> input = {
    //     {62,55,55,54,49,48,47,55},
    //     {62,57,54,52,48,47,48,53},
    //     {61,60,52,49,48,47,49,54},
    //     {63,61,60,60,63,65,68,65},
    //     {67,67,70,74,79,85,91,92},
    //     {82,95,101,106,114,115,112,117},
    //     {96,111,115,119,128,128,130,127},
    //     {109,121,127,133,139,141,140,133},
    // };
    // Transforms trans; 
    // alg.printMatrix(trans.discreteCosineTransform(input));
    // alg.printMatrix(conv.convolve(input, conv.getPrewittVertical(), 1)); // Can use padding
    // alg.printMatrix(conv.pool(input, 4, 4, "Max")); // Can use Max, Min, or Average pooling. 
    // std::vector<std::vector<std::vector<real_t>>> tensorSet; 
    // tensorSet.push_back(input);
    // tensorSet.push_back(input);
    // alg.printVector(conv.globalPool(tensorSet, "Average")); // Can use Max, Min, or Average global pooling. 
    // std::vector<std::vector<real_t>> laplacian = {{1, 1, 1}, {1, -4, 1}, {1, 1, 1}};
    // alg.printMatrix(conv.convolve(conv.gaussianFilter2D(5, 1), laplacian, 1));
    // // PCA, SVD, eigenvalues & eigenvectors
    // std::vector<std::vector<real_t>> inputSet = {{1,1}, {1,1}};
    // auto [Eigenvectors, Eigenvalues] = alg.eig(inputSet); 
    // std::cout << "Eigenvectors:" << std::endl; 
    // alg.printMatrix(Eigenvectors);
    // std::cout << std::endl;
    // std::cout << "Eigenvalues:" << std::endl; 
    // alg.printMatrix(Eigenvalues);
    // auto [U, S, Vt] = alg.SVD(inputSet);
    // // PCA done using Jacobi's method to approximate eigenvalues and eigenvectors.
    // PCA dr(inputSet, 1); // 1 dimensional representation. 
    // std::cout << std::endl;
    // std::cout << "Dimensionally reduced representation:" << std::endl;
    // alg.printMatrix(dr.principalComponents());
    // std::cout << "SCORE: " << dr.score() << std::endl; 
    // // NLP/DATA
    // std::string verbText = "I am appearing and thinking, as well as conducting.";
    // std::cout << "Stemming Example:" << std::endl;
    // std::cout << data.stemming(verbText) << std::endl;
    // std::cout << std::endl;
    // std::vector<std::string> sentences = {"He is a good boy", "She is a good girl", "The boy and girl are good"};
    // std::cout << "Bag of Words Example:" << std::endl;
    // alg.printMatrix(data.BOW(sentences, "Default"));
    // std::cout << std::endl;
    // std::cout << "TFIDF Example:" << std::endl;
    // alg.printMatrix(data.TFIDF(sentences));
    // std::cout << std::endl;
    // std::cout << "Tokenization:" << std::endl;
    // alg.printVector(data.tokenize(verbText));
    // std::cout << std::endl;
    // std::cout << "Word2Vec:" << std::endl;
    // std::string textArchive = {"He is a good boy. She is a good girl. The boy and girl are good."};
    // std::vector<std::string> corpus = data.splitSentences(textArchive);
    // auto [wordEmbeddings, wordList] = data.word2Vec(corpus, "CBOW", 2, 2, 0.1, 10000); // Can use either CBOW or Skip-n-gram.
    // alg.printMatrix(wordEmbeddings);
    // std::cout << std::endl;
    // std::vector<std::string> textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"};
    // alg.printMatrix(data.LSA(textArchive, 2));
    // //alg.printMatrix(data.BOW(textArchive, "Default"));
    // std::cout << std::endl;
    // std::vector<std::vector<real_t>> inputSet = {{1,2},{2,3},{3,4},{4,5},{5,6}};
    // std::cout << "Feature Scaling Example:" << std::endl;
    // alg.printMatrix(data.featureScaling(inputSet));
    // std::cout << std::endl;
    // std::cout << "Mean Centering Example:" << std::endl;
    // alg.printMatrix(data.meanCentering(inputSet));
    // std::cout << std::endl;
    // std::cout << "Mean Normalization Example:" << std::endl;
    // alg.printMatrix(data.meanNormalization(inputSet));
    // std::cout << std::endl;
    // // Outlier Finder
    // std::vector<real_t> inputSet = {1,2,3,4,5,6,7,8,9,23554332523523};
    // OutlierFinder outlierFinder(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier. 
    // alg.printVector(outlierFinder.modelTest(inputSet));
    // // Testing new Functions
    // real_t z_s = 0.001;
    // std::cout << avn.logit(z_s) << std::endl;
    // std::cout << avn.logit(z_s, 1) << std::endl;
    // std::vector<real_t> z_v = {0.001};
    // alg.printVector(avn.logit(z_v));
    // alg.printVector(avn.logit(z_v, 1));
    // std::vector<std::vector<real_t>> Z_m = {{0.001}};
    // alg.printMatrix(avn.logit(Z_m));
    // alg.printMatrix(avn.logit(Z_m, 1));
    // std::cout << alg.trace({{1,2}, {3,4}}) << std::endl;
    // alg.printMatrix(alg.pinverse({{1,2}, {3,4}}));
    // alg.printMatrix(alg.diag({1,2,3,4,5}));
    // alg.printMatrix(alg.kronecker_product({{1,2,3,4,5}}, {{6,7,8,9,10}}));
    // alg.printMatrix(alg.matrixPower({{5,5},{5,5}}, 2));
    // alg.printVector(alg.solve({{1,1}, {1.5, 4.0}}, {2200, 5050}));
    // std::vector<std::vector<real_t>> matrixOfCubes = {{1,2,64,27}};
    // std::vector<real_t> vectorOfCubes = {1,2,64,27};
    // alg.printMatrix(alg.cbrt(matrixOfCubes));
    // alg.printVector(alg.cbrt(vectorOfCubes));
    // std::cout << alg.max({{1,2,3,4,5}, {6,5,3,4,1}, {9,9,9,9,9}}) << std::endl;
    // std::cout << alg.min({{1,2,3,4,5}, {6,5,3,4,1}, {9,9,9,9,9}}) << std::endl;
    // std::vector<real_t> chicken; 
    // data.getImage("../../Data/apple.jpeg", chicken);
    // alg.printVector(chicken);
    // std::vector<std::vector<real_t>> P = {{12, -51, 4}, {6, 167, -68}, {-4, 24, -41}};
    // alg.printMatrix(P);
    // alg.printMatrix(alg.gramSchmidtProcess(P));
    // auto [Q, R] = alg.QRD(P); // It works! 
    //  alg.printMatrix(Q);
    //  alg.printMatrix(R); 
    // // Checking positive-definiteness checker. For Cholesky Decomp. 
    // std::vector<std::vector<real_t>> A = 
    // {
    //     {1,-1,-1,-1},                        
    //     {-1,2,2,2},
    //     {-1,2,3,1},
    //     {-1,2,1,4}
    // };
    // std::cout << std::boolalpha << alg.positiveDefiniteChecker(A) << std::endl;
    // auto [L, Lt] = alg.chol(A); // works.
    // alg.printMatrix(L);
    // alg.printMatrix(Lt);
    // Checks for numerical analysis class.
    NumericalAnalysis numAn;
    //std::cout << numAn.quadraticApproximation(f, 0, 1) << std::endl;
    // std::cout << numAn.cubicApproximation(f, 0, 1.001) << std::endl;
    // std::cout << f(1.001) << std::endl;
    // std::cout << numAn.quadraticApproximation(f_mv, {0, 0, 0}, {1, 1, 1}) << std::endl;
    // std::cout << numAn.numDiff(&f, 1) << std::endl;
    // std::cout << numAn.newtonRaphsonMethod(&f, 1, 1000) << std::endl;
    //std::cout << numAn.invQuadraticInterpolation(&f, {100, 2,1.5}, 10) << std::endl;
    // std::cout << numAn.numDiff(&f_mv, {1, 1}, 1) << std::endl; // Derivative w.r.t. x.
    // alg.printVector(numAn.jacobian(&f_mv, {1, 1}));
    //std::cout << numAn.numDiff_2(&f, 2) << std::endl; 
    //std::cout << numAn.numDiff_3(&f, 2) << std::endl; 
    // std::cout << numAn.numDiff_2(&f_mv, {2, 2, 500}, 2, 2) << std::endl;
    //std::cout << numAn.numDiff_3(&f_mv, {2, 1000, 130}, 0, 0, 0) << std::endl;
    // alg.printTensor(numAn.thirdOrderTensor(&f_mv, {1, 1, 1}));
    // std::cout << "Our Hessian." << std::endl;
    // alg.printMatrix(numAn.hessian(&f_mv, {2, 2, 500}));
    // std::cout << numAn.laplacian(f_mv, {1,1,1}) << std::endl;
    // std::vector<std::vector<std::vector<real_t>>> tensor;
    // tensor.push_back({{1,2}, {1,2}, {1,2}});
    // tensor.push_back({{1,2}, {1,2}, {1,2}});
    // alg.printTensor(tensor);
    // alg.printMatrix(alg.tensor_vec_mult(tensor, {1,2}));
    // std::cout << numAn.cubicApproximation(f_mv, {0, 0, 0}, {1, 1, 1}) << std::endl;
    // std::cout << numAn.eulerianMethod(f_prime, {1, 1}, 1.5, 0.000001) << std::endl;
    // std::cout << numAn.eulerianMethod(f_prime_2var, {2, 3}, 2.5, 0.00000001) << std::endl;
    // alg.printMatrix(conv.dx(A));
    // alg.printMatrix(conv.dy(A));
    // alg.printMatrix(conv.gradOrientation(A));
    // std::vector<std::vector<real_t>> A = 
    // {
    //     {1,0,0,0},                        
    //     {0,0,0,0},
    //     {0,0,0,0},
    //     {0,0,0,1}
    // };
    // std::vector<std::vector<std::string>> h = conv.harrisCornerDetection(A); 
    // for(int i = 0; i < h.size(); i++){
    //     for(int j = 0; j < h[i].size(); j++){
    //         std::cout << h[i][j] << " ";
    //     }
    //     std::cout << std::endl;
    // } // Harris detector works. Life is good!
    // std::vector<real_t> a = {3,4,4};
    // std::vector<real_t> b = {4,4,4};
    // alg.printVector(alg.cross(a,b));
    //SUPPORT VECTOR CLASSIFICATION (kernel method)
    // std::vector<std::vector<real_t>> inputSet; 
    // std::vector<real_t> outputSet; 
    // data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancerSVM.csv", inputSet, outputSet);
    // std::vector<std::vector<real_t>> inputSet; 
    // std::vector<real_t> outputSet; 
    // data.setData(4, "/Users/marcmelikyan/Desktop/Data/IrisSVM.csv", inputSet, outputSet);
    // DualSVC kernelSVM(inputSet, outputSet, 1000);
    // kernelSVM.gradientDescent(0.0001, 20, 1);
    // std::vector<std::vector<real_t>> linearlyIndependentMat = 
    // {
    //     {1,2,3,4},
    //     {234538495,4444,6111,55}
    // };
    // std::cout << "True of false: linearly independent?: " << std::boolalpha << alg.linearIndependenceChecker(linearlyIndependentMat) << std::endl;
    return 0;
 }
--- a/mlpp/activation/activation_old.cpp
+++ b/mlpp/activation/activation_old.cpp
@ -1,953 +0,0 @@
 //
 //  Activation.cpp
 //
 //  Created by Marc Melikyan on 1/16/21.
 //
 #include "activation_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include <algorithm>
 #include <cmath>
 #include <iostream>
 #ifndef M_PI
 #define M_PI 3.141592653
 #endif
 real_t MLPPActivationOld::linear(real_t z, bool deriv) {
 	if (deriv) {
 		return 1;
 	}
 	return z;
 }
 std::vector<real_t> MLPPActivationOld::linear(std::vector<real_t> z, bool deriv) {
 	if (deriv) {
 		MLPPLinAlgOld alg;
 		return alg.onevec(z.size());
 	}
 	return z;
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::linear(std::vector<std::vector<real_t>> z, bool deriv) {
 	if (deriv) {
 		MLPPLinAlgOld alg;
 		return alg.onemat(z.size(), z[0].size());
 	}
 	return z;
 }
 real_t MLPPActivationOld::sigmoid(real_t z, bool deriv) {
 	if (deriv) {
 		return sigmoid(z) * (1 - sigmoid(z));
 	}
 	return 1 / (1 + exp(-z));
 }
 std::vector<real_t> MLPPActivationOld::sigmoid(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), sigmoid(z)));
 	}
 	return alg.elementWiseDivision(alg.onevec(z.size()), alg.addition(alg.onevec(z.size()), alg.exp(alg.scalarMultiply(-1, z))));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::sigmoid(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), sigmoid(z)));
 	}
 	return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.addition(alg.onemat(z.size(), z[0].size()), alg.exp(alg.scalarMultiply(-1, z))));
 }
 std::vector<real_t> MLPPActivationOld::softmax(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	std::vector<real_t> a;
 	a.resize(z.size());
 	std::vector<real_t> expZ = alg.exp(z);
 	real_t sum = 0;
 	for (uint32_t i = 0; i < z.size(); i++) {
 		sum += expZ[i];
 	}
 	for (uint32_t i = 0; i < z.size(); i++) {
 		a[i] = expZ[i] / sum;
 	}
 	return a;
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::softmax(std::vector<std::vector<real_t>> z, bool deriv) {
 	std::vector<std::vector<real_t>> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < z.size(); i++) {
 		a[i] = softmax(z[i]);
 	}
 	return a;
 }
 std::vector<real_t> MLPPActivationOld::adjSoftmax(std::vector<real_t> z) {
 	MLPPLinAlgOld alg;
 	std::vector<real_t> a;
 	real_t C = -*std::max_element(z.begin(), z.end());
 	z = alg.scalarAdd(C, z);
 	return softmax(z);
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::adjSoftmax(std::vector<std::vector<real_t>> z) {
 	std::vector<std::vector<real_t>> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < z.size(); i++) {
 		a[i] = adjSoftmax(z[i]);
 	}
 	return a;
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::softmaxDeriv(std::vector<real_t> z) {
 	std::vector<std::vector<real_t>> deriv;
 	std::vector<real_t> a = softmax(z);
 	deriv.resize(a.size());
 	for (uint32_t i = 0; i < deriv.size(); i++) {
 		deriv[i].resize(a.size());
 	}
 	for (uint32_t i = 0; i < a.size(); i++) {
 		for (uint32_t j = 0; j < z.size(); j++) {
 			if (i == j) {
 				deriv[i][j] = a[i] * (1 - a[i]);
 			} else {
 				deriv[i][j] = -a[i] * a[j];
 			}
 		}
 	}
 	return deriv;
 }
 std::vector<std::vector<std::vector<real_t>>> MLPPActivationOld::softmaxDeriv(std::vector<std::vector<real_t>> z) {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<std::vector<real_t>>> deriv;
 	std::vector<std::vector<real_t>> a = softmax(z);
 	deriv.resize(a.size());
 	for (uint32_t i = 0; i < deriv.size(); i++) {
 		deriv[i].resize(a.size());
 	}
 	for (uint32_t i = 0; i < a.size(); i++) {
 		for (uint32_t j = 0; j < z.size(); j++) {
 			if (i == j) {
 				deriv[i][j] = alg.subtraction(a[i], alg.hadamard_product(a[i], a[i]));
 			} else {
 				deriv[i][j] = alg.scalarMultiply(-1, alg.hadamard_product(a[i], a[j]));
 			}
 		}
 	}
 	return deriv;
 }
 real_t MLPPActivationOld::softplus(real_t z, bool deriv) {
 	if (deriv) {
 		return sigmoid(z);
 	}
 	return std::log(1 + exp(z));
 }
 std::vector<real_t> MLPPActivationOld::softplus(std::vector<real_t> z, bool deriv) {
 	if (deriv) {
 		return sigmoid(z);
 	}
 	MLPPLinAlgOld alg;
 	return alg.log(alg.addition(alg.onevec(z.size()), alg.exp(z)));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::softplus(std::vector<std::vector<real_t>> z, bool deriv) {
 	if (deriv) {
 		return sigmoid(z);
 	}
 	MLPPLinAlgOld alg;
 	return alg.log(alg.addition(alg.onemat(z.size(), z[0].size()), alg.exp(z)));
 }
 real_t MLPPActivationOld::softsign(real_t z, bool deriv) {
 	if (deriv) {
 		return 1 / ((1 + abs(z)) * (1 + abs(z)));
 	}
 	return z / (1 + abs(z));
 }
 std::vector<real_t> MLPPActivationOld::softsign(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.onevec(z.size()), alg.exponentiate(alg.addition(alg.onevec(z.size()), alg.abs(z)), 2));
 	}
 	return alg.elementWiseDivision(z, alg.addition(alg.onevec(z.size()), alg.abs(z)));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::softsign(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.exponentiate(alg.addition(alg.onemat(z.size(), z[0].size()), alg.abs(z)), 2));
 	}
 	return alg.elementWiseDivision(z, alg.addition(alg.onemat(z.size(), z[0].size()), alg.abs(z)));
 }
 real_t MLPPActivationOld::gaussianCDF(real_t z, bool deriv) {
 	if (deriv) {
 		return (1 / sqrt(2 * M_PI)) * exp(-z * z / 2);
 	}
 	return 0.5 * (1 + erf(z / sqrt(2)));
 }
 std::vector<real_t> MLPPActivationOld::gaussianCDF(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.scalarMultiply(1 / sqrt(2 * M_PI), alg.exp(alg.scalarMultiply(-1 / 2, alg.hadamard_product(z, z))));
 	}
 	return alg.scalarMultiply(0.5, alg.addition(alg.onevec(z.size()), alg.erf(alg.scalarMultiply(1 / sqrt(2), z))));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::gaussianCDF(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.scalarMultiply(1 / sqrt(2 * M_PI), alg.exp(alg.scalarMultiply(-1 / 2, alg.hadamard_product(z, z))));
 	}
 	return alg.scalarMultiply(0.5, alg.addition(alg.onemat(z.size(), z[0].size()), alg.erf(alg.scalarMultiply(1 / sqrt(2), z))));
 }
 real_t MLPPActivationOld::cloglog(real_t z, bool deriv) {
 	if (deriv) {
 		return exp(z - exp(z));
 	}
 	return 1 - exp(-exp(z));
 }
 std::vector<real_t> MLPPActivationOld::cloglog(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.exp(alg.scalarMultiply(-1, alg.exp(z)));
 	}
 	return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.exp(alg.scalarMultiply(-1, alg.exp(z)))));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::cloglog(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.exp(alg.scalarMultiply(-1, alg.exp(z)));
 	}
 	return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.exp(alg.scalarMultiply(-1, alg.exp(z)))));
 }
 real_t MLPPActivationOld::logit(real_t z, bool deriv) {
 	if (deriv) {
 		return 1 / z - 1 / (z - 1);
 	}
 	return std::log(z / (1 - z));
 }
 std::vector<real_t> MLPPActivationOld::logit(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.subtraction(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.elementWiseDivision(alg.onevec(z.size()), alg.subtraction(z, alg.onevec(z.size()))));
 	}
 	return alg.log(alg.elementWiseDivision(z, alg.subtraction(alg.onevec(z.size()), z)));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::logit(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.subtraction(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.subtraction(z, alg.onemat(z.size(), z[0].size()))));
 	}
 	return alg.log(alg.elementWiseDivision(z, alg.subtraction(alg.onemat(z.size(), z[0].size()), z)));
 }
 real_t MLPPActivationOld::unitStep(real_t z, bool deriv) {
 	if (deriv) {
 		return 0;
 	}
 	return z < 0 ? 0 : 1;
 }
 std::vector<real_t> MLPPActivationOld::unitStep(std::vector<real_t> z, bool deriv) {
 	if (deriv) {
 		std::vector<real_t> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = unitStep(z[i], true);
 		}
 		return lderiv;
 	}
 	std::vector<real_t> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = unitStep(z[i]);
 	}
 	return a;
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::unitStep(std::vector<std::vector<real_t>> z, bool deriv) {
 	if (deriv) {
 		std::vector<std::vector<real_t>> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = unitStep(z[i], true);
 		}
 		return lderiv;
 	}
 	std::vector<std::vector<real_t>> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = unitStep(z[i]);
 	}
 	return a;
 }
 real_t MLPPActivationOld::swish(real_t z, bool deriv) {
 	if (deriv) {
 		return swish(z) + sigmoid(z) * (1 - swish(z));
 	}
 	return z * sigmoid(z);
 }
 std::vector<real_t> MLPPActivationOld::swish(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		alg.addition(swish(z), alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), swish(z))));
 	}
 	return alg.hadamard_product(z, sigmoid(z));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::swish(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		alg.addition(swish(z), alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), swish(z))));
 	}
 	return alg.hadamard_product(z, sigmoid(z));
 }
 real_t MLPPActivationOld::mish(real_t z, bool deriv) {
 	if (deriv) {
 		return sech(softplus(z)) * sech(softplus(z)) * z * sigmoid(z) + mish(z) / z;
 	}
 	return z * tanh(softplus(z));
 }
 std::vector<real_t> MLPPActivationOld::mish(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.addition(alg.hadamard_product(alg.hadamard_product(alg.hadamard_product(sech(softplus(z)), sech(softplus(z))), z), sigmoid(z)), alg.elementWiseDivision(mish(z), z));
 	}
 	return alg.hadamard_product(z, tanh(softplus(z)));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::mish(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.addition(alg.hadamard_product(alg.hadamard_product(alg.hadamard_product(sech(softplus(z)), sech(softplus(z))), z), sigmoid(z)), alg.elementWiseDivision(mish(z), z));
 	}
 	return alg.hadamard_product(z, tanh(softplus(z)));
 }
 real_t MLPPActivationOld::sinc(real_t z, bool deriv) {
 	if (deriv) {
 		return (z * std::cos(z) - std::sin(z)) / (z * z);
 	}
 	return std::sin(z) / z;
 }
 std::vector<real_t> MLPPActivationOld::sinc(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.subtraction(alg.hadamard_product(z, alg.cos(z)), alg.sin(z)), alg.hadamard_product(z, z));
 	}
 	return alg.elementWiseDivision(alg.sin(z), z);
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::sinc(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.subtraction(alg.hadamard_product(z, alg.cos(z)), alg.sin(z)), alg.hadamard_product(z, z));
 	}
 	return alg.elementWiseDivision(alg.sin(z), z);
 }
 real_t MLPPActivationOld::RELU(real_t z, bool deriv) {
 	if (deriv) {
 		if (z <= 0) {
 			return 0;
 		} else {
 			return 1;
 		}
 	}
 	return fmax(0, z);
 }
 std::vector<real_t> MLPPActivationOld::RELU(std::vector<real_t> z, bool deriv) {
 	if (deriv) {
 		std::vector<real_t> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = RELU(z[i], true);
 		}
 		return lderiv;
 	}
 	std::vector<real_t> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = RELU(z[i]);
 	}
 	return a;
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::RELU(std::vector<std::vector<real_t>> z, bool deriv) {
 	if (deriv) {
 		std::vector<std::vector<real_t>> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = RELU(z[i], true);
 		}
 		return lderiv;
 	}
 	std::vector<std::vector<real_t>> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = RELU(z[i]);
 	}
 	return a;
 }
 real_t MLPPActivationOld::leakyReLU(real_t z, real_t c, bool deriv) {
 	if (deriv) {
 		if (z <= 0) {
 			return c;
 		} else {
 			return 1;
 		}
 	}
 	return fmax(c * z, z);
 }
 std::vector<real_t> MLPPActivationOld::leakyReLU(std::vector<real_t> z, real_t c, bool deriv) {
 	if (deriv) {
 		std::vector<real_t> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = leakyReLU(z[i], c, true);
 		}
 		return lderiv;
 	}
 	std::vector<real_t> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = leakyReLU(z[i], c);
 	}
 	return a;
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::leakyReLU(std::vector<std::vector<real_t>> z, real_t c, bool deriv) {
 	if (deriv) {
 		std::vector<std::vector<real_t>> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = leakyReLU(z[i], c, true);
 		}
 		return lderiv;
 	}
 	std::vector<std::vector<real_t>> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = leakyReLU(z[i], c);
 	}
 	return a;
 }
 real_t MLPPActivationOld::ELU(real_t z, real_t c, bool deriv) {
 	if (deriv) {
 		if (z <= 0) {
 			return c * exp(z);
 		} else {
 			return 1;
 		}
 	}
 	if (z >= 0) {
 		return z;
 	} else {
 		return c * (exp(z) - 1);
 	}
 }
 std::vector<real_t> MLPPActivationOld::ELU(std::vector<real_t> z, real_t c, bool deriv) {
 	if (deriv) {
 		std::vector<real_t> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = ELU(z[i], c, true);
 		}
 		return lderiv;
 	}
 	std::vector<real_t> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = ELU(z[i], c);
 	}
 	return a;
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::ELU(std::vector<std::vector<real_t>> z, real_t c, bool deriv) {
 	if (deriv) {
 		std::vector<std::vector<real_t>> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = ELU(z[i], c, true);
 		}
 		return lderiv;
 	}
 	std::vector<std::vector<real_t>> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = ELU(z[i], c);
 	}
 	return a;
 }
 real_t MLPPActivationOld::SELU(real_t z, real_t lambda, real_t c, bool deriv) {
 	if (deriv) {
 		return ELU(z, c, true);
 	}
 	return lambda * ELU(z, c);
 }
 std::vector<real_t> MLPPActivationOld::SELU(std::vector<real_t> z, real_t lambda, real_t c, bool deriv) {
 	if (deriv) {
 		std::vector<real_t> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = SELU(z[i], lambda, c, true);
 		}
 		return lderiv;
 	}
 	std::vector<real_t> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = SELU(z[i], lambda, c);
 	}
 	return a;
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::SELU(std::vector<std::vector<real_t>> z, real_t lambda, real_t c, bool deriv) {
 	if (deriv) {
 		std::vector<std::vector<real_t>> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = SELU(z[i], lambda, c, true);
 		}
 		return lderiv;
 	}
 	std::vector<std::vector<real_t>> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = SELU(z[i], lambda, c);
 	}
 	return a;
 }
 real_t MLPPActivationOld::GELU(real_t z, bool deriv) {
 	if (deriv) {
 		return 0.5 * tanh(0.0356774 * std::pow(z, 3) + 0.797885 * z) + (0.0535161 * std::pow(z, 3) + 0.398942 * z) * std::pow(sech(0.0356774 * std::pow(z, 3) + 0.797885 * z), 2) + 0.5;
 	}
 	return 0.5 * z * (1 + tanh(sqrt(2 / M_PI) * (z + 0.044715 * std::pow(z, 3))));
 }
 std::vector<real_t> MLPPActivationOld::GELU(std::vector<real_t> z, bool deriv) {
 	if (deriv) {
 		std::vector<real_t> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = GELU(z[i], true);
 		}
 		return lderiv;
 	}
 	std::vector<real_t> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = GELU(z[i]);
 	}
 	return a;
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::GELU(std::vector<std::vector<real_t>> z, bool deriv) {
 	if (deriv) {
 		std::vector<std::vector<real_t>> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = GELU(z[i], true);
 		}
 		return lderiv;
 	}
 	std::vector<std::vector<real_t>> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = GELU(z[i]);
 	}
 	return a;
 }
 real_t MLPPActivationOld::sign(real_t z, bool deriv) {
 	if (deriv) {
 		return 0;
 	}
 	if (z < 0) {
 		return -1;
 	} else if (z == 0) {
 		return 0;
 	} else {
 		return 1;
 	}
 }
 std::vector<real_t> MLPPActivationOld::sign(std::vector<real_t> z, bool deriv) {
 	if (deriv) {
 		std::vector<real_t> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = sign(z[i], true);
 		}
 		return lderiv;
 	}
 	std::vector<real_t> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = sign(z[i]);
 	}
 	return a;
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::sign(std::vector<std::vector<real_t>> z, bool deriv) {
 	if (deriv) {
 		std::vector<std::vector<real_t>> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = sign(z[i], true);
 		}
 		return lderiv;
 	}
 	std::vector<std::vector<real_t>> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < a.size(); i++) {
 		a[i] = sign(z[i]);
 	}
 	return a;
 }
 real_t MLPPActivationOld::sinh(real_t z, bool deriv) {
 	if (deriv) {
 		return cosh(z);
 	}
 	return 0.5 * (exp(z) - exp(-z));
 }
 std::vector<real_t> MLPPActivationOld::sinh(std::vector<real_t> z, bool deriv) {
 	if (deriv) {
 		return cosh(z);
 	}
 	MLPPLinAlgOld alg;
 	return alg.scalarMultiply(0.5, alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::sinh(std::vector<std::vector<real_t>> z, bool deriv) {
 	if (deriv) {
 		return cosh(z);
 	}
 	MLPPLinAlgOld alg;
 	return alg.scalarMultiply(0.5, alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
 }
 real_t MLPPActivationOld::cosh(real_t z, bool deriv) {
 	if (deriv) {
 		return sinh(z);
 	}
 	return 0.5 * (exp(z) + exp(-z));
 }
 std::vector<real_t> MLPPActivationOld::cosh(std::vector<real_t> z, bool deriv) {
 	if (deriv) {
 		return sinh(z);
 	}
 	MLPPLinAlgOld alg;
 	return alg.scalarMultiply(0.5, alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::cosh(std::vector<std::vector<real_t>> z, bool deriv) {
 	if (deriv) {
 		return sinh(z);
 	}
 	MLPPLinAlgOld alg;
 	return alg.scalarMultiply(0.5, alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
 }
 real_t MLPPActivationOld::tanh(real_t z, bool deriv) {
 	if (deriv) {
 		return 1 - tanh(z) * tanh(z);
 	}
 	return (exp(z) - exp(-z)) / (exp(z) + exp(-z));
 }
 std::vector<real_t> MLPPActivationOld::tanh(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.hadamard_product(tanh(z), tanh(z))));
 	}
 	return alg.elementWiseDivision(alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))), alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::tanh(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.hadamard_product(tanh(z), tanh(z))));
 	}
 	return alg.elementWiseDivision(alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))), alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
 }
 real_t MLPPActivationOld::csch(real_t z, bool deriv) {
 	if (deriv) {
 		return -csch(z) * coth(z);
 	}
 	return 1 / sinh(z);
 }
 std::vector<real_t> MLPPActivationOld::csch(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), coth(z));
 	}
 	return alg.elementWiseDivision(alg.onevec(z.size()), sinh(z));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::csch(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), coth(z));
 	}
 	return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), sinh(z));
 }
 real_t MLPPActivationOld::sech(real_t z, bool deriv) {
 	if (deriv) {
 		return -sech(z) * tanh(z);
 	}
 	return 1 / cosh(z);
 }
 std::vector<real_t> MLPPActivationOld::sech(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.hadamard_product(alg.scalarMultiply(-1, sech(z)), tanh(z));
 	}
 	return alg.elementWiseDivision(alg.onevec(z.size()), cosh(z));
 	// return activation(z, deriv, static_cast<void (*)(real_t, bool)>(&sech));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::sech(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.hadamard_product(alg.scalarMultiply(-1, sech(z)), tanh(z));
 	}
 	return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), cosh(z));
 	// return activation(z, deriv, static_cast<void (*)(real_t, bool)>(&sech));
 }
 real_t MLPPActivationOld::coth(real_t z, bool deriv) {
 	if (deriv) {
 		return -csch(z) * csch(z);
 	}
 	return 1 / tanh(z);
 }
 std::vector<real_t> MLPPActivationOld::coth(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), csch(z));
 	}
 	return alg.elementWiseDivision(alg.onevec(z.size()), tanh(z));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::coth(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), csch(z));
 	}
 	return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), tanh(z));
 }
 real_t MLPPActivationOld::arsinh(real_t z, bool deriv) {
 	if (deriv) {
 		return 1 / sqrt(z * z + 1);
 	}
 	return std::log(z + sqrt(z * z + 1));
 }
 std::vector<real_t> MLPPActivationOld::arsinh(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.onevec(z.size()), alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onevec(z.size()))));
 	}
 	return alg.log(alg.addition(z, alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onevec(z.size())))));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::arsinh(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size()))));
 	}
 	return alg.log(alg.addition(z, alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size())))));
 }
 real_t MLPPActivationOld::arcosh(real_t z, bool deriv) {
 	if (deriv) {
 		return 1 / sqrt(z * z - 1);
 	}
 	return std::log(z + sqrt(z * z - 1));
 }
 std::vector<real_t> MLPPActivationOld::arcosh(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.onevec(z.size()), alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onevec(z.size()))));
 	}
 	return alg.log(alg.addition(z, alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onevec(z.size())))));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::arcosh(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size()))));
 	}
 	return alg.log(alg.addition(z, alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size())))));
 }
 real_t MLPPActivationOld::artanh(real_t z, bool deriv) {
 	if (deriv) {
 		return 1 / (1 - z * z);
 	}
 	return 0.5 * std::log((1 + z) / (1 - z));
 }
 std::vector<real_t> MLPPActivationOld::artanh(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.onevec(z.size()), alg.subtraction(alg.onevec(z.size()), alg.hadamard_product(z, z)));
 	}
 	return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onevec(z.size()), z), alg.subtraction(alg.onevec(z.size()), z))));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::artanh(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.subtraction(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z)));
 	}
 	return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onemat(z.size(), z[0].size()), z), alg.subtraction(alg.onemat(z.size(), z[0].size()), z))));
 }
 real_t MLPPActivationOld::arcsch(real_t z, bool deriv) {
 	if (deriv) {
 		return -1 / ((z * z) * sqrt(1 + (1 / (z * z))));
 	}
 	return std::log(sqrt(1 + (1 / (z * z))) + (1 / z));
 }
 std::vector<real_t> MLPPActivationOld::arcsch(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.full(z.size(), -1), alg.hadamard_product(alg.hadamard_product(z, z), alg.sqrt(alg.addition(alg.onevec(z.size()), alg.elementWiseDivision(alg.onevec(z.size()), alg.hadamard_product(z, z))))));
 	}
 	return alg.log(alg.addition(alg.sqrt(alg.addition(alg.onevec(z.size()), alg.elementWiseDivision(alg.onevec(z.size()), alg.hadamard_product(z, z)))), alg.elementWiseDivision(alg.onevec(z.size()), z)));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::arcsch(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.full(z.size(), z[0].size(), -1), alg.hadamard_product(alg.hadamard_product(z, z), alg.sqrt(alg.addition(alg.onemat(z.size(), z[0].size()), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z))))));
 	}
 	return alg.log(alg.addition(alg.sqrt(alg.addition(alg.onemat(z.size(), z[0].size()), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z)))), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z)));
 }
 real_t MLPPActivationOld::arsech(real_t z, bool deriv) {
 	if (deriv) {
 		return -1 / (z * sqrt(1 - z * z));
 	}
 	return std::log((1 / z) + ((1 / z) + 1) * ((1 / z) - 1));
 }
 std::vector<real_t> MLPPActivationOld::arsech(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.full(z.size(), -1), alg.hadamard_product(z, alg.sqrt(alg.subtraction(alg.onevec(z.size()), alg.hadamard_product(z, z)))));
 	}
 	return alg.log(alg.addition(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.hadamard_product(alg.addition(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.onevec(z.size())), alg.subtraction(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.onevec(z.size())))));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::arsech(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.full(z.size(), z[0].size(), -1), alg.hadamard_product(z, alg.sqrt(alg.subtraction(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z)))));
 	}
 	return alg.log(alg.addition(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.hadamard_product(alg.addition(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.onemat(z.size(), z[0].size())), alg.subtraction(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.onemat(z.size(), z[0].size())))));
 }
 real_t MLPPActivationOld::arcoth(real_t z, bool deriv) {
 	if (deriv) {
 		return 1 / (1 - z * z);
 	}
 	return 0.5 * std::log((1 + z) / (z - 1));
 }
 std::vector<real_t> MLPPActivationOld::arcoth(std::vector<real_t> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.onevec(z.size()), alg.subtraction(alg.onevec(z.size()), alg.hadamard_product(z, z)));
 	}
 	return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onevec(z.size()), z), alg.subtraction(z, alg.onevec(z.size())))));
 }
 std::vector<std::vector<real_t>> MLPPActivationOld::arcoth(std::vector<std::vector<real_t>> z, bool deriv) {
 	MLPPLinAlgOld alg;
 	if (deriv) {
 		return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.subtraction(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z)));
 	}
 	return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onemat(z.size(), z[0].size()), z), alg.subtraction(z, alg.onemat(z.size(), z[0].size())))));
 }
 // TO DO: Implement this template activation
 std::vector<real_t> MLPPActivationOld::activation(std::vector<real_t> z, bool deriv, real_t (*function)(real_t, bool)) {
 	if (deriv) {
 		std::vector<real_t> lderiv;
 		lderiv.resize(z.size());
 		for (uint32_t i = 0; i < z.size(); i++) {
 			lderiv[i] = function(z[i], true);
 		}
 		return lderiv;
 	}
 	std::vector<real_t> a;
 	a.resize(z.size());
 	for (uint32_t i = 0; i < z.size(); i++) {
 		a[i] = function(z[i], deriv);
 	}
 	return a;
 }
--- a/mlpp/activation/activation_old.h
+++ b/mlpp/activation/activation_old.h
@ -1,146 +0,0 @@
 #ifndef MLPP_ACTIVATION_OLD_H
 #define MLPP_ACTIVATION_OLD_H
 //
 //  Activation.hpp
 //
 //  Created by Marc Melikyan on 1/16/21.
 //
 #include "core/math/math_defs.h"
 #include "core/int_types.h"
 #include <vector>
 class MLPPActivationOld {
 public:
 	real_t linear(real_t z, bool deriv = false);
 	std::vector<real_t> linear(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> linear(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t sigmoid(real_t z, bool deriv = false);
 	std::vector<real_t> sigmoid(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> sigmoid(std::vector<std::vector<real_t>> z, bool deriv = false);
 	std::vector<real_t> softmax(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> softmax(std::vector<std::vector<real_t>> z, bool deriv = false);
 	std::vector<real_t> adjSoftmax(std::vector<real_t> z);
 	std::vector<std::vector<real_t>> adjSoftmax(std::vector<std::vector<real_t>> z);
 	std::vector<std::vector<real_t>> softmaxDeriv(std::vector<real_t> z);
 	std::vector<std::vector<std::vector<real_t>>> softmaxDeriv(std::vector<std::vector<real_t>> z);
 	real_t softplus(real_t z, bool deriv = false);
 	std::vector<real_t> softplus(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> softplus(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t softsign(real_t z, bool deriv = false);
 	std::vector<real_t> softsign(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> softsign(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t gaussianCDF(real_t z, bool deriv = false);
 	std::vector<real_t> gaussianCDF(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> gaussianCDF(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t cloglog(real_t z, bool deriv = false);
 	std::vector<real_t> cloglog(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> cloglog(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t logit(real_t z, bool deriv = false);
 	std::vector<real_t> logit(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> logit(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t unitStep(real_t z, bool deriv = false);
 	std::vector<real_t> unitStep(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> unitStep(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t swish(real_t z, bool deriv = false);
 	std::vector<real_t> swish(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> swish(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t mish(real_t z, bool deriv = false);
 	std::vector<real_t> mish(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> mish(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t sinc(real_t z, bool deriv = false);
 	std::vector<real_t> sinc(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> sinc(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t RELU(real_t z, bool deriv = false);
 	std::vector<real_t> RELU(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> RELU(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t leakyReLU(real_t z, real_t c, bool deriv = false);
 	std::vector<real_t> leakyReLU(std::vector<real_t> z, real_t c, bool deriv = false);
 	std::vector<std::vector<real_t>> leakyReLU(std::vector<std::vector<real_t>> z, real_t c, bool deriv = false);
 	real_t ELU(real_t z, real_t c, bool deriv = false);
 	std::vector<real_t> ELU(std::vector<real_t> z, real_t c, bool deriv = false);
 	std::vector<std::vector<real_t>> ELU(std::vector<std::vector<real_t>> z, real_t c, bool deriv = false);
 	real_t SELU(real_t z, real_t lambda, real_t c, bool deriv = false);
 	std::vector<real_t> SELU(std::vector<real_t> z, real_t lambda, real_t c, bool deriv = false);
 	std::vector<std::vector<real_t>> SELU(std::vector<std::vector<real_t>>, real_t lambda, real_t c, bool deriv = false);
 	real_t GELU(real_t z, bool deriv = false);
 	std::vector<real_t> GELU(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> GELU(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t sign(real_t z, bool deriv = false);
 	std::vector<real_t> sign(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> sign(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t sinh(real_t z, bool deriv = false);
 	std::vector<real_t> sinh(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> sinh(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t cosh(real_t z, bool deriv = false);
 	std::vector<real_t> cosh(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> cosh(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t tanh(real_t z, bool deriv = false);
 	std::vector<real_t> tanh(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> tanh(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t csch(real_t z, bool deriv = false);
 	std::vector<real_t> csch(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> csch(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t sech(real_t z, bool deriv = false);
 	std::vector<real_t> sech(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> sech(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t coth(real_t z, bool deriv = false);
 	std::vector<real_t> coth(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> coth(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t arsinh(real_t z, bool deriv = false);
 	std::vector<real_t> arsinh(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> arsinh(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t arcosh(real_t z, bool deriv = false);
 	std::vector<real_t> arcosh(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> arcosh(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t artanh(real_t z, bool deriv = false);
 	std::vector<real_t> artanh(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> artanh(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t arcsch(real_t z, bool deriv = false);
 	std::vector<real_t> arcsch(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> arcsch(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t arsech(real_t z, bool deriv = false);
 	std::vector<real_t> arsech(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> arsech(std::vector<std::vector<real_t>> z, bool deriv = false);
 	real_t arcoth(real_t z, bool deriv = false);
 	std::vector<real_t> arcoth(std::vector<real_t> z, bool deriv = false);
 	std::vector<std::vector<real_t>> arcoth(std::vector<std::vector<real_t>> z, bool deriv = false);
 	std::vector<real_t> activation(std::vector<real_t> z, bool deriv, real_t (*function)(real_t, bool));
 };
 #endif /* Activation_hpp */
--- a/mlpp/ann/ann_old.cpp
+++ b/mlpp/ann/ann_old.cpp
@ -1,808 +0,0 @@
 //
 //  ANN.cpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "ann_old.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include <cmath>
 #include <iostream>
 #include <random>
 MLPPANNOld::MLPPANNOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet) {
 	inputSet = p_inputSet;
 	outputSet = p_outputSet;
 	n = inputSet.size();
 	k = inputSet[0].size();
 	lrScheduler = "None";
 	decayConstant = 0;
 	dropRate = 0;
 }
 MLPPANNOld::~MLPPANNOld() {
 	delete outputLayer;
 }
 std::vector<real_t> MLPPANNOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	if (!network.empty()) {
 		network[0].input = X;
 		network[0].forwardPass();
 		for (uint32_t i = 1; i < network.size(); i++) {
 			network[i].input = network[i - 1].a;
 			network[i].forwardPass();
 		}
 		outputLayer->input = network[network.size() - 1].a;
 	} else {
 		outputLayer->input = X;
 	}
 	outputLayer->forwardPass();
 	return outputLayer->a;
 }
 real_t MLPPANNOld::modelTest(std::vector<real_t> x) {
 	if (!network.empty()) {
 		network[0].Test(x);
 		for (uint32_t i = 1; i < network.size(); i++) {
 			network[i].Test(network[i - 1].a_test);
 		}
 		outputLayer->Test(network[network.size() - 1].a_test);
 	} else {
 		outputLayer->Test(x);
 	}
 	return outputLayer->a_test;
 }
 void MLPPANNOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	real_t initial_learning_rate = learning_rate;
 	alg.printMatrix(network[network.size() - 1].weights);
 	while (true) {
 		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
 		cost_prev = Cost(y_hat, outputSet);
 		auto grads = computeGradients(y_hat, outputSet);
 		auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
 		auto outputWGrad = std::get<1>(grads);
 		cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
 		outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
 		updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
 		std::cout << learning_rate << std::endl;
 		forwardPass();
 		if (UI) {
 			MLPPANNOld::UI(epoch, cost_prev, y_hat, outputSet);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPANNOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	real_t initial_learning_rate = learning_rate;
 	while (true) {
 		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
 		std::random_device rd;
 		std::default_random_engine generator(rd());
 		std::uniform_int_distribution<int> distribution(0, int(n - 1));
 		int outputIndex = distribution(generator);
 		std::vector<real_t> y_hat = modelSetTest({ inputSet[outputIndex] });
 		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
 		auto grads = computeGradients(y_hat, { outputSet[outputIndex] });
 		auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
 		auto outputWGrad = std::get<1>(grads);
 		cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
 		outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
 		updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
 		y_hat = modelSetTest({ inputSet[outputIndex] });
 		if (UI) {
 			MLPPANNOld::UI(epoch, cost_prev, y_hat, { outputSet[outputIndex] });
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPANNOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	real_t initial_learning_rate = learning_rate;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	// always evaluate the result
 	// always do forward pass only ONCE at end.
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	while (true) {
 		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
 			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
 			auto outputWGrad = std::get<1>(grads);
 			cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
 			outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
 			updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
 			y_hat = modelSetTest(inputMiniBatches[i]);
 			if (UI) {
 				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPANNOld::Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool NAG, bool UI) {
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	real_t initial_learning_rate = learning_rate;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	// always evaluate the result
 	// always do forward pass only ONCE at end.
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Initializing necessary components for Adam.
 	std::vector<std::vector<std::vector<real_t>>> v_hidden;
 	std::vector<real_t> v_output;
 	while (true) {
 		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
 			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
 			auto outputWGrad = std::get<1>(grads);
 			if (!network.empty() && v_hidden.empty()) { // Initing our tensor
 				v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
 			}
 			if (v_output.empty()) {
 				v_output.resize(outputWGrad.size());
 			}
 			if (NAG) { // "Aposterori" calculation
 				updateParameters(v_hidden, v_output, 0); // DON'T update bias.
 			}
 			v_hidden = alg.addition(alg.scalarMultiply(gamma, v_hidden), alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad));
 			v_output = alg.addition(alg.scalarMultiply(gamma, v_output), alg.scalarMultiply(learning_rate / n, outputWGrad));
 			updateParameters(v_hidden, v_output, learning_rate); // subject to change. may want bias to have this matrix too.
 			y_hat = modelSetTest(inputMiniBatches[i]);
 			if (UI) {
 				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPANNOld::Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI) {
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	real_t initial_learning_rate = learning_rate;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	// always evaluate the result
 	// always do forward pass only ONCE at end.
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Initializing necessary components for Adam.
 	std::vector<std::vector<std::vector<real_t>>> v_hidden;
 	std::vector<real_t> v_output;
 	while (true) {
 		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
 			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
 			auto outputWGrad = std::get<1>(grads);
 			if (!network.empty() && v_hidden.empty()) { // Initing our tensor
 				v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
 			}
 			if (v_output.empty()) {
 				v_output.resize(outputWGrad.size());
 			}
 			v_hidden = alg.addition(v_hidden, alg.exponentiate(cumulativeHiddenLayerWGrad, 2));
 			v_output = alg.addition(v_output, alg.exponentiate(outputWGrad, 2));
 			std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
 			std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
 			updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
 			y_hat = modelSetTest(inputMiniBatches[i]);
 			if (UI) {
 				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPANNOld::Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI) {
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	real_t initial_learning_rate = learning_rate;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	// always evaluate the result
 	// always do forward pass only ONCE at end.
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Initializing necessary components for Adam.
 	std::vector<std::vector<std::vector<real_t>>> v_hidden;
 	std::vector<real_t> v_output;
 	while (true) {
 		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
 			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
 			auto outputWGrad = std::get<1>(grads);
 			if (!network.empty() && v_hidden.empty()) { // Initing our tensor
 				v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
 			}
 			if (v_output.empty()) {
 				v_output.resize(outputWGrad.size());
 			}
 			v_hidden = alg.addition(alg.scalarMultiply(1 - b1, v_hidden), alg.scalarMultiply(b1, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
 			v_output = alg.addition(v_output, alg.exponentiate(outputWGrad, 2));
 			std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
 			std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
 			updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
 			y_hat = modelSetTest(inputMiniBatches[i]);
 			if (UI) {
 				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPANNOld::Adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	real_t initial_learning_rate = learning_rate;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	// always evaluate the result
 	// always do forward pass only ONCE at end.
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Initializing necessary components for Adam.
 	std::vector<std::vector<std::vector<real_t>>> m_hidden;
 	std::vector<std::vector<std::vector<real_t>>> v_hidden;
 	std::vector<real_t> m_output;
 	std::vector<real_t> v_output;
 	while (true) {
 		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
 			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
 			auto outputWGrad = std::get<1>(grads);
 			if (!network.empty() && m_hidden.empty() && v_hidden.empty()) { // Initing our tensor
 				m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
 				v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
 			}
 			if (m_output.empty() && v_output.empty()) {
 				m_output.resize(outputWGrad.size());
 				v_output.resize(outputWGrad.size());
 			}
 			m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
 			v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
 			m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
 			v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2)));
 			std::vector<std::vector<std::vector<real_t>>> m_hidden_hat = alg.scalarMultiply(1 / (1 - std::pow(b1, epoch)), m_hidden);
 			std::vector<std::vector<std::vector<real_t>>> v_hidden_hat = alg.scalarMultiply(1 / (1 - std::pow(b2, epoch)), v_hidden);
 			std::vector<real_t> m_output_hat = alg.scalarMultiply(1 / (1 - std::pow(b1, epoch)), m_output);
 			std::vector<real_t> v_output_hat = alg.scalarMultiply(1 / (1 - std::pow(b2, epoch)), v_output);
 			std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
 			std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
 			updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
 			y_hat = modelSetTest(inputMiniBatches[i]);
 			if (UI) {
 				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPANNOld::Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	real_t initial_learning_rate = learning_rate;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	// always evaluate the result
 	// always do forward pass only ONCE at end.
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Initializing necessary components for Adam.
 	std::vector<std::vector<std::vector<real_t>>> m_hidden;
 	std::vector<std::vector<std::vector<real_t>>> u_hidden;
 	std::vector<real_t> m_output;
 	std::vector<real_t> u_output;
 	while (true) {
 		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
 			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
 			auto outputWGrad = std::get<1>(grads);
 			if (!network.empty() && m_hidden.empty() && u_hidden.empty()) { // Initing our tensor
 				m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
 				u_hidden = alg.resize(u_hidden, cumulativeHiddenLayerWGrad);
 			}
 			if (m_output.empty() && u_output.empty()) {
 				m_output.resize(outputWGrad.size());
 				u_output.resize(outputWGrad.size());
 			}
 			m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
 			u_hidden = alg.max(alg.scalarMultiply(b2, u_hidden), alg.abs(cumulativeHiddenLayerWGrad));
 			m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
 			u_output = alg.max(alg.scalarMultiply(b2, u_output), alg.abs(outputWGrad));
 			std::vector<std::vector<std::vector<real_t>>> m_hidden_hat = alg.scalarMultiply(1 / (1 - std::pow(b1, epoch)), m_hidden);
 			std::vector<real_t> m_output_hat = alg.scalarMultiply(1 / (1 - std::pow(b1, epoch)), m_output);
 			std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, u_hidden)));
 			std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, u_output)));
 			updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
 			y_hat = modelSetTest(inputMiniBatches[i]);
 			if (UI) {
 				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPANNOld::Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	real_t initial_learning_rate = learning_rate;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	// always evaluate the result
 	// always do forward pass only ONCE at end.
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Initializing necessary components for Adam.
 	std::vector<std::vector<std::vector<real_t>>> m_hidden;
 	std::vector<std::vector<std::vector<real_t>>> v_hidden;
 	std::vector<real_t> m_output;
 	std::vector<real_t> v_output;
 	while (true) {
 		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
 			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
 			auto outputWGrad = std::get<1>(grads);
 			if (!network.empty() && m_hidden.empty() && v_hidden.empty()) { // Initing our tensor
 				m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
 				v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
 			}
 			if (m_output.empty() && v_output.empty()) {
 				m_output.resize(outputWGrad.size());
 				v_output.resize(outputWGrad.size());
 			}
 			m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
 			v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
 			m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
 			v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2)));
 			std::vector<std::vector<std::vector<real_t>>> m_hidden_hat = alg.scalarMultiply(1 / (1 - std::pow(b1, epoch)), m_hidden);
 			std::vector<std::vector<std::vector<real_t>>> v_hidden_hat = alg.scalarMultiply(1 / (1 - std::pow(b2, epoch)), v_hidden);
 			std::vector<std::vector<std::vector<real_t>>> m_hidden_final = alg.addition(alg.scalarMultiply(b1, m_hidden_hat), alg.scalarMultiply((1 - b1) / (1 - std::pow(b1, epoch)), cumulativeHiddenLayerWGrad));
 			std::vector<real_t> m_output_hat = alg.scalarMultiply(1 / (1 - std::pow(b1, epoch)), m_output);
 			std::vector<real_t> v_output_hat = alg.scalarMultiply(1 / (1 - std::pow(b2, epoch)), v_output);
 			std::vector<real_t> m_output_final = alg.addition(alg.scalarMultiply(b1, m_output_hat), alg.scalarMultiply((1 - b1) / (1 - std::pow(b1, epoch)), outputWGrad));
 			std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_final, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
 			std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_final, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
 			updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
 			y_hat = modelSetTest(inputMiniBatches[i]);
 			if (UI) {
 				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPANNOld::AMSGrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	real_t initial_learning_rate = learning_rate;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	// always evaluate the result
 	// always do forward pass only ONCE at end.
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Initializing necessary components for Adam.
 	std::vector<std::vector<std::vector<real_t>>> m_hidden;
 	std::vector<std::vector<std::vector<real_t>>> v_hidden;
 	std::vector<std::vector<std::vector<real_t>>> v_hidden_hat;
 	std::vector<real_t> m_output;
 	std::vector<real_t> v_output;
 	std::vector<real_t> v_output_hat;
 	while (true) {
 		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
 			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
 			auto outputWGrad = std::get<1>(grads);
 			if (!network.empty() && m_hidden.empty() && v_hidden.empty()) { // Initing our tensor
 				m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
 				v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
 				v_hidden_hat = alg.resize(v_hidden_hat, cumulativeHiddenLayerWGrad);
 			}
 			if (m_output.empty() && v_output.empty()) {
 				m_output.resize(outputWGrad.size());
 				v_output.resize(outputWGrad.size());
 				v_output_hat.resize(outputWGrad.size());
 			}
 			m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
 			v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
 			m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
 			v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2)));
 			v_hidden_hat = alg.max(v_hidden_hat, v_hidden);
 			v_output_hat = alg.max(v_output_hat, v_output);
 			std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
 			std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
 			updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
 			y_hat = modelSetTest(inputMiniBatches[i]);
 			if (UI) {
 				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 real_t MLPPANNOld::score() {
 	MLPPUtilities util;
 	forwardPass();
 	return util.performance(y_hat, outputSet);
 }
 void MLPPANNOld::save(std::string fileName) {
 	MLPPUtilities util;
 	if (!network.empty()) {
 		util.saveParameters(fileName, network[0].weights, network[0].bias, false, 1);
 		for (uint32_t i = 1; i < network.size(); i++) {
 			util.saveParameters(fileName, network[i].weights, network[i].bias, true, i + 1);
 		}
 		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, true, network.size() + 1);
 	} else {
 		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, false, network.size() + 1);
 	}
 }
 void MLPPANNOld::setLearningRateScheduler(std::string type, real_t decayConstant) {
 	lrScheduler = type;
 	MLPPANNOld::decayConstant = decayConstant;
 }
 void MLPPANNOld::setLearningRateScheduler(std::string type, real_t decayConstant, real_t dropRate) {
 	lrScheduler = type;
 	MLPPANNOld::decayConstant = decayConstant;
 	MLPPANNOld::dropRate = dropRate;
 }
 // https://en.wikipedia.org/wiki/Learning_rate
 // Learning Rate Decay (C2W2L09) - Andrew Ng - Deep Learning Specialization
 real_t MLPPANNOld::applyLearningRateScheduler(real_t learningRate, real_t decayConstant, real_t epoch, real_t dropRate) {
 	if (lrScheduler == "Time") {
 		return learningRate / (1 + decayConstant * epoch);
 	} else if (lrScheduler == "Epoch") {
 		return learningRate * (decayConstant / std::sqrt(epoch));
 	} else if (lrScheduler == "Step") {
 		return learningRate * std::pow(decayConstant, int((1 + epoch) / dropRate)); // Utilizing an explicit int conversion implicitly takes the floor.
 	} else if (lrScheduler == "Exponential") {
 		return learningRate * std::exp(-decayConstant * epoch);
 	}
 	return learningRate;
 }
 void MLPPANNOld::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
 	if (network.empty()) {
 		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha));
 		network[0].forwardPass();
 	} else {
 		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
 		network[network.size() - 1].forwardPass();
 	}
 }
 void MLPPANNOld::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
 	if (!network.empty()) {
 		outputLayer = new MLPPOldOutputLayer(network[network.size() - 1].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha);
 	} else {
 		outputLayer = new MLPPOldOutputLayer(k, activation, loss, inputSet, weightInit, reg, lambda, alpha);
 	}
 }
 real_t MLPPANNOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPRegOld regularization;
 	class MLPPCostOld cost;
 	real_t totalRegTerm = 0;
 	auto cost_function = outputLayer->cost_map[outputLayer->cost];
 	if (!network.empty()) {
 		for (uint32_t i = 0; i < network.size() - 1; i++) {
 			totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
 		}
 	}
 	return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
 }
 void MLPPANNOld::forwardPass() {
 	if (!network.empty()) {
 		network[0].input = inputSet;
 		network[0].forwardPass();
 		for (uint32_t i = 1; i < network.size(); i++) {
 			network[i].input = network[i - 1].a;
 			network[i].forwardPass();
 		}
 		outputLayer->input = network[network.size() - 1].a;
 	} else {
 		outputLayer->input = inputSet;
 	}
 	outputLayer->forwardPass();
 	y_hat = outputLayer->a;
 }
 void MLPPANNOld::updateParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate) {
 	MLPPLinAlgOld alg;
 	outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
 	outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n;
 	if (!network.empty()) {
 		network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]);
 		network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate / n, network[network.size() - 1].delta));
 		for (int i = network.size() - 2; i >= 0; i--) {
 			network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
 			network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate / n, network[i].delta));
 		}
 	}
 }
 std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> MLPPANNOld::computeGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
 	// std::cout << "BEGIN" << std::endl;
 	class MLPPCostOld cost;
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	std::vector<std::vector<std::vector<real_t>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads.
 	auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
 	auto outputAvn = outputLayer->activation_map[outputLayer->activation];
 	outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
 	std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
 	outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
 	if (!network.empty()) {
 		auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
 		network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
 		std::vector<std::vector<real_t>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
 		cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
 		for (int i = network.size() - 2; i >= 0; i--) {
 			hiddenLayerAvn = network[i].activation_map[network[i].activation];
 			network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
 			hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
 			cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
 		}
 	}
 	return { cumulativeHiddenLayerWGrad, outputWGrad };
 }
 void MLPPANNOld::UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
 	MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 	std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
 	MLPPUtilities::UI(outputLayer->weights, outputLayer->bias);
 	if (!network.empty()) {
 		for (int i = network.size() - 1; i >= 0; i--) {
 			std::cout << "Layer " << i + 1 << ": " << std::endl;
 			MLPPUtilities::UI(network[i].weights, network[i].bias);
 		}
 	}
 }
--- a/mlpp/ann/ann_old.h
+++ b/mlpp/ann/ann_old.h
@ -1,73 +0,0 @@
 #ifndef MLPP_ANN_OLD_H
 #define MLPP_ANN_OLD_H
 //
 //  ANN.hpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "core/math/math_defs.h"
 #include "../hidden_layer/hidden_layer.h"
 #include "../output_layer/output_layer.h"
 #include "../hidden_layer/hidden_layer_old.h"
 #include "../output_layer/output_layer_old.h"
 #include <string>
 #include <tuple>
 #include <vector>
 class MLPPANNOld {
 public:
 	MLPPANNOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet);
 	~MLPPANNOld();
 	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
 	real_t modelTest(std::vector<real_t> x);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
 	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
 	void Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool NAG, bool UI = false);
 	void Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI = false);
 	void Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI = false);
 	void Adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
 	void Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
 	void Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
 	void AMSGrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
 	real_t score();
 	void save(std::string fileName);
 	void setLearningRateScheduler(std::string type, real_t decayConstant);
 	void setLearningRateScheduler(std::string type, real_t decayConstant, real_t dropRate);
 	void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 	void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 private:
 	real_t applyLearningRateScheduler(real_t learningRate, real_t decayConstant, real_t epoch, real_t dropRate);
 	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
 	void forwardPass();
 	void updateParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate);
 	std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> computeGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet);
 	void UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet);
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	std::vector<real_t> y_hat;
 	std::vector<MLPPOldHiddenLayer> network;
 	MLPPOldOutputLayer *outputLayer;
 	int n;
 	int k;
 	std::string lrScheduler;
 	real_t decayConstant;
 	real_t dropRate;
 };
 #endif /* ANN_hpp */
--- a/mlpp/auto_encoder/auto_encoder_old.cpp
+++ b/mlpp/auto_encoder/auto_encoder_old.cpp
@ -1,264 +0,0 @@
 //
 //  AutoEncoder.cpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "auto_encoder_old.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 std::vector<std::vector<real_t>> MLPPAutoEncoderOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	return Evaluate(X);
 }
 std::vector<real_t> MLPPAutoEncoderOld::modelTest(std::vector<real_t> x) {
 	return Evaluate(x);
 }
 void MLPPAutoEncoderOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, inputSet);
 		// Calculating the errors
 		std::vector<std::vector<real_t>> error = alg.subtraction(y_hat, inputSet);
 		// Calculating the weight/bias gradients for layer 2
 		std::vector<std::vector<real_t>> D2_1 = alg.matmult(alg.transpose(a2), error);
 		// weights and bias updation for layer 2
 		weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate / n, D2_1));
 		// Calculating the bias gradients for layer 2
 		bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
 		//Calculating the weight/bias for layer 1
 		std::vector<std::vector<real_t>> D1_1 = alg.matmult(error, alg.transpose(weights2));
 		std::vector<std::vector<real_t>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
 		std::vector<std::vector<real_t>> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2);
 		// weight an bias updation for layer 1
 		weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate / n, D1_3));
 		bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate / n, D1_2));
 		forwardPass();
 		// UI PORTION
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, inputSet));
 			std::cout << "Layer 1:" << std::endl;
 			MLPPUtilities::UI(weights1, bias1);
 			std::cout << "Layer 2:" << std::endl;
 			MLPPUtilities::UI(weights2, bias2);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPAutoEncoderOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	while (true) {
 		std::random_device rd;
 		std::default_random_engine generator(rd());
 		std::uniform_int_distribution<int> distribution(0, int(n - 1));
 		int outputIndex = distribution(generator);
 		std::vector<real_t> y_hat = Evaluate(inputSet[outputIndex]);
 		auto prop_res = propagate(inputSet[outputIndex]);
 		auto z2 = std::get<0>(prop_res);
 		auto a2 = std::get<1>(prop_res);
 		cost_prev = Cost({ y_hat }, { inputSet[outputIndex] });
 		std::vector<real_t> error = alg.subtraction(y_hat, inputSet[outputIndex]);
 		// Weight updation for layer 2
 		std::vector<std::vector<real_t>> D2_1 = alg.outerProduct(error, a2);
 		weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, alg.transpose(D2_1)));
 		// Bias updation for layer 2
 		bias2 = alg.subtraction(bias2, alg.scalarMultiply(learning_rate, error));
 		// Weight updation for layer 1
 		std::vector<real_t> D1_1 = alg.mat_vec_mult(weights2, error);
 		std::vector<real_t> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
 		std::vector<std::vector<real_t>> D1_3 = alg.outerProduct(inputSet[outputIndex], D1_2);
 		weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
 		// Bias updation for layer 1
 		bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2));
 		y_hat = Evaluate(inputSet[outputIndex]);
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { inputSet[outputIndex] }));
 			std::cout << "Layer 1:" << std::endl;
 			MLPPUtilities::UI(weights1, bias1);
 			std::cout << "Layer 2:" << std::endl;
 			MLPPUtilities::UI(weights2, bias2);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPAutoEncoderOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	std::vector<std::vector<std::vector<real_t>>> inputMiniBatches = MLPPUtilities::createMiniBatches(inputSet, n_mini_batch);
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<std::vector<real_t>> y_hat = Evaluate(inputMiniBatches[i]);
 			auto prop_res = propagate(inputMiniBatches[i]);
 			auto z2 = std::get<0>(prop_res);
 			auto a2 = std::get<1>(prop_res);
 			cost_prev = Cost(y_hat, inputMiniBatches[i]);
 			// Calculating the errors
 			std::vector<std::vector<real_t>> error = alg.subtraction(y_hat, inputMiniBatches[i]);
 			// Calculating the weight/bias gradients for layer 2
 			std::vector<std::vector<real_t>> D2_1 = alg.matmult(alg.transpose(a2), error);
 			// weights and bias updation for layer 2
 			weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate / inputMiniBatches[i].size(), D2_1));
 			// Bias Updation for layer 2
 			bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
 			//Calculating the weight/bias for layer 1
 			std::vector<std::vector<real_t>> D1_1 = alg.matmult(error, alg.transpose(weights2));
 			std::vector<std::vector<real_t>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
 			std::vector<std::vector<real_t>> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2);
 			// weight an bias updation for layer 1
 			weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate / inputMiniBatches[i].size(), D1_3));
 			bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate / inputMiniBatches[i].size(), D1_2));
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, inputMiniBatches[i]));
 				std::cout << "Layer 1:" << std::endl;
 				MLPPUtilities::UI(weights1, bias1);
 				std::cout << "Layer 2:" << std::endl;
 				MLPPUtilities::UI(weights2, bias2);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 real_t MLPPAutoEncoderOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, inputSet);
 }
 void MLPPAutoEncoderOld::save(std::string fileName) {
 	MLPPUtilities util;
 	util.saveParameters(fileName, weights1, bias1, 0, 1);
 	util.saveParameters(fileName, weights2, bias2, 1, 2);
 }
 MLPPAutoEncoderOld::MLPPAutoEncoderOld(std::vector<std::vector<real_t>> pinputSet, int pn_hidden) {
 	inputSet = pinputSet;
 	n_hidden = pn_hidden;
 	n = inputSet.size();
 	k = inputSet[0].size();
 	y_hat.resize(inputSet.size());
 	weights1 = MLPPUtilities::weightInitialization(k, n_hidden);
 	weights2 = MLPPUtilities::weightInitialization(n_hidden, k);
 	bias1 = MLPPUtilities::biasInitialization(n_hidden);
 	bias2 = MLPPUtilities::biasInitialization(k);
 }
 real_t MLPPAutoEncoderOld::Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	class MLPPCostOld cost;
 	return cost.MSE(y_hat, inputSet);
 }
 std::vector<std::vector<real_t>> MLPPAutoEncoderOld::Evaluate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
 	std::vector<std::vector<real_t>> a2 = avn.sigmoid(z2);
 	return alg.mat_vec_add(alg.matmult(a2, weights2), bias2);
 }
 std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPAutoEncoderOld::propagate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
 	std::vector<std::vector<real_t>> a2 = avn.sigmoid(z2);
 	return { z2, a2 };
 }
 std::vector<real_t> MLPPAutoEncoderOld::Evaluate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1);
 	std::vector<real_t> a2 = avn.sigmoid(z2);
 	return alg.addition(alg.mat_vec_mult(alg.transpose(weights2), a2), bias2);
 }
 std::tuple<std::vector<real_t>, std::vector<real_t>> MLPPAutoEncoderOld::propagate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1);
 	std::vector<real_t> a2 = avn.sigmoid(z2);
 	return { z2, a2 };
 }
 void MLPPAutoEncoderOld::forwardPass() {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1);
 	a2 = avn.sigmoid(z2);
 	y_hat = alg.mat_vec_add(alg.matmult(a2, weights2), bias2);
 }
--- a/mlpp/auto_encoder/auto_encoder_old.h
+++ b/mlpp/auto_encoder/auto_encoder_old.h
@ -1,58 +0,0 @@
 #ifndef MLPP_AUTO_ENCODER_OLD_H
 #define MLPP_AUTO_ENCODER_OLD_H
 //
 //  AutoEncoder.hpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "core/math/math_defs.h"
 #include <string>
 #include <tuple>
 #include <vector>
 class MLPPAutoEncoderOld {
 public:
 	std::vector<std::vector<real_t>> modelSetTest(std::vector<std::vector<real_t>> X);
 	std::vector<real_t> modelTest(std::vector<real_t> x);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
 	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
 	real_t score();
 	void save(std::string fileName);
 	MLPPAutoEncoderOld(std::vector<std::vector<real_t>> inputSet, int n_hidden);
 private:
 	real_t Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	std::vector<std::vector<real_t>> Evaluate(std::vector<std::vector<real_t>> X);
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> propagate(std::vector<std::vector<real_t>> X);
 	std::vector<real_t> Evaluate(std::vector<real_t> x);
 	std::tuple<std::vector<real_t>, std::vector<real_t>> propagate(std::vector<real_t> x);
 	void forwardPass();
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<std::vector<real_t>> y_hat;
 	std::vector<std::vector<real_t>> weights1;
 	std::vector<std::vector<real_t>> weights2;
 	std::vector<real_t> bias1;
 	std::vector<real_t> bias2;
 	std::vector<std::vector<real_t>> z2;
 	std::vector<std::vector<real_t>> a2;
 	int n;
 	int k;
 	int n_hidden;
 };
 #endif /* AutoEncoder_hpp */
--- a/mlpp/bernoulli_nb/bernoulli_nb_old.cpp
+++ b/mlpp/bernoulli_nb/bernoulli_nb_old.cpp
@ -1,179 +0,0 @@
 //
 //  BernoulliNB.cpp
 //
 //  Created by Marc Melikyan on 1/17/21.
 //
 #include "bernoulli_nb_old.h"
 #include "../data/data.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 MLPPBernoulliNBOld::MLPPBernoulliNBOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet) {
 	inputSet = p_inputSet;
 	outputSet = p_outputSet;
 	class_num = 2;
 	y_hat.resize(outputSet.size());
 	Evaluate();
 }
 std::vector<real_t> MLPPBernoulliNBOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	std::vector<real_t> y_hat;
 	for (uint32_t i = 0; i < X.size(); i++) {
 		y_hat.push_back(modelTest(X[i]));
 	}
 	return y_hat;
 }
 real_t MLPPBernoulliNBOld::modelTest(std::vector<real_t> x) {
 	real_t score_0 = 1;
 	real_t score_1 = 1;
 	std::vector<int> foundIndices;
 	for (uint32_t j = 0; j < x.size(); j++) {
 		for (uint32_t k = 0; k < vocab.size(); k++) {
 			if (x[j] == vocab[k]) {
 				score_0 *= theta[0][vocab[k]];
 				score_1 *= theta[1][vocab[k]];
 				foundIndices.push_back(k);
 			}
 		}
 	}
 	for (uint32_t i = 0; i < vocab.size(); i++) {
 		bool found = false;
 		for (uint32_t j = 0; j < foundIndices.size(); j++) {
 			if (vocab[i] == vocab[foundIndices[j]]) {
 				found = true;
 			}
 		}
 		if (!found) {
 			score_0 *= 1 - theta[0][vocab[i]];
 			score_1 *= 1 - theta[1][vocab[i]];
 		}
 	}
 	score_0 *= prior_0;
 	score_1 *= prior_1;
 	// Assigning the traning example to a class
 	if (score_0 > score_1) {
 		return 0;
 	} else {
 		return 1;
 	}
 }
 real_t MLPPBernoulliNBOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 void MLPPBernoulliNBOld::computeVocab() {
 	MLPPLinAlgOld alg;
 	MLPPData data;
 	vocab = data.vecToSet<real_t>(alg.flatten(inputSet));
 }
 void MLPPBernoulliNBOld::computeTheta() {
 	// Resizing theta for the sake of ease & proper access of the elements.
 	theta.resize(class_num);
 	// Setting all values in the hasmap by default to 0.
 	for (int i = class_num - 1; i >= 0; i--) {
 		for (uint32_t j = 0; j < vocab.size(); j++) {
 			theta[i][vocab[j]] = 0;
 		}
 	}
 	for (uint32_t i = 0; i < inputSet.size(); i++) {
 		for (uint32_t j = 0; j < inputSet[0].size(); j++) {
 			theta[outputSet[i]][inputSet[i][j]]++;
 		}
 	}
 	for (uint32_t i = 0; i < theta.size(); i++) {
 		for (uint32_t j = 0; j < theta[i].size(); j++) {
 			if (i == 0) {
 				theta[i][j] /= prior_0 * y_hat.size();
 			} else {
 				theta[i][j] /= prior_1 * y_hat.size();
 			}
 		}
 	}
 }
 void MLPPBernoulliNBOld::Evaluate() {
 	for (uint32_t i = 0; i < outputSet.size(); i++) {
 		// Pr(B | A) * Pr(A)
 		real_t score_0 = 1;
 		real_t score_1 = 1;
 		real_t sum = 0;
 		for (uint32_t ii = 0; ii < outputSet.size(); ii++) {
 			if (outputSet[ii] == 1) {
 				sum += outputSet[ii];
 			}
 		}
 		// Easy computation of priors, i.e. Pr(C_k)
 		prior_1 = sum / y_hat.size();
 		prior_0 = 1 - prior_1;
 		// Evaluating Theta...
 		computeTheta();
 		// Evaluating the vocab set...
 		computeVocab();
 		std::vector<int> foundIndices;
 		for (uint32_t j = 0; j < inputSet.size(); j++) {
 			for (uint32_t k = 0; k < vocab.size(); k++) {
 				if (inputSet[i][j] == vocab[k]) {
 					score_0 += std::log(theta[0][vocab[k]]);
 					score_1 += std::log(theta[1][vocab[k]]);
 					foundIndices.push_back(k);
 				}
 			}
 		}
 		for (uint32_t ii = 0; ii < vocab.size(); ii++) {
 			bool found = false;
 			for (uint32_t j = 0; j < foundIndices.size(); j++) {
 				if (vocab[ii] == vocab[foundIndices[j]]) {
 					found = true;
 				}
 			}
 			if (!found) {
 				score_0 += std::log(1 - theta[0][vocab[ii]]);
 				score_1 += std::log(1 - theta[1][vocab[ii]]);
 			}
 		}
 		score_0 += std::log(prior_0);
 		score_1 += std::log(prior_1);
 		score_0 = exp(score_0);
 		score_1 = exp(score_1);
 		std::cout << score_0 << std::endl;
 		std::cout << score_1 << std::endl;
 		// Assigning the traning example to a class
 		if (score_0 > score_1) {
 			y_hat[i] = 0;
 		} else {
 			y_hat[i] = 1;
 		}
 	}
 }
--- a/mlpp/bernoulli_nb/bernoulli_nb_old.h
+++ b/mlpp/bernoulli_nb/bernoulli_nb_old.h
@ -1,42 +0,0 @@
 #ifndef MLPP_BERNOULLI_NB_OLD_H
 #define MLPP_BERNOULLI_NB_OLD_H
 //
 //  BernoulliNB.hpp
 //
 //  Created by Marc Melikyan on 1/17/21.
 //
 #include "core/math/math_defs.h"
 #include <map>
 #include <vector>
 class MLPPBernoulliNBOld {
 public:
 	MLPPBernoulliNBOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet);
 	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
 	real_t modelTest(std::vector<real_t> x);
 	real_t score();
 private:
 	void computeVocab();
 	void computeTheta();
 	void Evaluate();
 	// Model Params
 	real_t prior_1 = 0;
 	real_t prior_0 = 0;
 	std::vector<std::map<real_t, int>> theta;
 	std::vector<real_t> vocab;
 	int class_num;
 	// Datasets
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	std::vector<real_t> y_hat;
 };
 #endif /* BernoulliNB_hpp */
--- a/mlpp/c_log_log_reg/c_log_log_reg_old.cpp
+++ b/mlpp/c_log_log_reg/c_log_log_reg_old.cpp
@ -1,224 +0,0 @@
 //
 //  CLogLogReg.cpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "c_log_log_reg_old.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 MLPPCLogLogRegOld::MLPPCLogLogRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg, real_t lambda, real_t alpha) :
 		inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) {
 	y_hat.resize(n);
 	weights = MLPPUtilities::weightInitialization(k);
 	bias = MLPPUtilities::biasInitialization();
 }
 std::vector<real_t> MLPPCLogLogRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	return Evaluate(X);
 }
 real_t MLPPCLogLogRegOld::modelTest(std::vector<real_t> x) {
 	return Evaluate(x);
 }
 void MLPPCLogLogRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
 		// Calculating the weight gradients
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.cloglog(z, 1)))));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Calculating the bias gradients
 		bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n;
 		forwardPass();
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPCLogLogRegOld::MLE(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
 		weights = alg.addition(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.cloglog(z, 1)))));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Calculating the bias gradients
 		bias += learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n;
 		forwardPass();
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPCLogLogRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		std::random_device rd;
 		std::default_random_engine generator(rd());
 		std::uniform_int_distribution<int> distribution(0, int(n - 1));
 		int outputIndex = distribution(generator);
 		real_t y_hat = Evaluate(inputSet[outputIndex]);
 		real_t z = propagate(inputSet[outputIndex]);
 		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
 		real_t error = y_hat - outputSet[outputIndex];
 		// Weight Updation
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error * exp(z - exp(z)), inputSet[outputIndex]));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Bias updation
 		bias -= learning_rate * error * exp(z - exp(z));
 		y_hat = Evaluate({ inputSet[outputIndex] });
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPCLogLogRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			std::vector<real_t> z = propagate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight gradients
 			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.cloglog(z, 1)))));
 			weights = regularization.regWeights(weights, lambda, alpha, reg);
 			// Calculating the bias gradients
 			bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n;
 			forwardPass();
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 real_t MLPPCLogLogRegOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 real_t MLPPCLogLogRegOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPRegOld regularization;
 	class MLPPCostOld cost;
 	return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
 }
 std::vector<real_t> MLPPCLogLogRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	return avn.cloglog(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)));
 }
 std::vector<real_t> MLPPCLogLogRegOld::propagate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights));
 }
 real_t MLPPCLogLogRegOld::Evaluate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	return avn.cloglog(alg.dot(weights, x) + bias);
 }
 real_t MLPPCLogLogRegOld::propagate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	return alg.dot(weights, x) + bias;
 }
 // cloglog ( wTx + b )
 void MLPPCLogLogRegOld::forwardPass() {
 	MLPPActivationOld avn;
 	z = propagate(inputSet);
 	y_hat = avn.cloglog(z);
 }
--- a/mlpp/c_log_log_reg/c_log_log_reg_old.h
+++ b/mlpp/c_log_log_reg/c_log_log_reg_old.h
@ -1,54 +0,0 @@
 #ifndef MLPP_C_LOG_LOG_REG_OLD_H
 #define MLPP_C_LOG_LOG_REG_OLD_H
 //
 //  CLogLogReg.hpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "core/math/math_defs.h"
 #include <string>
 #include <vector>
 class MLPPCLogLogRegOld {
 public:
 	MLPPCLogLogRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
 	real_t modelTest(std::vector<real_t> x);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	void MLE(real_t learning_rate, int max_epoch, bool UI = false);
 	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
 	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
 	real_t score();
 private:
 	void weightInitialization(int k);
 	void biasInitialization();
 	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
 	std::vector<real_t> propagate(std::vector<std::vector<real_t>> X);
 	real_t Evaluate(std::vector<real_t> x);
 	real_t propagate(std::vector<real_t> x);
 	void forwardPass();
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	std::vector<real_t> y_hat;
 	std::vector<real_t> z;
 	std::vector<real_t> weights;
 	real_t bias;
 	int n;
 	int k;
 	// Regularization Params
 	std::string reg;
 	real_t lambda;
 	real_t alpha; /* This is the controlling param for Elastic Net*/
 };
 #endif /* CLogLogReg_hpp */
--- a/mlpp/convolutions/convolutions_old.cpp
+++ b/mlpp/convolutions/convolutions_old.cpp
@ -1,378 +0,0 @@
 //
 //  Convolutions.cpp
 //
 //  Created by Marc Melikyan on 4/6/21.
 //
 #include "../convolutions/convolutions_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../stat/stat_old.h"
 #include <cmath>
 #include <iostream>
 #ifndef M_PI
 #define M_PI 3.141592653
 #endif
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::convolve_2d(std::vector<std::vector<real_t>> input, std::vector<std::vector<real_t>> filter, int S, int P) {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<real_t>> feature_map;
 	uint32_t N = input.size();
 	uint32_t F = filter.size();
 	uint32_t map_size = (N - F + 2 * P) / S + 1; // This is computed as ⌊map_size⌋ by def- thanks C++!
 	if (P != 0) {
 		std::vector<std::vector<real_t>> padded_input;
 		padded_input.resize(N + 2 * P);
 		for (uint32_t i = 0; i < padded_input.size(); i++) {
 			padded_input[i].resize(N + 2 * P);
 		}
 		for (uint32_t i = 0; i < padded_input.size(); i++) {
 			for (uint32_t j = 0; j < padded_input[i].size(); j++) {
 				if (i - P < 0 || j - P < 0 || i - P > input.size() - 1 || j - P > input[0].size() - 1) {
 					padded_input[i][j] = 0;
 				} else {
 					padded_input[i][j] = input[i - P][j - P];
 				}
 			}
 		}
 		input.resize(padded_input.size());
 		for (uint32_t i = 0; i < padded_input.size(); i++) {
 			input[i].resize(padded_input[i].size());
 		}
 		input = padded_input;
 	}
 	feature_map.resize(map_size);
 	for (uint32_t i = 0; i < map_size; i++) {
 		feature_map[i].resize(map_size);
 	}
 	for (uint32_t i = 0; i < map_size; i++) {
 		for (uint32_t j = 0; j < map_size; j++) {
 			std::vector<real_t> convolving_input;
 			for (uint32_t k = 0; k < F; k++) {
 				for (uint32_t p = 0; p < F; p++) {
 					if (i == 0 && j == 0) {
 						convolving_input.push_back(input[i + k][j + p]);
 					} else if (i == 0) {
 						convolving_input.push_back(input[i + k][j + (S - 1) + p]);
 					} else if (j == 0) {
 						convolving_input.push_back(input[i + (S - 1) + k][j + p]);
 					} else {
 						convolving_input.push_back(input[i + (S - 1) + k][j + (S - 1) + p]);
 					}
 				}
 			}
 			feature_map[i][j] = alg.dot(convolving_input, alg.flatten(filter));
 		}
 	}
 	return feature_map;
 }
 std::vector<std::vector<std::vector<real_t>>> MLPPConvolutionsOld::convolve_3d(std::vector<std::vector<std::vector<real_t>>> input, std::vector<std::vector<std::vector<real_t>>> filter, int S, int P) {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<std::vector<real_t>>> feature_map;
 	uint32_t N = input[0].size();
 	uint32_t F = filter[0].size();
 	uint32_t C = filter.size() / input.size();
 	uint32_t map_size = (N - F + 2 * P) / S + 1; // This is computed as ⌊map_size⌋ by def.
 	if (P != 0) {
 		for (uint32_t c = 0; c < input.size(); c++) {
 			std::vector<std::vector<real_t>> padded_input;
 			padded_input.resize(N + 2 * P);
 			for (uint32_t i = 0; i < padded_input.size(); i++) {
 				padded_input[i].resize(N + 2 * P);
 			}
 			for (uint32_t i = 0; i < padded_input.size(); i++) {
 				for (uint32_t j = 0; j < padded_input[i].size(); j++) {
 					if (i - P < 0 || j - P < 0 || i - P > input[c].size() - 1 || j - P > input[c][0].size() - 1) {
 						padded_input[i][j] = 0;
 					} else {
 						padded_input[i][j] = input[c][i - P][j - P];
 					}
 				}
 			}
 			input[c].resize(padded_input.size());
 			for (uint32_t i = 0; i < padded_input.size(); i++) {
 				input[c][i].resize(padded_input[i].size());
 			}
 			input[c] = padded_input;
 		}
 	}
 	feature_map.resize(C);
 	for (uint32_t i = 0; i < feature_map.size(); i++) {
 		feature_map[i].resize(map_size);
 		for (uint32_t j = 0; j < feature_map[i].size(); j++) {
 			feature_map[i][j].resize(map_size);
 		}
 	}
 	for (uint32_t c = 0; c < C; c++) {
 		for (uint32_t i = 0; i < map_size; i++) {
 			for (uint32_t j = 0; j < map_size; j++) {
 				std::vector<real_t> convolving_input;
 				for (uint32_t t = 0; t < input.size(); t++) {
 					for (uint32_t k = 0; k < F; k++) {
 						for (uint32_t p = 0; p < F; p++) {
 							if (i == 0 && j == 0) {
 								convolving_input.push_back(input[t][i + k][j + p]);
 							} else if (i == 0) {
 								convolving_input.push_back(input[t][i + k][j + (S - 1) + p]);
 							} else if (j == 0) {
 								convolving_input.push_back(input[t][i + (S - 1) + k][j + p]);
 							} else {
 								convolving_input.push_back(input[t][i + (S - 1) + k][j + (S - 1) + p]);
 							}
 						}
 					}
 				}
 				feature_map[c][i][j] = alg.dot(convolving_input, alg.flatten(filter));
 			}
 		}
 	}
 	return feature_map;
 }
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::pool_2d(std::vector<std::vector<real_t>> input, int F, int S, std::string type) {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<real_t>> pooled_map;
 	uint32_t N = input.size();
 	uint32_t map_size = floor((N - F) / S + 1);
 	pooled_map.resize(map_size);
 	for (uint32_t i = 0; i < map_size; i++) {
 		pooled_map[i].resize(map_size);
 	}
 	for (uint32_t i = 0; i < map_size; i++) {
 		for (uint32_t j = 0; j < map_size; j++) {
 			std::vector<real_t> pooling_input;
 			for (int k = 0; k < F; k++) {
 				for (int p = 0; p < F; p++) {
 					if (i == 0 && j == 0) {
 						pooling_input.push_back(input[i + k][j + p]);
 					} else if (i == 0) {
 						pooling_input.push_back(input[i + k][j + (S - 1) + p]);
 					} else if (j == 0) {
 						pooling_input.push_back(input[i + (S - 1) + k][j + p]);
 					} else {
 						pooling_input.push_back(input[i + (S - 1) + k][j + (S - 1) + p]);
 					}
 				}
 			}
 			if (type == "Average") {
 				MLPPStatOld stat;
 				pooled_map[i][j] = stat.mean(pooling_input);
 			} else if (type == "Min") {
 				pooled_map[i][j] = alg.min(pooling_input);
 			} else {
 				pooled_map[i][j] = alg.max(pooling_input);
 			}
 		}
 	}
 	return pooled_map;
 }
 std::vector<std::vector<std::vector<real_t>>> MLPPConvolutionsOld::pool_3d(std::vector<std::vector<std::vector<real_t>>> input, int F, int S, std::string type) {
 	std::vector<std::vector<std::vector<real_t>>> pooled_map;
 	for (uint32_t i = 0; i < input.size(); i++) {
 		pooled_map.push_back(pool_2d(input[i], F, S, type));
 	}
 	return pooled_map;
 }
 real_t MLPPConvolutionsOld::global_pool_2d(std::vector<std::vector<real_t>> input, std::string type) {
 	MLPPLinAlgOld alg;
 	if (type == "Average") {
 		MLPPStatOld stat;
 		return stat.mean(alg.flatten(input));
 	} else if (type == "Min") {
 		return alg.min(alg.flatten(input));
 	} else {
 		return alg.max(alg.flatten(input));
 	}
 }
 std::vector<real_t> MLPPConvolutionsOld::global_pool_3d(std::vector<std::vector<std::vector<real_t>>> input, std::string type) {
 	std::vector<real_t> pooled_map;
 	for (uint32_t i = 0; i < input.size(); i++) {
 		pooled_map.push_back(global_pool_2d(input[i], type));
 	}
 	return pooled_map;
 }
 real_t MLPPConvolutionsOld::gaussian_2d(real_t x, real_t y, real_t std) {
 	real_t std_sq = std * std;
 	return 1 / (2 * M_PI * std_sq) * std::exp(-(x * x + y * y) / 2 * std_sq);
 }
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::gaussian_filter_2d(int size, real_t std) {
 	std::vector<std::vector<real_t>> filter;
 	filter.resize(size);
 	for (uint32_t i = 0; i < filter.size(); i++) {
 		filter[i].resize(size);
 	}
 	for (int i = 0; i < size; i++) {
 		for (int j = 0; j < size; j++) {
 			filter[i][j] = gaussian_2d(i - (size - 1) / 2, (size - 1) / 2 - j, std);
 		}
 	}
 	return filter;
 }
 /*
 Indeed a filter could have been used for this purpose, but I decided that it would've just
 been easier to carry out the calculation explicitly, mainly because it is more informative,
 and also because my convolution algorithm is only built for filters with equally sized
 heights and widths.
 */
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::dx(std::vector<std::vector<real_t>> input) {
 	std::vector<std::vector<real_t>> deriv; // We assume a gray scale image.
 	deriv.resize(input.size());
 	for (uint32_t i = 0; i < deriv.size(); i++) {
 		deriv[i].resize(input[i].size());
 	}
 	for (uint32_t i = 0; i < input.size(); i++) {
 		for (uint32_t j = 0; j < input[i].size(); j++) {
 			if (j != 0 && j != input.size() - 1) {
 				deriv[i][j] = input[i][j + 1] - input[i][j - 1];
 			} else if (j == 0) {
 				deriv[i][j] = input[i][j + 1] - 0; // Implicit zero-padding
 			} else {
 				deriv[i][j] = 0 - input[i][j - 1]; // Implicit zero-padding
 			}
 		}
 	}
 	return deriv;
 }
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::dy(std::vector<std::vector<real_t>> input) {
 	std::vector<std::vector<real_t>> deriv;
 	deriv.resize(input.size());
 	for (uint32_t i = 0; i < deriv.size(); i++) {
 		deriv[i].resize(input[i].size());
 	}
 	for (uint32_t i = 0; i < input.size(); i++) {
 		for (uint32_t j = 0; j < input[i].size(); j++) {
 			if (i != 0 && i != input.size() - 1) {
 				deriv[i][j] = input[i - 1][j] - input[i + 1][j];
 			} else if (i == 0) {
 				deriv[i][j] = 0 - input[i + 1][j]; // Implicit zero-padding
 			} else {
 				deriv[i][j] = input[i - 1][j] - 0; // Implicit zero-padding
 			}
 		}
 	}
 	return deriv;
 }
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::grad_magnitude(std::vector<std::vector<real_t>> input) {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<real_t>> x_deriv_2 = alg.hadamard_product(dx(input), dx(input));
 	std::vector<std::vector<real_t>> y_deriv_2 = alg.hadamard_product(dy(input), dy(input));
 	return alg.sqrt(alg.addition(x_deriv_2, y_deriv_2));
 }
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::grad_orientation(std::vector<std::vector<real_t>> input) {
 	std::vector<std::vector<real_t>> deriv;
 	deriv.resize(input.size());
 	for (uint32_t i = 0; i < deriv.size(); i++) {
 		deriv[i].resize(input[i].size());
 	}
 	std::vector<std::vector<real_t>> x_deriv = dx(input);
 	std::vector<std::vector<real_t>> y_deriv = dy(input);
 	for (uint32_t i = 0; i < deriv.size(); i++) {
 		for (uint32_t j = 0; j < deriv[i].size(); j++) {
 			deriv[i][j] = std::atan2(y_deriv[i][j], x_deriv[i][j]);
 		}
 	}
 	return deriv;
 }
 std::vector<std::vector<std::vector<real_t>>> MLPPConvolutionsOld::compute_m(std::vector<std::vector<real_t>> input) {
 	real_t const SIGMA = 1;
 	real_t const GAUSSIAN_SIZE = 3;
 	real_t const GAUSSIAN_PADDING = ((input.size() - 1) + GAUSSIAN_SIZE - input.size()) / 2; // Convs must be same.
 	std::cout << GAUSSIAN_PADDING << std::endl;
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<real_t>> x_deriv = dx(input);
 	std::vector<std::vector<real_t>> y_deriv = dy(input);
 	std::vector<std::vector<real_t>> gaussian_filter = gaussian_filter_2d(GAUSSIAN_SIZE, SIGMA); // Sigma of 1, size of 3.
 	std::vector<std::vector<real_t>> xx_deriv = convolve_2d(alg.hadamard_product(x_deriv, x_deriv), gaussian_filter, 1, GAUSSIAN_PADDING);
 	std::vector<std::vector<real_t>> yy_deriv = convolve_2d(alg.hadamard_product(y_deriv, y_deriv), gaussian_filter, 1, GAUSSIAN_PADDING);
 	std::vector<std::vector<real_t>> xy_deriv = convolve_2d(alg.hadamard_product(x_deriv, y_deriv), gaussian_filter, 1, GAUSSIAN_PADDING);
 	std::vector<std::vector<std::vector<real_t>>> M = { xx_deriv, yy_deriv, xy_deriv };
 	return M;
 }
 std::vector<std::vector<std::string>> MLPPConvolutionsOld::harris_corner_detection(std::vector<std::vector<real_t>> input) {
 	real_t const k = 0.05; // Empirically determined wherein k -> [0.04, 0.06], though conventionally 0.05 is typically used as well.
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<std::vector<real_t>>> M = compute_m(input);
 	std::vector<std::vector<real_t>> det = alg.subtraction(alg.hadamard_product(M[0], M[1]), alg.hadamard_product(M[2], M[2]));
 	std::vector<std::vector<real_t>> trace = alg.addition(M[0], M[1]);
 	// The reason this is not a scalar is because xx_deriv, xy_deriv, yx_deriv, and yy_deriv are not scalars.
 	std::vector<std::vector<real_t>> r = alg.subtraction(det, alg.scalarMultiply(k, alg.hadamard_product(trace, trace)));
 	std::vector<std::vector<std::string>> imageTypes;
 	imageTypes.resize(r.size());
 	alg.printMatrix(r);
 	for (uint32_t i = 0; i < r.size(); i++) {
 		imageTypes[i].resize(r[i].size());
 		for (uint32_t j = 0; j < r[i].size(); j++) {
 			if (r[i][j] > 0) {
 				imageTypes[i][j] = "C";
 			} else if (r[i][j] < 0) {
 				imageTypes[i][j] = "E";
 			} else {
 				imageTypes[i][j] = "N";
 			}
 		}
 	}
 	return imageTypes;
 }
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_prewitt_horizontal() {
 	return _prewitt_horizontal;
 }
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_prewitt_vertical() {
 	return _prewitt_vertical;
 }
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_sobel_horizontal() {
 	return _sobel_horizontal;
 }
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_sobel_vertical() {
 	return _sobel_vertical;
 }
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_scharr_horizontal() {
 	return _scharr_horizontal;
 }
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_scharr_vertical() {
 	return _scharr_vertical;
 }
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_roberts_horizontal() {
 	return _roberts_horizontal;
 }
 std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_roberts_vertical() {
 	return _roberts_vertical;
 }
 MLPPConvolutionsOld::MLPPConvolutionsOld() {
 	_prewitt_horizontal = { { 1, 1, 1 }, { 0, 0, 0 }, { -1, -1, -1 } };
 	_prewitt_vertical = { { 1, 0, -1 }, { 1, 0, -1 }, { 1, 0, -1 } };
 	_sobel_horizontal = { { 1, 2, 1 }, { 0, 0, 0 }, { -1, -2, -1 } };
 	_sobel_vertical = { { -1, 0, 1 }, { -2, 0, 2 }, { -1, 0, 1 } };
 	_scharr_horizontal = { { 3, 10, 3 }, { 0, 0, 0 }, { -3, -10, -3 } };
 	_scharr_vertical = { { 3, 0, -3 }, { 10, 0, -10 }, { 3, 0, -3 } };
 	_roberts_horizontal = { { 0, 1 }, { -1, 0 } };
 	_roberts_vertical = { { 1, 0 }, { 0, -1 } };
 }
--- a/mlpp/convolutions/convolutions_old.h
+++ b/mlpp/convolutions/convolutions_old.h
@ -1,56 +0,0 @@
 #ifndef MLPP_CONVOLUTIONS_OLD_H
 #define MLPP_CONVOLUTIONS_OLD_H
 #include <string>
 #include <vector>
 #include "core/math/math_defs.h"
 #include "core/int_types.h"
 class MLPPConvolutionsOld {
 public:
 	std::vector<std::vector<real_t>> convolve_2d(std::vector<std::vector<real_t>> input, std::vector<std::vector<real_t>> filter, int S, int P = 0);
 	std::vector<std::vector<std::vector<real_t>>> convolve_3d(std::vector<std::vector<std::vector<real_t>>> input, std::vector<std::vector<std::vector<real_t>>> filter, int S, int P = 0);
 	std::vector<std::vector<real_t>> pool_2d(std::vector<std::vector<real_t>> input, int F, int S, std::string type);
 	std::vector<std::vector<std::vector<real_t>>> pool_3d(std::vector<std::vector<std::vector<real_t>>> input, int F, int S, std::string type);
 	real_t global_pool_2d(std::vector<std::vector<real_t>> input, std::string type);
 	std::vector<real_t> global_pool_3d(std::vector<std::vector<std::vector<real_t>>> input, std::string type);
 	real_t gaussian_2d(real_t x, real_t y, real_t std);
 	std::vector<std::vector<real_t>> gaussian_filter_2d(int size, real_t std);
 	std::vector<std::vector<real_t>> dx(std::vector<std::vector<real_t>> input);
 	std::vector<std::vector<real_t>> dy(std::vector<std::vector<real_t>> input);
 	std::vector<std::vector<real_t>> grad_magnitude(std::vector<std::vector<real_t>> input);
 	std::vector<std::vector<real_t>> grad_orientation(std::vector<std::vector<real_t>> input);
 	std::vector<std::vector<std::vector<real_t>>> compute_m(std::vector<std::vector<real_t>> input);
 	std::vector<std::vector<std::string>> harris_corner_detection(std::vector<std::vector<real_t>> input);
 	std::vector<std::vector<real_t>> get_prewitt_horizontal();
 	std::vector<std::vector<real_t>> get_prewitt_vertical();
 	std::vector<std::vector<real_t>> get_sobel_horizontal();
 	std::vector<std::vector<real_t>> get_sobel_vertical();
 	std::vector<std::vector<real_t>> get_scharr_horizontal();
 	std::vector<std::vector<real_t>> get_scharr_vertical();
 	std::vector<std::vector<real_t>> get_roberts_horizontal();
 	std::vector<std::vector<real_t>> get_roberts_vertical();
 	MLPPConvolutionsOld();
 protected:
 	std::vector<std::vector<real_t>> _prewitt_horizontal;
 	std::vector<std::vector<real_t>> _prewitt_vertical;
 	std::vector<std::vector<real_t>> _sobel_horizontal;
 	std::vector<std::vector<real_t>> _sobel_vertical;
 	std::vector<std::vector<real_t>> _scharr_horizontal;
 	std::vector<std::vector<real_t>> _scharr_vertical;
 	std::vector<std::vector<real_t>> _roberts_horizontal;
 	std::vector<std::vector<real_t>> _roberts_vertical;
 };
 #endif // Convolutions_hpp
--- a/mlpp/cost/cost_old.cpp
+++ b/mlpp/cost/cost_old.cpp
@ -1,395 +0,0 @@
 //
 //  Reg.cpp
 //
 //  Created by Marc Melikyan on 1/16/21.
 //
 #include "cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include <cmath>
 #include <iostream>
 real_t MLPPCostOld::MSE(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		sum += (y_hat[i] - y[i]) * (y_hat[i] - y[i]);
 	}
 	return sum / 2 * y_hat.size();
 }
 real_t MLPPCostOld::MSE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
 			sum += (y_hat[i][j] - y[i][j]) * (y_hat[i][j] - y[i][j]);
 		}
 	}
 	return sum / 2 * y_hat.size();
 }
 std::vector<real_t> MLPPCostOld::MSEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPLinAlgOld alg;
 	return alg.subtraction(y_hat, y);
 }
 std::vector<std::vector<real_t>> MLPPCostOld::MSEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	MLPPLinAlgOld alg;
 	return alg.subtraction(y_hat, y);
 }
 real_t MLPPCostOld::RMSE(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		sum += (y_hat[i] - y[i]) * (y_hat[i] - y[i]);
 	}
 	return sqrt(sum / y_hat.size());
 }
 real_t MLPPCostOld::RMSE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
 			sum += (y_hat[i][j] - y[i][j]) * (y_hat[i][j] - y[i][j]);
 		}
 	}
 	return sqrt(sum / y_hat.size());
 }
 std::vector<real_t> MLPPCostOld::RMSEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPLinAlgOld alg;
 	return alg.scalarMultiply(1 / (2 * sqrt(MSE(y_hat, y))), MSEDeriv(y_hat, y));
 }
 std::vector<std::vector<real_t>> MLPPCostOld::RMSEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	MLPPLinAlgOld alg;
 	return alg.scalarMultiply(1 / (2 / sqrt(MSE(y_hat, y))), MSEDeriv(y_hat, y));
 }
 real_t MLPPCostOld::MAE(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		sum += abs((y_hat[i] - y[i]));
 	}
 	return sum / y_hat.size();
 }
 real_t MLPPCostOld::MAE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
 			sum += abs((y_hat[i][j] - y[i][j]));
 		}
 	}
 	return sum / y_hat.size();
 }
 std::vector<real_t> MLPPCostOld::MAEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	std::vector<real_t> deriv;
 	deriv.resize(y_hat.size());
 	for (uint32_t i = 0; i < deriv.size(); i++) {
 		if (y_hat[i] < 0) {
 			deriv[i] = -1;
 		} else if (y_hat[i] == 0) {
 			deriv[i] = 0;
 		} else {
 			deriv[i] = 1;
 		}
 	}
 	return deriv;
 }
 std::vector<std::vector<real_t>> MLPPCostOld::MAEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	std::vector<std::vector<real_t>> deriv;
 	deriv.resize(y_hat.size());
 	for (uint32_t i = 0; i < deriv.size(); i++) {
 		deriv.resize(y_hat[i].size());
 	}
 	for (uint32_t i = 0; i < deriv.size(); i++) {
 		for (uint32_t j = 0; j < deriv[i].size(); j++) {
 			if (y_hat[i][j] < 0) {
 				deriv[i][j] = -1;
 			} else if (y_hat[i][j] == 0) {
 				deriv[i][j] = 0;
 			} else {
 				deriv[i][j] = 1;
 			}
 		}
 	}
 	return deriv;
 }
 real_t MLPPCostOld::MBE(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		sum += (y_hat[i] - y[i]);
 	}
 	return sum / y_hat.size();
 }
 real_t MLPPCostOld::MBE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
 			sum += (y_hat[i][j] - y[i][j]);
 		}
 	}
 	return sum / y_hat.size();
 }
 std::vector<real_t> MLPPCostOld::MBEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPLinAlgOld alg;
 	return alg.onevec(y_hat.size());
 }
 std::vector<std::vector<real_t>> MLPPCostOld::MBEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	MLPPLinAlgOld alg;
 	return alg.onemat(y_hat.size(), y_hat[0].size());
 }
 real_t MLPPCostOld::LogLoss(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	real_t sum = 0;
 	real_t eps = 1e-8;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		sum += -(y[i] * std::log(y_hat[i] + eps) + (1 - y[i]) * std::log(1 - y_hat[i] + eps));
 	}
 	return sum / y_hat.size();
 }
 real_t MLPPCostOld::LogLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	real_t sum = 0;
 	real_t eps = 1e-8;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
 			sum += -(y[i][j] * std::log(y_hat[i][j] + eps) + (1 - y[i][j]) * std::log(1 - y_hat[i][j] + eps));
 		}
 	}
 	return sum / y_hat.size();
 }
 std::vector<real_t> MLPPCostOld::LogLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPLinAlgOld alg;
 	return alg.addition(alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)), alg.elementWiseDivision(alg.scalarMultiply(-1, alg.scalarAdd(-1, y)), alg.scalarMultiply(-1, alg.scalarAdd(-1, y_hat))));
 }
 std::vector<std::vector<real_t>> MLPPCostOld::LogLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	MLPPLinAlgOld alg;
 	return alg.addition(alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)), alg.elementWiseDivision(alg.scalarMultiply(-1, alg.scalarAdd(-1, y)), alg.scalarMultiply(-1, alg.scalarAdd(-1, y_hat))));
 }
 real_t MLPPCostOld::CrossEntropy(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		sum += y[i] * std::log(y_hat[i]);
 	}
 	return -1 * sum;
 }
 real_t MLPPCostOld::CrossEntropy(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
 			sum += y[i][j] * std::log(y_hat[i][j]);
 		}
 	}
 	return -1 * sum;
 }
 std::vector<real_t> MLPPCostOld::CrossEntropyDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPLinAlgOld alg;
 	return alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat));
 }
 std::vector<std::vector<real_t>> MLPPCostOld::CrossEntropyDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	MLPPLinAlgOld alg;
 	return alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat));
 }
 real_t MLPPCostOld::HuberLoss(std::vector<real_t> y_hat, std::vector<real_t> y, real_t delta) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		if (abs(y[i] - y_hat[i]) <= delta) {
 			sum += (y[i] - y_hat[i]) * (y[i] - y_hat[i]);
 		} else {
 			sum += 2 * delta * abs(y[i] - y_hat[i]) - delta * delta;
 		}
 	}
 	return sum;
 }
 real_t MLPPCostOld::HuberLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, real_t delta) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
 			if (abs(y[i][j] - y_hat[i][j]) <= delta) {
 				sum += (y[i][j] - y_hat[i][j]) * (y[i][j] - y_hat[i][j]);
 			} else {
 				sum += 2 * delta * abs(y[i][j] - y_hat[i][j]) - delta * delta;
 			}
 		}
 	}
 	return sum;
 }
 std::vector<real_t> MLPPCostOld::HuberLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y, real_t delta) {
 	std::vector<real_t> deriv;
 	deriv.resize(y_hat.size());
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		if (abs(y[i] - y_hat[i]) <= delta) {
 			deriv.push_back(-(y[i] - y_hat[i]));
 		} else {
 			if (y_hat[i] > 0 || y_hat[i] < 0) {
 				deriv.push_back(2 * delta * (y_hat[i] / abs(y_hat[i])));
 			} else {
 				deriv.push_back(0);
 			}
 		}
 	}
 	return deriv;
 }
 std::vector<std::vector<real_t>> MLPPCostOld::HuberLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, real_t delta) {
 	std::vector<std::vector<real_t>> deriv;
 	deriv.resize(y_hat.size());
 	for (uint32_t i = 0; i < deriv.size(); i++) {
 		deriv[i].resize(y_hat[i].size());
 	}
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
 			if (abs(y[i][j] - y_hat[i][j]) <= delta) {
 				deriv[i].push_back(-(y[i][j] - y_hat[i][j]));
 			} else {
 				if (y_hat[i][j] > 0 || y_hat[i][j] < 0) {
 					deriv[i].push_back(2 * delta * (y_hat[i][j] / abs(y_hat[i][j])));
 				} else {
 					deriv[i].push_back(0);
 				}
 			}
 		}
 	}
 	return deriv;
 }
 real_t MLPPCostOld::HingeLoss(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		sum += fmax(0, 1 - y[i] * y_hat[i]);
 	}
 	return sum / y_hat.size();
 }
 real_t MLPPCostOld::HingeLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
 			sum += fmax(0, 1 - y[i][j] * y_hat[i][j]);
 		}
 	}
 	return sum / y_hat.size();
 }
 std::vector<real_t> MLPPCostOld::HingeLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	std::vector<real_t> deriv;
 	deriv.resize(y_hat.size());
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		if (1 - y[i] * y_hat[i] > 0) {
 			deriv[i] = -y[i];
 		} else {
 			deriv[i] = 0;
 		}
 	}
 	return deriv;
 }
 std::vector<std::vector<real_t>> MLPPCostOld::HingeLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	std::vector<std::vector<real_t>> deriv;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
 			if (1 - y[i][j] * y_hat[i][j] > 0) {
 				deriv[i][j] = -y[i][j];
 			} else {
 				deriv[i][j] = 0;
 			}
 		}
 	}
 	return deriv;
 }
 real_t MLPPCostOld::WassersteinLoss(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		sum += y_hat[i] * y[i];
 	}
 	return -sum / y_hat.size();
 }
 real_t MLPPCostOld::WassersteinLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
 			sum += y_hat[i][j] * y[i][j];
 		}
 	}
 	return -sum / y_hat.size();
 }
 std::vector<real_t> MLPPCostOld::WassersteinLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPLinAlgOld alg;
 	return alg.scalarMultiply(-1, y); // Simple.
 }
 std::vector<std::vector<real_t>> MLPPCostOld::WassersteinLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	MLPPLinAlgOld alg;
 	return alg.scalarMultiply(-1, y); // Simple.
 }
 real_t MLPPCostOld::HingeLoss(std::vector<real_t> y_hat, std::vector<real_t> y, std::vector<real_t> weights, real_t C) {
 	MLPPRegOld regularization;
 	return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
 }
 real_t MLPPCostOld::HingeLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, std::vector<std::vector<real_t>> weights, real_t C) {
 	MLPPRegOld regularization;
 	return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
 }
 std::vector<real_t> MLPPCostOld::HingeLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y, real_t C) {
 	MLPPLinAlgOld alg;
 	return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y));
 }
 std::vector<std::vector<real_t>> MLPPCostOld::HingeLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, real_t C) {
 	MLPPLinAlgOld alg;
 	return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y));
 }
 real_t MLPPCostOld::dualFormSVM(std::vector<real_t> alpha, std::vector<std::vector<real_t>> X, std::vector<real_t> y) {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<real_t>> Y = alg.diag(y); // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
 	std::vector<std::vector<real_t>> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations.
 	std::vector<std::vector<real_t>> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y);
 	real_t alphaQ = alg.matmult(alg.matmult({ alpha }, Q), alg.transpose({ alpha }))[0][0];
 	std::vector<real_t> one = alg.onevec(alpha.size());
 	return -alg.dot(one, alpha) + 0.5 * alphaQ;
 }
 std::vector<real_t> MLPPCostOld::dualFormSVMDeriv(std::vector<real_t> alpha, std::vector<std::vector<real_t>> X, std::vector<real_t> y) {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<real_t>> Y = alg.zeromat(y.size(), y.size());
 	for (uint32_t i = 0; i < y.size(); i++) {
 		Y[i][i] = y[i]; // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
 	}
 	std::vector<std::vector<real_t>> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations.
 	std::vector<std::vector<real_t>> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y);
 	std::vector<real_t> alphaQDeriv = alg.mat_vec_mult(Q, alpha);
 	std::vector<real_t> one = alg.onevec(alpha.size());
 	return alg.subtraction(alphaQDeriv, one);
 }
--- a/mlpp/cost/cost_old.h
+++ b/mlpp/cost/cost_old.h
@ -1,85 +0,0 @@
 #ifndef MLPP_COST_OLD_H
 #define MLPP_COST_OLD_H
 //
 //  Cost.hpp
 //
 //  Created by Marc Melikyan on 1/16/21.
 //
 #include "core/math/math_defs.h"
 #include "core/int_types.h"
 #include <vector>
 class MLPPCostOld {
 public:
 	// Regression Costs
 	real_t MSE(std::vector<real_t> y_hat, std::vector<real_t> y);
 	real_t MSE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	std::vector<real_t> MSEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<std::vector<real_t>> MSEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	real_t RMSE(std::vector<real_t> y_hat, std::vector<real_t> y);
 	real_t RMSE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	std::vector<real_t> RMSEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<std::vector<real_t>> RMSEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	real_t MAE(std::vector<real_t> y_hat, std::vector<real_t> y);
 	real_t MAE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	std::vector<real_t> MAEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<std::vector<real_t>> MAEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	real_t MBE(std::vector<real_t> y_hat, std::vector<real_t> y);
 	real_t MBE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	std::vector<real_t> MBEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<std::vector<real_t>> MBEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	// Classification Costs
 	real_t LogLoss(std::vector<real_t> y_hat, std::vector<real_t> y);
 	real_t LogLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	std::vector<real_t> LogLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<std::vector<real_t>> LogLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	real_t CrossEntropy(std::vector<real_t> y_hat, std::vector<real_t> y);
 	real_t CrossEntropy(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	std::vector<real_t> CrossEntropyDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<std::vector<real_t>> CrossEntropyDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	real_t HuberLoss(std::vector<real_t> y_hat, std::vector<real_t> y, real_t delta);
 	real_t HuberLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, real_t delta);
 	std::vector<real_t> HuberLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y, real_t delta);
 	std::vector<std::vector<real_t>> HuberLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, real_t delta);
 	real_t HingeLoss(std::vector<real_t> y_hat, std::vector<real_t> y);
 	real_t HingeLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	std::vector<real_t> HingeLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<std::vector<real_t>> HingeLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	real_t HingeLoss(std::vector<real_t> y_hat, std::vector<real_t> y, std::vector<real_t> weights, real_t C);
 	real_t HingeLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, std::vector<std::vector<real_t>> weights, real_t C);
 	std::vector<real_t> HingeLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y, real_t C);
 	std::vector<std::vector<real_t>> HingeLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, real_t C);
 	real_t WassersteinLoss(std::vector<real_t> y_hat, std::vector<real_t> y);
 	real_t WassersteinLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	std::vector<real_t> WassersteinLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<std::vector<real_t>> WassersteinLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	real_t dualFormSVM(std::vector<real_t> alpha, std::vector<std::vector<real_t>> X, std::vector<real_t> y); // TO DO: DON'T forget to add non-linear kernelizations.
 	std::vector<real_t> dualFormSVMDeriv(std::vector<real_t> alpha, std::vector<std::vector<real_t>> X, std::vector<real_t> y);
 };
 #endif /* Cost_hpp */
--- a/mlpp/data/data.cpp
+++ b/mlpp/data/data.cpp
@ -13,7 +13,6 @@
 #include "../stat/stat.h"
 #include "../softmax_net/softmax_net.h"
 #include "data_old.h"
 #include <algorithm>
 #include <cmath>
--- a/mlpp/data/data_old.cpp
+++ b/mlpp/data/data_old.cpp
@ -1,833 +0,0 @@
 //
 //  Data.cpp
 //  MLP
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "data_old.h"
 #include "core/os/file_access.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../softmax_net/softmax_net_old.h"
 #include "../stat/stat_old.h"
 #include <algorithm>
 #include <cmath>
 #include <fstream>
 #include <iostream>
 #include <random>
 #include <sstream>
 // Loading Datasets
 std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> MLPPDataOld::loadBreastCancer() {
 	const int BREAST_CANCER_SIZE = 30; // k = 30
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	setData(BREAST_CANCER_SIZE, "MLPP/Data/Datasets/BreastCancer.csv", inputSet, outputSet);
 	return { inputSet, outputSet };
 }
 std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> MLPPDataOld::loadBreastCancerSVC() {
 	const int BREAST_CANCER_SIZE = 30; // k = 30
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	setData(BREAST_CANCER_SIZE, "MLPP/Data/Datasets/BreastCancerSVM.csv", inputSet, outputSet);
 	return { inputSet, outputSet };
 }
 std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPDataOld::loadIris() {
 	const int IRIS_SIZE = 4;
 	const int ONE_HOT_NUM = 3;
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> tempOutputSet;
 	setData(IRIS_SIZE, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
 	std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
 	return { inputSet, outputSet };
 }
 std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPDataOld::loadWine() {
 	const int WINE_SIZE = 4;
 	const int ONE_HOT_NUM = 3;
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> tempOutputSet;
 	setData(WINE_SIZE, "MLPP/Data/Datasets/Iris.csv", inputSet, tempOutputSet);
 	std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
 	return { inputSet, outputSet };
 }
 std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPDataOld::loadMnistTrain() {
 	const int MNIST_SIZE = 784;
 	const int ONE_HOT_NUM = 10;
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> tempOutputSet;
 	setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTrain.csv", inputSet, tempOutputSet);
 	std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
 	return { inputSet, outputSet };
 }
 std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPDataOld::loadMnistTest() {
 	const int MNIST_SIZE = 784;
 	const int ONE_HOT_NUM = 10;
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> tempOutputSet;
 	setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTest.csv", inputSet, tempOutputSet);
 	std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
 	return { inputSet, outputSet };
 }
 std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> MLPPDataOld::loadCaliforniaHousing() {
 	const int CALIFORNIA_HOUSING_SIZE = 13; // k = 30
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	setData(CALIFORNIA_HOUSING_SIZE, "MLPP/Data/Datasets/CaliforniaHousing.csv", inputSet, outputSet);
 	return { inputSet, outputSet };
 }
 std::tuple<std::vector<real_t>, std::vector<real_t>> MLPPDataOld::loadFiresAndCrime() {
 	std::vector<real_t> inputSet; // k is implicitly 1.
 	std::vector<real_t> outputSet;
 	setData("MLPP/Data/Datasets/FiresAndCrime.csv", inputSet, outputSet);
 	return { inputSet, outputSet };
 }
 // Note that inputs and outputs should be pairs (technically), but this
 // implementation will separate them. (My implementation keeps them tied together.)
 // Not yet sure whether this is intentional or not (or it's something like a compiler specific difference)
 std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPDataOld::trainTestSplit(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, real_t testSize) {
 	std::random_device rd;
 	std::default_random_engine generator(rd());
 	std::shuffle(inputSet.begin(), inputSet.end(), generator); // inputSet random shuffle
 	std::shuffle(outputSet.begin(), outputSet.end(), generator); // outputSet random shuffle)
 	std::vector<std::vector<real_t>> inputTestSet;
 	std::vector<std::vector<real_t>> outputTestSet;
 	int testInputNumber = testSize * inputSet.size(); // implicit usage of floor
 	int testOutputNumber = testSize * outputSet.size(); // implicit usage of floor
 	for (int i = 0; i < testInputNumber; i++) {
 		inputTestSet.push_back(inputSet[i]);
 		inputSet.erase(inputSet.begin());
 	}
 	for (int i = 0; i < testOutputNumber; i++) {
 		outputTestSet.push_back(outputSet[i]);
 		outputSet.erase(outputSet.begin());
 	}
 	return { inputSet, outputSet, inputTestSet, outputTestSet };
 }
 // MULTIVARIATE SUPERVISED
 void MLPPDataOld::setData(int k, std::string fileName, std::vector<std::vector<real_t>> &inputSet, std::vector<real_t> &outputSet) {
 	MLPPLinAlgOld alg;
 	std::string inputTemp;
 	std::string outputTemp;
 	inputSet.resize(k);
 	std::ifstream dataFile(fileName);
 	if (!dataFile.is_open()) {
 		std::cout << fileName << " failed to open." << std::endl;
 	}
 	std::string line;
 	while (std::getline(dataFile, line)) {
 		std::stringstream ss(line);
 		for (int i = 0; i < k; i++) {
 			std::getline(ss, inputTemp, ',');
 			inputSet[i].push_back(std::stod(inputTemp));
 		}
 		std::getline(ss, outputTemp, ',');
 		outputSet.push_back(std::stod(outputTemp));
 	}
 	inputSet = alg.transpose(inputSet);
 	dataFile.close();
 }
 void MLPPDataOld::printData(std::vector<std::string> inputName, std::string outputName, std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet) {
 	MLPPLinAlgOld alg;
 	inputSet = alg.transpose(inputSet);
 	for (uint32_t i = 0; i < inputSet.size(); i++) {
 		std::cout << inputName[i] << std::endl;
 		for (uint32_t j = 0; j < inputSet[i].size(); j++) {
 			std::cout << inputSet[i][j] << std::endl;
 		}
 	}
 	std::cout << outputName << std::endl;
 	for (uint32_t i = 0; i < outputSet.size(); i++) {
 		std::cout << outputSet[i] << std::endl;
 	}
 }
 // UNSUPERVISED
 void MLPPDataOld::setData(int k, std::string fileName, std::vector<std::vector<real_t>> &inputSet) {
 	MLPPLinAlgOld alg;
 	std::string inputTemp;
 	inputSet.resize(k);
 	std::ifstream dataFile(fileName);
 	if (!dataFile.is_open()) {
 		std::cout << fileName << " failed to open." << std::endl;
 	}
 	std::string line;
 	while (std::getline(dataFile, line)) {
 		std::stringstream ss(line);
 		for (int i = 0; i < k; i++) {
 			std::getline(ss, inputTemp, ',');
 			inputSet[i].push_back(std::stod(inputTemp));
 		}
 	}
 	inputSet = alg.transpose(inputSet);
 	dataFile.close();
 }
 void MLPPDataOld::printData(std::vector<std::string> inputName, std::vector<std::vector<real_t>> inputSet) {
 	MLPPLinAlgOld alg;
 	inputSet = alg.transpose(inputSet);
 	for (uint32_t i = 0; i < inputSet.size(); i++) {
 		std::cout << inputName[i] << std::endl;
 		for (uint32_t j = 0; j < inputSet[i].size(); j++) {
 			std::cout << inputSet[i][j] << std::endl;
 		}
 	}
 }
 // SIMPLE
 void MLPPDataOld::setData(std::string fileName, std::vector<real_t> &inputSet, std::vector<real_t> &outputSet) {
 	std::string inputTemp, outputTemp;
 	std::ifstream dataFile(fileName);
 	if (!dataFile.is_open()) {
 		std::cout << "The file failed to open." << std::endl;
 	}
 	std::string line;
 	while (std::getline(dataFile, line)) {
 		std::stringstream ss(line);
 		std::getline(ss, inputTemp, ',');
 		std::getline(ss, outputTemp, ',');
 		inputSet.push_back(std::stod(inputTemp));
 		outputSet.push_back(std::stod(outputTemp));
 	}
 	dataFile.close();
 }
 void MLPPDataOld::printData(std::string &inputName, std::string &outputName, std::vector<real_t> &inputSet, std::vector<real_t> &outputSet) {
 	std::cout << inputName << std::endl;
 	for (uint32_t i = 0; i < inputSet.size(); i++) {
 		std::cout << inputSet[i] << std::endl;
 	}
 	std::cout << outputName << std::endl;
 	for (uint32_t i = 0; i < inputSet.size(); i++) {
 		std::cout << outputSet[i] << std::endl;
 	}
 }
 // Images
 std::vector<std::vector<real_t>> MLPPDataOld::rgb2gray(std::vector<std::vector<std::vector<real_t>>> input) {
 	std::vector<std::vector<real_t>> grayScale;
 	grayScale.resize(input[0].size());
 	for (uint32_t i = 0; i < grayScale.size(); i++) {
 		grayScale[i].resize(input[0][i].size());
 	}
 	for (uint32_t i = 0; i < grayScale.size(); i++) {
 		for (uint32_t j = 0; j < grayScale[i].size(); j++) {
 			grayScale[i][j] = 0.299 * input[0][i][j] + 0.587 * input[1][i][j] + 0.114 * input[2][i][j];
 		}
 	}
 	return grayScale;
 }
 std::vector<std::vector<std::vector<real_t>>> MLPPDataOld::rgb2ycbcr(std::vector<std::vector<std::vector<real_t>>> input) {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<std::vector<real_t>>> YCbCr;
 	YCbCr = alg.resize(YCbCr, input);
 	for (uint32_t i = 0; i < YCbCr[0].size(); i++) {
 		for (uint32_t j = 0; j < YCbCr[0][i].size(); j++) {
 			YCbCr[0][i][j] = 0.299 * input[0][i][j] + 0.587 * input[1][i][j] + 0.114 * input[2][i][j];
 			YCbCr[1][i][j] = -0.169 * input[0][i][j] - 0.331 * input[1][i][j] + 0.500 * input[2][i][j];
 			YCbCr[2][i][j] = 0.500 * input[0][i][j] - 0.419 * input[1][i][j] - 0.081 * input[2][i][j];
 		}
 	}
 	return YCbCr;
 }
 // Conversion formulas available here:
 // https://www.rapidtables.com/convert/color/rgb-to-hsv.html
 std::vector<std::vector<std::vector<real_t>>> MLPPDataOld::rgb2hsv(std::vector<std::vector<std::vector<real_t>>> input) {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<std::vector<real_t>>> HSV;
 	HSV = alg.resize(HSV, input);
 	for (uint32_t i = 0; i < HSV[0].size(); i++) {
 		for (uint32_t j = 0; j < HSV[0][i].size(); j++) {
 			real_t rPrime = input[0][i][j] / 255;
 			real_t gPrime = input[1][i][j] / 255;
 			real_t bPrime = input[2][i][j] / 255;
 			real_t cMax = alg.max({ rPrime, gPrime, bPrime });
 			real_t cMin = alg.min({ rPrime, gPrime, bPrime });
 			real_t delta = cMax - cMin;
 			// H calculation.
 			if (delta == 0) {
 				HSV[0][i][j] = 0;
 			} else {
 				if (cMax == rPrime) {
 					HSV[0][i][j] = 60 * fmod(((gPrime - bPrime) / delta), 6);
 				} else if (cMax == gPrime) {
 					HSV[0][i][j] = 60 * ((bPrime - rPrime) / delta + 2);
 				} else { // cMax == bPrime
 					HSV[0][i][j] = 60 * ((rPrime - gPrime) / delta + 6);
 				}
 			}
 			// S calculation.
 			if (cMax == 0) {
 				HSV[1][i][j] = 0;
 			} else {
 				HSV[1][i][j] = delta / cMax;
 			}
 			// V calculation.
 			HSV[2][i][j] = cMax;
 		}
 	}
 	return HSV;
 }
 // http://machinethatsees.blogspot.com/2013/07/how-to-convert-rgb-to-xyz-or-vice-versa.html
 std::vector<std::vector<std::vector<real_t>>> MLPPDataOld::rgb2xyz(std::vector<std::vector<std::vector<real_t>>> input) {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<std::vector<real_t>>> XYZ;
 	XYZ = alg.resize(XYZ, input);
 	std::vector<std::vector<real_t>> RGB2XYZ = { { 0.4124564, 0.3575761, 0.1804375 }, { 0.2126726, 0.7151522, 0.0721750 }, { 0.0193339, 0.1191920, 0.9503041 } };
 	return alg.vector_wise_tensor_product(input, RGB2XYZ);
 }
 std::vector<std::vector<std::vector<real_t>>> MLPPDataOld::xyz2rgb(std::vector<std::vector<std::vector<real_t>>> input) {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<std::vector<real_t>>> XYZ;
 	XYZ = alg.resize(XYZ, input);
 	std::vector<std::vector<real_t>> RGB2XYZ = alg.inverse({ { 0.4124564, 0.3575761, 0.1804375 }, { 0.2126726, 0.7151522, 0.0721750 }, { 0.0193339, 0.1191920, 0.9503041 } });
 	return alg.vector_wise_tensor_product(input, RGB2XYZ);
 }
 // TEXT-BASED & NLP
 std::string MLPPDataOld::toLower(std::string text) {
 	for (uint32_t i = 0; i < text.size(); i++) {
 		text[i] = tolower(text[i]);
 	}
 	return text;
 }
 std::vector<char> MLPPDataOld::split(std::string text) {
 	std::vector<char> split_data;
 	for (uint32_t i = 0; i < text.size(); i++) {
 		split_data.push_back(text[i]);
 	}
 	return split_data;
 }
 std::vector<std::string> MLPPDataOld::splitSentences(std::string data) {
 	std::vector<std::string> sentences;
 	std::string currentStr = "";
 	for (uint32_t i = 0; i < data.length(); i++) {
 		currentStr.push_back(data[i]);
 		if (data[i] == '.' && data[i + 1] != '.') {
 			sentences.push_back(currentStr);
 			currentStr = "";
 			i++;
 		}
 	}
 	return sentences;
 }
 std::vector<std::string> MLPPDataOld::removeSpaces(std::vector<std::string> data) {
 	for (uint32_t i = 0; i < data.size(); i++) {
 		auto it = data[i].begin();
 		for (uint32_t j = 0; j < data[i].length(); j++) {
 			if (data[i][j] == ' ') {
 				data[i].erase(it);
 			}
 			it++;
 		}
 	}
 	return data;
 }
 std::vector<std::string> MLPPDataOld::removeNullByte(std::vector<std::string> data) {
 	for (uint32_t i = 0; i < data.size(); i++) {
 		if (data[i] == "\0") {
 			data.erase(data.begin() + i);
 		}
 	}
 	return data;
 }
 std::vector<std::string> MLPPDataOld::segment(std::string text) {
 	std::vector<std::string> segmented_data;
 	int prev_delim = 0;
 	for (uint32_t i = 0; i < text.length(); i++) {
 		if (text[i] == ' ') {
 			segmented_data.push_back(text.substr(prev_delim, i - prev_delim));
 			prev_delim = i + 1;
 		} else if (text[i] == ',' || text[i] == '!' || text[i] == '.' || text[i] == '-') {
 			segmented_data.push_back(text.substr(prev_delim, i - prev_delim));
 			std::string punc;
 			punc.push_back(text[i]);
 			segmented_data.push_back(punc);
 			prev_delim = i + 2;
 			i++;
 		} else if (i == text.length() - 1) {
 			segmented_data.push_back(text.substr(prev_delim, text.length() - prev_delim)); // hehe oops- forgot this
 		}
 	}
 	return segmented_data;
 }
 std::vector<real_t> MLPPDataOld::tokenize(std::string text) {
 	int max_num = 0;
 	bool new_num = true;
 	std::vector<std::string> segmented_data = segment(text);
 	std::vector<real_t> tokenized_data;
 	tokenized_data.resize(segmented_data.size());
 	for (uint32_t i = 0; i < segmented_data.size(); i++) {
 		for (int j = i - 1; j >= 0; j--) {
 			if (segmented_data[i] == segmented_data[j]) {
 				tokenized_data[i] = tokenized_data[j];
 				new_num = false;
 			}
 		}
 		if (!new_num) {
 			new_num = true;
 		} else {
 			max_num++;
 			tokenized_data[i] = max_num;
 		}
 	}
 	return tokenized_data;
 }
 std::vector<std::string> MLPPDataOld::removeStopWords(std::string text) {
 	std::vector<std::string> stopWords = { "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now" };
 	std::vector<std::string> segmented_data = removeSpaces(segment(toLower(text)));
 	for (uint32_t i = 0; i < stopWords.size(); i++) {
 		for (uint32_t j = 0; j < segmented_data.size(); j++) {
 			if (segmented_data[j] == stopWords[i]) {
 				segmented_data.erase(segmented_data.begin() + j);
 			}
 		}
 	}
 	return segmented_data;
 }
 std::vector<std::string> MLPPDataOld::removeStopWords(std::vector<std::string> segmented_data) {
 	std::vector<std::string> stopWords = { "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now" };
 	for (uint32_t i = 0; i < segmented_data.size(); i++) {
 		for (uint32_t j = 0; j < stopWords.size(); j++) {
 			if (segmented_data[i] == stopWords[j]) {
 				segmented_data.erase(segmented_data.begin() + i);
 			}
 		}
 	}
 	return segmented_data;
 }
 std::string MLPPDataOld::stemming(std::string text) {
 	// Our list of suffixes which we use to compare against
 	std::vector<std::string> suffixes = { "eer", "er", "ion", "ity", "ment", "ness", "or", "sion", "ship", "th", "able", "ible", "al", "ant", "ary", "ful", "ic", "ious", "ous", "ive", "less", "y", "ed", "en", "ing", "ize", "ise", "ly", "ward", "wise" };
 	int padding_size = 4;
 	char padding = ' '; // our padding
 	for (int i = 0; i < padding_size; i++) {
 		text[text.length() + i] = padding; // ' ' will be our padding value
 	}
 	for (uint32_t i = 0; i < text.size(); i++) {
 		for (uint32_t j = 0; j < suffixes.size(); j++) {
 			if (text.substr(i, suffixes[j].length()) == suffixes[j] && (text[i + suffixes[j].length()] == ' ' || text[i + suffixes[j].length()] == ',' || text[i + suffixes[j].length()] == '-' || text[i + suffixes[j].length()] == '.' || text[i + suffixes[j].length()] == '!')) {
 				text.erase(i, suffixes[j].length());
 			}
 		}
 	}
 	return text;
 }
 std::vector<std::vector<real_t>> MLPPDataOld::BOW(std::vector<std::string> sentences, std::string type) {
 	/*
 	STEPS OF BOW:
 		1) To lowercase (done by removeStopWords function by def)
 		2) Removing stop words
 		3) Obtain a list of the used words
 		4) Create a one hot encoded vector of the words and sentences
 		5) Sentence.size() x list.size() matrix
 	*/
 	std::vector<std::string> wordList = removeNullByte(removeStopWords(createWordList(sentences)));
 	std::vector<std::vector<std::string>> segmented_sentences;
 	segmented_sentences.resize(sentences.size());
 	for (uint32_t i = 0; i < sentences.size(); i++) {
 		segmented_sentences[i] = removeStopWords(sentences[i]);
 	}
 	std::vector<std::vector<real_t>> bow;
 	bow.resize(sentences.size());
 	for (uint32_t i = 0; i < bow.size(); i++) {
 		bow[i].resize(wordList.size());
 	}
 	for (uint32_t i = 0; i < segmented_sentences.size(); i++) {
 		for (uint32_t j = 0; j < segmented_sentences[i].size(); j++) {
 			for (uint32_t k = 0; k < wordList.size(); k++) {
 				if (segmented_sentences[i][j] == wordList[k]) {
 					if (type == "Binary") {
 						bow[i][k] = 1;
 					} else {
 						bow[i][k]++;
 					}
 				}
 			}
 		}
 	}
 	return bow;
 }
 std::vector<std::vector<real_t>> MLPPDataOld::TFIDF(std::vector<std::string> sentences) {
 	MLPPLinAlgOld alg;
 	std::vector<std::string> wordList = removeNullByte(removeStopWords(createWordList(sentences)));
 	std::vector<std::vector<std::string>> segmented_sentences;
 	segmented_sentences.resize(sentences.size());
 	for (uint32_t i = 0; i < sentences.size(); i++) {
 		segmented_sentences[i] = removeStopWords(sentences[i]);
 	}
 	std::vector<std::vector<real_t>> TF;
 	std::vector<int> frequency;
 	frequency.resize(wordList.size());
 	TF.resize(segmented_sentences.size());
 	for (uint32_t i = 0; i < TF.size(); i++) {
 		TF[i].resize(wordList.size());
 	}
 	for (uint32_t i = 0; i < segmented_sentences.size(); i++) {
 		std::vector<bool> present(wordList.size(), false);
 		for (uint32_t j = 0; j < segmented_sentences[i].size(); j++) {
 			for (uint32_t k = 0; k < wordList.size(); k++) {
 				if (segmented_sentences[i][j] == wordList[k]) {
 					TF[i][k]++;
 					if (!present[k]) {
 						frequency[k]++;
 						present[k] = true;
 					}
 				}
 			}
 		}
 		TF[i] = alg.scalarMultiply(real_t(1) / real_t(segmented_sentences[i].size()), TF[i]);
 	}
 	std::vector<real_t> IDF;
 	IDF.resize(frequency.size());
 	for (uint32_t i = 0; i < IDF.size(); i++) {
 		IDF[i] = std::log((real_t)segmented_sentences.size() / (real_t)frequency[i]);
 	}
 	std::vector<std::vector<real_t>> TFIDF;
 	TFIDF.resize(segmented_sentences.size());
 	for (uint32_t i = 0; i < TFIDF.size(); i++) {
 		TFIDF[i].resize(wordList.size());
 	}
 	for (uint32_t i = 0; i < TFIDF.size(); i++) {
 		for (uint32_t j = 0; j < TFIDF[i].size(); j++) {
 			TFIDF[i][j] = TF[i][j] * IDF[j];
 		}
 	}
 	return TFIDF;
 }
 std::tuple<std::vector<std::vector<real_t>>, std::vector<std::string>> MLPPDataOld::word2Vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, real_t learning_rate, int max_epoch) {
 	std::vector<std::string> wordList = removeNullByte(removeStopWords(createWordList(sentences)));
 	std::vector<std::vector<std::string>> segmented_sentences;
 	segmented_sentences.resize(sentences.size());
 	for (uint32_t i = 0; i < sentences.size(); i++) {
 		segmented_sentences[i] = removeStopWords(sentences[i]);
 	}
 	std::vector<std::string> inputStrings;
 	std::vector<std::string> outputStrings;
 	for (uint32_t i = 0; i < segmented_sentences.size(); i++) {
 		for (uint32_t j = 0; j < segmented_sentences[i].size(); j++) {
 			for (int k = windowSize; k > 0; k--) {
 				int jmk = (int)j - k;
 				if (jmk >= 0) {
 					inputStrings.push_back(segmented_sentences[i][j]);
 					outputStrings.push_back(segmented_sentences[i][jmk]);
 				}
 				if (j + k <= segmented_sentences[i].size() - 1) {
 					inputStrings.push_back(segmented_sentences[i][j]);
 					outputStrings.push_back(segmented_sentences[i][j + k]);
 				}
 			}
 		}
 	}
 	uint32_t inputSize = inputStrings.size();
 	inputStrings.insert(inputStrings.end(), outputStrings.begin(), outputStrings.end());
 	std::vector<std::vector<real_t>> BOW = MLPPDataOld::BOW(inputStrings, "Binary");
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<std::vector<real_t>> outputSet;
 	for (uint32_t i = 0; i < inputSize; i++) {
 		inputSet.push_back(BOW[i]);
 	}
 	for (uint32_t i = inputSize; i < BOW.size(); i++) {
 		outputSet.push_back(BOW[i]);
 	}
 	MLPPSoftmaxNetOld *model;
 	if (type == "Skipgram") {
 		model = new MLPPSoftmaxNetOld(outputSet, inputSet, dimension);
 	} else { // else = CBOW. We maintain it is a default.
 		model = new MLPPSoftmaxNetOld(inputSet, outputSet, dimension);
 	}
 	model->gradientDescent(learning_rate, max_epoch, false);
 	std::vector<std::vector<real_t>> wordEmbeddings = model->getEmbeddings();
 	delete model;
 	return { wordEmbeddings, wordList };
 }
 struct WordsToVecResult {
 	std::vector<std::vector<real_t>> word_embeddings;
 	std::vector<std::string> word_list;
 };
 MLPPDataOld::WordsToVecResult MLPPDataOld::word_to_vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, real_t learning_rate, int max_epoch) {
 	WordsToVecResult res;
 	res.word_list = removeNullByte(removeStopWords(createWordList(sentences)));
 	std::vector<std::vector<std::string>> segmented_sentences;
 	segmented_sentences.resize(sentences.size());
 	for (uint32_t i = 0; i < sentences.size(); i++) {
 		segmented_sentences[i] = removeStopWords(sentences[i]);
 	}
 	std::vector<std::string> inputStrings;
 	std::vector<std::string> outputStrings;
 	for (uint32_t i = 0; i < segmented_sentences.size(); i++) {
 		for (uint32_t j = 0; j < segmented_sentences[i].size(); j++) {
 			for (int k = windowSize; k > 0; k--) {
 				if (j - k >= 0) {
 					inputStrings.push_back(segmented_sentences[i][j]);
 					outputStrings.push_back(segmented_sentences[i][j - k]);
 				}
 				if (j + k <= segmented_sentences[i].size() - 1) {
 					inputStrings.push_back(segmented_sentences[i][j]);
 					outputStrings.push_back(segmented_sentences[i][j + k]);
 				}
 			}
 		}
 	}
 	uint32_t inputSize = inputStrings.size();
 	inputStrings.insert(inputStrings.end(), outputStrings.begin(), outputStrings.end());
 	std::vector<std::vector<real_t>> BOW = MLPPDataOld::BOW(inputStrings, "Binary");
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<std::vector<real_t>> outputSet;
 	for (uint32_t i = 0; i < inputSize; i++) {
 		inputSet.push_back(BOW[i]);
 	}
 	for (uint32_t i = inputSize; i < BOW.size(); i++) {
 		outputSet.push_back(BOW[i]);
 	}
 	MLPPSoftmaxNetOld *model;
 	if (type == "Skipgram") {
 		model = new MLPPSoftmaxNetOld(outputSet, inputSet, dimension);
 	} else { // else = CBOW. We maintain it is a default.
 		model = new MLPPSoftmaxNetOld(inputSet, outputSet, dimension);
 	}
 	model->gradientDescent(learning_rate, max_epoch, false);
 	res.word_embeddings = model->getEmbeddings();
 	delete model;
 	return res;
 }
 std::vector<std::vector<real_t>> MLPPDataOld::LSA(std::vector<std::string> sentences, int dim) {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<real_t>> docWordData = BOW(sentences, "Binary");
 	MLPPLinAlgOld::SVDResultOld svr_res = alg.SVD(docWordData);
 	std::vector<std::vector<real_t>> S_trunc = alg.zeromat(dim, dim);
 	std::vector<std::vector<real_t>> Vt_trunc;
 	for (int i = 0; i < dim; i++) {
 		S_trunc[i][i] = svr_res.S[i][i];
 		Vt_trunc.push_back(svr_res.Vt[i]);
 	}
 	std::vector<std::vector<real_t>> embeddings = alg.matmult(S_trunc, Vt_trunc);
 	return embeddings;
 }
 std::vector<std::string> MLPPDataOld::createWordList(std::vector<std::string> sentences) {
 	std::string combinedText = "";
 	for (uint32_t i = 0; i < sentences.size(); i++) {
 		if (i != 0) {
 			combinedText += " ";
 		}
 		combinedText += sentences[i];
 	}
 	return removeSpaces(vecToSet(removeStopWords(combinedText)));
 }
 // EXTRA
 void MLPPDataOld::setInputNames(std::string fileName, std::vector<std::string> &inputNames) {
 	std::string inputNameTemp;
 	std::ifstream dataFile(fileName);
 	if (!dataFile.is_open()) {
 		std::cout << fileName << " failed to open." << std::endl;
 	}
 	while (std::getline(dataFile, inputNameTemp)) {
 		inputNames.push_back(inputNameTemp);
 	}
 	dataFile.close();
 }
 std::vector<std::vector<real_t>> MLPPDataOld::featureScaling(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	X = alg.transpose(X);
 	std::vector<real_t> max_elements, min_elements;
 	max_elements.resize(X.size());
 	min_elements.resize(X.size());
 	for (uint32_t i = 0; i < X.size(); i++) {
 		max_elements[i] = alg.max(X[i]);
 		min_elements[i] = alg.min(X[i]);
 	}
 	for (uint32_t i = 0; i < X.size(); i++) {
 		for (uint32_t j = 0; j < X[i].size(); j++) {
 			X[i][j] = (X[i][j] - min_elements[i]) / (max_elements[i] - min_elements[i]);
 		}
 	}
 	return alg.transpose(X);
 }
 std::vector<std::vector<real_t>> MLPPDataOld::meanNormalization(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	MLPPStatOld stat;
 	// (X_j - mu_j) / std_j, for every j
 	X = meanCentering(X);
 	for (uint32_t i = 0; i < X.size(); i++) {
 		X[i] = alg.scalarMultiply(1 / stat.standardDeviation(X[i]), X[i]);
 	}
 	return X;
 }
 std::vector<std::vector<real_t>> MLPPDataOld::meanCentering(std::vector<std::vector<real_t>> X) {
 	MLPPStatOld stat;
 	for (uint32_t i = 0; i < X.size(); i++) {
 		real_t mean_i = stat.mean(X[i]);
 		for (uint32_t j = 0; j < X[i].size(); j++) {
 			X[i][j] -= mean_i;
 		}
 	}
 	return X;
 }
 std::vector<std::vector<real_t>> MLPPDataOld::oneHotRep(std::vector<real_t> tempOutputSet, int n_class) {
 	std::vector<std::vector<real_t>> outputSet;
 	outputSet.resize(tempOutputSet.size());
 	for (uint32_t i = 0; i < tempOutputSet.size(); i++) {
 		for (int j = 0; j <= n_class - 1; j++) {
 			if (tempOutputSet[i] == j) {
 				outputSet[i].push_back(1);
 			} else {
 				outputSet[i].push_back(0);
 			}
 		}
 	}
 	return outputSet;
 }
 std::vector<real_t> MLPPDataOld::reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet) {
 	std::vector<real_t> outputSet;
 	//uint32_t n_class = tempOutputSet[0].size();
 	for (uint32_t i = 0; i < tempOutputSet.size(); i++) {
 		int current_class = 1;
 		for (uint32_t j = 0; j < tempOutputSet[i].size(); j++) {
 			if (tempOutputSet[i][j] == 1) {
 				break;
 			} else {
 				current_class++;
 			}
 		}
 		outputSet.push_back(current_class);
 	}
 	return outputSet;
 }
--- a/mlpp/data/data_old.h
+++ b/mlpp/data/data_old.h
@ -1,110 +0,0 @@
 #ifndef MLPP_DATA_OLD_H
 #define MLPP_DATA_OLD_H
 //
 //  Data.hpp
 //  MLP
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "core/math/math_defs.h"
 #include "core/int_types.h"
 #include <string>
 #include <tuple>
 #include <vector>
 class MLPPDataOld {
 public:
 	// Load Datasets
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> loadBreastCancer();
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> loadBreastCancerSVC();
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> loadIris();
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> loadWine();
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> loadMnistTrain();
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> loadMnistTest();
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> loadCaliforniaHousing();
 	std::tuple<std::vector<real_t>, std::vector<real_t>> loadFiresAndCrime();
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> trainTestSplit(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, real_t testSize);
 	// Supervised
 	void setData(int k, std::string fileName, std::vector<std::vector<real_t>> &inputSet, std::vector<real_t> &outputSet);
 	void printData(std::vector<std::string> inputName, std::string outputName, std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet);
 	// Unsupervised
 	void setData(int k, std::string fileName, std::vector<std::vector<real_t>> &inputSet);
 	void printData(std::vector<std::string> inputName, std::vector<std::vector<real_t>> inputSet);
 	// Simple
 	void setData(std::string fileName, std::vector<real_t> &inputSet, std::vector<real_t> &outputSet);
 	void printData(std::string &inputName, std::string &outputName, std::vector<real_t> &inputSet, std::vector<real_t> &outputSet);
 	// Images
 	std::vector<std::vector<real_t>> rgb2gray(std::vector<std::vector<std::vector<real_t>>> input);
 	std::vector<std::vector<std::vector<real_t>>> rgb2ycbcr(std::vector<std::vector<std::vector<real_t>>> input);
 	std::vector<std::vector<std::vector<real_t>>> rgb2hsv(std::vector<std::vector<std::vector<real_t>>> input);
 	std::vector<std::vector<std::vector<real_t>>> rgb2xyz(std::vector<std::vector<std::vector<real_t>>> input);
 	std::vector<std::vector<std::vector<real_t>>> xyz2rgb(std::vector<std::vector<std::vector<real_t>>> input);
 	// Text-Based & NLP
 	std::string toLower(std::string text);
 	std::vector<char> split(std::string text);
 	std::vector<std::string> splitSentences(std::string data);
 	std::vector<std::string> removeSpaces(std::vector<std::string> data);
 	std::vector<std::string> removeNullByte(std::vector<std::string> data);
 	std::vector<std::string> segment(std::string text);
 	std::vector<real_t> tokenize(std::string text);
 	std::vector<std::string> removeStopWords(std::string text);
 	std::vector<std::string> removeStopWords(std::vector<std::string> segmented_data);
 	std::string stemming(std::string text);
 	std::vector<std::vector<real_t>> BOW(std::vector<std::string> sentences, std::string = "Default");
 	std::vector<std::vector<real_t>> TFIDF(std::vector<std::string> sentences);
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::string>> word2Vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, real_t learning_rate, int max_epoch);
 	struct WordsToVecResult {
 		std::vector<std::vector<real_t>> word_embeddings;
 		std::vector<std::string> word_list;
 	};
 	WordsToVecResult word_to_vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, real_t learning_rate, int max_epoch);
 	std::vector<std::vector<real_t>> LSA(std::vector<std::string> sentences, int dim);
 	std::vector<std::string> createWordList(std::vector<std::string> sentences);
 	// Extra
 	void setInputNames(std::string fileName, std::vector<std::string> &inputNames);
 	std::vector<std::vector<real_t>> featureScaling(std::vector<std::vector<real_t>> X);
 	std::vector<std::vector<real_t>> meanNormalization(std::vector<std::vector<real_t>> X);
 	std::vector<std::vector<real_t>> meanCentering(std::vector<std::vector<real_t>> X);
 	std::vector<std::vector<real_t>> oneHotRep(std::vector<real_t> tempOutputSet, int n_class);
 	std::vector<real_t> reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet);
 	template <class T>
 	std::vector<T> vecToSet(std::vector<T> inputSet) {
 		std::vector<T> setInputSet;
 		for (uint32_t i = 0; i < inputSet.size(); i++) {
 			bool new_element = true;
 			for (uint32_t j = 0; j < setInputSet.size(); j++) {
 				if (setInputSet[j] == inputSet[i]) {
 					new_element = false;
 				}
 			}
 			if (new_element) {
 				setInputSet.push_back(inputSet[i]);
 			}
 		}
 		return setInputSet;
 	}
 protected:
 	static void _bind_methods();
 };
 #endif /* Data_hpp */
--- a/mlpp/dual_svc/dual_svc_old.cpp
+++ b/mlpp/dual_svc/dual_svc_old.cpp
@ -1,244 +0,0 @@
 //
 //  DualSVC.cpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "dual_svc_old.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 MLPPDualSVCOld::MLPPDualSVCOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet, real_t p_C, std::string p_kernel) {
 	inputSet = p_inputSet;
 	outputSet = p_outputSet;
 	n = p_inputSet.size();
 	k = p_inputSet[0].size();
 	C = p_C;
 	kernel = p_kernel;
 	y_hat.resize(n);
 	bias = MLPPUtilities::biasInitialization();
 	alpha = MLPPUtilities::weightInitialization(n); // One alpha for all training examples, as per the lagrangian multipliers.
 	K = kernelFunction(inputSet, inputSet, kernel); // For now this is unused. When non-linear kernels are added, the K will be manipulated.
 }
 std::vector<real_t> MLPPDualSVCOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	return Evaluate(X);
 }
 real_t MLPPDualSVCOld::modelTest(std::vector<real_t> x) {
 	return Evaluate(x);
 }
 void MLPPDualSVCOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	class MLPPCostOld cost;
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(alpha, inputSet, outputSet);
 		alpha = alg.subtraction(alpha, alg.scalarMultiply(learning_rate, cost.dualFormSVMDeriv(alpha, inputSet, outputSet)));
 		alphaProjection();
 		// Calculating the bias
 		real_t biasGradient = 0;
 		for (uint32_t i = 0; i < alpha.size(); i++) {
 			real_t sum = 0;
 			if (alpha[i] < C && alpha[i] > 0) {
 				for (uint32_t j = 0; j < alpha.size(); j++) {
 					if (alpha[j] > 0) {
 						sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], inputSet[i]); // TO DO: DON'T forget to add non-linear kernelizations.
 					}
 				}
 			}
 			biasGradient = (1 - outputSet[i] * sum) / outputSet[i];
 			break;
 		}
 		bias -= biasGradient * learning_rate;
 		forwardPass();
 		// UI PORTION
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(alpha, inputSet, outputSet));
 			MLPPUtilities::UI(alpha, bias);
 			std::cout << score() << std::endl; // TO DO: DELETE THIS.
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 // void MLPPDualSVCOld::SGD(real_t learning_rate, int max_epoch, bool UI){
 //     class MLPPCostOld cost;
 //     MLPPActivationOld avn;
 //     MLPPLinAlgOld alg;
 //     MLPPRegOld regularization;
 //     real_t cost_prev = 0;
 //     int epoch = 1;
 //     while(true){
 //         std::random_device rd;
 //         std::default_random_engine generator(rd());
 //         std::uniform_int_distribution<int> distribution(0, int(n - 1));
 //         int outputIndex = distribution(generator);
 //         cost_prev = Cost(alpha, inputSet[outputIndex], outputSet[outputIndex]);
 //         // Bias updation
 //         bias -= learning_rate * costDeriv;
 //         y_hat = Evaluate({inputSet[outputIndex]});
 //         if(UI) {
 //             MLPPUtilities::CostInfo(epoch, cost_prev, Cost(alpha));
 //             MLPPUtilities::UI(weights, bias);
 //         }
 //         epoch++;
 //         if(epoch > max_epoch) { break; }
 //     }
 //     forwardPass();
 // }
 // void MLPPDualSVCOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI){
 //     class MLPPCostOld cost;
 //     MLPPActivationOld avn;
 //     MLPPLinAlgOld alg;
 //     MLPPRegOld regularization;
 //     real_t cost_prev = 0;
 //     int epoch = 1;
 //     // Creating the mini-batches
 //     int n_mini_batch = n/mini_batch_size;
 //     auto [inputMiniBatches, outputMiniBatches] = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 //     while(true){
 //         for(int i = 0; i < n_mini_batch; i++){
 //             std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 //             std::vector<real_t> z = propagate(inputMiniBatches[i]);
 //             cost_prev = Cost(z, outputMiniBatches[i], weights, C);
 //             // Calculating the weight gradients
 //             weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C))));
 //             weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge");
 //             // Calculating the bias gradients
 //             bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n;
 //             forwardPass();
 //             y_hat = Evaluate(inputMiniBatches[i]);
 //             if(UI) {
 //                 MLPPUtilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C));
 //                 MLPPUtilities::UI(weights, bias);
 //             }
 //         }
 //         epoch++;
 //         if(epoch > max_epoch) { break; }
 //     }
 //     forwardPass();
 // }
 real_t MLPPDualSVCOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 void MLPPDualSVCOld::save(std::string fileName) {
 	MLPPUtilities util;
 	util.saveParameters(fileName, alpha, bias);
 }
 real_t MLPPDualSVCOld::Cost(std::vector<real_t> alpha, std::vector<std::vector<real_t>> X, std::vector<real_t> y) {
 	class MLPPCostOld cost;
 	return cost.dualFormSVM(alpha, X, y);
 }
 std::vector<real_t> MLPPDualSVCOld::Evaluate(std::vector<std::vector<real_t>> X) {
 	MLPPActivationOld avn;
 	return avn.sign(propagate(X));
 }
 std::vector<real_t> MLPPDualSVCOld::propagate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	std::vector<real_t> z;
 	for (uint32_t i = 0; i < X.size(); i++) {
 		real_t sum = 0;
 		for (uint32_t j = 0; j < alpha.size(); j++) {
 			if (alpha[j] != 0) {
 				sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], X[i]); // TO DO: DON'T forget to add non-linear kernelizations.
 			}
 		}
 		sum += bias;
 		z.push_back(sum);
 	}
 	return z;
 }
 real_t MLPPDualSVCOld::Evaluate(std::vector<real_t> x) {
 	MLPPActivationOld avn;
 	return avn.sign(propagate(x));
 }
 real_t MLPPDualSVCOld::propagate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	real_t z = 0;
 	for (uint32_t j = 0; j < alpha.size(); j++) {
 		if (alpha[j] != 0) {
 			z += alpha[j] * outputSet[j] * alg.dot(inputSet[j], x); // TO DO: DON'T forget to add non-linear kernelizations.
 		}
 	}
 	z += bias;
 	return z;
 }
 void MLPPDualSVCOld::forwardPass() {
 	MLPPActivationOld avn;
 	z = propagate(inputSet);
 	y_hat = avn.sign(z);
 }
 void MLPPDualSVCOld::alphaProjection() {
 	for (uint32_t i = 0; i < alpha.size(); i++) {
 		if (alpha[i] > C) {
 			alpha[i] = C;
 		} else if (alpha[i] < 0) {
 			alpha[i] = 0;
 		}
 	}
 }
 real_t MLPPDualSVCOld::kernelFunction(std::vector<real_t> u, std::vector<real_t> v, std::string kernel) {
 	MLPPLinAlgOld alg;
 	if (kernel == "Linear") {
 		return alg.dot(u, v);
 	}
 	return 0;
 }
 std::vector<std::vector<real_t>> MLPPDualSVCOld::kernelFunction(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B, std::string kernel) {
 	MLPPLinAlgOld alg;
 	if (kernel == "Linear") {
 		return alg.matmult(inputSet, alg.transpose(inputSet));
 	}
 	return std::vector<std::vector<real_t>>();
 }
--- a/mlpp/dual_svc/dual_svc_old.h
+++ b/mlpp/dual_svc/dual_svc_old.h
@ -1,69 +0,0 @@
 #ifndef MLPP_DUAL_SVC_OLD_H
 #define MLPP_DUAL_SVC_OLD_H
 //
 //  DualSVC.hpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 // http://disp.ee.ntu.edu.tw/~pujols/Support%20Vector%20Machine.pdf
 // http://ciml.info/dl/v0_99/ciml-v0_99-ch11.pdf
 // Were excellent for the practical intution behind the dual formulation.
 #include "core/math/math_defs.h"
 #include <string>
 #include <vector>
 class MLPPDualSVCOld {
 public:
 	MLPPDualSVCOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, real_t C, std::string kernel = "Linear");
 	MLPPDualSVCOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, real_t C, std::string kernel, real_t p, real_t c);
 	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
 	real_t modelTest(std::vector<real_t> x);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
 	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
 	real_t score();
 	void save(std::string fileName);
 private:
 	void init();
 	real_t Cost(std::vector<real_t> alpha, std::vector<std::vector<real_t>> X, std::vector<real_t> y);
 	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
 	std::vector<real_t> propagate(std::vector<std::vector<real_t>> X);
 	real_t Evaluate(std::vector<real_t> x);
 	real_t propagate(std::vector<real_t> x);
 	void forwardPass();
 	void alphaProjection();
 	real_t kernelFunction(std::vector<real_t> v, std::vector<real_t> u, std::string kernel);
 	std::vector<std::vector<real_t>> kernelFunction(std::vector<std::vector<real_t>> U, std::vector<std::vector<real_t>> V, std::string kernel);
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	std::vector<real_t> z;
 	std::vector<real_t> y_hat;
 	real_t bias;
 	std::vector<real_t> alpha;
 	std::vector<std::vector<real_t>> K;
 	real_t C;
 	int n;
 	int k;
 	std::string kernel;
 	real_t p; // Poly
 	real_t c; // Poly
 	// UI Portion
 	void UI(int epoch, real_t cost_prev);
 };
 #endif /* DualSVC_hpp */
--- a/mlpp/exp_reg/exp_reg_old.cpp
+++ b/mlpp/exp_reg/exp_reg_old.cpp
@ -1,247 +0,0 @@
 //
 //  ExpReg.cpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "exp_reg_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../stat/stat_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 MLPPExpRegOld::MLPPExpRegOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet, std::string p_reg, real_t p_lambda, real_t p_alpha) {
 	inputSet = p_inputSet;
 	outputSet = p_outputSet;
 	n = p_inputSet.size();
 	k = p_inputSet[0].size();
 	reg = p_reg;
 	lambda = p_lambda;
 	alpha = p_alpha;
 	y_hat.resize(n);
 	weights = MLPPUtilities::weightInitialization(k);
 	initial = MLPPUtilities::weightInitialization(k);
 	bias = MLPPUtilities::biasInitialization();
 }
 std::vector<real_t> MLPPExpRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	return Evaluate(X);
 }
 real_t MLPPExpRegOld::modelTest(std::vector<real_t> x) {
 	return Evaluate(x);
 }
 void MLPPExpRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
 		for (int i = 0; i < k; i++) {
 			// Calculating the weight gradient
 			real_t sum = 0;
 			for (int j = 0; j < n; j++) {
 				sum += error[j] * inputSet[j][i] * std::pow(weights[i], inputSet[j][i] - 1);
 			}
 			real_t w_gradient = sum / n;
 			// Calculating the initial gradient
 			real_t sum2 = 0;
 			for (int j = 0; j < n; j++) {
 				sum2 += error[j] * std::pow(weights[i], inputSet[j][i]);
 			}
 			real_t i_gradient = sum2 / n;
 			// Weight/initial updation
 			weights[i] -= learning_rate * w_gradient;
 			initial[i] -= learning_rate * i_gradient;
 		}
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Calculating the bias gradient
 		real_t sum = 0;
 		for (int j = 0; j < n; j++) {
 			sum += (y_hat[j] - outputSet[j]);
 		}
 		real_t b_gradient = sum / n;
 		// bias updation
 		bias -= learning_rate * b_gradient;
 		forwardPass();
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPExpRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	while (true) {
 		std::random_device rd;
 		std::default_random_engine generator(rd());
 		std::uniform_int_distribution<int> distribution(0, int(n - 1));
 		int outputIndex = distribution(generator);
 		real_t y_hat = Evaluate(inputSet[outputIndex]);
 		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
 		for (int i = 0; i < k; i++) {
 			// Calculating the weight gradients
 			real_t w_gradient = (y_hat - outputSet[outputIndex]) * inputSet[outputIndex][i] * std::pow(weights[i], inputSet[outputIndex][i] - 1);
 			real_t i_gradient = (y_hat - outputSet[outputIndex]) * std::pow(weights[i], inputSet[outputIndex][i]);
 			// Weight/initial updation
 			weights[i] -= learning_rate * w_gradient;
 			initial[i] -= learning_rate * i_gradient;
 		}
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Calculating the bias gradients
 		real_t b_gradient = (y_hat - outputSet[outputIndex]);
 		// Bias updation
 		bias -= learning_rate * b_gradient;
 		y_hat = Evaluate({ inputSet[outputIndex] });
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPExpRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			for (int j = 0; j < k; j++) {
 				// Calculating the weight gradient
 				real_t sum = 0;
 				for (uint32_t k = 0; k < outputMiniBatches[i].size(); k++) {
 					sum += error[k] * inputMiniBatches[i][k][j] * std::pow(weights[j], inputMiniBatches[i][k][j] - 1);
 				}
 				real_t w_gradient = sum / outputMiniBatches[i].size();
 				// Calculating the initial gradient
 				real_t sum2 = 0;
 				for (uint32_t k = 0; k < outputMiniBatches[i].size(); k++) {
 					sum2 += error[k] * std::pow(weights[j], inputMiniBatches[i][k][j]);
 				}
 				real_t i_gradient = sum2 / outputMiniBatches[i].size();
 				// Weight/initial updation
 				weights[j] -= learning_rate * w_gradient;
 				initial[j] -= learning_rate * i_gradient;
 			}
 			weights = regularization.regWeights(weights, lambda, alpha, reg);
 			// Calculating the bias gradient
 			//real_t sum = 0;
 			//for (uint32_t j = 0; j < outputMiniBatches[i].size(); j++) {
 			//	sum += (y_hat[j] - outputMiniBatches[i][j]);
 			//}
 			//real_t b_gradient = sum / outputMiniBatches[i].size();
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 real_t MLPPExpRegOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 void MLPPExpRegOld::save(std::string fileName) {
 	MLPPUtilities util;
 	util.saveParameters(fileName, weights, initial, bias);
 }
 real_t MLPPExpRegOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPRegOld regularization;
 	class MLPPCostOld cost;
 	return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
 }
 std::vector<real_t> MLPPExpRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
 	std::vector<real_t> y_hat;
 	y_hat.resize(X.size());
 	for (uint32_t i = 0; i < X.size(); i++) {
 		y_hat[i] = 0;
 		for (uint32_t j = 0; j < X[i].size(); j++) {
 			y_hat[i] += initial[j] * std::pow(weights[j], X[i][j]);
 		}
 		y_hat[i] += bias;
 	}
 	return y_hat;
 }
 real_t MLPPExpRegOld::Evaluate(std::vector<real_t> x) {
 	real_t y_hat = 0;
 	for (uint32_t i = 0; i < x.size(); i++) {
 		y_hat += initial[i] * std::pow(weights[i], x[i]);
 	}
 	return y_hat + bias;
 }
 // a * w^x + b
 void MLPPExpRegOld::forwardPass() {
 	y_hat = Evaluate(inputSet);
 }
--- a/mlpp/exp_reg/exp_reg_old.h
+++ b/mlpp/exp_reg/exp_reg_old.h
@ -1,50 +0,0 @@
 #ifndef MLPP_EXP_REG_OLD_H
 #define MLPP_EXP_REG_OLD_H
 //
 //  ExpReg.hpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "core/math/math_defs.h"
 #include <string>
 #include <vector>
 class MLPPExpRegOld {
 public:
 	MLPPExpRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
 	real_t modelTest(std::vector<real_t> x);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = 1);
 	void SGD(real_t learning_rate, int max_epoch, bool UI = 1);
 	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
 	real_t score();
 	void save(std::string fileName);
 private:
 	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
 	real_t Evaluate(std::vector<real_t> x);
 	void forwardPass();
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	std::vector<real_t> y_hat;
 	std::vector<real_t> weights;
 	std::vector<real_t> initial;
 	real_t bias;
 	int n;
 	int k;
 	// Regularization Params
 	std::string reg;
 	real_t lambda;
 	real_t alpha; /* This is the controlling param for Elastic Net*/
 };
 #endif /* ExpReg_hpp */
--- a/mlpp/gan/gan_old.cpp
+++ b/mlpp/gan/gan_old.cpp
@ -1,287 +0,0 @@
 //
 //  GAN.cpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "gan_old.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include <cmath>
 #include <iostream>
 MLPPGANOld::MLPPGANOld(real_t k, std::vector<std::vector<real_t>> outputSet) :
 		outputSet(outputSet), n(outputSet.size()), k(k) {
 }
 MLPPGANOld::~MLPPGANOld() {
 	delete outputLayer;
 }
 std::vector<std::vector<real_t>> MLPPGANOld::generateExample(int n) {
 	MLPPLinAlgOld alg;
 	return modelSetTestGenerator(alg.gaussianNoise(n, k));
 }
 void MLPPGANOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	class MLPPCost cost;
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, alg.onevec(n));
 		// Training of the discriminator.
 		std::vector<std::vector<real_t>> generatorInputSet = alg.gaussianNoise(n, k);
 		std::vector<std::vector<real_t>> discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
 		discriminatorInputSet.insert(discriminatorInputSet.end(), outputSet.begin(), outputSet.end()); // Fake + real inputs.
 		std::vector<real_t> y_hat = modelSetTestDiscriminator(discriminatorInputSet);
 		std::vector<real_t> outputSet = alg.zerovec(n);
 		std::vector<real_t> outputSetReal = alg.onevec(n);
 		outputSet.insert(outputSet.end(), outputSetReal.begin(), outputSetReal.end()); // Fake + real output scores.
 		auto dgrads = computeDiscriminatorGradients(y_hat, outputSet);
 		auto cumulativeDiscriminatorHiddenLayerWGrad = std::get<0>(dgrads);
 		auto outputDiscriminatorWGrad = std::get<1>(dgrads);
 		cumulativeDiscriminatorHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeDiscriminatorHiddenLayerWGrad);
 		outputDiscriminatorWGrad = alg.scalarMultiply(learning_rate / n, outputDiscriminatorWGrad);
 		updateDiscriminatorParameters(cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad, learning_rate);
 		// Training of the generator.
 		generatorInputSet = alg.gaussianNoise(n, k);
 		discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
 		y_hat = modelSetTestDiscriminator(discriminatorInputSet);
 		outputSet = alg.onevec(n);
 		std::vector<std::vector<std::vector<real_t>>> cumulativeGeneratorHiddenLayerWGrad = computeGeneratorGradients(y_hat, outputSet);
 		cumulativeGeneratorHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeGeneratorHiddenLayerWGrad);
 		updateGeneratorParameters(cumulativeGeneratorHiddenLayerWGrad, learning_rate);
 		forwardPass();
 		if (UI) {
 			MLPPGANOld::UI(epoch, cost_prev, MLPPGANOld::y_hat, alg.onevec(n));
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 real_t MLPPGANOld::score() {
 	MLPPLinAlgOld alg;
 	MLPPUtilities util;
 	forwardPass();
 	return util.performance(y_hat, alg.onevec(n));
 }
 void MLPPGANOld::save(std::string fileName) {
 	MLPPUtilities util;
 	if (!network.empty()) {
 		util.saveParameters(fileName, network[0].weights, network[0].bias, false, 1);
 		for (uint32_t i = 1; i < network.size(); i++) {
 			util.saveParameters(fileName, network[i].weights, network[i].bias, true, i + 1);
 		}
 		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, true, network.size() + 1);
 	} else {
 		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, false, network.size() + 1);
 	}
 }
 void MLPPGANOld::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
 	MLPPLinAlgOld alg;
 	if (network.empty()) {
 		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha));
 		network[0].forwardPass();
 	} else {
 		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
 		network[network.size() - 1].forwardPass();
 	}
 }
 void MLPPGANOld::addOutputLayer(std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
 	MLPPLinAlgOld alg;
 	if (!network.empty()) {
 		outputLayer = new MLPPOldOutputLayer(network[network.size() - 1].n_hidden, "Sigmoid", "LogLoss", network[network.size() - 1].a, weightInit, reg, lambda, alpha);
 	} else {
 		outputLayer = new MLPPOldOutputLayer(k, "Sigmoid", "LogLoss", alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha);
 	}
 }
 std::vector<std::vector<real_t>> MLPPGANOld::modelSetTestGenerator(std::vector<std::vector<real_t>> X) {
 	if (!network.empty()) {
 		network[0].input = X;
 		network[0].forwardPass();
 		for (uint32_t i = 1; i <= network.size() / 2; i++) {
 			network[i].input = network[i - 1].a;
 			network[i].forwardPass();
 		}
 	}
 	return network[network.size() / 2].a;
 }
 std::vector<real_t> MLPPGANOld::modelSetTestDiscriminator(std::vector<std::vector<real_t>> X) {
 	if (!network.empty()) {
 		for (uint32_t i = network.size() / 2 + 1; i < network.size(); i++) {
 			if (i == network.size() / 2 + 1) {
 				network[i].input = X;
 			} else {
 				network[i].input = network[i - 1].a;
 			}
 			network[i].forwardPass();
 		}
 		outputLayer->input = network[network.size() - 1].a;
 	}
 	outputLayer->forwardPass();
 	return outputLayer->a;
 }
 real_t MLPPGANOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPRegOld regularization;
 	class MLPPCostOld cost;
 	real_t totalRegTerm = 0;
 	auto cost_function = outputLayer->cost_map[outputLayer->cost];
 	if (!network.empty()) {
 		for (uint32_t i = 0; i < network.size() - 1; i++) {
 			totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
 		}
 	}
 	return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
 }
 void MLPPGANOld::forwardPass() {
 	MLPPLinAlgOld alg;
 	if (!network.empty()) {
 		network[0].input = alg.gaussianNoise(n, k);
 		network[0].forwardPass();
 		for (uint32_t i = 1; i < network.size(); i++) {
 			network[i].input = network[i - 1].a;
 			network[i].forwardPass();
 		}
 		outputLayer->input = network[network.size() - 1].a;
 	} else { // Should never happen, though.
 		outputLayer->input = alg.gaussianNoise(n, k);
 	}
 	outputLayer->forwardPass();
 	y_hat = outputLayer->a;
 }
 void MLPPGANOld::updateDiscriminatorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate) {
 	MLPPLinAlgOld alg;
 	outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
 	outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n;
 	if (!network.empty()) {
 		network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]);
 		network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate / n, network[network.size() - 1].delta));
 		for (int i = static_cast<int>(network.size()) - 2; i > static_cast<int>(network.size()) / 2; i--) {
 			network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
 			network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate / n, network[i].delta));
 		}
 	}
 }
 void MLPPGANOld::updateGeneratorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, real_t learning_rate) {
 	MLPPLinAlgOld alg;
 	if (!network.empty()) {
 		for (int i = network.size() / 2; i >= 0; i--) {
 			//std::cout << network[i].weights.size() << "x" << network[i].weights[0].size() << std::endl;
 			//std::cout << hiddenLayerUpdations[(network.size() - 2) - i + 1].size() << "x" << hiddenLayerUpdations[(network.size() - 2) - i + 1][0].size() << std::endl;
 			network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
 			network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate / n, network[i].delta));
 		}
 	}
 }
 std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> MLPPGANOld::computeDiscriminatorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
 	class MLPPCostOld cost;
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	std::vector<std::vector<std::vector<real_t>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads.
 	auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
 	auto outputAvn = outputLayer->activation_map[outputLayer->activation];
 	outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
 	std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
 	outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
 	if (!network.empty()) {
 		auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
 		network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
 		std::vector<std::vector<real_t>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
 		cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
 		//std::cout << "HIDDENLAYER FIRST:" << hiddenLayerWGrad.size() << "x" << hiddenLayerWGrad[0].size() << std::endl;
 		//std::cout << "WEIGHTS SECOND:" << network[network.size() - 1].weights.size() << "x" << network[network.size() - 1].weights[0].size() << std::endl;
 		for (int i = static_cast<int>(network.size()) - 2; i > static_cast<int>(network.size()) / 2; i--) {
 			hiddenLayerAvn = network[i].activation_map[network[i].activation];
 			network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
 			hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
 			cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
 		}
 	}
 	return { cumulativeHiddenLayerWGrad, outputWGrad };
 }
 std::vector<std::vector<std::vector<real_t>>> MLPPGANOld::computeGeneratorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
 	class MLPPCostOld cost;
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	std::vector<std::vector<std::vector<real_t>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads.
 	auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
 	auto outputAvn = outputLayer->activation_map[outputLayer->activation];
 	outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
 	std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
 	outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
 	if (!network.empty()) {
 		auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
 		network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
 		std::vector<std::vector<real_t>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
 		cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
 		for (int i = network.size() - 2; i >= 0; i--) {
 			hiddenLayerAvn = network[i].activation_map[network[i].activation];
 			network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
 			hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
 			cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
 		}
 	}
 	return cumulativeHiddenLayerWGrad;
 }
 void MLPPGANOld::UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
 	MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 	std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
 	MLPPUtilities::UI(outputLayer->weights, outputLayer->bias);
 	if (!network.empty()) {
 		for (int i = network.size() - 1; i >= 0; i--) {
 			std::cout << "Layer " << i + 1 << ": " << std::endl;
 			MLPPUtilities::UI(network[i].weights, network[i].bias);
 		}
 	}
 }
--- a/mlpp/gan/gan_old.h
+++ b/mlpp/gan/gan_old.h
@ -1,59 +0,0 @@
 #ifndef MLPP_GAN_OLD_hpp
 #define MLPP_GAN_OLD_hpp
 //
 //  GAN.hpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "core/math/math_defs.h"
 #include "../hidden_layer/hidden_layer.h"
 #include "../output_layer/output_layer.h"
 #include "../hidden_layer/hidden_layer_old.h"
 #include "../output_layer/output_layer_old.h"
 #include <string>
 #include <tuple>
 #include <vector>
 class MLPPGANOld {
 public:
 	MLPPGANOld(real_t k, std::vector<std::vector<real_t>> outputSet);
 	~MLPPGANOld();
 	std::vector<std::vector<real_t>> generateExample(int n);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	real_t score();
 	void save(std::string fileName);
 	void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 	void addOutputLayer(std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 private:
 	std::vector<std::vector<real_t>> modelSetTestGenerator(std::vector<std::vector<real_t>> X); // Evaluator for the generator of the gan.
 	std::vector<real_t> modelSetTestDiscriminator(std::vector<std::vector<real_t>> X); // Evaluator for the discriminator of the gan.
 	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
 	void forwardPass();
 	void updateDiscriminatorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate);
 	void updateGeneratorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, real_t learning_rate);
 	std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> computeDiscriminatorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet);
 	std::vector<std::vector<std::vector<real_t>>> computeGeneratorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet);
 	void UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet);
 	std::vector<std::vector<real_t>> outputSet;
 	std::vector<real_t> y_hat;
 	std::vector<MLPPOldHiddenLayer> network;
 	MLPPOldOutputLayer *outputLayer;
 	int n;
 	int k;
 };
 #endif /* GAN_hpp */
--- a/mlpp/gauss_markov_checker/gauss_markov_checker_old.cpp
+++ b/mlpp/gauss_markov_checker/gauss_markov_checker_old.cpp
@ -1,61 +0,0 @@
 //
 //  GaussMarkovChecker.cpp
 //
 //  Created by Marc Melikyan on 11/13/20.
 //
 #include "gauss_markov_checker_old.h"
 #include "../stat/stat_old.h"
 #include "core/int_types.h"
 #include <iostream>
 void MLPPGaussMarkovCheckerOld::checkGMConditions(std::vector<real_t> eps) {
 	bool condition1 = arithmeticMean(eps);
 	bool condition2 = homoscedasticity(eps);
 	bool condition3 = exogeneity(eps);
 	if (condition1 && condition2 && condition3) {
 		std::cout << "Gauss-Markov conditions were not violated. You may use OLS to obtain a BLUE estimator" << std::endl;
 	} else {
 		std::cout << "A test of the expected value of 0 of the error terms returned " << std::boolalpha << condition1 << ", a test of homoscedasticity has returned " << std::boolalpha << condition2 << ", and a test of exogenity has returned " << std::boolalpha << "." << std::endl;
 	}
 }
 bool MLPPGaussMarkovCheckerOld::arithmeticMean(std::vector<real_t> eps) {
 	MLPPStatOld stat;
 	if (stat.mean(eps) == 0) {
 		return true;
 	} else {
 		return false;
 	}
 }
 bool MLPPGaussMarkovCheckerOld::homoscedasticity(std::vector<real_t> eps) {
 	MLPPStatOld stat;
 	real_t currentVar = (eps[0] - stat.mean(eps)) * (eps[0] - stat.mean(eps)) / eps.size();
 	for (uint32_t i = 0; i < eps.size(); i++) {
 		if (currentVar != (eps[i] - stat.mean(eps)) * (eps[i] - stat.mean(eps)) / eps.size()) {
 			return false;
 		}
 	}
 	return true;
 }
 bool MLPPGaussMarkovCheckerOld::exogeneity(std::vector<real_t> eps) {
 	MLPPStatOld stat;
 	for (uint32_t i = 0; i < eps.size(); i++) {
 		for (uint32_t j = 0; j < eps.size(); j++) {
 			if (i != j) {
 				if ((eps[i] - stat.mean(eps)) * (eps[j] - stat.mean(eps)) / eps.size() != 0) {
 					return false;
 				}
 			}
 		}
 	}
 	return true;
 }
 void MLPPGaussMarkovCheckerOld::_bind_methods() {
 }
--- a/mlpp/gauss_markov_checker/gauss_markov_checker_old.h
+++ b/mlpp/gauss_markov_checker/gauss_markov_checker_old.h
@ -1,29 +0,0 @@
 #ifndef MLPP_GAUSS_MARKOV_CHECKER_OLD_H
 #define MLPP_GAUSS_MARKOV_CHECKER_OLD_H
 //
 //  GaussMarkovChecker.hpp
 //
 //  Created by Marc Melikyan on 11/13/20.
 //
 #include "core/math/math_defs.h"
 #include <string>
 #include <vector>
 class MLPPGaussMarkovCheckerOld {
 public:
 	void checkGMConditions(std::vector<real_t> eps);
 	// Independent, 3 Gauss-Markov Conditions
 	bool arithmeticMean(std::vector<real_t> eps); // 1) Arithmetic Mean of 0.
 	bool homoscedasticity(std::vector<real_t> eps); // 2) Homoscedasticity
 	bool exogeneity(std::vector<real_t> eps); // 3) Cov of any 2 non-equal eps values = 0.
 protected:
 	static void _bind_methods();
 };
 #endif /* GaussMarkovChecker_hpp */
--- a/mlpp/gaussian_nb/gaussian_nb_old.cpp
+++ b/mlpp/gaussian_nb/gaussian_nb_old.cpp
@ -1,93 +0,0 @@
 //
 //  GaussianNB.cpp
 //
 //  Created by Marc Melikyan on 1/17/21.
 //
 #include "gaussian_nb_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../stat/stat_old.h"
 #include "../utilities/utilities.h"
 #include <algorithm>
 #include <iostream>
 #include <random>
 #ifndef M_PI
 #define M_PI 3.141592653
 #endif
 MLPPGaussianNBOld::MLPPGaussianNBOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet, int p_class_num) {
 	inputSet = p_inputSet;
 	outputSet = p_outputSet;
 	class_num = p_class_num;
 	y_hat.resize(outputSet.size());
 	Evaluate();
 }
 std::vector<real_t> MLPPGaussianNBOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	std::vector<real_t> y_hat;
 	for (uint32_t i = 0; i < X.size(); i++) {
 		y_hat.push_back(modelTest(X[i]));
 	}
 	return y_hat;
 }
 real_t MLPPGaussianNBOld::modelTest(std::vector<real_t> x) {
 	real_t score[class_num];
 	real_t y_hat_i = 1;
 	for (int i = class_num - 1; i >= 0; i--) {
 		y_hat_i += std::log(priors[i] * (1 / sqrt(2 * M_PI * sigma[i] * sigma[i])) * exp(-(x[i] * mu[i]) * (x[i] * mu[i]) / (2 * sigma[i] * sigma[i])));
 		score[i] = exp(y_hat_i);
 	}
 	return std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(real_t)));
 }
 real_t MLPPGaussianNBOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 void MLPPGaussianNBOld::Evaluate() {
 	MLPPStatOld stat;
 	MLPPLinAlgOld alg;
 	// Computing mu_k_y and sigma_k_y
 	mu.resize(class_num);
 	sigma.resize(class_num);
 	for (int i = class_num - 1; i >= 0; i--) {
 		std::vector<real_t> set;
 		for (uint32_t j = 0; j < inputSet.size(); j++) {
 			for (uint32_t k = 0; k < inputSet[j].size(); k++) {
 				if (outputSet[j] == i) {
 					set.push_back(inputSet[j][k]);
 				}
 			}
 		}
 		mu[i] = stat.mean(set);
 		sigma[i] = stat.standardDeviation(set);
 	}
 	// Priors
 	priors.resize(class_num);
 	for (uint32_t i = 0; i < outputSet.size(); i++) {
 		priors[int(outputSet[i])]++;
 	}
 	priors = alg.scalarMultiply(real_t(1) / real_t(outputSet.size()), priors);
 	for (uint32_t i = 0; i < outputSet.size(); i++) {
 		real_t score[class_num];
 		real_t y_hat_i = 1;
 		for (int j = class_num - 1; j >= 0; j--) {
 			for (uint32_t k = 0; k < inputSet[i].size(); k++) {
 				y_hat_i += std::log(priors[j] * (1 / sqrt(2 * M_PI * sigma[j] * sigma[j])) * exp(-(inputSet[i][k] * mu[j]) * (inputSet[i][k] * mu[j]) / (2 * sigma[j] * sigma[j])));
 			}
 			score[j] = exp(y_hat_i);
 			std::cout << score[j] << std::endl;
 		}
 		y_hat[i] = std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(real_t)));
 		std::cout << std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(real_t))) << std::endl;
 	}
 }
--- a/mlpp/gaussian_nb/gaussian_nb_old.h
+++ b/mlpp/gaussian_nb/gaussian_nb_old.h
@ -1,37 +0,0 @@
 #ifndef MLPP_GAUSSIAN_NB_OLD_H
 #define MLPP_GAUSSIAN_NB_OLD_H
 //
 //  GaussianNB.hpp
 //
 //  Created by Marc Melikyan on 1/17/21.
 //
 #include "core/math/math_defs.h"
 #include <vector>
 class MLPPGaussianNBOld {
 public:
 	MLPPGaussianNBOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int class_num);
 	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
 	real_t modelTest(std::vector<real_t> x);
 	real_t score();
 private:
 	void Evaluate();
 	int class_num;
 	std::vector<real_t> priors;
 	std::vector<real_t> mu;
 	std::vector<real_t> sigma;
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	std::vector<real_t> y_hat;
 };
 #endif /* GaussianNB_hpp */
--- a/mlpp/hidden_layer/hidden_layer_old.cpp
+++ b/mlpp/hidden_layer/hidden_layer_old.cpp
@ -1,118 +0,0 @@
 //
 //  HiddenLayer.cpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "hidden_layer_old.h"
 #include "../activation/activation.h"
 #include "../lin_alg/lin_alg_old.h"
 #include <iostream>
 #include <random>
 MLPPOldHiddenLayer::MLPPOldHiddenLayer(int p_n_hidden, std::string p_activation, std::vector<std::vector<real_t>> p_input, std::string p_weightInit, std::string p_reg, real_t p_lambda, real_t p_alpha) {
 	n_hidden = p_n_hidden;
 	activation = p_activation;
 	input = p_input;
 	weightInit = p_weightInit;
 	reg = p_reg;
 	lambda = p_lambda;
 	alpha = p_alpha;
 	weights = MLPPUtilities::weightInitialization(input[0].size(), n_hidden, weightInit);
 	bias = MLPPUtilities::biasInitialization(n_hidden);
 	activation_map["Linear"] = &MLPPActivationOld::linear;
 	activationTest_map["Linear"] = &MLPPActivationOld::linear;
 	activation_map["Sigmoid"] = &MLPPActivationOld::sigmoid;
 	activationTest_map["Sigmoid"] = &MLPPActivationOld::sigmoid;
 	activation_map["Swish"] = &MLPPActivationOld::swish;
 	activationTest_map["Swish"] = &MLPPActivationOld::swish;
 	activation_map["Mish"] = &MLPPActivationOld::mish;
 	activationTest_map["Mish"] = &MLPPActivationOld::mish;
 	activation_map["SinC"] = &MLPPActivationOld::sinc;
 	activationTest_map["SinC"] = &MLPPActivationOld::sinc;
 	activation_map["Softplus"] = &MLPPActivationOld::softplus;
 	activationTest_map["Softplus"] = &MLPPActivationOld::softplus;
 	activation_map["Softsign"] = &MLPPActivationOld::softsign;
 	activationTest_map["Softsign"] = &MLPPActivationOld::softsign;
 	activation_map["CLogLog"] = &MLPPActivationOld::cloglog;
 	activationTest_map["CLogLog"] = &MLPPActivationOld::cloglog;
 	activation_map["Logit"] = &MLPPActivationOld::logit;
 	activationTest_map["Logit"] = &MLPPActivationOld::logit;
 	activation_map["GaussianCDF"] = &MLPPActivationOld::gaussianCDF;
 	activationTest_map["GaussianCDF"] = &MLPPActivationOld::gaussianCDF;
 	activation_map["RELU"] = &MLPPActivationOld::RELU;
 	activationTest_map["RELU"] = &MLPPActivationOld::RELU;
 	activation_map["GELU"] = &MLPPActivationOld::GELU;
 	activationTest_map["GELU"] = &MLPPActivationOld::GELU;
 	activation_map["Sign"] = &MLPPActivationOld::sign;
 	activationTest_map["Sign"] = &MLPPActivationOld::sign;
 	activation_map["UnitStep"] = &MLPPActivationOld::unitStep;
 	activationTest_map["UnitStep"] = &MLPPActivationOld::unitStep;
 	activation_map["Sinh"] = &MLPPActivationOld::sinh;
 	activationTest_map["Sinh"] = &MLPPActivationOld::sinh;
 	activation_map["Cosh"] = &MLPPActivationOld::cosh;
 	activationTest_map["Cosh"] = &MLPPActivationOld::cosh;
 	activation_map["Tanh"] = &MLPPActivationOld::tanh;
 	activationTest_map["Tanh"] = &MLPPActivationOld::tanh;
 	activation_map["Csch"] = &MLPPActivationOld::csch;
 	activationTest_map["Csch"] = &MLPPActivationOld::csch;
 	activation_map["Sech"] = &MLPPActivationOld::sech;
 	activationTest_map["Sech"] = &MLPPActivationOld::sech;
 	activation_map["Coth"] = &MLPPActivationOld::coth;
 	activationTest_map["Coth"] = &MLPPActivationOld::coth;
 	activation_map["Arsinh"] = &MLPPActivationOld::arsinh;
 	activationTest_map["Arsinh"] = &MLPPActivationOld::arsinh;
 	activation_map["Arcosh"] = &MLPPActivationOld::arcosh;
 	activationTest_map["Arcosh"] = &MLPPActivationOld::arcosh;
 	activation_map["Artanh"] = &MLPPActivationOld::artanh;
 	activationTest_map["Artanh"] = &MLPPActivationOld::artanh;
 	activation_map["Arcsch"] = &MLPPActivationOld::arcsch;
 	activationTest_map["Arcsch"] = &MLPPActivationOld::arcsch;
 	activation_map["Arsech"] = &MLPPActivationOld::arsech;
 	activationTest_map["Arsech"] = &MLPPActivationOld::arsech;
 	activation_map["Arcoth"] = &MLPPActivationOld::arcoth;
 	activationTest_map["Arcoth"] = &MLPPActivationOld::arcoth;
 }
 void MLPPOldHiddenLayer::forwardPass() {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	z = alg.mat_vec_add(alg.matmult(input, weights), bias);
 	a = (avn.*activation_map[activation])(z, false);
 }
 void MLPPOldHiddenLayer::Test(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	z_test = alg.addition(alg.mat_vec_mult(alg.transpose(weights), x), bias);
 	a_test = (avn.*activationTest_map[activation])(z_test, false);
 }
--- a/mlpp/hidden_layer/hidden_layer_old.h
+++ b/mlpp/hidden_layer/hidden_layer_old.h
@ -1,61 +0,0 @@
 #ifndef MLPP_HIDDEN_LAYER_OLD_H
 #define MLPP_HIDDEN_LAYER_OLD_H
 //
 //  HiddenLayer.hpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "core/math/math_defs.h"
 #include "core/string/ustring.h"
 #include "core/object/reference.h"
 #include "../activation/activation_old.h"
 #include "../regularization/reg.h"
 #include "../utilities/utilities.h"
 #include "../lin_alg/mlpp_matrix.h"
 #include "../lin_alg/mlpp_vector.h"
 #include <map>
 #include <string>
 #include <vector>
 class MLPPOldHiddenLayer {
 public:
 	MLPPOldHiddenLayer(int n_hidden, std::string activation, std::vector<std::vector<real_t>> input, std::string weightInit, std::string reg, real_t lambda, real_t alpha);
 	int n_hidden;
 	std::string activation;
 	std::vector<std::vector<real_t>> input;
 	std::vector<std::vector<real_t>> weights;
 	std::vector<real_t> bias;
 	std::vector<std::vector<real_t>> z;
 	std::vector<std::vector<real_t>> a;
 	std::map<std::string, std::vector<std::vector<real_t>> (MLPPActivationOld::*)(std::vector<std::vector<real_t>>, bool)> activation_map;
 	std::map<std::string, std::vector<real_t> (MLPPActivationOld::*)(std::vector<real_t>, bool)> activationTest_map;
 	std::vector<real_t> z_test;
 	std::vector<real_t> a_test;
 	std::vector<std::vector<real_t>> delta;
 	// Regularization Params
 	std::string reg;
 	real_t lambda; /* Regularization Parameter */
 	real_t alpha; /* This is the controlling param for Elastic Net*/
 	std::string weightInit;
 	void forwardPass();
 	void Test(std::vector<real_t> x);
 };
 #endif /* HiddenLayer_hpp */
--- a/mlpp/hypothesis_testing/hypothesis_testing_old.cpp
+++ b/mlpp/hypothesis_testing/hypothesis_testing_old.cpp
@ -1,20 +0,0 @@
 //
 //  HypothesisTesting.cpp
 //
 //  Created by Marc Melikyan on 3/10/21.
 //
 #include "hypothesis_testing_old.h"
 std::tuple<bool, real_t> MLPPHypothesisTestingOld::chiSquareTest(std::vector<real_t> observed, std::vector<real_t> expected) {
 	//real_t df = observed.size() - 1; // These are our degrees of freedom
 	//real_t sum = 0;
 	//for (uint32_t i = 0; i < observed.size(); i++) {
 	//	sum += (observed[i] - expected[i]) * (observed[i] - expected[i]) / expected[i];
 	//}
 	return std::tuple<bool, real_t>();
 }
 void MLPPHypothesisTestingOld::_bind_methods() {
 }
--- a/mlpp/hypothesis_testing/hypothesis_testing_old.h
+++ b/mlpp/hypothesis_testing/hypothesis_testing_old.h
@ -1,25 +0,0 @@
 #ifndef MLPP_HYPOTHESIS_TESTING_OLD_H
 #define MLPP_HYPOTHESIS_TESTING_OLD_H
 //
 //  HypothesisTesting.hpp
 //
 //  Created by Marc Melikyan on 3/10/21.
 //
 #include "core/math/math_defs.h"
 #include "core/int_types.h"
 #include <tuple>
 #include <vector>
 class MLPPHypothesisTestingOld {
 public:
 	std::tuple<bool, real_t> chiSquareTest(std::vector<real_t> observed, std::vector<real_t> expected);
 protected:
 	static void _bind_methods();
 };
 #endif /* HypothesisTesting_hpp */
--- a/mlpp/lin_alg/lin_alg_old.cpp
+++ b/mlpp/lin_alg/lin_alg_old.cpp
--- a/mlpp/lin_alg/lin_alg_old.h
+++ b/mlpp/lin_alg/lin_alg_old.h
@ -1,230 +0,0 @@
 #ifndef MLPP_LIN_ALG_OLD_H
 #define MLPP_LIN_ALG_OLD_H
 //
 //  LinAlg.hpp
 //
 //  Created by Marc Melikyan on 1/8/21.
 //
 //TODO Methods here should probably use error macros in a way where they get disabled in non-tools(?) (maybe release?) builds
 #include "core/math/math_defs.h"
 #include <tuple>
 #include <vector>
 class MLPPLinAlgOld {
 public:
 	// MATRIX FUNCTIONS
 	std::vector<std::vector<real_t>> gramMatrix(std::vector<std::vector<real_t>> A);
 	bool linearIndependenceChecker(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> gaussianNoise(int n, int m);
 	std::vector<std::vector<real_t>> addition(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
 	std::vector<std::vector<real_t>> subtraction(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
 	std::vector<std::vector<real_t>> matmult(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
 	std::vector<std::vector<real_t>> hadamard_product(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
 	std::vector<std::vector<real_t>> kronecker_product(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
 	std::vector<std::vector<real_t>> elementWiseDivision(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
 	std::vector<std::vector<real_t>> transpose(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> scalarMultiply(real_t scalar, std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> scalarAdd(real_t scalar, std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> log(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> log10(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> exp(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> erf(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> exponentiate(std::vector<std::vector<real_t>> A, real_t p);
 	std::vector<std::vector<real_t>> sqrt(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> cbrt(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> matrixPower(std::vector<std::vector<real_t>> A, int n);
 	std::vector<std::vector<real_t>> abs(std::vector<std::vector<real_t>> A);
 	real_t det(std::vector<std::vector<real_t>> A, int d);
 	real_t trace(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> cofactor(std::vector<std::vector<real_t>> A, int n, int i, int j);
 	std::vector<std::vector<real_t>> adjoint(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> inverse(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> pinverse(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> zeromat(int n, int m);
 	std::vector<std::vector<real_t>> onemat(int n, int m);
 	std::vector<std::vector<real_t>> full(int n, int m, int k);
 	std::vector<std::vector<real_t>> sin(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> cos(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> rotate(std::vector<std::vector<real_t>> A, real_t theta, int axis = -1);
 	std::vector<std::vector<real_t>> max(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
 	real_t max(std::vector<std::vector<real_t>> A);
 	real_t min(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> round(std::vector<std::vector<real_t>> A);
 	real_t norm_2(std::vector<std::vector<real_t>> A);
 	std::vector<std::vector<real_t>> identity(real_t d);
 	std::vector<std::vector<real_t>> cov(std::vector<std::vector<real_t>> A);
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> eig(std::vector<std::vector<real_t>> A);
 	struct EigenResultOld {
 		std::vector<std::vector<real_t>> eigen_vectors;
 		std::vector<std::vector<real_t>> eigen_values;
 	};
 	EigenResultOld eigen_old(std::vector<std::vector<real_t>> A);
 	struct SVDResultOld {
 		std::vector<std::vector<real_t>> U;
 		std::vector<std::vector<real_t>> S;
 		std::vector<std::vector<real_t>> Vt;
 	};
 	SVDResultOld SVD(std::vector<std::vector<real_t>> A);
 	std::vector<real_t> vectorProjection(std::vector<real_t> a, std::vector<real_t> b);
 	std::vector<std::vector<real_t>> gramSchmidtProcess(std::vector<std::vector<real_t>> A);
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> QRD(std::vector<std::vector<real_t>> A);
 	struct QRDResult {
 		std::vector<std::vector<real_t>> Q;
 		std::vector<std::vector<real_t>> R;
 	};
 	QRDResult qrd(std::vector<std::vector<real_t>> A);
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> chol(std::vector<std::vector<real_t>> A);
 	struct CholeskyResult {
 		std::vector<std::vector<real_t>> L;
 		std::vector<std::vector<real_t>> Lt;
 	};
 	CholeskyResult cholesky(std::vector<std::vector<real_t>> A);
 	real_t sum_elements(std::vector<std::vector<real_t>> A);
 	std::vector<real_t> flatten(std::vector<std::vector<real_t>> A);
 	std::vector<real_t> solve(std::vector<std::vector<real_t>> A, std::vector<real_t> b);
 	bool positiveDefiniteChecker(std::vector<std::vector<real_t>> A);
 	bool negativeDefiniteChecker(std::vector<std::vector<real_t>> A);
 	bool zeroEigenvalue(std::vector<std::vector<real_t>> A);
 	void printMatrix(std::vector<std::vector<real_t>> A);
 	// VECTOR FUNCTIONS
 	std::vector<std::vector<real_t>> outerProduct(std::vector<real_t> a, std::vector<real_t> b); // This multiplies a, bT
 	std::vector<real_t> hadamard_product(std::vector<real_t> a, std::vector<real_t> b);
 	std::vector<real_t> elementWiseDivision(std::vector<real_t> a, std::vector<real_t> b);
 	std::vector<real_t> scalarMultiply(real_t scalar, std::vector<real_t> a);
 	std::vector<real_t> scalarAdd(real_t scalar, std::vector<real_t> a);
 	std::vector<real_t> addition(std::vector<real_t> a, std::vector<real_t> b);
 	std::vector<real_t> subtraction(std::vector<real_t> a, std::vector<real_t> b);
 	std::vector<real_t> subtractMatrixRows(std::vector<real_t> a, std::vector<std::vector<real_t>> B);
 	std::vector<real_t> log(std::vector<real_t> a);
 	std::vector<real_t> log10(std::vector<real_t> a);
 	std::vector<real_t> exp(std::vector<real_t> a);
 	std::vector<real_t> erf(std::vector<real_t> a);
 	std::vector<real_t> exponentiate(std::vector<real_t> a, real_t p);
 	std::vector<real_t> sqrt(std::vector<real_t> a);
 	std::vector<real_t> cbrt(std::vector<real_t> a);
 	real_t dot(std::vector<real_t> a, std::vector<real_t> b);
 	std::vector<real_t> cross(std::vector<real_t> a, std::vector<real_t> b);
 	std::vector<real_t> abs(std::vector<real_t> a);
 	std::vector<real_t> zerovec(int n);
 	std::vector<real_t> onevec(int n);
 	std::vector<real_t> full(int n, int k);
 	std::vector<std::vector<real_t>> diag(std::vector<real_t> a);
 	std::vector<real_t> sin(std::vector<real_t> a);
 	std::vector<real_t> cos(std::vector<real_t> a);
 	std::vector<real_t> max(std::vector<real_t> a, std::vector<real_t> b);
 	real_t max(std::vector<real_t> a);
 	real_t min(std::vector<real_t> a);
 	std::vector<real_t> round(std::vector<real_t> a);
 	real_t euclideanDistance(std::vector<real_t> a, std::vector<real_t> b);
 	real_t norm_2(std::vector<real_t> a);
 	real_t norm_sq(std::vector<real_t> a);
 	real_t sum_elements(std::vector<real_t> a);
 	real_t cosineSimilarity(std::vector<real_t> a, std::vector<real_t> b);
 	void printVector(std::vector<real_t> a);
 	// MATRIX-VECTOR FUNCTIONS
 	std::vector<std::vector<real_t>> mat_vec_add(std::vector<std::vector<real_t>> A, std::vector<real_t> b);
 	std::vector<real_t> mat_vec_mult(std::vector<std::vector<real_t>> A, std::vector<real_t> b);
 	// TENSOR FUNCTIONS
 	std::vector<std::vector<std::vector<real_t>>> addition(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B);
 	std::vector<std::vector<std::vector<real_t>>> elementWiseDivision(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B);
 	std::vector<std::vector<std::vector<real_t>>> sqrt(std::vector<std::vector<std::vector<real_t>>> A);
 	std::vector<std::vector<std::vector<real_t>>> exponentiate(std::vector<std::vector<std::vector<real_t>>> A, real_t p);
 	std::vector<std::vector<real_t>> tensor_vec_mult(std::vector<std::vector<std::vector<real_t>>> A, std::vector<real_t> b);
 	std::vector<real_t> flatten(std::vector<std::vector<std::vector<real_t>>> A);
 	void printTensor(std::vector<std::vector<std::vector<real_t>>> A);
 	std::vector<std::vector<std::vector<real_t>>> scalarMultiply(real_t scalar, std::vector<std::vector<std::vector<real_t>>> A);
 	std::vector<std::vector<std::vector<real_t>>> scalarAdd(real_t scalar, std::vector<std::vector<std::vector<real_t>>> A);
 	std::vector<std::vector<std::vector<real_t>>> resize(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B);
 	std::vector<std::vector<std::vector<real_t>>> hadamard_product(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B);
 	std::vector<std::vector<std::vector<real_t>>> max(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B);
 	std::vector<std::vector<std::vector<real_t>>> abs(std::vector<std::vector<std::vector<real_t>>> A);
 	real_t norm_2(std::vector<std::vector<std::vector<real_t>>> A);
 	std::vector<std::vector<std::vector<real_t>>> vector_wise_tensor_product(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<real_t>> B);
 };
 #endif /* LinAlg_hpp */
--- a/mlpp/lin_reg/lin_reg_old.cpp
+++ b/mlpp/lin_reg/lin_reg_old.cpp
@ -1,598 +0,0 @@
 //
 //  LinReg.cpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "lin_reg_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../stat/stat_old.h"
 #include "../utilities/utilities.h"
 #include <cmath>
 #include <iostream>
 #include <random>
 MLPPLinRegOld::MLPPLinRegOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet, std::string p_reg, real_t p_lambda, real_t p_alpha) {
 	inputSet = p_inputSet;
 	outputSet = p_outputSet;
 	n = p_inputSet.size();
 	k = p_inputSet[0].size();
 	reg = p_reg;
 	lambda = p_lambda;
 	alpha = p_alpha;
 	y_hat.resize(n);
 	weights = MLPPUtilities::weightInitialization(k);
 	bias = MLPPUtilities::biasInitialization();
 }
 std::vector<real_t> MLPPLinRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	return Evaluate(X);
 }
 real_t MLPPLinRegOld::modelTest(std::vector<real_t> x) {
 	return Evaluate(x);
 }
 void MLPPLinRegOld::NewtonRaphson(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
 		// Calculating the weight gradients (2nd derivative)
 		std::vector<real_t> first_derivative = alg.mat_vec_mult(alg.transpose(inputSet), error);
 		std::vector<std::vector<real_t>> second_derivative = alg.matmult(alg.transpose(inputSet), inputSet);
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(alg.inverse(second_derivative)), first_derivative)));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Calculating the bias gradients (2nd derivative)
 		bias -= learning_rate * alg.sum_elements(error) / n; // We keep this the same. The 2nd derivative is just [1].
 		forwardPass();
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPLinRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
 		// Calculating the weight gradients
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), error)));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Calculating the bias gradients
 		bias -= learning_rate * alg.sum_elements(error) / n;
 		forwardPass();
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPLinRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	while (true) {
 		std::random_device rd;
 		std::default_random_engine generator(rd());
 		std::uniform_int_distribution<int> distribution(0, int(n - 1));
 		int outputIndex = distribution(generator);
 		real_t y_hat = Evaluate(inputSet[outputIndex]);
 		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
 		real_t error = y_hat - outputSet[outputIndex];
 		// Weight updation
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error, inputSet[outputIndex]));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Bias updation
 		bias -= learning_rate * error;
 		y_hat = Evaluate({ inputSet[outputIndex] });
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPLinRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight gradients
 			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error)));
 			weights = regularization.regWeights(weights, lambda, alpha, reg);
 			// Calculating the bias gradients
 			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size();
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPLinRegOld::Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Initializing necessary components for Momentum.
 	std::vector<real_t> v = alg.zerovec(weights.size());
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight gradients
 			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
 			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
 			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
 			v = alg.addition(alg.scalarMultiply(gamma, v), alg.scalarMultiply(learning_rate, weight_grad));
 			weights = alg.subtraction(weights, v);
 			// Calculating the bias gradients
 			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPLinRegOld::NAG(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Initializing necessary components for Momentum.
 	std::vector<real_t> v = alg.zerovec(weights.size());
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			weights = alg.subtraction(weights, alg.scalarMultiply(gamma, v)); // "Aposterori" calculation
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight gradients
 			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
 			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
 			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
 			v = alg.addition(alg.scalarMultiply(gamma, v), alg.scalarMultiply(learning_rate, weight_grad));
 			weights = alg.subtraction(weights, v);
 			// Calculating the bias gradients
 			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPLinRegOld::Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Initializing necessary components for Adagrad.
 	std::vector<real_t> v = alg.zerovec(weights.size());
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight gradients
 			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
 			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
 			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
 			v = alg.hadamard_product(weight_grad, weight_grad);
 			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(weight_grad, alg.sqrt(alg.scalarAdd(e, v)))));
 			// Calculating the bias gradients
 			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPLinRegOld::Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI) {
 	// Adagrad upgrade. Momentum is applied.
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Initializing necessary components for Adagrad.
 	std::vector<real_t> v = alg.zerovec(weights.size());
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight gradients
 			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
 			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
 			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
 			v = alg.addition(alg.scalarMultiply(b1, v), alg.scalarMultiply(1 - b1, alg.hadamard_product(weight_grad, weight_grad)));
 			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(weight_grad, alg.sqrt(alg.scalarAdd(e, v)))));
 			// Calculating the bias gradients
 			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPLinRegOld::Adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Initializing necessary components for Adam.
 	std::vector<real_t> m = alg.zerovec(weights.size());
 	std::vector<real_t> v = alg.zerovec(weights.size());
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight gradients
 			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
 			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
 			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
 			m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
 			v = alg.addition(alg.scalarMultiply(b2, v), alg.scalarMultiply(1 - b2, alg.exponentiate(weight_grad, 2)));
 			std::vector<real_t> m_hat = alg.scalarMultiply(1 / (1 - pow(b1, epoch)), m);
 			std::vector<real_t> v_hat = alg.scalarMultiply(1 / (1 - pow(b2, epoch)), v);
 			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_hat, alg.scalarAdd(e, alg.sqrt(v_hat)))));
 			// Calculating the bias gradients
 			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPLinRegOld::Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	std::vector<real_t> m = alg.zerovec(weights.size());
 	std::vector<real_t> u = alg.zerovec(weights.size());
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight gradients
 			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
 			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
 			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
 			m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
 			u = alg.max(alg.scalarMultiply(b2, u), alg.abs(weight_grad));
 			std::vector<real_t> m_hat = alg.scalarMultiply(1 / (1 - pow(b1, epoch)), m);
 			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_hat, u)));
 			// Calculating the bias gradients
 			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPLinRegOld::Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Initializing necessary components for Adam.
 	std::vector<real_t> m = alg.zerovec(weights.size());
 	std::vector<real_t> v = alg.zerovec(weights.size());
 	std::vector<real_t> m_final = alg.zerovec(weights.size());
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight gradients
 			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
 			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
 			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
 			m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
 			v = alg.addition(alg.scalarMultiply(b2, v), alg.scalarMultiply(1 - b2, alg.exponentiate(weight_grad, 2)));
 			m_final = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply((1 - b1) / (1 - pow(b1, epoch)), weight_grad));
 			std::vector<real_t> m_hat = alg.scalarMultiply(1 / (1 - pow(b1, epoch)), m);
 			std::vector<real_t> v_hat = alg.scalarMultiply(1 / (1 - pow(b2, epoch)), v);
 			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_final, alg.scalarAdd(e, alg.sqrt(v_hat)))));
 			// Calculating the bias gradients
 			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPLinRegOld::normalEquation() {
 	MLPPLinAlgOld alg;
 	MLPPStatOld stat;
 	std::vector<real_t> x_means;
 	std::vector<std::vector<real_t>> inputSetT = alg.transpose(inputSet);
 	x_means.resize(inputSetT.size());
 	for (uint32_t i = 0; i < inputSetT.size(); i++) {
 		x_means[i] = (stat.mean(inputSetT[i]));
 	}
 	//try {
 	std::vector<real_t> temp;
 	temp.resize(k);
 	temp = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
 	if (std::isnan(temp[0])) {
 		//throw 99;
 		//TODO ERR_FAIL_COND
 		std::cout << "ERR: Resulting matrix was noninvertible/degenerate, and so the normal equation could not be performed. Try utilizing gradient descent." << std::endl;
 		return;
 	} else {
 		if (reg == "Ridge") {
 			weights = alg.mat_vec_mult(alg.inverse(alg.addition(alg.matmult(alg.transpose(inputSet), inputSet), alg.scalarMultiply(lambda, alg.identity(k)))), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
 		} else {
 			weights = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
 		}
 		bias = stat.mean(outputSet) - alg.dot(weights, x_means);
 		forwardPass();
 	}
 	//} catch (int err_num) {
 	//	std::cout << "ERR " << err_num << ": Resulting matrix was noninvertible/degenerate, and so the normal equation could not be performed. Try utilizing gradient descent." << std::endl;
 	//}
 }
 real_t MLPPLinRegOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 void MLPPLinRegOld::save(std::string fileName) {
 	MLPPUtilities util;
 	util.saveParameters(fileName, weights, bias);
 }
 real_t MLPPLinRegOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPRegOld regularization;
 	class MLPPCostOld cost;
 	return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
 }
 std::vector<real_t> MLPPLinRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights));
 }
 real_t MLPPLinRegOld::Evaluate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	return alg.dot(weights, x) + bias;
 }
 // wTx + b
 void MLPPLinRegOld::forwardPass() {
 	y_hat = Evaluate(inputSet);
 }
--- a/mlpp/lin_reg/lin_reg_old.h
+++ b/mlpp/lin_reg/lin_reg_old.h
@ -1,60 +0,0 @@
 #ifndef MLPP_LIN_REG_OLD_H
 #define MLPP_LIN_REG_OLD_H
 //
 //  LinReg.hpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "core/math/math_defs.h"
 #include <string>
 #include <vector>
 class MLPPLinRegOld {
 public:
 	MLPPLinRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
 	real_t modelTest(std::vector<real_t> x);
 	void NewtonRaphson(real_t learning_rate, int max_epoch, bool UI);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
 	void Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool UI = false);
 	void NAG(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool UI = false);
 	void Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI = false);
 	void Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI = false);
 	void Adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
 	void Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
 	void Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
 	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
 	void normalEquation();
 	real_t score();
 	void save(std::string fileName);
 private:
 	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
 	real_t Evaluate(std::vector<real_t> x);
 	void forwardPass();
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	std::vector<real_t> y_hat;
 	std::vector<real_t> weights;
 	real_t bias;
 	int n;
 	int k;
 	// Regularization Params
 	std::string reg;
 	int lambda;
 	int alpha; /* This is the controlling param for Elastic Net*/
 };
 #endif /* LinReg_hpp */
--- a/mlpp/log_reg/log_reg_old.cpp
+++ b/mlpp/log_reg/log_reg_old.cpp
@ -1,213 +0,0 @@
 //
 //  LogReg.cpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "log_reg_old.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 MLPPLogRegOld::MLPPLogRegOld(std::vector<std::vector<real_t>> pinputSet, std::vector<real_t> poutputSet, std::string preg, real_t plambda, real_t palpha) {
 	inputSet = pinputSet;
 	outputSet = poutputSet;
 	n = pinputSet.size();
 	k = pinputSet[0].size();
 	reg = preg;
 	lambda = plambda;
 	alpha = palpha;
 	y_hat.resize(n);
 	weights = MLPPUtilities::weightInitialization(k);
 	bias = MLPPUtilities::biasInitialization();
 }
 std::vector<real_t> MLPPLogRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	return Evaluate(X);
 }
 real_t MLPPLogRegOld::modelTest(std::vector<real_t> x) {
 	return Evaluate(x);
 }
 void MLPPLogRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
 		// Calculating the weight gradients
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), error)));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Calculating the bias gradients
 		bias -= learning_rate * alg.sum_elements(error) / n;
 		forwardPass();
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPLogRegOld::MLE(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		std::vector<real_t> error = alg.subtraction(outputSet, y_hat);
 		// Calculating the weight gradients
 		weights = alg.addition(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), error)));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Calculating the bias gradients
 		bias += learning_rate * alg.sum_elements(error) / n;
 		forwardPass();
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPLogRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	while (true) {
 		std::random_device rd;
 		std::default_random_engine generator(rd());
 		std::uniform_int_distribution<int> distribution(0, int(n - 1));
 		int outputIndex = distribution(generator);
 		real_t y_hat = Evaluate(inputSet[outputIndex]);
 		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
 		real_t error = y_hat - outputSet[outputIndex];
 		// Weight updation
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error, inputSet[outputIndex]));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Bias updation
 		bias -= learning_rate * error;
 		y_hat = Evaluate({ inputSet[outputIndex] });
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPLogRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto bacthes = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(bacthes);
 	auto outputMiniBatches = std::get<1>(bacthes);
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight gradients
 			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error)));
 			weights = regularization.regWeights(weights, lambda, alpha, reg);
 			// Calculating the bias gradients
 			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size();
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 real_t MLPPLogRegOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 void MLPPLogRegOld::save(std::string fileName) {
 	MLPPUtilities util;
 	util.saveParameters(fileName, weights, bias);
 }
 real_t MLPPLogRegOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPRegOld regularization;
 	class MLPPCostOld cost;
 	return cost.LogLoss(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
 }
 std::vector<real_t> MLPPLogRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	return avn.sigmoid(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)));
 }
 real_t MLPPLogRegOld::Evaluate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	return avn.sigmoid(alg.dot(weights, x) + bias);
 }
 // sigmoid ( wTx + b )
 void MLPPLogRegOld::forwardPass() {
 	y_hat = Evaluate(inputSet);
 }
--- a/mlpp/log_reg/log_reg_old.h
+++ b/mlpp/log_reg/log_reg_old.h
@ -1,51 +0,0 @@
 #ifndef MLPP_LOG_REG_OLD_H
 #define MLPP_LOG_REG_OLD_H
 //
 //  LogReg.hpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "core/math/math_defs.h"
 #include <string>
 #include <vector>
 class MLPPLogRegOld {
 public:
 	MLPPLogRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
 	real_t modelTest(std::vector<real_t> x);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	void MLE(real_t learning_rate, int max_epoch, bool UI = false);
 	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
 	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
 	real_t score();
 	void save(std::string fileName);
 private:
 	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
 	real_t Evaluate(std::vector<real_t> x);
 	void forwardPass();
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	std::vector<real_t> y_hat;
 	std::vector<real_t> weights;
 	real_t bias;
 	int n;
 	int k;
 	//real_t learning_rate;
 	// Regularization Params
 	std::string reg;
 	real_t lambda; /* Regularization Parameter */
 	real_t alpha; /* This is the controlling param for Elastic Net*/
 };
 #endif /* LogReg_hpp */
--- a/mlpp/mann/mann_old.cpp
+++ b/mlpp/mann/mann_old.cpp
@ -1,189 +0,0 @@
 //
 //  MANN.cpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "mann_old.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 MLPPMANNOld::MLPPMANNOld(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet) :
 		inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), n_output(outputSet[0].size()) {
 }
 MLPPMANNOld::~MLPPMANNOld() {
 	delete outputLayer;
 }
 std::vector<std::vector<real_t>> MLPPMANNOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	if (!network.empty()) {
 		network[0].input = X;
 		network[0].forwardPass();
 		for (uint32_t i = 1; i < network.size(); i++) {
 			network[i].input = network[i - 1].a;
 			network[i].forwardPass();
 		}
 		outputLayer->input = network[network.size() - 1].a;
 	} else {
 		outputLayer->input = X;
 	}
 	outputLayer->forwardPass();
 	return outputLayer->a;
 }
 std::vector<real_t> MLPPMANNOld::modelTest(std::vector<real_t> x) {
 	if (!network.empty()) {
 		network[0].Test(x);
 		for (uint32_t i = 1; i < network.size(); i++) {
 			network[i].Test(network[i - 1].a_test);
 		}
 		outputLayer->Test(network[network.size() - 1].a_test);
 	} else {
 		outputLayer->Test(x);
 	}
 	return outputLayer->a_test;
 }
 void MLPPMANNOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	class MLPPCostOld cost;
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		if (outputLayer->activation == "Softmax") {
 			outputLayer->delta = alg.subtraction(y_hat, outputSet);
 		} else {
 			auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
 			auto outputAvn = outputLayer->activation_map[outputLayer->activation];
 			outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
 		}
 		std::vector<std::vector<real_t>> outputWGrad = alg.matmult(alg.transpose(outputLayer->input), outputLayer->delta);
 		outputLayer->weights = alg.subtraction(outputLayer->weights, alg.scalarMultiply(learning_rate / n, outputWGrad));
 		outputLayer->weights = regularization.regWeights(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
 		outputLayer->bias = alg.subtractMatrixRows(outputLayer->bias, alg.scalarMultiply(learning_rate / n, outputLayer->delta));
 		if (!network.empty()) {
 			auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
 			network[network.size() - 1].delta = alg.hadamard_product(alg.matmult(outputLayer->delta, alg.transpose(outputLayer->weights)), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
 			std::vector<std::vector<real_t>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
 			network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, alg.scalarMultiply(learning_rate / n, hiddenLayerWGrad));
 			network[network.size() - 1].weights = regularization.regWeights(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg);
 			network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate / n, network[network.size() - 1].delta));
 			for (int i = network.size() - 2; i >= 0; i--) {
 				hiddenLayerAvn = network[i].activation_map[network[i].activation];
 				network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, network[i + 1].weights), (avn.*hiddenLayerAvn)(network[i].z, 1));
 				hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
 				network[i].weights = alg.subtraction(network[i].weights, alg.scalarMultiply(learning_rate / n, hiddenLayerWGrad));
 				network[i].weights = regularization.regWeights(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
 				network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate / n, network[i].delta));
 			}
 		}
 		forwardPass();
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
 			MLPPUtilities::UI(outputLayer->weights, outputLayer->bias);
 			if (!network.empty()) {
 				std::cout << "Layer " << network.size() << ": " << std::endl;
 				for (int i = network.size() - 1; i >= 0; i--) {
 					std::cout << "Layer " << i + 1 << ": " << std::endl;
 					MLPPUtilities::UI(network[i].weights, network[i].bias);
 				}
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 real_t MLPPMANNOld::score() {
 	MLPPUtilities util;
 	forwardPass();
 	return util.performance(y_hat, outputSet);
 }
 void MLPPMANNOld::save(std::string fileName) {
 	MLPPUtilities util;
 	if (!network.empty()) {
 		util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1);
 		for (uint32_t i = 1; i < network.size(); i++) {
 			util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1);
 		}
 		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1);
 	} else {
 		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1);
 	}
 }
 void MLPPMANNOld::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
 	if (network.empty()) {
 		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha));
 		network[0].forwardPass();
 	} else {
 		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
 		network[network.size() - 1].forwardPass();
 	}
 }
 void MLPPMANNOld::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
 	if (!network.empty()) {
 		outputLayer = new MLPPOldMultiOutputLayer(n_output, network[0].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha);
 	} else {
 		outputLayer = new MLPPOldMultiOutputLayer(n_output, k, activation, loss, inputSet, weightInit, reg, lambda, alpha);
 	}
 }
 real_t MLPPMANNOld::Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	MLPPRegOld regularization;
 	class MLPPCostOld cost;
 	real_t totalRegTerm = 0;
 	auto cost_function = outputLayer->cost_map[outputLayer->cost];
 	if (!network.empty()) {
 		for (uint32_t i = 0; i < network.size() - 1; i++) {
 			totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
 		}
 	}
 	return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
 }
 void MLPPMANNOld::forwardPass() {
 	if (!network.empty()) {
 		network[0].input = inputSet;
 		network[0].forwardPass();
 		for (uint32_t i = 1; i < network.size(); i++) {
 			network[i].input = network[i - 1].a;
 			network[i].forwardPass();
 		}
 		outputLayer->input = network[network.size() - 1].a;
 	} else {
 		outputLayer->input = inputSet;
 	}
 	outputLayer->forwardPass();
 	y_hat = outputLayer->a;
 }
--- a/mlpp/mann/mann_old.h
+++ b/mlpp/mann/mann_old.h
@ -1,51 +0,0 @@
 #ifndef MLPP_MANN_OLD_H
 #define MLPP_MANN_OLD_H
 //
 //  MANN.hpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "core/math/math_defs.h"
 #include "../hidden_layer/hidden_layer.h"
 #include "../multi_output_layer/multi_output_layer.h"
 #include "../hidden_layer/hidden_layer_old.h"
 #include "../multi_output_layer/multi_output_layer_old.h"
 #include <string>
 #include <vector>
 class MLPPMANNOld {
 public:
 	MLPPMANNOld(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet);
 	~MLPPMANNOld();
 	std::vector<std::vector<real_t>> modelSetTest(std::vector<std::vector<real_t>> X);
 	std::vector<real_t> modelTest(std::vector<real_t> x);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	real_t score();
 	void save(std::string fileName);
 	void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 	void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 private:
 	real_t Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	void forwardPass();
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<std::vector<real_t>> outputSet;
 	std::vector<std::vector<real_t>> y_hat;
 	std::vector<MLPPOldHiddenLayer> network;
 	MLPPOldMultiOutputLayer *outputLayer;
 	int n;
 	int k;
 	int n_output;
 };
 #endif /* MANN_hpp */
--- a/mlpp/mlp/mlp_old.cpp
+++ b/mlpp/mlp/mlp_old.cpp
@ -1,287 +0,0 @@
 //
 //  MLP.cpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "mlp_old.h"
 #include "core/log/logger.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 MLPPMLPOld::MLPPMLPOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet, int p_n_hidden, std::string p_reg, real_t p_lambda, real_t p_alpha) {
 	inputSet = p_inputSet;
 	outputSet = p_outputSet;
 	n_hidden = p_n_hidden;
 	n = p_inputSet.size();
 	k = p_inputSet[0].size();
 	reg = p_reg;
 	lambda = p_lambda;
 	alpha = p_alpha;
 	y_hat.resize(n);
 	weights1 = MLPPUtilities::weightInitialization(k, n_hidden);
 	weights2 = MLPPUtilities::weightInitialization(n_hidden);
 	bias1 = MLPPUtilities::biasInitialization(n_hidden);
 	bias2 = MLPPUtilities::biasInitialization();
 }
 std::vector<real_t> MLPPMLPOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	return Evaluate(X);
 }
 real_t MLPPMLPOld::modelTest(std::vector<real_t> x) {
 	return Evaluate(x);
 }
 void MLPPMLPOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		// Calculating the errors
 		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
 		// Calculating the weight/bias gradients for layer 2
 		std::vector<real_t> D2_1 = alg.mat_vec_mult(alg.transpose(a2), error);
 		// weights and bias updation for layer 2
 		weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate / n, D2_1));
 		weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
 		bias2 -= learning_rate * alg.sum_elements(error) / n;
 		// Calculating the weight/bias for layer 1
 		std::vector<std::vector<real_t>> D1_1;
 		D1_1.resize(n);
 		D1_1 = alg.outerProduct(error, weights2);
 		std::vector<std::vector<real_t>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, true));
 		std::vector<std::vector<real_t>> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2);
 		// weight an bias updation for layer 1
 		weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate / n, D1_3));
 		weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
 		bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate / n, D1_2));
 		forwardPass();
 		// UI PORTION
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			std::cout << "Layer 1:" << std::endl;
 			MLPPUtilities::UI(weights1, bias1);
 			std::cout << "Layer 2:" << std::endl;
 			MLPPUtilities::UI(weights2, bias2);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPMLPOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	while (true) {
 		std::random_device rd;
 		std::default_random_engine generator(rd());
 		std::uniform_int_distribution<int> distribution(0, int(n - 1));
 		int outputIndex = distribution(generator);
 		real_t y_hat = Evaluate(inputSet[outputIndex]);
 		auto propagate_result = propagate(inputSet[outputIndex]);
 		auto z2 = std::get<0>(propagate_result);
 		auto a2 = std::get<1>(propagate_result);
 		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
 		real_t error = y_hat - outputSet[outputIndex];
 		// Weight updation for layer 2
 		std::vector<real_t> D2_1 = alg.scalarMultiply(error, a2);
 		weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1));
 		weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
 		// Bias updation for layer 2
 		bias2 -= learning_rate * error;
 		// Weight updation for layer 1
 		std::vector<real_t> D1_1 = alg.scalarMultiply(error, weights2);
 		std::vector<real_t> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, true));
 		std::vector<std::vector<real_t>> D1_3 = alg.outerProduct(inputSet[outputIndex], D1_2);
 		weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
 		weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
 		// Bias updation for layer 1
 		bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2));
 		y_hat = Evaluate(inputSet[outputIndex]);
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
 			std::cout << "Layer 1:" << std::endl;
 			MLPPUtilities::UI(weights1, bias1);
 			std::cout << "Layer 2:" << std::endl;
 			MLPPUtilities::UI(weights2, bias2);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPMLPOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto minibatches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(minibatches);
 	auto outputMiniBatches = std::get<1>(minibatches);
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			auto propagate_result = propagate(inputMiniBatches[i]);
 			auto z2 = std::get<0>(propagate_result);
 			auto a2 = std::get<1>(propagate_result);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			// Calculating the errors
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight/bias gradients for layer 2
 			std::vector<real_t> D2_1 = alg.mat_vec_mult(alg.transpose(a2), error);
 			// weights and bias updation for layser 2
 			weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate / outputMiniBatches[i].size(), D2_1));
 			weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
 			// Calculating the bias gradients for layer 2
 			//real_t b_gradient = alg.sum_elements(error);
 			// Bias Updation for layer 2
 			bias2 -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size();
 			//Calculating the weight/bias for layer 1
 			std::vector<std::vector<real_t>> D1_1 = alg.outerProduct(error, weights2);
 			std::vector<std::vector<real_t>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, true));
 			std::vector<std::vector<real_t>> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2);
 			// weight an bias updation for layer 1
 			weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate / outputMiniBatches[i].size(), D1_3));
 			weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
 			bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate / outputMiniBatches[i].size(), D1_2));
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				std::cout << "Layer 1:" << std::endl;
 				MLPPUtilities::UI(weights1, bias1);
 				std::cout << "Layer 2:" << std::endl;
 				MLPPUtilities::UI(weights2, bias2);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 real_t MLPPMLPOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 void MLPPMLPOld::save(std::string fileName) {
 	MLPPUtilities util;
 	util.saveParameters(fileName, weights1, bias1, false, 1);
 	util.saveParameters(fileName, weights2, bias2, true, 2);
 }
 real_t MLPPMLPOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPRegOld regularization;
 	class MLPPCostOld cost;
 	return cost.LogLoss(y_hat, y) + regularization.regTerm(weights2, lambda, alpha, reg) + regularization.regTerm(weights1, lambda, alpha, reg);
 }
 std::vector<real_t> MLPPMLPOld::Evaluate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
 	std::vector<std::vector<real_t>> a2 = avn.sigmoid(z2);
 	return avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2)));
 }
 std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPMLPOld::propagate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
 	std::vector<std::vector<real_t>> a2 = avn.sigmoid(z2);
 	return { z2, a2 };
 }
 real_t MLPPMLPOld::Evaluate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1);
 	std::vector<real_t> a2 = avn.sigmoid(z2);
 	return avn.sigmoid(alg.dot(weights2, a2) + bias2);
 }
 std::tuple<std::vector<real_t>, std::vector<real_t>> MLPPMLPOld::propagate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1);
 	std::vector<real_t> a2 = avn.sigmoid(z2);
 	return { z2, a2 };
 }
 void MLPPMLPOld::forwardPass() {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1);
 	a2 = avn.sigmoid(z2);
 	y_hat = avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2)));
 }
--- a/mlpp/mlp/mlp_old.h
+++ b/mlpp/mlp/mlp_old.h
@ -1,70 +0,0 @@
 #ifndef MLPP_MLP_OLD_H
 #define MLPP_MLP_OLD_H
 //
 //  MLP.hpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "core/containers/vector.h"
 #include "core/math/math_defs.h"
 #include "core/string/ustring.h"
 #include "core/variant/variant.h"
 #include "core/object/reference.h"
 #include "../regularization/reg.h"
 #include "../lin_alg/mlpp_matrix.h"
 #include "../lin_alg/mlpp_vector.h"
 #include <map>
 #include <string>
 #include <vector>
 class MLPPMLPOld {
 public:
 	MLPPMLPOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int n_hidden, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
 	real_t modelTest(std::vector<real_t> x);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
 	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
 	real_t score();
 	void save(std::string fileName);
 private:
 	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> propagate(std::vector<std::vector<real_t>> X);
 	real_t Evaluate(std::vector<real_t> x);
 	std::tuple<std::vector<real_t>, std::vector<real_t>> propagate(std::vector<real_t> x);
 	void forwardPass();
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	std::vector<real_t> y_hat;
 	std::vector<std::vector<real_t>> weights1;
 	std::vector<real_t> weights2;
 	std::vector<real_t> bias1;
 	real_t bias2;
 	std::vector<std::vector<real_t>> z2;
 	std::vector<std::vector<real_t>> a2;
 	int n;
 	int k;
 	int n_hidden;
 	// Regularization Params
 	std::string reg;
 	real_t lambda; /* Regularization Parameter */
 	real_t alpha; /* This is the controlling param for Elastic Net*/
 };
 #endif /* MLP_hpp */
--- a/mlpp/multi_output_layer/multi_output_layer_old.cpp
+++ b/mlpp/multi_output_layer/multi_output_layer_old.cpp
@ -1,139 +0,0 @@
 //
 //  MultiOutputLayer.cpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "multi_output_layer_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 MLPPOldMultiOutputLayer::MLPPOldMultiOutputLayer(int p_n_output, int p_n_hidden, std::string p_activation, std::string p_cost, std::vector<std::vector<real_t>> p_input, std::string p_weightInit, std::string p_reg, real_t p_lambda, real_t p_alpha) {
 	n_output = p_n_output;
 	n_hidden = p_n_hidden;
 	activation = p_activation;
 	cost = p_cost;
 	input = p_input;
 	weightInit = p_weightInit;
 	reg = p_reg;
 	lambda = p_lambda;
 	alpha = p_alpha;
 	weights = MLPPUtilities::weightInitialization(n_hidden, n_output, weightInit);
 	bias = MLPPUtilities::biasInitialization(n_output);
 	activation_map["Linear"] = &MLPPActivationOld::linear;
 	activationTest_map["Linear"] = &MLPPActivationOld::linear;
 	activation_map["Sigmoid"] = &MLPPActivationOld::sigmoid;
 	activationTest_map["Sigmoid"] = &MLPPActivationOld::sigmoid;
 	activation_map["Softmax"] = &MLPPActivationOld::softmax;
 	activationTest_map["Softmax"] = &MLPPActivationOld::softmax;
 	activation_map["Swish"] = &MLPPActivationOld::swish;
 	activationTest_map["Swish"] = &MLPPActivationOld::swish;
 	activation_map["Mish"] = &MLPPActivationOld::mish;
 	activationTest_map["Mish"] = &MLPPActivationOld::mish;
 	activation_map["SinC"] = &MLPPActivationOld::sinc;
 	activationTest_map["SinC"] = &MLPPActivationOld::sinc;
 	activation_map["Softplus"] = &MLPPActivationOld::softplus;
 	activationTest_map["Softplus"] = &MLPPActivationOld::softplus;
 	activation_map["Softsign"] = &MLPPActivationOld::softsign;
 	activationTest_map["Softsign"] = &MLPPActivationOld::softsign;
 	activation_map["CLogLog"] = &MLPPActivationOld::cloglog;
 	activationTest_map["CLogLog"] = &MLPPActivationOld::cloglog;
 	activation_map["Logit"] = &MLPPActivationOld::logit;
 	activationTest_map["Logit"] = &MLPPActivationOld::logit;
 	activation_map["GaussianCDF"] = &MLPPActivationOld::gaussianCDF;
 	activationTest_map["GaussianCDF"] = &MLPPActivationOld::gaussianCDF;
 	activation_map["RELU"] = &MLPPActivationOld::RELU;
 	activationTest_map["RELU"] = &MLPPActivationOld::RELU;
 	activation_map["GELU"] = &MLPPActivationOld::GELU;
 	activationTest_map["GELU"] = &MLPPActivationOld::GELU;
 	activation_map["Sign"] = &MLPPActivationOld::sign;
 	activationTest_map["Sign"] = &MLPPActivationOld::sign;
 	activation_map["UnitStep"] = &MLPPActivationOld::unitStep;
 	activationTest_map["UnitStep"] = &MLPPActivationOld::unitStep;
 	activation_map["Sinh"] = &MLPPActivationOld::sinh;
 	activationTest_map["Sinh"] = &MLPPActivationOld::sinh;
 	activation_map["Cosh"] = &MLPPActivationOld::cosh;
 	activationTest_map["Cosh"] = &MLPPActivationOld::cosh;
 	activation_map["Tanh"] = &MLPPActivationOld::tanh;
 	activationTest_map["Tanh"] = &MLPPActivationOld::tanh;
 	activation_map["Csch"] = &MLPPActivationOld::csch;
 	activationTest_map["Csch"] = &MLPPActivationOld::csch;
 	activation_map["Sech"] = &MLPPActivationOld::sech;
 	activationTest_map["Sech"] = &MLPPActivationOld::sech;
 	activation_map["Coth"] = &MLPPActivationOld::coth;
 	activationTest_map["Coth"] = &MLPPActivationOld::coth;
 	activation_map["Arsinh"] = &MLPPActivationOld::arsinh;
 	activationTest_map["Arsinh"] = &MLPPActivationOld::arsinh;
 	activation_map["Arcosh"] = &MLPPActivationOld::arcosh;
 	activationTest_map["Arcosh"] = &MLPPActivationOld::arcosh;
 	activation_map["Artanh"] = &MLPPActivationOld::artanh;
 	activationTest_map["Artanh"] = &MLPPActivationOld::artanh;
 	activation_map["Arcsch"] = &MLPPActivationOld::arcsch;
 	activationTest_map["Arcsch"] = &MLPPActivationOld::arcsch;
 	activation_map["Arsech"] = &MLPPActivationOld::arsech;
 	activationTest_map["Arsech"] = &MLPPActivationOld::arsech;
 	activation_map["Arcoth"] = &MLPPActivationOld::arcoth;
 	activationTest_map["Arcoth"] = &MLPPActivationOld::arcoth;
 	costDeriv_map["MSE"] = &MLPPCostOld::MSEDeriv;
 	cost_map["MSE"] = &MLPPCostOld::MSE;
 	costDeriv_map["RMSE"] = &MLPPCostOld::RMSEDeriv;
 	cost_map["RMSE"] = &MLPPCostOld::RMSE;
 	costDeriv_map["MAE"] = &MLPPCostOld::MAEDeriv;
 	cost_map["MAE"] = &MLPPCostOld::MAE;
 	costDeriv_map["MBE"] = &MLPPCostOld::MBEDeriv;
 	cost_map["MBE"] = &MLPPCostOld::MBE;
 	costDeriv_map["LogLoss"] = &MLPPCostOld::LogLossDeriv;
 	cost_map["LogLoss"] = &MLPPCostOld::LogLoss;
 	costDeriv_map["CrossEntropy"] = &MLPPCostOld::CrossEntropyDeriv;
 	cost_map["CrossEntropy"] = &MLPPCostOld::CrossEntropy;
 	costDeriv_map["HingeLoss"] = &MLPPCostOld::HingeLossDeriv;
 	cost_map["HingeLoss"] = &MLPPCostOld::HingeLoss;
 	costDeriv_map["WassersteinLoss"] = &MLPPCostOld::HingeLossDeriv;
 	cost_map["WassersteinLoss"] = &MLPPCostOld::HingeLoss;
 }
 void MLPPOldMultiOutputLayer::forwardPass() {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	z = alg.mat_vec_add(alg.matmult(input, weights), bias);
 	a = (avn.*activation_map[activation])(z, false);
 }
 void MLPPOldMultiOutputLayer::Test(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	z_test = alg.addition(alg.mat_vec_mult(alg.transpose(weights), x), bias);
 	a_test = (avn.*activationTest_map[activation])(z_test, false);
 }
--- a/mlpp/multi_output_layer/multi_output_layer_old.h
+++ b/mlpp/multi_output_layer/multi_output_layer_old.h
@ -1,66 +0,0 @@
 #ifndef MLPP_MULTI_OUTPUT_LAYER_OLD_H
 #define MLPP_MULTI_OUTPUT_LAYER_OLD_H
 //
 //  MultiOutputLayer.hpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "core/math/math_defs.h"
 #include "core/string/ustring.h"
 #include "core/object/reference.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../regularization/reg.h"
 #include "../utilities/utilities.h"
 #include "../lin_alg/mlpp_matrix.h"
 #include "../lin_alg/mlpp_vector.h"
 #include <map>
 #include <string>
 #include <vector>
 class MLPPOldMultiOutputLayer {
 public:
 	MLPPOldMultiOutputLayer(int n_output, int n_hidden, std::string activation, std::string cost, std::vector<std::vector<real_t>> input, std::string weightInit, std::string reg, real_t lambda, real_t alpha);
 	int n_output;
 	int n_hidden;
 	std::string activation;
 	std::string cost;
 	std::vector<std::vector<real_t>> input;
 	std::vector<std::vector<real_t>> weights;
 	std::vector<real_t> bias;
 	std::vector<std::vector<real_t>> z;
 	std::vector<std::vector<real_t>> a;
 	std::map<std::string, std::vector<std::vector<real_t>> (MLPPActivationOld::*)(std::vector<std::vector<real_t>>, bool)> activation_map;
 	std::map<std::string, std::vector<real_t> (MLPPActivationOld::*)(std::vector<real_t>, bool)> activationTest_map;
 	std::map<std::string, real_t (MLPPCostOld::*)(std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>)> cost_map;
 	std::map<std::string, std::vector<std::vector<real_t>> (MLPPCostOld::*)(std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>)> costDeriv_map;
 	std::vector<real_t> z_test;
 	std::vector<real_t> a_test;
 	std::vector<std::vector<real_t>> delta;
 	// Regularization Params
 	std::string reg;
 	real_t lambda; /* Regularization Parameter */
 	real_t alpha; /* This is the controlling param for Elastic Net*/
 	std::string weightInit;
 	void forwardPass();
 	void Test(std::vector<real_t> x);
 };
 #endif /* MultiOutputLayer_hpp */
--- a/mlpp/multinomial_nb/multinomial_nb_old.cpp
+++ b/mlpp/multinomial_nb/multinomial_nb_old.cpp
@ -1,121 +0,0 @@
 //
 //  MultinomialNB.cpp
 //
 //  Created by Marc Melikyan on 1/17/21.
 //
 #include "multinomial_nb_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../utilities/utilities.h"
 #include <algorithm>
 #include <iostream>
 #include <random>
 MLPPMultinomialNBOld::MLPPMultinomialNBOld(std::vector<std::vector<real_t>> pinputSet, std::vector<real_t> poutputSet, int pclass_num) {
 	inputSet = pinputSet;
 	outputSet = poutputSet;
 	class_num = pclass_num;
 	y_hat.resize(outputSet.size());
 	Evaluate();
 }
 std::vector<real_t> MLPPMultinomialNBOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	std::vector<real_t> y_hat;
 	for (uint32_t i = 0; i < X.size(); i++) {
 		y_hat.push_back(modelTest(X[i]));
 	}
 	return y_hat;
 }
 real_t MLPPMultinomialNBOld::modelTest(std::vector<real_t> x) {
 	real_t score[class_num];
 	computeTheta();
 	for (uint32_t j = 0; j < x.size(); j++) {
 		for (uint32_t k = 0; k < vocab.size(); k++) {
 			if (x[j] == vocab[k]) {
 				for (int p = class_num - 1; p >= 0; p--) {
 					score[p] += std::log(theta[p][vocab[k]]);
 				}
 			}
 		}
 	}
 	for (uint32_t i = 0; i < priors.size(); i++) {
 		score[i] += std::log(priors[i]);
 	}
 	return std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(real_t)));
 }
 real_t MLPPMultinomialNBOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 void MLPPMultinomialNBOld::computeTheta() {
 	// Resizing theta for the sake of ease & proper access of the elements.
 	theta.resize(class_num);
 	// Setting all values in the hasmap by default to 0.
 	for (int i = class_num - 1; i >= 0; i--) {
 		for (uint32_t j = 0; j < vocab.size(); j++) {
 			theta[i][vocab[j]] = 0;
 		}
 	}
 	for (uint32_t i = 0; i < inputSet.size(); i++) {
 		for (uint32_t j = 0; j < inputSet[0].size(); j++) {
 			theta[outputSet[i]][inputSet[i][j]]++;
 		}
 	}
 	for (uint32_t i = 0; i < theta.size(); i++) {
 		for (uint32_t j = 0; j < theta[i].size(); j++) {
 			theta[i][j] /= priors[i] * y_hat.size();
 		}
 	}
 }
 void MLPPMultinomialNBOld::Evaluate() {
 	MLPPLinAlgOld alg;
 	for (uint32_t i = 0; i < outputSet.size(); i++) {
 		// Pr(B | A) * Pr(A)
 		real_t score[class_num];
 		// Easy computation of priors, i.e. Pr(C_k)
 		priors.resize(class_num);
 		for (uint32_t ii = 0; ii < outputSet.size(); ii++) {
 			priors[int(outputSet[ii])]++;
 		}
 		priors = alg.scalarMultiply(real_t(1) / real_t(outputSet.size()), priors);
 		// Evaluating Theta...
 		computeTheta();
 		for (uint32_t j = 0; j < inputSet.size(); j++) {
 			for (uint32_t k = 0; k < vocab.size(); k++) {
 				if (inputSet[i][j] == vocab[k]) {
 					for (int p = class_num - 1; p >= 0; p--) {
 						score[p] += std::log(theta[i][vocab[k]]);
 					}
 				}
 			}
 		}
 		for (uint32_t ii = 0; ii < priors.size(); ii++) {
 			score[ii] += std::log(priors[ii]);
 			score[ii] = exp(score[ii]);
 		}
 		for (int ii = 0; ii < 2; ii++) {
 			std::cout << score[ii] << std::endl;
 		}
 		// Assigning the traning example's y_hat to a class
 		y_hat[i] = std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(real_t)));
 	}
 }
--- a/mlpp/multinomial_nb/multinomial_nb_old.h
+++ b/mlpp/multinomial_nb/multinomial_nb_old.h
@ -1,40 +0,0 @@
 #ifndef MLPP_MULTINOMIAL_NB_OLD_H
 #define MLPP_MULTINOMIAL_NB_OLD_H
 //
 //  MultinomialNB.hpp
 //
 //  Created by Marc Melikyan on 1/17/21.
 //
 #include "core/math/math_defs.h"
 #include <map>
 #include <vector>
 class MLPPMultinomialNBOld {
 public:
 	MLPPMultinomialNBOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int class_num);
 	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
 	real_t modelTest(std::vector<real_t> x);
 	real_t score();
 private:
 	void computeTheta();
 	void Evaluate();
 	// Model Params
 	std::vector<real_t> priors;
 	std::vector<std::map<real_t, int>> theta;
 	std::vector<real_t> vocab;
 	int class_num;
 	// Datasets
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	std::vector<real_t> y_hat;
 };
 #endif /* MultinomialNB_hpp */
--- a/mlpp/numerical_analysis/numerical_analysis_old.cpp
+++ b/mlpp/numerical_analysis/numerical_analysis_old.cpp
@ -1,300 +0,0 @@
 //
 //  NumericalAnalysis.cpp
 //
 //  Created by Marc Melikyan on 11/13/20.
 //
 #include "numerical_analysis_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include <climits>
 #include <cmath>
 #include <iostream>
 #include <string>
 real_t MLPPNumericalAnalysisOld::numDiff(real_t (*function)(real_t), real_t x) {
 	real_t eps = 1e-10;
 	return (function(x + eps) - function(x)) / eps; // This is just the formal def. of the derivative.
 }
 real_t MLPPNumericalAnalysisOld::numDiff_2(real_t (*function)(real_t), real_t x) {
 	real_t eps = 1e-5;
 	return (function(x + 2 * eps) - 2 * function(x + eps) + function(x)) / (eps * eps);
 }
 real_t MLPPNumericalAnalysisOld::numDiff_3(real_t (*function)(real_t), real_t x) {
 	real_t eps = 1e-5;
 	real_t t1 = function(x + 3 * eps) - 2 * function(x + 2 * eps) + function(x + eps);
 	real_t t2 = function(x + 2 * eps) - 2 * function(x + eps) + function(x);
 	return (t1 - t2) / (eps * eps * eps);
 }
 real_t MLPPNumericalAnalysisOld::constantApproximation(real_t (*function)(real_t), real_t c) {
 	return function(c);
 }
 real_t MLPPNumericalAnalysisOld::linearApproximation(real_t (*function)(real_t), real_t c, real_t x) {
 	return constantApproximation(function, c) + numDiff(function, c) * (x - c);
 }
 real_t MLPPNumericalAnalysisOld::quadraticApproximation(real_t (*function)(real_t), real_t c, real_t x) {
 	return linearApproximation(function, c, x) + 0.5 * numDiff_2(function, c) * (x - c) * (x - c);
 }
 real_t MLPPNumericalAnalysisOld::cubicApproximation(real_t (*function)(real_t), real_t c, real_t x) {
 	return quadraticApproximation(function, c, x) + (1 / 6) * numDiff_3(function, c) * (x - c) * (x - c) * (x - c);
 }
 real_t MLPPNumericalAnalysisOld::numDiff(real_t (*function)(std::vector<real_t>), std::vector<real_t> x, int axis) {
 	// For multivariable function analysis.
 	// This will be used for calculating Jacobian vectors.
 	// Diffrentiate with respect to indicated axis. (0, 1, 2 ...)
 	real_t eps = 1e-10;
 	std::vector<real_t> x_eps = x;
 	x_eps[axis] += eps;
 	return (function(x_eps) - function(x)) / eps;
 }
 real_t MLPPNumericalAnalysisOld::numDiff_2(real_t (*function)(std::vector<real_t>), std::vector<real_t> x, int axis1, int axis2) {
 	//For Hessians.
 	real_t eps = 1e-5;
 	std::vector<real_t> x_pp = x;
 	x_pp[axis1] += eps;
 	x_pp[axis2] += eps;
 	std::vector<real_t> x_np = x;
 	x_np[axis2] += eps;
 	std::vector<real_t> x_pn = x;
 	x_pn[axis1] += eps;
 	return (function(x_pp) - function(x_np) - function(x_pn) + function(x)) / (eps * eps);
 }
 real_t MLPPNumericalAnalysisOld::numDiff_3(real_t (*function)(std::vector<real_t>), std::vector<real_t> x, int axis1, int axis2, int axis3) {
 	// For third order derivative tensors.
 	// NOTE: Approximations do not appear to be accurate for sinusodial functions...
 	// Should revisit this later.
 	real_t eps = 1e-5;
 	std::vector<real_t> x_ppp = x;
 	x_ppp[axis1] += eps;
 	x_ppp[axis2] += eps;
 	x_ppp[axis3] += eps;
 	std::vector<real_t> x_npp = x;
 	x_npp[axis2] += eps;
 	x_npp[axis3] += eps;
 	std::vector<real_t> x_pnp = x;
 	x_pnp[axis1] += eps;
 	x_pnp[axis3] += eps;
 	std::vector<real_t> x_nnp = x;
 	x_nnp[axis3] += eps;
 	std::vector<real_t> x_ppn = x;
 	x_ppn[axis1] += eps;
 	x_ppn[axis2] += eps;
 	std::vector<real_t> x_npn = x;
 	x_npn[axis2] += eps;
 	std::vector<real_t> x_pnn = x;
 	x_pnn[axis1] += eps;
 	real_t thirdAxis = function(x_ppp) - function(x_npp) - function(x_pnp) + function(x_nnp);
 	real_t noThirdAxis = function(x_ppn) - function(x_npn) - function(x_pnn) + function(x);
 	return (thirdAxis - noThirdAxis) / (eps * eps * eps);
 }
 real_t MLPPNumericalAnalysisOld::newtonRaphsonMethod(real_t (*function)(real_t), real_t x_0, real_t epoch_num) {
 	real_t x = x_0;
 	for (int i = 0; i < epoch_num; i++) {
 		x -= function(x) / numDiff(function, x);
 	}
 	return x;
 }
 real_t MLPPNumericalAnalysisOld::halleyMethod(real_t (*function)(real_t), real_t x_0, real_t epoch_num) {
 	real_t x = x_0;
 	for (int i = 0; i < epoch_num; i++) {
 		x -= ((2 * function(x) * numDiff(function, x)) / (2 * numDiff(function, x) * numDiff(function, x) - function(x) * numDiff_2(function, x)));
 	}
 	return x;
 }
 real_t MLPPNumericalAnalysisOld::invQuadraticInterpolation(real_t (*function)(real_t), std::vector<real_t> x_0, int epoch_num) {
 	real_t x = 0;
 	std::vector<real_t> currentThree = x_0;
 	for (int i = 0; i < epoch_num; i++) {
 		real_t t1 = ((function(currentThree[1]) * function(currentThree[2])) / ((function(currentThree[0]) - function(currentThree[1])) * (function(currentThree[0]) - function(currentThree[2])))) * currentThree[0];
 		real_t t2 = ((function(currentThree[0]) * function(currentThree[2])) / ((function(currentThree[1]) - function(currentThree[0])) * (function(currentThree[1]) - function(currentThree[2])))) * currentThree[1];
 		real_t t3 = ((function(currentThree[0]) * function(currentThree[1])) / ((function(currentThree[2]) - function(currentThree[0])) * (function(currentThree[2]) - function(currentThree[1])))) * currentThree[2];
 		x = t1 + t2 + t3;
 		currentThree.erase(currentThree.begin());
 		currentThree.push_back(x);
 	}
 	return x;
 }
 real_t MLPPNumericalAnalysisOld::eulerianMethod(real_t (*derivative)(real_t), std::vector<real_t> q_0, real_t p, real_t h) {
 	int max_epoch = static_cast<int>((p - q_0[0]) / h);
 	real_t x = q_0[0];
 	real_t y = q_0[1];
 	for (int i = 0; i < max_epoch; i++) {
 		y = y + h * derivative(x);
 		x += h;
 	}
 	return y;
 }
 real_t MLPPNumericalAnalysisOld::eulerianMethod(real_t (*derivative)(std::vector<real_t>), std::vector<real_t> q_0, real_t p, real_t h) {
 	int max_epoch = static_cast<int>((p - q_0[0]) / h);
 	real_t x = q_0[0];
 	real_t y = q_0[1];
 	for (int i = 0; i < max_epoch; i++) {
 		y = y + h * derivative({ x, y });
 		x += h;
 	}
 	return y;
 }
 real_t MLPPNumericalAnalysisOld::growthMethod(real_t C, real_t k, real_t t) {
 	/*
 	dP/dt = kP
 	dP/P = kdt
 	integral(1/P)dP = integral(k) dt
 	ln|P| = kt + C_initial
 	|P| = e^(kt + C_initial)
 	|P| = e^(C_initial) * e^(kt)
 	P = +/- e^(C_initial) * e^(kt)
 	P = C * e^(kt)
 	*/
 	// auto growthFunction = [&C, &k](real_t t) { return C * exp(k * t); };
 	return C * std::exp(k * t);
 }
 std::vector<real_t> MLPPNumericalAnalysisOld::jacobian(real_t (*function)(std::vector<real_t>), std::vector<real_t> x) {
 	std::vector<real_t> jacobian;
 	jacobian.resize(x.size());
 	for (uint32_t i = 0; i < jacobian.size(); i++) {
 		jacobian[i] = numDiff(function, x, i); // Derivative w.r.t axis i evaluated at x. For all x_i.
 	}
 	return jacobian;
 }
 std::vector<std::vector<real_t>> MLPPNumericalAnalysisOld::hessian(real_t (*function)(std::vector<real_t>), std::vector<real_t> x) {
 	std::vector<std::vector<real_t>> hessian;
 	hessian.resize(x.size());
 	for (uint32_t i = 0; i < hessian.size(); i++) {
 		hessian[i].resize(x.size());
 	}
 	for (uint32_t i = 0; i < hessian.size(); i++) {
 		for (uint32_t j = 0; j < hessian[i].size(); j++) {
 			hessian[i][j] = numDiff_2(function, x, i, j);
 		}
 	}
 	return hessian;
 }
 std::vector<std::vector<std::vector<real_t>>> MLPPNumericalAnalysisOld::thirdOrderTensor(real_t (*function)(std::vector<real_t>), std::vector<real_t> x) {
 	std::vector<std::vector<std::vector<real_t>>> tensor;
 	tensor.resize(x.size());
 	for (uint32_t i = 0; i < tensor.size(); i++) {
 		tensor[i].resize(x.size());
 		for (uint32_t j = 0; j < tensor[i].size(); j++) {
 			tensor[i][j].resize(x.size());
 		}
 	}
 	for (uint32_t i = 0; i < tensor.size(); i++) { // O(n^3) time complexity :(
 		for (uint32_t j = 0; j < tensor[i].size(); j++) {
 			for (uint32_t k = 0; k < tensor[i][j].size(); k++)
 				tensor[i][j][k] = numDiff_3(function, x, i, j, k);
 		}
 	}
 	return tensor;
 }
 real_t MLPPNumericalAnalysisOld::constantApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c) {
 	return function(c);
 }
 real_t MLPPNumericalAnalysisOld::linearApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c, std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	return constantApproximation(function, c) + alg.matmult(alg.transpose({ jacobian(function, c) }), { alg.subtraction(x, c) })[0][0];
 }
 real_t MLPPNumericalAnalysisOld::quadraticApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c, std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	return linearApproximation(function, c, x) + 0.5 * alg.matmult({ (alg.subtraction(x, c)) }, alg.matmult(hessian(function, c), alg.transpose({ alg.subtraction(x, c) })))[0][0];
 }
 real_t MLPPNumericalAnalysisOld::cubicApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c, std::vector<real_t> x) {
 	/*
 	Not completely sure as the literature seldom discusses the third order taylor approximation,
 	in particular for multivariate cases, but ostensibly, the matrix/tensor/vector multiplies
 	should look something like this:
 	(N x N x N) (N x 1) [tensor vector mult] => (N x N x 1) => (N x N)
 	Perform remaining multiplies as done for the 2nd order approximation.
 	Result is a scalar.
 	*/
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<real_t>> resultMat = alg.tensor_vec_mult(thirdOrderTensor(function, c), alg.subtraction(x, c));
 	real_t resultScalar = alg.matmult({ (alg.subtraction(x, c)) }, alg.matmult(resultMat, alg.transpose({ alg.subtraction(x, c) })))[0][0];
 	return quadraticApproximation(function, c, x) + (1 / 6) * resultScalar;
 }
 real_t MLPPNumericalAnalysisOld::laplacian(real_t (*function)(std::vector<real_t>), std::vector<real_t> x) {
 	std::vector<std::vector<real_t>> hessian_matrix = hessian(function, x);
 	real_t laplacian = 0;
 	for (uint32_t i = 0; i < hessian_matrix.size(); i++) {
 		laplacian += hessian_matrix[i][i]; // homogenous 2nd derivs w.r.t i, then i
 	}
 	return laplacian;
 }
 std::string MLPPNumericalAnalysisOld::secondPartialDerivativeTest(real_t (*function)(std::vector<real_t>), std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<real_t>> hessianMatrix = hessian(function, x);
 	/*
 	The reason we do this is because the 2nd partial derivative test is less conclusive for functions of variables greater than
 	2, and the calculations specific to the bivariate case are less computationally intensive.
 	*/
 	if (x.size() == 2) {
 		real_t det = alg.det(hessianMatrix, hessianMatrix.size());
 		real_t secondDerivative = numDiff_2(function, x, 0, 0);
 		if (secondDerivative > 0 && det > 0) {
 			return "min";
 		} else if (secondDerivative < 0 && det > 0) {
 			return "max";
 		} else if (det < 0) {
 			return "saddle";
 		} else {
 			return "test was inconclusive";
 		}
 	} else {
 		if (alg.positiveDefiniteChecker(hessianMatrix)) {
 			return "min";
 		} else if (alg.negativeDefiniteChecker(hessianMatrix)) {
 			return "max";
 		} else if (!alg.zeroEigenvalue(hessianMatrix)) {
 			return "saddle";
 		} else {
 			return "test was inconclusive";
 		}
 	}
 }
--- a/mlpp/numerical_analysis/numerical_analysis_old.h
+++ b/mlpp/numerical_analysis/numerical_analysis_old.h
@ -1,59 +0,0 @@
 #ifndef MLPP_NUMERICAL_ANALYSIS_OLD_H
 #define MLPP_NUMERICAL_ANALYSIS_OLD_H
 //
 //  NumericalAnalysis.hpp
 //
 //
 #include "core/math/math_defs.h"
 #include "core/object/reference.h"
 #include <string>
 #include <vector>
 class MLPPNumericalAnalysisOld {
 public:
 	/* A numerical method for derivatives is used. This may be subject to change,
 	as an analytical method for calculating derivatives will most likely be used in
 	the future.
 	*/
 	real_t numDiff(real_t (*function)(real_t), real_t x);
 	real_t numDiff_2(real_t (*function)(real_t), real_t x);
 	real_t numDiff_3(real_t (*function)(real_t), real_t x);
 	real_t constantApproximation(real_t (*function)(real_t), real_t c);
 	real_t linearApproximation(real_t (*function)(real_t), real_t c, real_t x);
 	real_t quadraticApproximation(real_t (*function)(real_t), real_t c, real_t x);
 	real_t cubicApproximation(real_t (*function)(real_t), real_t c, real_t x);
 	real_t numDiff(real_t (*function)(std::vector<real_t>), std::vector<real_t> x, int axis);
 	real_t numDiff_2(real_t (*function)(std::vector<real_t>), std::vector<real_t> x, int axis1, int axis2);
 	real_t numDiff_3(real_t (*function)(std::vector<real_t>), std::vector<real_t> x, int axis1, int axis2, int axis3);
 	real_t newtonRaphsonMethod(real_t (*function)(real_t), real_t x_0, real_t epoch_num);
 	real_t halleyMethod(real_t (*function)(real_t), real_t x_0, real_t epoch_num);
 	real_t invQuadraticInterpolation(real_t (*function)(real_t), std::vector<real_t> x_0, int epoch_num);
 	real_t eulerianMethod(real_t (*derivative)(real_t), std::vector<real_t> q_0, real_t p, real_t h); // Euler's method for solving diffrential equations.
 	real_t eulerianMethod(real_t (*derivative)(std::vector<real_t>), std::vector<real_t> q_0, real_t p, real_t h); // Euler's method for solving diffrential equations.
 	real_t growthMethod(real_t C, real_t k, real_t t); // General growth-based diffrential equations can be solved by seperation of variables.
 	std::vector<real_t> jacobian(real_t (*function)(std::vector<real_t>), std::vector<real_t> x); // Indeed, for functions with scalar outputs the Jacobians will be vectors.
 	std::vector<std::vector<real_t>> hessian(real_t (*function)(std::vector<real_t>), std::vector<real_t> x);
 	std::vector<std::vector<std::vector<real_t>>> thirdOrderTensor(real_t (*function)(std::vector<real_t>), std::vector<real_t> x);
 	real_t constantApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c);
 	real_t linearApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c, std::vector<real_t> x);
 	real_t quadraticApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c, std::vector<real_t> x);
 	real_t cubicApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c, std::vector<real_t> x);
 	real_t laplacian(real_t (*function)(std::vector<real_t>), std::vector<real_t> x); // laplacian
 	std::string secondPartialDerivativeTest(real_t (*function)(std::vector<real_t>), std::vector<real_t> x);
 };
 #endif /* NumericalAnalysis_hpp */
--- a/mlpp/outlier_finder/outlier_finder_old.cpp
+++ b/mlpp/outlier_finder/outlier_finder_old.cpp
@ -1,42 +0,0 @@
 //
 //  OutlierFinder.cpp
 //
 //  Created by Marc Melikyan on 11/13/20.
 //
 #include "outlier_finder_old.h"
 #include "../stat/stat_old.h"
 #include <iostream>
 MLPPOutlierFinderOld::MLPPOutlierFinderOld(int threshold) :
 		threshold(threshold) {
 }
 std::vector<std::vector<real_t>> MLPPOutlierFinderOld::modelSetTest(std::vector<std::vector<real_t>> inputSet) {
 	MLPPStatOld  stat;
 	std::vector<std::vector<real_t>> outliers;
 	outliers.resize(inputSet.size());
 	for (uint32_t i = 0; i < inputSet.size(); i++) {
 		for (uint32_t j = 0; j < inputSet[i].size(); j++) {
 			real_t z = (inputSet[i][j] - stat.mean(inputSet[i])) / stat.standardDeviation(inputSet[i]);
 			if (abs(z) > threshold) {
 				outliers[i].push_back(inputSet[i][j]);
 			}
 		}
 	}
 	return outliers;
 }
 std::vector<real_t> MLPPOutlierFinderOld::modelTest(std::vector<real_t> inputSet) {
 	MLPPStatOld  stat;
 	std::vector<real_t> outliers;
 	for (uint32_t i = 0; i < inputSet.size(); i++) {
 		real_t z = (inputSet[i] - stat.mean(inputSet)) / stat.standardDeviation(inputSet);
 		if (abs(z) > threshold) {
 			outliers.push_back(inputSet[i]);
 		}
 	}
 	return outliers;
 }
--- a/mlpp/outlier_finder/outlier_finder_old.h
+++ b/mlpp/outlier_finder/outlier_finder_old.h
@ -1,30 +0,0 @@
 #ifndef MLPP_OUTLIER_FINDER_OLD_H
 #define MLPP_OUTLIER_FINDER_OLD_H
 //
 //  OutlierFinder.hpp
 //
 //  Created by Marc Melikyan on 11/13/20.
 //
 #include "core/math/math_defs.h"
 #include "core/int_types.h"
 #include <vector>
 class MLPPOutlierFinderOld {
 public:
 	// Cnstr
 	MLPPOutlierFinderOld(int threshold);
 	std::vector<std::vector<real_t>> modelSetTest(std::vector<std::vector<real_t>> inputSet);
 	std::vector<real_t> modelTest(std::vector<real_t> inputSet);
 	// Variables required
 	int threshold;
 };
 #endif /* OutlierFinder_hpp */
--- a/mlpp/output_layer/output_layer_old.cpp
+++ b/mlpp/output_layer/output_layer_old.cpp
@ -1,135 +0,0 @@
 //
 //  OutputLayer.cpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "output_layer_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 MLPPOldOutputLayer::MLPPOldOutputLayer(int p_n_hidden, std::string p_activation, std::string p_cost, std::vector<std::vector<real_t>> p_input, std::string p_weightInit, std::string p_reg, real_t p_lambda, real_t p_alpha) {
 	n_hidden = p_n_hidden;
 	activation = p_activation;
 	cost = p_cost;
 	input = p_input;
 	weightInit = p_weightInit;
 	reg = p_reg;
 	lambda = p_lambda;
 	alpha = p_alpha;
 	weights = MLPPUtilities::weightInitialization(n_hidden, weightInit);
 	bias = MLPPUtilities::biasInitialization();
 	activation_map["Linear"] = &MLPPActivationOld::linear;
 	activationTest_map["Linear"] = &MLPPActivationOld::linear;
 	activation_map["Sigmoid"] = &MLPPActivationOld::sigmoid;
 	activationTest_map["Sigmoid"] = &MLPPActivationOld::sigmoid;
 	activation_map["Swish"] = &MLPPActivationOld::swish;
 	activationTest_map["Swish"] = &MLPPActivationOld::swish;
 	activation_map["Mish"] = &MLPPActivationOld::mish;
 	activationTest_map["Mish"] = &MLPPActivationOld::mish;
 	activation_map["SinC"] = &MLPPActivationOld::sinc;
 	activationTest_map["SinC"] = &MLPPActivationOld::sinc;
 	activation_map["Softplus"] = &MLPPActivationOld::softplus;
 	activationTest_map["Softplus"] = &MLPPActivationOld::softplus;
 	activation_map["Softsign"] = &MLPPActivationOld::softsign;
 	activationTest_map["Softsign"] = &MLPPActivationOld::softsign;
 	activation_map["CLogLog"] = &MLPPActivationOld::cloglog;
 	activationTest_map["CLogLog"] = &MLPPActivationOld::cloglog;
 	activation_map["Logit"] = &MLPPActivationOld::logit;
 	activationTest_map["Logit"] = &MLPPActivationOld::logit;
 	activation_map["GaussianCDF"] = &MLPPActivationOld::gaussianCDF;
 	activationTest_map["GaussianCDF"] = &MLPPActivationOld::gaussianCDF;
 	activation_map["RELU"] = &MLPPActivationOld::RELU;
 	activationTest_map["RELU"] = &MLPPActivationOld::RELU;
 	activation_map["GELU"] = &MLPPActivationOld::GELU;
 	activationTest_map["GELU"] = &MLPPActivationOld::GELU;
 	activation_map["Sign"] = &MLPPActivationOld::sign;
 	activationTest_map["Sign"] = &MLPPActivationOld::sign;
 	activation_map["UnitStep"] = &MLPPActivationOld::unitStep;
 	activationTest_map["UnitStep"] = &MLPPActivationOld::unitStep;
 	activation_map["Sinh"] = &MLPPActivationOld::sinh;
 	activationTest_map["Sinh"] = &MLPPActivationOld::sinh;
 	activation_map["Cosh"] = &MLPPActivationOld::cosh;
 	activationTest_map["Cosh"] = &MLPPActivationOld::cosh;
 	activation_map["Tanh"] = &MLPPActivationOld::tanh;
 	activationTest_map["Tanh"] = &MLPPActivationOld::tanh;
 	activation_map["Csch"] = &MLPPActivationOld::csch;
 	activationTest_map["Csch"] = &MLPPActivationOld::csch;
 	activation_map["Sech"] = &MLPPActivationOld::sech;
 	activationTest_map["Sech"] = &MLPPActivationOld::sech;
 	activation_map["Coth"] = &MLPPActivationOld::coth;
 	activationTest_map["Coth"] = &MLPPActivationOld::coth;
 	activation_map["Arsinh"] = &MLPPActivationOld::arsinh;
 	activationTest_map["Arsinh"] = &MLPPActivationOld::arsinh;
 	activation_map["Arcosh"] = &MLPPActivationOld::arcosh;
 	activationTest_map["Arcosh"] = &MLPPActivationOld::arcosh;
 	activation_map["Artanh"] = &MLPPActivationOld::artanh;
 	activationTest_map["Artanh"] = &MLPPActivationOld::artanh;
 	activation_map["Arcsch"] = &MLPPActivationOld::arcsch;
 	activationTest_map["Arcsch"] = &MLPPActivationOld::arcsch;
 	activation_map["Arsech"] = &MLPPActivationOld::arsech;
 	activationTest_map["Arsech"] = &MLPPActivationOld::arsech;
 	activation_map["Arcoth"] = &MLPPActivationOld::arcoth;
 	activationTest_map["Arcoth"] = &MLPPActivationOld::arcoth;
 	costDeriv_map["MSE"] = &MLPPCostOld::MSEDeriv;
 	cost_map["MSE"] = &MLPPCostOld::MSE;
 	costDeriv_map["RMSE"] = &MLPPCostOld::RMSEDeriv;
 	cost_map["RMSE"] = &MLPPCostOld::RMSE;
 	costDeriv_map["MAE"] = &MLPPCostOld::MAEDeriv;
 	cost_map["MAE"] = &MLPPCostOld::MAE;
 	costDeriv_map["MBE"] = &MLPPCostOld::MBEDeriv;
 	cost_map["MBE"] = &MLPPCostOld::MBE;
 	costDeriv_map["LogLoss"] = &MLPPCostOld::LogLossDeriv;
 	cost_map["LogLoss"] = &MLPPCostOld::LogLoss;
 	costDeriv_map["CrossEntropy"] = &MLPPCostOld::CrossEntropyDeriv;
 	cost_map["CrossEntropy"] = &MLPPCostOld::CrossEntropy;
 	costDeriv_map["HingeLoss"] = &MLPPCostOld::HingeLossDeriv;
 	cost_map["HingeLoss"] = &MLPPCostOld::HingeLoss;
 	costDeriv_map["WassersteinLoss"] = &MLPPCostOld::HingeLossDeriv;
 	cost_map["WassersteinLoss"] = &MLPPCostOld::HingeLoss;
 }
 void MLPPOldOutputLayer::forwardPass() {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	z = alg.scalarAdd(bias, alg.mat_vec_mult(input, weights));
 	a = (avn.*activation_map[activation])(z, false);
 }
 void MLPPOldOutputLayer::Test(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	z_test = alg.dot(weights, x) + bias;
 	a_test = (avn.*activationTest_map[activation])(z_test, false);
 }
--- a/mlpp/output_layer/output_layer_old.h
+++ b/mlpp/output_layer/output_layer_old.h
@ -1,65 +0,0 @@
 #ifndef MLPP_OUTPUT_LAYER_OLD_H
 #define MLPP_OUTPUT_LAYER_OLD_H
 //
 //  OutputLayer.hpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "core/math/math_defs.h"
 #include "core/string/ustring.h"
 #include "core/object/reference.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../regularization/reg.h"
 #include "../utilities/utilities.h"
 #include "../lin_alg/mlpp_matrix.h"
 #include "../lin_alg/mlpp_vector.h"
 #include <map>
 #include <string>
 #include <vector>
 class MLPPOldOutputLayer {
 public:
 	MLPPOldOutputLayer(int n_hidden, std::string activation, std::string cost, std::vector<std::vector<real_t>> input, std::string weightInit, std::string reg, real_t lambda, real_t alpha);
 	int n_hidden;
 	std::string activation;
 	std::string cost;
 	std::vector<std::vector<real_t>> input;
 	std::vector<real_t> weights;
 	real_t bias;
 	std::vector<real_t> z;
 	std::vector<real_t> a;
 	std::map<std::string, std::vector<real_t> (MLPPActivationOld::*)(std::vector<real_t>, bool)> activation_map;
 	std::map<std::string, real_t (MLPPActivationOld::*)(real_t, bool)> activationTest_map;
 	std::map<std::string, real_t (MLPPCostOld::*)(std::vector<real_t>, std::vector<real_t>)> cost_map;
 	std::map<std::string, std::vector<real_t> (MLPPCostOld::*)(std::vector<real_t>, std::vector<real_t>)> costDeriv_map;
 	real_t z_test;
 	real_t a_test;
 	std::vector<real_t> delta;
 	// Regularization Params
 	std::string reg;
 	real_t lambda; /* Regularization Parameter */
 	real_t alpha; /* This is the controlling param for Elastic Net*/
 	std::string weightInit;
 	void forwardPass();
 	void Test(std::vector<real_t> x);
 };
 #endif /* OutputLayer_hpp */
--- a/mlpp/pca/pca_old.cpp
+++ b/mlpp/pca/pca_old.cpp
@ -1,59 +0,0 @@
 //
 //  PCA.cpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "pca_old.h"
 #include "../data/data_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include <iostream>
 #include <random>
 MLPPPCAOld::MLPPPCAOld(std::vector<std::vector<real_t>> inputSet, int k) :
 		inputSet(inputSet), k(k) {
 }
 std::vector<std::vector<real_t>> MLPPPCAOld::principalComponents() {
 	MLPPLinAlgOld alg;
 	MLPPDataOld data;
 	MLPPLinAlgOld::SVDResultOld svr_res = alg.SVD(alg.cov(inputSet));
 	X_normalized = data.meanCentering(inputSet);
 	U_reduce.resize(svr_res.U.size());
 	for (int i = 0; i < k; i++) {
 		for (uint32_t j = 0; j < svr_res.U.size(); j++) {
 			U_reduce[j].push_back(svr_res.U[j][i]);
 		}
 	}
 	Z = alg.matmult(alg.transpose(U_reduce), X_normalized);
 	return Z;
 }
 // Simply tells us the percentage of variance maintained.
 real_t MLPPPCAOld::score() {
 	MLPPLinAlgOld alg;
 	std::vector<std::vector<real_t>> X_approx = alg.matmult(U_reduce, Z);
 	real_t num = 0;
 	real_t den = 0;
 	for (uint32_t i = 0; i < X_normalized.size(); i++) {
 		num += alg.norm_sq(alg.subtraction(X_normalized[i], X_approx[i]));
 	}
 	num /= X_normalized.size();
 	for (uint32_t i = 0; i < X_normalized.size(); i++) {
 		den += alg.norm_sq(X_normalized[i]);
 	}
 	den /= X_normalized.size();
 	if (den == 0) {
 		den += 1e-10; // For numerical sanity as to not recieve a domain error
 	}
 	return 1 - num / den;
 }
--- a/mlpp/pca/pca_old.h
+++ b/mlpp/pca/pca_old.h
@ -1,31 +0,0 @@
 #ifndef MLPP_PCA_OLD_H
 #define MLPP_PCA_OLD_H
 //
 //  PCA.hpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "core/math/math_defs.h"
 #include <vector>
 class MLPPPCAOld {
 public:
 	MLPPPCAOld(std::vector<std::vector<real_t>> inputSet, int k);
 	std::vector<std::vector<real_t>> principalComponents();
 	real_t score();
 private:
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<std::vector<real_t>> X_normalized;
 	std::vector<std::vector<real_t>> U_reduce;
 	std::vector<std::vector<real_t>> Z;
 	int k;
 };
 #endif /* PCA_hpp */
--- a/mlpp/probit_reg/probit_reg_old.cpp
+++ b/mlpp/probit_reg/probit_reg_old.cpp
@ -1,248 +0,0 @@
 //
 //  ProbitReg.cpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "probit_reg_old.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 #ifndef M_PI
 #define M_PI 3.141592653
 #endif
 MLPPProbitRegOld::MLPPProbitRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg, real_t lambda, real_t alpha) :
 		inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) {
 	y_hat.resize(n);
 	weights = MLPPUtilities::weightInitialization(k);
 	bias = MLPPUtilities::biasInitialization();
 }
 std::vector<real_t> MLPPProbitRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	return Evaluate(X);
 }
 real_t MLPPProbitRegOld::modelTest(std::vector<real_t> x) {
 	return Evaluate(x);
 }
 void MLPPProbitRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
 		// Calculating the weight gradients
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.gaussianCDF(z, 1)))));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Calculating the bias gradients
 		bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / n;
 		forwardPass();
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPProbitRegOld::MLE(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		std::vector<real_t> error = alg.subtraction(outputSet, y_hat);
 		// Calculating the weight gradients
 		weights = alg.addition(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.gaussianCDF(z, 1)))));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Calculating the bias gradients
 		bias += learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / n;
 		forwardPass();
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPProbitRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
 	// NOTE: ∂y_hat/∂z is sparse
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	while (true) {
 		std::random_device rd;
 		std::default_random_engine generator(rd());
 		std::uniform_int_distribution<int> distribution(0, int(n - 1));
 		int outputIndex = distribution(generator);
 		real_t y_hat = Evaluate(inputSet[outputIndex]);
 		real_t z = propagate(inputSet[outputIndex]);
 		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
 		real_t error = y_hat - outputSet[outputIndex];
 		// Weight Updation
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error * ((1 / sqrt(2 * M_PI)) * exp(-z * z / 2)), inputSet[outputIndex]));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Bias updation
 		bias -= learning_rate * error * ((1 / sqrt(2 * M_PI)) * exp(-z * z / 2));
 		y_hat = Evaluate({ inputSet[outputIndex] });
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPProbitRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto createMiniBatchesResult = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(createMiniBatchesResult);
 	auto outputMiniBatches = std::get<1>(createMiniBatchesResult);
 	// Creating the mini-batches
 	for (int i = 0; i < n_mini_batch; i++) {
 		std::vector<std::vector<real_t>> currentInputSet;
 		std::vector<real_t> currentOutputSet;
 		for (int j = 0; j < n / n_mini_batch; j++) {
 			currentInputSet.push_back(inputSet[n / n_mini_batch * i + j]);
 			currentOutputSet.push_back(outputSet[n / n_mini_batch * i + j]);
 		}
 		inputMiniBatches.push_back(currentInputSet);
 		outputMiniBatches.push_back(currentOutputSet);
 	}
 	if (real_t(n) / real_t(n_mini_batch) - int(n / n_mini_batch) != 0) {
 		for (int i = 0; i < n - n / n_mini_batch * n_mini_batch; i++) {
 			inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n / n_mini_batch * n_mini_batch + i]);
 			outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n / n_mini_batch * n_mini_batch + i]);
 		}
 	}
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			std::vector<real_t> z = propagate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight gradients
 			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / outputMiniBatches.size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.gaussianCDF(z, 1)))));
 			weights = regularization.regWeights(weights, lambda, alpha, reg);
 			// Calculating the bias gradients
 			bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / outputMiniBatches.size();
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 real_t MLPPProbitRegOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 void MLPPProbitRegOld::save(std::string fileName) {
 	MLPPUtilities util;
 	util.saveParameters(fileName, weights, bias);
 }
 real_t MLPPProbitRegOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPRegOld regularization;
 	class MLPPCostOld cost;
 	return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
 }
 std::vector<real_t> MLPPProbitRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	return avn.gaussianCDF(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)));
 }
 std::vector<real_t> MLPPProbitRegOld::propagate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights));
 }
 real_t MLPPProbitRegOld::Evaluate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	return avn.gaussianCDF(alg.dot(weights, x) + bias);
 }
 real_t MLPPProbitRegOld::propagate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	return alg.dot(weights, x) + bias;
 }
 // gaussianCDF ( wTx + b )
 void MLPPProbitRegOld::forwardPass() {
 	MLPPActivationOld avn;
 	z = propagate(inputSet);
 	y_hat = avn.gaussianCDF(z);
 }
--- a/mlpp/probit_reg/probit_reg_old.h
+++ b/mlpp/probit_reg/probit_reg_old.h
@ -1,53 +0,0 @@
 #ifndef MLPP_PROBIT_REG_OLD_H
 #define MLPP_PROBIT_REG_OLD_H
 //
 //  ProbitReg.hpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "core/math/math_defs.h"
 #include <string>
 #include <vector>
 class MLPPProbitRegOld {
 public:
 	MLPPProbitRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
 	real_t modelTest(std::vector<real_t> x);
 	void gradientDescent(real_t learning_rate, int max_epoch = 0, bool UI = false);
 	void MLE(real_t learning_rate, int max_epoch = 0, bool UI = false);
 	void SGD(real_t learning_rate, int max_epoch = 0, bool UI = false);
 	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
 	real_t score();
 	void save(std::string fileName);
 private:
 	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
 	std::vector<real_t> propagate(std::vector<std::vector<real_t>> X);
 	real_t Evaluate(std::vector<real_t> x);
 	real_t propagate(std::vector<real_t> x);
 	void forwardPass();
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	std::vector<real_t> z;
 	std::vector<real_t> y_hat;
 	std::vector<real_t> weights;
 	real_t bias;
 	int n;
 	int k;
 	// Regularization Params
 	std::string reg;
 	real_t lambda;
 	real_t alpha; /* This is the controlling param for Elastic Net*/
 };
 #endif /* ProbitReg_hpp */
--- a/mlpp/regularization/reg_old.cpp
+++ b/mlpp/regularization/reg_old.cpp
@ -1,166 +0,0 @@
 //
 //  Reg.cpp
 //
 //  Created by Marc Melikyan on 1/16/21.
 //
 #include "reg_old.h"
 #include "core/math/math_defs.h"
 #include "../activation/activation_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include <iostream>
 #include <random>
 real_t MLPPRegOld::regTerm(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string p_reg) {
 	if (p_reg == "Ridge") {
 		real_t reg = 0;
 		for (uint32_t i = 0; i < weights.size(); i++) {
 			reg += weights[i] * weights[i];
 		}
 		return reg * lambda / 2;
 	} else if (p_reg == "Lasso") {
 		real_t reg = 0;
 		for (uint32_t i = 0; i < weights.size(); i++) {
 			reg += abs(weights[i]);
 		}
 		return reg * lambda;
 	} else if (p_reg == "ElasticNet") {
 		real_t reg = 0;
 		for (uint32_t i = 0; i < weights.size(); i++) {
 			reg += alpha * abs(weights[i]); // Lasso Reg
 			reg += ((1 - alpha) / 2) * weights[i] * weights[i]; // Ridge Reg
 		}
 		return reg * lambda;
 	}
 	return 0;
 }
 real_t MLPPRegOld::regTerm(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string p_reg) {
 	if (p_reg == "Ridge") {
 		real_t reg = 0;
 		for (uint32_t i = 0; i < weights.size(); i++) {
 			for (uint32_t j = 0; j < weights[i].size(); j++) {
 				reg += weights[i][j] * weights[i][j];
 			}
 		}
 		return reg * lambda / 2;
 	} else if (p_reg == "Lasso") {
 		real_t reg = 0;
 		for (uint32_t i = 0; i < weights.size(); i++) {
 			for (uint32_t j = 0; j < weights[i].size(); j++) {
 				reg += abs(weights[i][j]);
 			}
 		}
 		return reg * lambda;
 	} else if (p_reg == "ElasticNet") {
 		real_t reg = 0;
 		for (uint32_t i = 0; i < weights.size(); i++) {
 			for (uint32_t j = 0; j < weights[i].size(); j++) {
 				reg += alpha * abs(weights[i][j]); // Lasso Reg
 				reg += ((1 - alpha) / 2) * weights[i][j] * weights[i][j]; // Ridge Reg
 			}
 		}
 		return reg * lambda;
 	}
 	return 0;
 }
 std::vector<real_t> MLPPRegOld::regWeights(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg) {
 	MLPPLinAlgOld alg;
 	if (reg == "WeightClipping") {
 		return regDerivTerm(weights, lambda, alpha, reg);
 	}
 	return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg));
 	// for(int i = 0; i < weights.size(); i++){
 	//     weights[i] -= regDerivTerm(weights, lambda, alpha, reg, i);
 	// }
 	// return weights;
 }
 std::vector<std::vector<real_t>> MLPPRegOld::regWeights(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string reg) {
 	MLPPLinAlgOld alg;
 	if (reg == "WeightClipping") {
 		return regDerivTerm(weights, lambda, alpha, reg);
 	}
 	return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg));
 	// for(int i = 0; i < weights.size(); i++){
 	//     for(int j = 0; j < weights[i].size(); j++){
 	//         weights[i][j] -= regDerivTerm(weights, lambda, alpha, reg, i, j);
 	//     }
 	// }
 	// return weights;
 }
 std::vector<real_t> MLPPRegOld::regDerivTerm(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg) {
 	std::vector<real_t> regDeriv;
 	regDeriv.resize(weights.size());
 	for (uint32_t i = 0; i < regDeriv.size(); i++) {
 		regDeriv[i] = regDerivTerm(weights, lambda, alpha, reg, i);
 	}
 	return regDeriv;
 }
 std::vector<std::vector<real_t>> MLPPRegOld::regDerivTerm(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string reg) {
 	std::vector<std::vector<real_t>> regDeriv;
 	regDeriv.resize(weights.size());
 	for (uint32_t i = 0; i < regDeriv.size(); i++) {
 		regDeriv[i].resize(weights[0].size());
 	}
 	for (uint32_t i = 0; i < regDeriv.size(); i++) {
 		for (uint32_t j = 0; j < regDeriv[i].size(); j++) {
 			regDeriv[i][j] = regDerivTerm(weights, lambda, alpha, reg, i, j);
 		}
 	}
 	return regDeriv;
 }
 real_t MLPPRegOld::regDerivTerm(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg, int j) {
 	MLPPActivationOld act;
 	if (reg == "Ridge") {
 		return lambda * weights[j];
 	} else if (reg == "Lasso") {
 		return lambda * act.sign(weights[j]);
 	} else if (reg == "ElasticNet") {
 		return alpha * lambda * act.sign(weights[j]) + (1 - alpha) * lambda * weights[j];
 	} else if (reg == "WeightClipping") { // Preparation for Wasserstein GANs.
 		// We assume lambda is the lower clipping threshold, while alpha is the higher clipping threshold.
 		// alpha > lambda.
 		if (weights[j] > alpha) {
 			return alpha;
 		} else if (weights[j] < lambda) {
 			return lambda;
 		} else {
 			return weights[j];
 		}
 	} else {
 		return 0;
 	}
 }
 real_t MLPPRegOld::regDerivTerm(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string reg, int i, int j) {
 	MLPPActivationOld act;
 	if (reg == "Ridge") {
 		return lambda * weights[i][j];
 	} else if (reg == "Lasso") {
 		return lambda * act.sign(weights[i][j]);
 	} else if (reg == "ElasticNet") {
 		return alpha * lambda * act.sign(weights[i][j]) + (1 - alpha) * lambda * weights[i][j];
 	} else if (reg == "WeightClipping") { // Preparation for Wasserstein GANs.
 		// We assume lambda is the lower clipping threshold, while alpha is the higher clipping threshold.
 		// alpha > lambda.
 		if (weights[i][j] > alpha) {
 			return alpha;
 		} else if (weights[i][j] < lambda) {
 			return lambda;
 		} else {
 			return weights[i][j];
 		}
 	} else {
 		return 0;
 	}
 }
--- a/mlpp/regularization/reg_old.h
+++ b/mlpp/regularization/reg_old.h
@ -1,33 +0,0 @@
 #ifndef MLPP_REG_OLD_H
 #define MLPP_REG_OLD_H
 //
 //  Reg.hpp
 //
 //  Created by Marc Melikyan on 1/16/21.
 //
 #include "core/math/math_defs.h"
 #include <string>
 #include <vector>
 class MLPPRegOld {
 public:
 	real_t regTerm(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg);
 	real_t regTerm(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string reg);
 	std::vector<real_t> regWeights(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg);
 	std::vector<std::vector<real_t>> regWeights(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string reg);
 	std::vector<real_t> regDerivTerm(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg);
 	std::vector<std::vector<real_t>> regDerivTerm(std::vector<std::vector<real_t>>, real_t lambda, real_t alpha, std::string reg);
 private:
 	real_t regDerivTerm(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg, int j);
 	real_t regDerivTerm(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string reg, int i, int j);
 };
 #endif /* Reg_hpp */
--- a/mlpp/softmax_net/softmax_net_old.cpp
+++ b/mlpp/softmax_net/softmax_net_old.cpp
@ -1,309 +0,0 @@
 //
 //  SoftmaxNet.cpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "softmax_net_old.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../data/data.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 MLPPSoftmaxNetOld::MLPPSoftmaxNetOld(std::vector<std::vector<real_t>> pinputSet, std::vector<std::vector<real_t>> poutputSet, int pn_hidden, std::string preg, real_t plambda, real_t palpha) {
 	inputSet = pinputSet;
 	outputSet = poutputSet;
 	n = pinputSet.size();
 	k = pinputSet[0].size();
 	n_hidden = pn_hidden;
 	n_class = poutputSet[0].size();
 	reg = preg;
 	lambda = plambda;
 	alpha = palpha;
 	y_hat.resize(n);
 	weights1 = MLPPUtilities::weightInitialization(k, n_hidden);
 	weights2 = MLPPUtilities::weightInitialization(n_hidden, n_class);
 	bias1 = MLPPUtilities::biasInitialization(n_hidden);
 	bias2 = MLPPUtilities::biasInitialization(n_class);
 }
 std::vector<real_t> MLPPSoftmaxNetOld::modelTest(std::vector<real_t> x) {
 	return Evaluate(x);
 }
 std::vector<std::vector<real_t>> MLPPSoftmaxNetOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	return Evaluate(X);
 }
 void MLPPSoftmaxNetOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		// Calculating the errors
 		std::vector<std::vector<real_t>> error = alg.subtraction(y_hat, outputSet);
 		// Calculating the weight/bias gradients for layer 2
 		std::vector<std::vector<real_t>> D2_1 = alg.matmult(alg.transpose(a2), error);
 		// weights and bias updation for layer 2
 		weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1));
 		weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
 		bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
 		//Calculating the weight/bias for layer 1
 		std::vector<std::vector<real_t>> D1_1 = alg.matmult(error, alg.transpose(weights2));
 		std::vector<std::vector<real_t>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
 		std::vector<std::vector<real_t>> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2);
 		// weight an bias updation for layer 1
 		weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
 		weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
 		bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate, D1_2));
 		forwardPass();
 		// UI PORTION
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			std::cout << "Layer 1:" << std::endl;
 			MLPPUtilities::UI(weights1, bias1);
 			std::cout << "Layer 2:" << std::endl;
 			MLPPUtilities::UI(weights2, bias2);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPSoftmaxNetOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	while (true) {
 		std::random_device rd;
 		std::default_random_engine generator(rd());
 		std::uniform_int_distribution<int> distribution(0, int(n - 1));
 		int outputIndex = distribution(generator);
 		std::vector<real_t> y_hat = Evaluate(inputSet[outputIndex]);
 		auto prop_res = propagate(inputSet[outputIndex]);
 		auto z2 = std::get<0>(prop_res);
 		auto a2 = std::get<1>(prop_res);
 		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
 		std::vector<real_t> error = alg.subtraction(y_hat, outputSet[outputIndex]);
 		// Weight updation for layer 2
 		std::vector<std::vector<real_t>> D2_1 = alg.outerProduct(error, a2);
 		weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, alg.transpose(D2_1)));
 		weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
 		// Bias updation for layer 2
 		bias2 = alg.subtraction(bias2, alg.scalarMultiply(learning_rate, error));
 		// Weight updation for layer 1
 		std::vector<real_t> D1_1 = alg.mat_vec_mult(weights2, error);
 		std::vector<real_t> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, true));
 		std::vector<std::vector<real_t>> D1_3 = alg.outerProduct(inputSet[outputIndex], D1_2);
 		weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
 		weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
 		// Bias updation for layer 1
 		bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2));
 		y_hat = Evaluate(inputSet[outputIndex]);
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
 			std::cout << "Layer 1:" << std::endl;
 			MLPPUtilities::UI(weights1, bias1);
 			std::cout << "Layer 2:" << std::endl;
 			MLPPUtilities::UI(weights2, bias2);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPSoftmaxNetOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	// Creating the mini-batches
 	for (int i = 0; i < n_mini_batch; i++) {
 		std::vector<std::vector<real_t>> currentInputSet;
 		std::vector<std::vector<real_t>> currentOutputSet;
 		for (int j = 0; j < n / n_mini_batch; j++) {
 			currentInputSet.push_back(inputSet[n / n_mini_batch * i + j]);
 			currentOutputSet.push_back(outputSet[n / n_mini_batch * i + j]);
 		}
 		inputMiniBatches.push_back(currentInputSet);
 		outputMiniBatches.push_back(currentOutputSet);
 	}
 	if (real_t(n) / real_t(n_mini_batch) - int(n / n_mini_batch) != 0) {
 		for (int i = 0; i < n - n / n_mini_batch * n_mini_batch; i++) {
 			inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n / n_mini_batch * n_mini_batch + i]);
 			outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n / n_mini_batch * n_mini_batch + i]);
 		}
 	}
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<std::vector<real_t>> y_hat = Evaluate(inputMiniBatches[i]);
 			auto propagate_res = propagate(inputMiniBatches[i]);
 			auto z2 = std::get<0>(propagate_res);
 			auto a2 = std::get<1>(propagate_res);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			// Calculating the errors
 			std::vector<std::vector<real_t>> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight/bias gradients for layer 2
 			std::vector<std::vector<real_t>> D2_1 = alg.matmult(alg.transpose(a2), error);
 			// weights and bias updation for layser 2
 			weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1));
 			weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
 			// Bias Updation for layer 2
 			bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
 			//Calculating the weight/bias for layer 1
 			std::vector<std::vector<real_t>> D1_1 = alg.matmult(error, alg.transpose(weights2));
 			std::vector<std::vector<real_t>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
 			std::vector<std::vector<real_t>> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2);
 			// weight an bias updation for layer 1
 			weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
 			weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
 			bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate, D1_2));
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				std::cout << "Layer 1:" << std::endl;
 				MLPPUtilities::UI(weights1, bias1);
 				std::cout << "Layer 2:" << std::endl;
 				MLPPUtilities::UI(weights2, bias2);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 real_t MLPPSoftmaxNetOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 void MLPPSoftmaxNetOld::save(std::string fileName) {
 	MLPPUtilities util;
 	util.saveParameters(fileName, weights1, bias1, 0, 1);
 	util.saveParameters(fileName, weights2, bias2, 1, 2);
 }
 std::vector<std::vector<real_t>> MLPPSoftmaxNetOld::getEmbeddings() {
 	return weights1;
 }
 real_t MLPPSoftmaxNetOld::Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	MLPPRegOld regularization;
 	MLPPData data;
 	class MLPPCostOld cost;
 	return cost.CrossEntropy(y_hat, y) + regularization.regTerm(weights1, lambda, alpha, reg) + regularization.regTerm(weights2, lambda, alpha, reg);
 }
 std::vector<std::vector<real_t>> MLPPSoftmaxNetOld::Evaluate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
 	std::vector<std::vector<real_t>> a2 = avn.sigmoid(z2);
 	return avn.adjSoftmax(alg.mat_vec_add(alg.matmult(a2, weights2), bias2));
 }
 std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPSoftmaxNetOld::propagate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
 	std::vector<std::vector<real_t>> a2 = avn.sigmoid(z2);
 	return { z2, a2 };
 }
 std::vector<real_t> MLPPSoftmaxNetOld::Evaluate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1);
 	std::vector<real_t> a2 = avn.sigmoid(z2);
 	return avn.adjSoftmax(alg.addition(alg.mat_vec_mult(alg.transpose(weights2), a2), bias2));
 }
 std::tuple<std::vector<real_t>, std::vector<real_t>> MLPPSoftmaxNetOld::propagate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1);
 	std::vector<real_t> a2 = avn.sigmoid(z2);
 	return { z2, a2 };
 }
 void MLPPSoftmaxNetOld::forwardPass() {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1);
 	a2 = avn.sigmoid(z2);
 	y_hat = avn.adjSoftmax(alg.mat_vec_add(alg.matmult(a2, weights2), bias2));
 }
--- a/mlpp/softmax_net/softmax_net_old.h
+++ b/mlpp/softmax_net/softmax_net_old.h
@ -1,60 +0,0 @@
 #ifndef MLPP_SOFTMAX_NET_OLD_H
 #define MLPP_SOFTMAX_NET_OLD_H
 //
 //  SoftmaxNet.hpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "core/math/math_defs.h"
 #include <string>
 #include <vector>
 class MLPPSoftmaxNetOld {
 public:
 	MLPPSoftmaxNetOld(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, int n_hidden, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 	std::vector<real_t> modelTest(std::vector<real_t> x);
 	std::vector<std::vector<real_t>> modelSetTest(std::vector<std::vector<real_t>> X);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
 	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
 	real_t score();
 	void save(std::string fileName);
 	std::vector<std::vector<real_t>> getEmbeddings(); // This class is used (mostly) for word2Vec. This function returns our embeddings.
 private:
 	real_t Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	std::vector<std::vector<real_t>> Evaluate(std::vector<std::vector<real_t>> X);
 	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> propagate(std::vector<std::vector<real_t>> X);
 	std::vector<real_t> Evaluate(std::vector<real_t> x);
 	std::tuple<std::vector<real_t>, std::vector<real_t>> propagate(std::vector<real_t> x);
 	void forwardPass();
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<std::vector<real_t>> outputSet;
 	std::vector<std::vector<real_t>> y_hat;
 	std::vector<std::vector<real_t>> weights1;
 	std::vector<std::vector<real_t>> weights2;
 	std::vector<real_t> bias1;
 	std::vector<real_t> bias2;
 	std::vector<std::vector<real_t>> z2;
 	std::vector<std::vector<real_t>> a2;
 	int n;
 	int k;
 	int n_class;
 	int n_hidden;
 	// Regularization Params
 	std::string reg;
 	real_t lambda;
 	real_t alpha; /* This is the controlling param for Elastic Net*/
 };
 #endif /* SoftmaxNet_hpp */
--- a/mlpp/softmax_reg/softmax_reg_old.cpp
+++ b/mlpp/softmax_reg/softmax_reg_old.cpp
@ -1,193 +0,0 @@
 //
 //  SoftmaxReg.cpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "softmax_reg_old.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 MLPPSoftmaxRegOld::MLPPSoftmaxRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, std::string reg, real_t lambda, real_t alpha) :
 		inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), n_class(outputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) {
 	y_hat.resize(n);
 	weights = MLPPUtilities::weightInitialization(k, n_class);
 	bias = MLPPUtilities::biasInitialization(n_class);
 }
 std::vector<real_t> MLPPSoftmaxRegOld::modelTest(std::vector<real_t> x) {
 	return Evaluate(x);
 }
 std::vector<std::vector<real_t>> MLPPSoftmaxRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	return Evaluate(X);
 }
 void MLPPSoftmaxRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		std::vector<std::vector<real_t>> error = alg.subtraction(y_hat, outputSet);
 		//Calculating the weight gradients
 		std::vector<std::vector<real_t>> w_gradient = alg.matmult(alg.transpose(inputSet), error);
 		//Weight updation
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Calculating the bias gradients
 		//real_t b_gradient = alg.sum_elements(error);
 		// Bias Updation
 		bias = alg.subtractMatrixRows(bias, alg.scalarMultiply(learning_rate, error));
 		forwardPass();
 		// UI PORTION
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPSoftmaxRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	while (true) {
 		std::random_device rd;
 		std::default_random_engine generator(rd());
 		std::uniform_int_distribution<int> distribution(0, int(n - 1));
 		real_t outputIndex = distribution(generator);
 		std::vector<real_t> y_hat = Evaluate(inputSet[outputIndex]);
 		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
 		// Calculating the weight gradients
 		std::vector<std::vector<real_t>> w_gradient = alg.outerProduct(inputSet[outputIndex], alg.subtraction(y_hat, outputSet[outputIndex]));
 		// Weight Updation
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Calculating the bias gradients
 		std::vector<real_t> b_gradient = alg.subtraction(y_hat, outputSet[outputIndex]);
 		// Bias updation
 		bias = alg.subtraction(bias, alg.scalarMultiply(learning_rate, b_gradient));
 		//y_hat = Evaluate({ inputSet[outputIndex] });
 		y_hat = Evaluate(inputSet[outputIndex]);
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPSoftmaxRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<std::vector<real_t>> y_hat = Evaluate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<std::vector<real_t>> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight gradients
 			std::vector<std::vector<real_t>> w_gradient = alg.matmult(alg.transpose(inputMiniBatches[i]), error);
 			//Weight updation
 			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient));
 			weights = regularization.regWeights(weights, lambda, alpha, reg);
 			// Calculating the bias gradients
 			bias = alg.subtractMatrixRows(bias, alg.scalarMultiply(learning_rate, error));
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 real_t MLPPSoftmaxRegOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 void MLPPSoftmaxRegOld::save(std::string fileName) {
 	MLPPUtilities util;
 	util.saveParameters(fileName, weights, bias);
 }
 real_t MLPPSoftmaxRegOld::Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	MLPPRegOld regularization;
 	class MLPPCostOld cost;
 	return cost.CrossEntropy(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
 }
 std::vector<real_t> MLPPSoftmaxRegOld::Evaluate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	return avn.softmax(alg.addition(bias, alg.mat_vec_mult(alg.transpose(weights), x)));
 }
 std::vector<std::vector<real_t>> MLPPSoftmaxRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	return avn.softmax(alg.mat_vec_add(alg.matmult(X, weights), bias));
 }
 // softmax ( wTx + b )
 void MLPPSoftmaxRegOld::forwardPass() {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	y_hat = avn.softmax(alg.mat_vec_add(alg.matmult(inputSet, weights), bias));
 }
--- a/mlpp/softmax_reg/softmax_reg_old.h
+++ b/mlpp/softmax_reg/softmax_reg_old.h
@ -1,50 +0,0 @@
 #ifndef MLPP_SOFTMAX_REG_OLD_H
 #define MLPP_SOFTMAX_REG_OLD_H
 //
 //  SoftmaxReg.hpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "core/math/math_defs.h"
 #include <string>
 #include <vector>
 class MLPPSoftmaxRegOld {
 public:
 	MLPPSoftmaxRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 	std::vector<real_t> modelTest(std::vector<real_t> x);
 	std::vector<std::vector<real_t>> modelSetTest(std::vector<std::vector<real_t>> X);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
 	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
 	real_t score();
 	void save(std::string fileName);
 private:
 	real_t Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	std::vector<std::vector<real_t>> Evaluate(std::vector<std::vector<real_t>> X);
 	std::vector<real_t> Evaluate(std::vector<real_t> x);
 	void forwardPass();
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<std::vector<real_t>> outputSet;
 	std::vector<std::vector<real_t>> y_hat;
 	std::vector<std::vector<real_t>> weights;
 	std::vector<real_t> bias;
 	int n;
 	int k;
 	int n_class;
 	// Regularization Params
 	std::string reg;
 	real_t lambda;
 	real_t alpha; /* This is the controlling param for Elastic Net*/
 };
 #endif /* SoftmaxReg_hpp */
--- a/mlpp/stat/stat_old.cpp
+++ b/mlpp/stat/stat_old.cpp
@ -1,215 +0,0 @@
 //
 //  Stat.cpp
 //
 //  Created by Marc Melikyan on 9/29/20.
 //
 #include "stat_old.h"
 #include "../activation/activation_old.h"
 #include "../data/data.h"
 #include "../lin_alg/lin_alg_old.h"
 #include <algorithm>
 #include <cmath>
 #include <map>
 #include <iostream>
 real_t MLPPStatOld::b0Estimation(const std::vector<real_t> &x, const std::vector<real_t> &y) {
 	return mean(y) - b1Estimation(x, y) * mean(x);
 }
 real_t MLPPStatOld::b1Estimation(const std::vector<real_t> &x, const std::vector<real_t> &y) {
 	return covariance(x, y) / variance(x);
 }
 real_t MLPPStatOld::mean(const std::vector<real_t> &x) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < x.size(); i++) {
 		sum += x[i];
 	}
 	return sum / x.size();
 }
 real_t MLPPStatOld::median(std::vector<real_t> x) {
 	real_t center = real_t(x.size()) / real_t(2);
 	sort(x.begin(), x.end());
 	if (x.size() % 2 == 0) {
 		return mean({ x[center - 1], x[center] });
 	} else {
 		return x[center - 1 + 0.5];
 	}
 }
 std::vector<real_t> MLPPStatOld::mode(const std::vector<real_t> &x) {
 	MLPPData data;
 	std::vector<real_t> x_set = data.vecToSet(x);
 	std::map<real_t, int> element_num;
 	for (uint32_t i = 0; i < x_set.size(); i++) {
 		element_num[x[i]] = 0;
 	}
 	for (uint32_t i = 0; i < x.size(); i++) {
 		element_num[x[i]]++;
 	}
 	std::vector<real_t> modes;
 	real_t max_num = element_num[x_set[0]];
 	for (uint32_t i = 0; i < x_set.size(); i++) {
 		if (element_num[x_set[i]] > max_num) {
 			max_num = element_num[x_set[i]];
 			modes.clear();
 			modes.push_back(x_set[i]);
 		} else if (element_num[x_set[i]] == max_num) {
 			modes.push_back(x_set[i]);
 		}
 	}
 	return modes;
 }
 real_t MLPPStatOld::range(const std::vector<real_t> &x) {
 	MLPPLinAlgOld alg;
 	return alg.max(x) - alg.min(x);
 }
 real_t MLPPStatOld::midrange(const std::vector<real_t> &x) {
 	return range(x) / 2;
 }
 real_t MLPPStatOld::absAvgDeviation(const std::vector<real_t> &x) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < x.size(); i++) {
 		sum += std::abs(x[i] - mean(x));
 	}
 	return sum / x.size();
 }
 real_t MLPPStatOld::standardDeviation(const std::vector<real_t> &x) {
 	return std::sqrt(variance(x));
 }
 real_t MLPPStatOld::variance(const std::vector<real_t> &x) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < x.size(); i++) {
 		sum += (x[i] - mean(x)) * (x[i] - mean(x));
 	}
 	return sum / (x.size() - 1);
 }
 real_t MLPPStatOld::covariance(const std::vector<real_t> &x, const std::vector<real_t> &y) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < x.size(); i++) {
 		sum += (x[i] - mean(x)) * (y[i] - mean(y));
 	}
 	return sum / (x.size() - 1);
 }
 real_t MLPPStatOld::correlation(const std::vector<real_t> &x, const std::vector<real_t> &y) {
 	return covariance(x, y) / (standardDeviation(x) * standardDeviation(y));
 }
 real_t MLPPStatOld::R2(const std::vector<real_t> &x, const std::vector<real_t> &y) {
 	return correlation(x, y) * correlation(x, y);
 }
 real_t MLPPStatOld::chebyshevIneq(const real_t k) {
 	// X may or may not belong to a Gaussian Distribution
 	return 1 - 1 / (k * k);
 }
 real_t MLPPStatOld::weightedMean(const std::vector<real_t> &x, const std::vector<real_t> &weights) {
 	real_t sum = 0;
 	real_t weights_sum = 0;
 	for (uint32_t i = 0; i < x.size(); i++) {
 		sum += x[i] * weights[i];
 		weights_sum += weights[i];
 	}
 	return sum / weights_sum;
 }
 real_t MLPPStatOld::geometricMean(const std::vector<real_t> &x) {
 	real_t product = 1;
 	for (uint32_t i = 0; i < x.size(); i++) {
 		product *= x[i];
 	}
 	return std::pow(product, 1.0 / x.size());
 }
 real_t MLPPStatOld::harmonicMean(const std::vector<real_t> &x) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < x.size(); i++) {
 		sum += 1 / x[i];
 	}
 	return x.size() / sum;
 }
 real_t MLPPStatOld::RMS(const std::vector<real_t> &x) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < x.size(); i++) {
 		sum += x[i] * x[i];
 	}
 	return sqrt(sum / x.size());
 }
 real_t MLPPStatOld::powerMean(const std::vector<real_t> &x, const real_t p) {
 	real_t sum = 0;
 	for (uint32_t i = 0; i < x.size(); i++) {
 		sum += std::pow(x[i], p);
 	}
 	return std::pow(sum / x.size(), 1 / p);
 }
 real_t MLPPStatOld::lehmerMean(const std::vector<real_t> &x, const real_t p) {
 	real_t num = 0;
 	real_t den = 0;
 	for (uint32_t i = 0; i < x.size(); i++) {
 		num += std::pow(x[i], p);
 		den += std::pow(x[i], p - 1);
 	}
 	return num / den;
 }
 real_t MLPPStatOld::weightedLehmerMean(const std::vector<real_t> &x, const std::vector<real_t> &weights, const real_t p) {
 	real_t num = 0;
 	real_t den = 0;
 	for (uint32_t i = 0; i < x.size(); i++) {
 		num += weights[i] * std::pow(x[i], p);
 		den += weights[i] * std::pow(x[i], p - 1);
 	}
 	return num / den;
 }
 real_t MLPPStatOld::heronianMean(const real_t A, const real_t B) {
 	return (A + sqrt(A * B) + B) / 3;
 }
 real_t MLPPStatOld::contraHarmonicMean(const std::vector<real_t> &x) {
 	return lehmerMean(x, 2);
 }
 real_t MLPPStatOld::heinzMean(const real_t A, const real_t B, const real_t x) {
 	return (std::pow(A, x) * std::pow(B, 1 - x) + std::pow(A, 1 - x) * std::pow(B, x)) / 2;
 }
 real_t MLPPStatOld::neumanSandorMean(const real_t a, const real_t b) {
 	MLPPActivationOld avn;
 	return (a - b) / 2 * avn.arsinh((a - b) / (a + b));
 }
 real_t MLPPStatOld::stolarskyMean(const real_t x, const real_t y, const real_t p) {
 	if (x == y) {
 		return x;
 	}
 	return std::pow((std::pow(x, p) - std::pow(y, p)) / (p * (x - y)), 1 / (p - 1));
 }
 real_t MLPPStatOld::identricMean(const real_t x, const real_t y) {
 	if (x == y) {
 		return x;
 	}
 	return (1 / M_E) * std::pow(std::pow(x, x) / std::pow(y, y), 1 / (x - y));
 }
 real_t MLPPStatOld::logMean(const real_t x, const real_t y) {
 	if (x == y) {
 		return x;
 	}
 	return (y - x) / (log(y) - std::log(x));
 }
--- a/mlpp/stat/stat_old.h
+++ b/mlpp/stat/stat_old.h
@ -1,52 +0,0 @@
 #ifndef MLPP_STAT_OLD_H
 #define MLPP_STAT_OLD_H
 //
 //  Stat.hpp
 //
 //  Created by Marc Melikyan on 9/29/20.
 //
 #include "core/math/math_defs.h"
 #include <vector>
 class MLPPStatOld {
 public:
 	// These functions are for univariate lin reg module- not for users.
 	real_t b0Estimation(const std::vector<real_t> &x, const std::vector<real_t> &y);
 	real_t b1Estimation(const std::vector<real_t> &x, const std::vector<real_t> &y);
 	// Statistical Functions
 	real_t mean(const std::vector<real_t> &x);
 	real_t median(std::vector<real_t> x);
 	std::vector<real_t> mode(const std::vector<real_t> &x);
 	real_t range(const std::vector<real_t> &x);
 	real_t midrange(const std::vector<real_t> &x);
 	real_t absAvgDeviation(const std::vector<real_t> &x);
 	real_t standardDeviation(const std::vector<real_t> &x);
 	real_t variance(const std::vector<real_t> &x);
 	real_t covariance(const std::vector<real_t> &x, const std::vector<real_t> &y);
 	real_t correlation(const std::vector<real_t> &x, const std::vector<real_t> &y);
 	real_t R2(const std::vector<real_t> &x, const std::vector<real_t> &y);
 	real_t chebyshevIneq(const real_t k);
 	// Extras
 	real_t weightedMean(const std::vector<real_t> &x, const std::vector<real_t> &weights);
 	real_t geometricMean(const std::vector<real_t> &x);
 	real_t harmonicMean(const std::vector<real_t> &x);
 	real_t RMS(const std::vector<real_t> &x);
 	real_t powerMean(const std::vector<real_t> &x, const real_t p);
 	real_t lehmerMean(const std::vector<real_t> &x, const real_t p);
 	real_t weightedLehmerMean(const std::vector<real_t> &x, const std::vector<real_t> &weights, const real_t p);
 	real_t contraHarmonicMean(const std::vector<real_t> &x);
 	real_t heronianMean(const real_t A, const real_t B);
 	real_t heinzMean(const real_t A, const real_t B, const real_t x);
 	real_t neumanSandorMean(const real_t a, const real_t b);
 	real_t stolarskyMean(const real_t x, const real_t y, const real_t p);
 	real_t identricMean(const real_t x, const real_t y);
 	real_t logMean(const real_t x, const real_t y);
 };
 #endif /* Stat_hpp */
--- a/mlpp/svc/svc_old.cpp
+++ b/mlpp/svc/svc_old.cpp
@ -1,198 +0,0 @@
 //
 //  SVC.cpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "svc_old.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 std::vector<real_t> MLPPSVCOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	return Evaluate(X);
 }
 real_t MLPPSVCOld::modelTest(std::vector<real_t> x) {
 	return Evaluate(x);
 }
 void MLPPSVCOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	class MLPPCostOld cost;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet, weights, C);
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), cost.HingeLossDeriv(z, outputSet, C))));
 		weights = regularization.regWeights(weights, learning_rate / n, 0, "Ridge");
 		// Calculating the bias gradients
 		bias += learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputSet, C)) / n;
 		forwardPass();
 		// UI PORTION
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet, weights, C));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPSVCOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
 	class MLPPCostOld cost;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	while (true) {
 		std::random_device rd;
 		std::default_random_engine generator(rd());
 		std::uniform_int_distribution<int> distribution(0, int(n - 1));
 		int outputIndex = distribution(generator);
 		//real_t y_hat = Evaluate(inputSet[outputIndex]);
 		real_t z = propagate(inputSet[outputIndex]);
 		cost_prev = Cost({ z }, { outputSet[outputIndex] }, weights, C);
 		real_t costDeriv = cost.HingeLossDeriv(std::vector<real_t>({ z }), std::vector<real_t>({ outputSet[outputIndex] }), C)[0]; // Explicit conversion to avoid ambiguity with overloaded function. Error occured on Ubuntu.
 		// Weight Updation
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * costDeriv, inputSet[outputIndex]));
 		weights = regularization.regWeights(weights, learning_rate, 0, "Ridge");
 		// Bias updation
 		bias -= learning_rate * costDeriv;
 		//y_hat = Evaluate({ inputSet[outputIndex] });
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ z }, { outputSet[outputIndex] }, weights, C));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPSVCOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
 	class MLPPCostOld cost;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			std::vector<real_t> z = propagate(inputMiniBatches[i]);
 			cost_prev = Cost(z, outputMiniBatches[i], weights, C);
 			// Calculating the weight gradients
 			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C))));
 			weights = regularization.regWeights(weights, learning_rate / n, 0, "Ridge");
 			// Calculating the bias gradients
 			bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n;
 			forwardPass();
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 real_t MLPPSVCOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 void MLPPSVCOld::save(std::string fileName) {
 	MLPPUtilities util;
 	util.saveParameters(fileName, weights, bias);
 }
 MLPPSVCOld::MLPPSVCOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet, real_t p_C) {
 	inputSet = p_inputSet;
 	outputSet = p_outputSet;
 	n = inputSet.size();
 	k = inputSet[0].size();
 	C = p_C;
 	y_hat.resize(n);
 	weights = MLPPUtilities::weightInitialization(k);
 	bias = MLPPUtilities::biasInitialization();
 }
 real_t MLPPSVCOld::Cost(std::vector<real_t> z, std::vector<real_t> y, std::vector<real_t> weights, real_t C) {
 	class MLPPCostOld cost;
 	return cost.HingeLoss(z, y, weights, C);
 }
 std::vector<real_t> MLPPSVCOld::Evaluate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	return avn.sign(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)));
 }
 std::vector<real_t> MLPPSVCOld::propagate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights));
 }
 real_t MLPPSVCOld::Evaluate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	return avn.sign(alg.dot(weights, x) + bias);
 }
 real_t MLPPSVCOld::propagate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	return alg.dot(weights, x) + bias;
 }
 // sign ( wTx + b )
 void MLPPSVCOld::forwardPass() {
 	MLPPActivationOld avn;
 	z = propagate(inputSet);
 	y_hat = avn.sign(z);
 }
--- a/mlpp/svc/svc_old.h
+++ b/mlpp/svc/svc_old.h
@ -1,55 +0,0 @@
 #ifndef MLPP_SVC_OLD_H
 #define MLPP_SVC_OLD_H
 //
 //  SVC.hpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 // https://towardsdatascience.com/svm-implementation-from-scratch-python-2db2fc52e5c2
 // Illustratd a practical definition of the Hinge Loss function and its gradient when optimizing with SGD.
 #include "core/math/math_defs.h"
 #include <string>
 #include <vector>
 class MLPPSVCOld {
 public:
 	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
 	real_t modelTest(std::vector<real_t> x);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
 	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
 	real_t score();
 	void save(std::string fileName);
 	MLPPSVCOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, real_t C);
 private:
 	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y, std::vector<real_t> weights, real_t C);
 	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
 	std::vector<real_t> propagate(std::vector<std::vector<real_t>> X);
 	real_t Evaluate(std::vector<real_t> x);
 	real_t propagate(std::vector<real_t> x);
 	void forwardPass();
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	std::vector<real_t> z;
 	std::vector<real_t> y_hat;
 	std::vector<real_t> weights;
 	real_t bias;
 	real_t C;
 	int n;
 	int k;
 	// UI Portion
 	void UI(int epoch, real_t cost_prev);
 };
 #endif /* SVC_hpp */
--- a/mlpp/tanh_reg/tanh_reg_old.cpp
+++ b/mlpp/tanh_reg/tanh_reg_old.cpp
@ -1,196 +0,0 @@
 //
 //  TanhReg.cpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "tanh_reg_old.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include <iostream>
 #include <random>
 MLPPTanhRegOld::MLPPTanhRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg, real_t lambda, real_t alpha) :
 		inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) {
 	y_hat.resize(n);
 	weights = MLPPUtilities::weightInitialization(k);
 	bias = MLPPUtilities::biasInitialization();
 }
 std::vector<real_t> MLPPTanhRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
 	return Evaluate(X);
 }
 real_t MLPPTanhRegOld::modelTest(std::vector<real_t> x) {
 	return Evaluate(x);
 }
 void MLPPTanhRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	while (true) {
 		cost_prev = Cost(y_hat, outputSet);
 		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.tanh(z, 1)))));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Calculating the bias gradients
 		bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.tanh(z, 1))) / n;
 		forwardPass();
 		// UI PORTION
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 void MLPPTanhRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	while (true) {
 		std::random_device rd;
 		std::default_random_engine generator(rd());
 		std::uniform_int_distribution<int> distribution(0, int(n - 1));
 		int outputIndex = distribution(generator);
 		real_t y_hat = Evaluate(inputSet[outputIndex]);
 		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
 		real_t error = y_hat - outputSet[outputIndex];
 		// Weight Updation
 		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error * (1 - y_hat * y_hat), inputSet[outputIndex]));
 		weights = regularization.regWeights(weights, lambda, alpha, reg);
 		// Bias updation
 		bias -= learning_rate * error * (1 - y_hat * y_hat);
 		y_hat = Evaluate({ inputSet[outputIndex] });
 		if (UI) {
 			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
 			MLPPUtilities::UI(weights, bias);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 void MLPPTanhRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	// Creating the mini-batches
 	int n_mini_batch = n / mini_batch_size;
 	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
 	auto inputMiniBatches = std::get<0>(batches);
 	auto outputMiniBatches = std::get<1>(batches);
 	while (true) {
 		for (int i = 0; i < n_mini_batch; i++) {
 			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
 			std::vector<real_t> z = propagate(inputMiniBatches[i]);
 			cost_prev = Cost(y_hat, outputMiniBatches[i]);
 			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
 			// Calculating the weight gradients
 			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.tanh(z, 1)))));
 			weights = regularization.regWeights(weights, lambda, alpha, reg);
 			// Calculating the bias gradients
 			bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.tanh(z, 1))) / n;
 			forwardPass();
 			y_hat = Evaluate(inputMiniBatches[i]);
 			if (UI) {
 				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
 				MLPPUtilities::UI(weights, bias);
 			}
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 	forwardPass();
 }
 real_t MLPPTanhRegOld::score() {
 	MLPPUtilities util;
 	return util.performance(y_hat, outputSet);
 }
 void MLPPTanhRegOld::save(std::string fileName) {
 	MLPPUtilities util;
 	util.saveParameters(fileName, weights, bias);
 }
 real_t MLPPTanhRegOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPRegOld regularization;
 	class MLPPCostOld cost;
 	return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
 }
 std::vector<real_t> MLPPTanhRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	return avn.tanh(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)));
 }
 std::vector<real_t> MLPPTanhRegOld::propagate(std::vector<std::vector<real_t>> X) {
 	MLPPLinAlgOld alg;
 	return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights));
 }
 real_t MLPPTanhRegOld::Evaluate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	MLPPActivationOld avn;
 	return avn.tanh(alg.dot(weights, x) + bias);
 }
 real_t MLPPTanhRegOld::propagate(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	return alg.dot(weights, x) + bias;
 }
 // Tanh ( wTx + b )
 void MLPPTanhRegOld::forwardPass() {
 	MLPPActivationOld avn;
 	z = propagate(inputSet);
 	y_hat = avn.tanh(z);
 }
--- a/mlpp/tanh_reg/tanh_reg_old.h
+++ b/mlpp/tanh_reg/tanh_reg_old.h
@ -1,55 +0,0 @@
 #ifndef MLPP_TANH_REG_OLD_H
 #define MLPP_TANH_REG_OLD_H
 //
 //  TanhReg.hpp
 //
 //  Created by Marc Melikyan on 10/2/20.
 //
 #include "core/math/math_defs.h"
 #include <string>
 #include <vector>
 class MLPPTanhRegOld {
 public:
 	MLPPTanhRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
 	real_t modelTest(std::vector<real_t> x);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
 	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
 	real_t score();
 	void save(std::string fileName);
 private:
 	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
 	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
 	std::vector<real_t> propagate(std::vector<std::vector<real_t>> X);
 	real_t Evaluate(std::vector<real_t> x);
 	real_t propagate(std::vector<real_t> x);
 	void forwardPass();
 	std::vector<std::vector<real_t>> inputSet;
 	std::vector<real_t> outputSet;
 	std::vector<real_t> z;
 	std::vector<real_t> y_hat;
 	std::vector<real_t> weights;
 	real_t bias;
 	int n;
 	int k;
 	// UI Portion
 	void UI(int epoch, real_t cost_prev);
 	// Regularization Params
 	std::string reg;
 	real_t lambda;
 	real_t alpha; /* This is the controlling param for Elastic Net*/
 };
 #endif /* TanhReg_hpp */
--- a/mlpp/transforms/transforms_old.cpp
+++ b/mlpp/transforms/transforms_old.cpp
@ -1,58 +0,0 @@
 //
 //  Transforms.cpp
 //
 //  Created by Marc Melikyan on 11/13/20.
 //
 #include "transforms_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "core/int_types.h"
 #include <cmath>
 #include <iostream>
 #include <string>
 #ifndef M_PI
 #define M_PI 3.141592653
 #endif
 // DCT ii.
 // https://www.mathworks.com/help/images/discrete-cosine-transform.html
 std::vector<std::vector<real_t>> MLPPTransformsOld::discreteCosineTransform(std::vector<std::vector<real_t>> A) {
 	MLPPLinAlgOld alg;
 	A = alg.scalarAdd(-128, A); // Center around 0.
 	std::vector<std::vector<real_t>> B;
 	B.resize(A.size());
 	for (uint32_t i = 0; i < B.size(); i++) {
 		B[i].resize(A[i].size());
 	}
 	int M = A.size();
 	for (uint32_t i = 0; i < B.size(); i++) {
 		for (uint32_t j = 0; j < B[i].size(); j++) {
 			real_t sum = 0;
 			real_t alphaI;
 			if (i == 0) {
 				alphaI = 1 / std::sqrt(M);
 			} else {
 				alphaI = std::sqrt(real_t(2) / real_t(M));
 			}
 			real_t alphaJ;
 			if (j == 0) {
 				alphaJ = 1 / std::sqrt(M);
 			} else {
 				alphaJ = std::sqrt(real_t(2) / real_t(M));
 			}
 			for (uint32_t k = 0; k < B.size(); k++) {
 				for (uint32_t f = 0; f < B[k].size(); f++) {
 					sum += A[k][f] * std::cos((M_PI * i * (2 * k + 1)) / (2 * M)) * std::cos((M_PI * j * (2 * f + 1)) / (2 * M));
 				}
 			}
 			B[i][j] = sum;
 			B[i][j] *= alphaI * alphaJ;
 		}
 	}
 	return B;
 }
--- a/mlpp/transforms/transforms_old.h
+++ b/mlpp/transforms/transforms_old.h
@ -1,20 +0,0 @@
 #ifndef MLPP_TRANSFORMS_OLD_H
 #define MLPP_TRANSFORMS_OLD_H
 //
 //  Transforms.hpp
 //
 //
 #include "core/math/math_defs.h"
 #include <string>
 #include <vector>
 class MLPPTransformsOld {
 public:
 	std::vector<std::vector<real_t>> discreteCosineTransform(std::vector<std::vector<real_t>> A);
 };
 #endif /* Transforms_hpp */
--- a/mlpp/uni_lin_reg/uni_lin_reg_old.cpp
+++ b/mlpp/uni_lin_reg/uni_lin_reg_old.cpp
@ -1,34 +0,0 @@
 //
 //  UniLinReg.cpp
 //
 //  Created by Marc Melikyan on 9/29/20.
 //
 #include "uni_lin_reg_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../stat/stat_old.h"
 #include <iostream>
 // General Multivariate Linear Regression Model
 // ŷ = b0 + b1x1 + b2x2 + ... + bkxk
 // Univariate Linear Regression Model
 // ŷ = b0 + b1x1
 MLPPUniLinRegOld::MLPPUniLinRegOld(std::vector<real_t> x, std::vector<real_t> y) :
 		inputSet(x), outputSet(y) {
 	MLPPStatOld estimator;
 	b1 = estimator.b1Estimation(inputSet, outputSet);
 	b0 = estimator.b0Estimation(inputSet, outputSet);
 }
 std::vector<real_t> MLPPUniLinRegOld::modelSetTest(std::vector<real_t> x) {
 	MLPPLinAlgOld alg;
 	return alg.scalarAdd(b0, alg.scalarMultiply(b1, x));
 }
 real_t MLPPUniLinRegOld::modelTest(real_t input) {
 	return b0 + b1 * input;
 }
--- a/mlpp/uni_lin_reg/uni_lin_reg_old.h
+++ b/mlpp/uni_lin_reg/uni_lin_reg_old.h
@ -1,29 +0,0 @@
 #ifndef MLPP_UNI_LIN_REG_OLD_H
 #define MLPP_UNI_LIN_REG_OLD_H
 //
 //  UniLinReg.hpp
 //
 //  Created by Marc Melikyan on 9/29/20.
 //
 #include "core/math/math_defs.h"
 #include <vector>
 class MLPPUniLinRegOld {
 public:
 	MLPPUniLinRegOld(std::vector<real_t> x, std::vector<real_t> y);
 	std::vector<real_t> modelSetTest(std::vector<real_t> x);
 	real_t modelTest(real_t x);
 private:
 	std::vector<real_t> inputSet;
 	std::vector<real_t> outputSet;
 	real_t b0;
 	real_t b1;
 };
 #endif /* UniLinReg_hpp */
--- a/mlpp/utilities/utilities_old.cpp
+++ b/mlpp/utilities/utilities_old.cpp
@ -1,399 +0,0 @@
 //
 //  Reg.cpp
 //
 //  Created by Marc Melikyan on 1/16/21.
 //
 #include "utilities_old.h"
 #include <fstream>
 #include <iostream>
 #include <random>
 #include <string>
 std::vector<real_t> MLPPUtilitiesOld::weightInitialization(int n, std::string type) {
 	std::random_device rd;
 	std::default_random_engine generator(rd());
 	std::vector<real_t> weights;
 	for (int i = 0; i < n; i++) {
 		if (type == "XavierNormal") {
 			std::normal_distribution<real_t> distribution(0, sqrt(2 / (n + 1)));
 			weights.push_back(distribution(generator));
 		} else if (type == "XavierUniform") {
 			std::uniform_real_distribution<real_t> distribution(-sqrt(6 / (n + 1)), sqrt(6 / (n + 1)));
 			weights.push_back(distribution(generator));
 		} else if (type == "HeNormal") {
 			std::normal_distribution<real_t> distribution(0, sqrt(2 / n));
 			weights.push_back(distribution(generator));
 		} else if (type == "HeUniform") {
 			std::uniform_real_distribution<real_t> distribution(-sqrt(6 / n), sqrt(6 / n));
 			weights.push_back(distribution(generator));
 		} else if (type == "LeCunNormal") {
 			std::normal_distribution<real_t> distribution(0, sqrt(1 / n));
 			weights.push_back(distribution(generator));
 		} else if (type == "LeCunUniform") {
 			std::uniform_real_distribution<real_t> distribution(-sqrt(3 / n), sqrt(3 / n));
 			weights.push_back(distribution(generator));
 		} else if (type == "Uniform") {
 			std::uniform_real_distribution<real_t> distribution(-1 / sqrt(n), 1 / sqrt(n));
 			weights.push_back(distribution(generator));
 		} else {
 			std::uniform_real_distribution<real_t> distribution(0, 1);
 			weights.push_back(distribution(generator));
 		}
 	}
 	return weights;
 }
 real_t MLPPUtilitiesOld::biasInitialization() {
 	std::random_device rd;
 	std::default_random_engine generator(rd());
 	std::uniform_real_distribution<real_t> distribution(0, 1);
 	return distribution(generator);
 }
 std::vector<std::vector<real_t>> MLPPUtilitiesOld::weightInitialization(int n, int m, std::string type) {
 	std::random_device rd;
 	std::default_random_engine generator(rd());
 	std::vector<std::vector<real_t>> weights;
 	weights.resize(n);
 	for (int i = 0; i < n; i++) {
 		for (int j = 0; j < m; j++) {
 			if (type == "XavierNormal") {
 				std::normal_distribution<real_t> distribution(0, sqrt(2 / (n + m)));
 				weights[i].push_back(distribution(generator));
 			} else if (type == "XavierUniform") {
 				std::uniform_real_distribution<real_t> distribution(-sqrt(6 / (n + m)), sqrt(6 / (n + m)));
 				weights[i].push_back(distribution(generator));
 			} else if (type == "HeNormal") {
 				std::normal_distribution<real_t> distribution(0, sqrt(2 / n));
 				weights[i].push_back(distribution(generator));
 			} else if (type == "HeUniform") {
 				std::uniform_real_distribution<real_t> distribution(-sqrt(6 / n), sqrt(6 / n));
 				weights[i].push_back(distribution(generator));
 			} else if (type == "LeCunNormal") {
 				std::normal_distribution<real_t> distribution(0, sqrt(1 / n));
 				weights[i].push_back(distribution(generator));
 			} else if (type == "LeCunUniform") {
 				std::uniform_real_distribution<real_t> distribution(-sqrt(3 / n), sqrt(3 / n));
 				weights[i].push_back(distribution(generator));
 			} else if (type == "Uniform") {
 				std::uniform_real_distribution<real_t> distribution(-1 / sqrt(n), 1 / sqrt(n));
 				weights[i].push_back(distribution(generator));
 			} else {
 				std::uniform_real_distribution<real_t> distribution(0, 1);
 				weights[i].push_back(distribution(generator));
 			}
 		}
 	}
 	return weights;
 }
 std::vector<real_t> MLPPUtilitiesOld::biasInitialization(int n) {
 	std::vector<real_t> bias;
 	std::random_device rd;
 	std::default_random_engine generator(rd());
 	std::uniform_real_distribution<real_t> distribution(0, 1);
 	for (int i = 0; i < n; i++) {
 		bias.push_back(distribution(generator));
 	}
 	return bias;
 }
 real_t MLPPUtilitiesOld::performance(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
 	real_t correct = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		if (std::round(y_hat[i]) == outputSet[i]) {
 			correct++;
 		}
 	}
 	return correct / y_hat.size();
 }
 real_t MLPPUtilitiesOld::performance(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
 	real_t correct = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		uint32_t sub_correct = 0;
 		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
 			if (std::round(y_hat[i][j]) == y[i][j]) {
 				sub_correct++;
 			}
 			if (sub_correct == y_hat[0].size()) {
 				correct++;
 			}
 		}
 	}
 	return correct / y_hat.size();
 }
 void MLPPUtilitiesOld::saveParameters(std::string fileName, std::vector<real_t> weights, real_t bias, bool app, int layer) {
 	std::string layer_info = "";
 	std::ofstream saveFile;
 	if (layer > -1) {
 		layer_info = " for layer " + std::to_string(layer);
 	}
 	if (app) {
 		saveFile.open(fileName.c_str(), std::ios_base::app);
 	} else {
 		saveFile.open(fileName.c_str());
 	}
 	if (!saveFile.is_open()) {
 		std::cout << fileName << " failed to open." << std::endl;
 	}
 	saveFile << "Weight(s)" << layer_info << std::endl;
 	for (uint32_t i = 0; i < weights.size(); i++) {
 		saveFile << weights[i] << std::endl;
 	}
 	saveFile << "Bias" << layer_info << std::endl;
 	saveFile << bias << std::endl;
 	saveFile.close();
 }
 void MLPPUtilitiesOld::saveParameters(std::string fileName, std::vector<real_t> weights, std::vector<real_t> initial, real_t bias, bool app, int layer) {
 	std::string layer_info = "";
 	std::ofstream saveFile;
 	if (layer > -1) {
 		layer_info = " for layer " + std::to_string(layer);
 	}
 	if (app) {
 		saveFile.open(fileName.c_str(), std::ios_base::app);
 	} else {
 		saveFile.open(fileName.c_str());
 	}
 	if (!saveFile.is_open()) {
 		std::cout << fileName << " failed to open." << std::endl;
 	}
 	saveFile << "Weight(s)" << layer_info << std::endl;
 	for (uint32_t i = 0; i < weights.size(); i++) {
 		saveFile << weights[i] << std::endl;
 	}
 	saveFile << "Initial(s)" << layer_info << std::endl;
 	for (uint32_t i = 0; i < initial.size(); i++) {
 		saveFile << initial[i] << std::endl;
 	}
 	saveFile << "Bias" << layer_info << std::endl;
 	saveFile << bias << std::endl;
 	saveFile.close();
 }
 void MLPPUtilitiesOld::saveParameters(std::string fileName, std::vector<std::vector<real_t>> weights, std::vector<real_t> bias, bool app, int layer) {
 	std::string layer_info = "";
 	std::ofstream saveFile;
 	if (layer > -1) {
 		layer_info = " for layer " + std::to_string(layer);
 	}
 	if (app) {
 		saveFile.open(fileName.c_str(), std::ios_base::app);
 	} else {
 		saveFile.open(fileName.c_str());
 	}
 	if (!saveFile.is_open()) {
 		std::cout << fileName << " failed to open." << std::endl;
 	}
 	saveFile << "Weight(s)" << layer_info << std::endl;
 	for (uint32_t i = 0; i < weights.size(); i++) {
 		for (uint32_t j = 0; j < weights[i].size(); j++) {
 			saveFile << weights[i][j] << std::endl;
 		}
 	}
 	saveFile << "Bias(es)" << layer_info << std::endl;
 	for (uint32_t i = 0; i < bias.size(); i++) {
 		saveFile << bias[i] << std::endl;
 	}
 	saveFile.close();
 }
 void MLPPUtilitiesOld::UI(std::vector<real_t> weights, real_t bias) {
 	std::cout << "Values of the weight(s):" << std::endl;
 	for (uint32_t i = 0; i < weights.size(); i++) {
 		std::cout << weights[i] << std::endl;
 	}
 	std::cout << "Value of the bias:" << std::endl;
 	std::cout << bias << std::endl;
 }
 void MLPPUtilitiesOld::UI(std::vector<std::vector<real_t>> weights, std::vector<real_t> bias) {
 	std::cout << "Values of the weight(s):" << std::endl;
 	for (uint32_t i = 0; i < weights.size(); i++) {
 		for (uint32_t j = 0; j < weights[i].size(); j++) {
 			std::cout << weights[i][j] << std::endl;
 		}
 	}
 	std::cout << "Value of the biases:" << std::endl;
 	for (uint32_t i = 0; i < bias.size(); i++) {
 		std::cout << bias[i] << std::endl;
 	}
 }
 void MLPPUtilitiesOld::UI(std::vector<real_t> weights, std::vector<real_t> initial, real_t bias) {
 	std::cout << "Values of the weight(s):" << std::endl;
 	for (uint32_t i = 0; i < weights.size(); i++) {
 		std::cout << weights[i] << std::endl;
 	}
 	std::cout << "Values of the initial(s):" << std::endl;
 	for (uint32_t i = 0; i < initial.size(); i++) {
 		std::cout << initial[i] << std::endl;
 	}
 	std::cout << "Value of the bias:" << std::endl;
 	std::cout << bias << std::endl;
 }
 void MLPPUtilitiesOld::CostInfo(int epoch, real_t cost_prev, real_t Cost) {
 	std::cout << "-----------------------------------" << std::endl;
 	std::cout << "This is epoch: " << epoch << std::endl;
 	std::cout << "The cost function has been minimized by " << cost_prev - Cost << std::endl;
 	std::cout << "Current Cost:" << std::endl;
 	std::cout << Cost << std::endl;
 }
 std::vector<std::vector<std::vector<real_t>>> MLPPUtilitiesOld::createMiniBatches(std::vector<std::vector<real_t>> inputSet, int n_mini_batch) {
 	int n = inputSet.size();
 	std::vector<std::vector<std::vector<real_t>>> inputMiniBatches;
 	// Creating the mini-batches
 	for (int i = 0; i < n_mini_batch; i++) {
 		std::vector<std::vector<real_t>> currentInputSet;
 		for (int j = 0; j < n / n_mini_batch; j++) {
 			currentInputSet.push_back(inputSet[n / n_mini_batch * i + j]);
 		}
 		inputMiniBatches.push_back(currentInputSet);
 	}
 	if (real_t(n) / real_t(n_mini_batch) - int(n / n_mini_batch) != 0) {
 		for (int i = 0; i < n - n / n_mini_batch * n_mini_batch; i++) {
 			inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n / n_mini_batch * n_mini_batch + i]);
 		}
 	}
 	return inputMiniBatches;
 }
 std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<std::vector<real_t>>> MLPPUtilitiesOld::createMiniBatches(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int n_mini_batch) {
 	int n = inputSet.size();
 	std::vector<std::vector<std::vector<real_t>>> inputMiniBatches;
 	std::vector<std::vector<real_t>> outputMiniBatches;
 	for (int i = 0; i < n_mini_batch; i++) {
 		std::vector<std::vector<real_t>> currentInputSet;
 		std::vector<real_t> currentOutputSet;
 		for (int j = 0; j < n / n_mini_batch; j++) {
 			currentInputSet.push_back(inputSet[n / n_mini_batch * i + j]);
 			currentOutputSet.push_back(outputSet[n / n_mini_batch * i + j]);
 		}
 		inputMiniBatches.push_back(currentInputSet);
 		outputMiniBatches.push_back(currentOutputSet);
 	}
 	if (real_t(n) / real_t(n_mini_batch) - int(n / n_mini_batch) != 0) {
 		for (int i = 0; i < n - n / n_mini_batch * n_mini_batch; i++) {
 			inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n / n_mini_batch * n_mini_batch + i]);
 			outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n / n_mini_batch * n_mini_batch + i]);
 		}
 	}
 	return { inputMiniBatches, outputMiniBatches };
 }
 std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<std::vector<std::vector<real_t>>>> MLPPUtilitiesOld::createMiniBatches(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, int n_mini_batch) {
 	int n = inputSet.size();
 	std::vector<std::vector<std::vector<real_t>>> inputMiniBatches;
 	std::vector<std::vector<std::vector<real_t>>> outputMiniBatches;
 	for (int i = 0; i < n_mini_batch; i++) {
 		std::vector<std::vector<real_t>> currentInputSet;
 		std::vector<std::vector<real_t>> currentOutputSet;
 		for (int j = 0; j < n / n_mini_batch; j++) {
 			currentInputSet.push_back(inputSet[n / n_mini_batch * i + j]);
 			currentOutputSet.push_back(outputSet[n / n_mini_batch * i + j]);
 		}
 		inputMiniBatches.push_back(currentInputSet);
 		outputMiniBatches.push_back(currentOutputSet);
 	}
 	if (real_t(n) / real_t(n_mini_batch) - int(n / n_mini_batch) != 0) {
 		for (int i = 0; i < n - n / n_mini_batch * n_mini_batch; i++) {
 			inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n / n_mini_batch * n_mini_batch + i]);
 			outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n / n_mini_batch * n_mini_batch + i]);
 		}
 	}
 	return { inputMiniBatches, outputMiniBatches };
 }
 std::tuple<real_t, real_t, real_t, real_t> MLPPUtilitiesOld::TF_PN(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	real_t TP = 0;
 	real_t FP = 0;
 	real_t TN = 0;
 	real_t FN = 0;
 	for (uint32_t i = 0; i < y_hat.size(); i++) {
 		if (y_hat[i] == y[i]) {
 			if (y_hat[i] == 1) {
 				TP++;
 			} else {
 				TN++;
 			}
 		} else {
 			if (y_hat[i] == 1) {
 				FP++;
 			} else {
 				FN++;
 			}
 		}
 	}
 	return { TP, FP, TN, FN };
 }
 real_t MLPPUtilitiesOld::recall(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	auto res = TF_PN(y_hat, y);
 	auto TP = std::get<0>(res);
 	//auto FP = std::get<1>(res);
 	//auto TN = std::get<2>(res);
 	auto FN = std::get<3>(res);
 	return TP / (TP + FN);
 }
 real_t MLPPUtilitiesOld::precision(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	auto res = TF_PN(y_hat, y);
 	auto TP = std::get<0>(res);
 	auto FP = std::get<1>(res);
 	//auto TN = std::get<2>(res);
 	//auto FN = std::get<3>(res);
 	return TP / (TP + FP);
 }
 real_t MLPPUtilitiesOld::accuracy(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	auto res = TF_PN(y_hat, y);
 	auto TP = std::get<0>(res);
 	auto FP = std::get<1>(res);
 	auto TN = std::get<2>(res);
 	auto FN = std::get<3>(res);
 	return (TP + TN) / (TP + FP + FN + TN);
 }
 real_t MLPPUtilitiesOld::f1_score(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	return 2 * precision(y_hat, y) * recall(y_hat, y) / (precision(y_hat, y) + recall(y_hat, y));
 }
--- a/mlpp/utilities/utilities_old.h
+++ b/mlpp/utilities/utilities_old.h
@ -1,54 +0,0 @@
 #ifndef MLPP_UTILITIES_OLD_H
 #define MLPP_UTILITIES_OLD_H
 //
 //  Utilities.hpp
 //
 //  Created by Marc Melikyan on 1/16/21.
 //
 #include "core/math/math_defs.h"
 #include <string>
 #include <tuple>
 #include <vector>
 class MLPPUtilitiesOld {
 public:
 	// Weight Init
 	static std::vector<real_t> weightInitialization(int n, std::string type = "Default");
 	static real_t biasInitialization();
 	static std::vector<std::vector<real_t>> weightInitialization(int n, int m, std::string type = "Default");
 	static std::vector<real_t> biasInitialization(int n);
 	// Cost/Performance related Functions
 	real_t performance(std::vector<real_t> y_hat, std::vector<real_t> y);
 	real_t performance(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
 	// Parameter Saving Functions
 	void saveParameters(std::string fileName, std::vector<real_t> weights, real_t bias, bool app = false, int layer = -1);
 	void saveParameters(std::string fileName, std::vector<real_t> weights, std::vector<real_t> initial, real_t bias, bool app = false, int layer = -1);
 	void saveParameters(std::string fileName, std::vector<std::vector<real_t>> weights, std::vector<real_t> bias, bool app = false, int layer = -1);
 	// Gradient Descent related
 	static void UI(std::vector<real_t> weights, real_t bias);
 	static void UI(std::vector<real_t> weights, std::vector<real_t> initial, real_t bias);
 	static void UI(std::vector<std::vector<real_t>> weights, std::vector<real_t> bias);
 	static void CostInfo(int epoch, real_t cost_prev, real_t Cost);
 	static std::vector<std::vector<std::vector<real_t>>> createMiniBatches(std::vector<std::vector<real_t>> inputSet, int n_mini_batch);
 	static std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<std::vector<real_t>>> createMiniBatches(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int n_mini_batch);
 	static std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<std::vector<std::vector<real_t>>>> createMiniBatches(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, int n_mini_batch);
 	// F1 score, Precision/Recall, TP, FP, TN, FN, etc.
 	std::tuple<real_t, real_t, real_t, real_t> TF_PN(std::vector<real_t> y_hat, std::vector<real_t> y); //TF_PN = "True", "False", "Positive", "Negative"
 	real_t recall(std::vector<real_t> y_hat, std::vector<real_t> y);
 	real_t precision(std::vector<real_t> y_hat, std::vector<real_t> y);
 	real_t accuracy(std::vector<real_t> y_hat, std::vector<real_t> y);
 	real_t f1_score(std::vector<real_t> y_hat, std::vector<real_t> y);
 };
 #endif /* Utilities_hpp */
--- a/mlpp/wgan/wgan_old.cpp
+++ b/mlpp/wgan/wgan_old.cpp
@ -1,305 +0,0 @@
 //
 //  WGAN.cpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "wgan_old.h"
 #include "core/log/logger.h"
 #include "../activation/activation_old.h"
 #include "../cost/cost_old.h"
 #include "../lin_alg/lin_alg_old.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include "core/object/method_bind_ext.gen.inc"
 #include <cmath>
 #include <iostream>
 MLPPWGANOld::MLPPWGANOld(real_t k, std::vector<std::vector<real_t>> outputSet) :
 		outputSet(outputSet), n(outputSet.size()), k(k) {
 }
 MLPPWGANOld::~MLPPWGANOld() {
 	delete outputLayer;
 }
 std::vector<std::vector<real_t>> MLPPWGANOld::generateExample(int n) {
 	MLPPLinAlgOld alg;
 	return modelSetTestGenerator(alg.gaussianNoise(n, k));
 }
 void MLPPWGANOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
 	MLPPLinAlgOld alg;
 	real_t cost_prev = 0;
 	int epoch = 1;
 	forwardPass();
 	const int CRITIC_INTERATIONS = 5; // Wasserstein GAN specific parameter.
 	while (true) {
 		cost_prev = Cost(y_hat, alg.onevec(n));
 		std::vector<std::vector<real_t>> generatorInputSet;
 		std::vector<std::vector<real_t>> discriminatorInputSet;
 		std::vector<real_t> y_hat;
 		std::vector<real_t> outputSet;
 		// Training of the discriminator.
 		for (int i = 0; i < CRITIC_INTERATIONS; i++) {
 			generatorInputSet = alg.gaussianNoise(n, k);
 			discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
 			discriminatorInputSet.insert(discriminatorInputSet.end(), MLPPWGANOld::outputSet.begin(), MLPPWGANOld::outputSet.end()); // Fake + real inputs.
 			y_hat = modelSetTestDiscriminator(discriminatorInputSet);
 			outputSet = alg.scalarMultiply(-1, alg.onevec(n)); // WGAN changes y_i = 1 and y_i = 0 to y_i = 1 and y_i = -1
 			std::vector<real_t> outputSetReal = alg.onevec(n);
 			outputSet.insert(outputSet.end(), outputSetReal.begin(), outputSetReal.end()); // Fake + real output scores.
 			auto discriminator_gradient_results = computeDiscriminatorGradients(y_hat, outputSet);
 			auto cumulativeDiscriminatorHiddenLayerWGrad = std::get<0>(discriminator_gradient_results);
 			auto outputDiscriminatorWGrad = std::get<1>(discriminator_gradient_results);
 			cumulativeDiscriminatorHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeDiscriminatorHiddenLayerWGrad);
 			outputDiscriminatorWGrad = alg.scalarMultiply(learning_rate / n, outputDiscriminatorWGrad);
 			updateDiscriminatorParameters(cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad, learning_rate);
 		}
 		// Training of the generator.
 		generatorInputSet = alg.gaussianNoise(n, k);
 		discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
 		y_hat = modelSetTestDiscriminator(discriminatorInputSet);
 		outputSet = alg.onevec(n);
 		std::vector<std::vector<std::vector<real_t>>> cumulativeGeneratorHiddenLayerWGrad = computeGeneratorGradients(y_hat, outputSet);
 		cumulativeGeneratorHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeGeneratorHiddenLayerWGrad);
 		updateGeneratorParameters(cumulativeGeneratorHiddenLayerWGrad, learning_rate);
 		forwardPass();
 		if (UI) {
 			MLPPWGANOld::UI(epoch, cost_prev, MLPPWGANOld::y_hat, alg.onevec(n));
 		}
 		epoch++;
 		if (epoch > max_epoch) {
 			break;
 		}
 	}
 }
 real_t MLPPWGANOld::score() {
 	MLPPLinAlgOld alg;
 	MLPPUtilities util;
 	forwardPass();
 	return util.performance(y_hat, alg.onevec(n));
 }
 void MLPPWGANOld::save(std::string fileName) {
 	MLPPUtilities util;
 	if (!network.empty()) {
 		util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1);
 		for (uint32_t i = 1; i < network.size(); i++) {
 			util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1);
 		}
 		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1);
 	} else {
 		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1);
 	}
 }
 void MLPPWGANOld::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
 	MLPPLinAlgOld alg;
 	if (network.empty()) {
 		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha));
 		network[0].forwardPass();
 	} else {
 		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
 		network[network.size() - 1].forwardPass();
 	}
 }
 void MLPPWGANOld::addOutputLayer(std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
 	MLPPLinAlgOld alg;
 	if (!network.empty()) {
 		outputLayer = new MLPPOldOutputLayer(network[network.size() - 1].n_hidden, "Linear", "WassersteinLoss", network[network.size() - 1].a, weightInit, "WeightClipping", -0.01, 0.01);
 	} else { // Should never happen.
 		outputLayer = new MLPPOldOutputLayer(k, "Linear", "WassersteinLoss", alg.gaussianNoise(n, k), weightInit, "WeightClipping", -0.01, 0.01);
 	}
 }
 std::vector<std::vector<real_t>> MLPPWGANOld::modelSetTestGenerator(std::vector<std::vector<real_t>> X) {
 	if (!network.empty()) {
 		network[0].input = X;
 		network[0].forwardPass();
 		for (uint32_t i = 1; i <= network.size() / 2; i++) {
 			network[i].input = network[i - 1].a;
 			network[i].forwardPass();
 		}
 	}
 	return network[network.size() / 2].a;
 }
 std::vector<real_t> MLPPWGANOld::modelSetTestDiscriminator(std::vector<std::vector<real_t>> X) {
 	if (!network.empty()) {
 		for (uint32_t i = network.size() / 2 + 1; i < network.size(); i++) {
 			if (i == network.size() / 2 + 1) {
 				network[i].input = X;
 			} else {
 				network[i].input = network[i - 1].a;
 			}
 			network[i].forwardPass();
 		}
 		outputLayer->input = network[network.size() - 1].a;
 	}
 	outputLayer->forwardPass();
 	return outputLayer->a;
 }
 real_t MLPPWGANOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
 	MLPPRegOld regularization;
 	class MLPPCostOld cost;
 	real_t totalRegTerm = 0;
 	auto cost_function = outputLayer->cost_map[outputLayer->cost];
 	if (!network.empty()) {
 		for (uint32_t i = 0; i < network.size() - 1; i++) {
 			totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
 		}
 	}
 	return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
 }
 void MLPPWGANOld::forwardPass() {
 	MLPPLinAlgOld alg;
 	if (!network.empty()) {
 		network[0].input = alg.gaussianNoise(n, k);
 		network[0].forwardPass();
 		for (uint32_t i = 1; i < network.size(); i++) {
 			network[i].input = network[i - 1].a;
 			network[i].forwardPass();
 		}
 		outputLayer->input = network[network.size() - 1].a;
 	} else { // Should never happen, though.
 		outputLayer->input = alg.gaussianNoise(n, k);
 	}
 	outputLayer->forwardPass();
 	y_hat = outputLayer->a;
 }
 void MLPPWGANOld::updateDiscriminatorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate) {
 	MLPPLinAlgOld alg;
 	outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
 	outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n;
 	if (!network.empty()) {
 		network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]);
 		network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate / n, network[network.size() - 1].delta));
 		for (uint32_t i = network.size() - 2; i > network.size() / 2; i--) {
 			network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
 			network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate / n, network[i].delta));
 		}
 	}
 }
 void MLPPWGANOld::updateGeneratorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, real_t learning_rate) {
 	MLPPLinAlgOld alg;
 	if (!network.empty()) {
 		for (int ii = network.size() / 2; ii >= 0; ii--) {
 			uint32_t i = static_cast<uint32_t>(ii);
 			//std::cout << network[i].weights.size() << "x" << network[i].weights[0].size() << std::endl;
 			//std::cout << hiddenLayerUpdations[(network.size() - 2) - i + 1].size() << "x" << hiddenLayerUpdations[(network.size() - 2) - i + 1][0].size() << std::endl;
 			network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
 			network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate / n, network[i].delta));
 		}
 	}
 }
 std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> MLPPWGANOld::computeDiscriminatorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
 	class MLPPCostOld cost;
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	std::vector<std::vector<std::vector<real_t>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads.
 	auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
 	auto outputAvn = outputLayer->activation_map[outputLayer->activation];
 	outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
 	std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
 	outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
 	if (!network.empty()) {
 		auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
 		network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
 		std::vector<std::vector<real_t>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
 		cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
 		//std::cout << "HIDDENLAYER FIRST:" << hiddenLayerWGrad.size() << "x" << hiddenLayerWGrad[0].size() << std::endl;
 		//std::cout << "WEIGHTS SECOND:" << network[network.size() - 1].weights.size() << "x" << network[network.size() - 1].weights[0].size() << std::endl;
 		for (uint32_t i = network.size() - 2; i > network.size() / 2; i--) {
 			auto hiddenLayerAvnl = network[i].activation_map[network[i].activation];
 			network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvnl)(network[i].z, 1));
 			std::vector<std::vector<real_t>> hiddenLayerWGradl = alg.matmult(alg.transpose(network[i].input), network[i].delta);
 			cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGradl, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
 		}
 	}
 	return { cumulativeHiddenLayerWGrad, outputWGrad };
 }
 std::vector<std::vector<std::vector<real_t>>> MLPPWGANOld::computeGeneratorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
 	class MLPPCostOld cost;
 	MLPPActivationOld avn;
 	MLPPLinAlgOld alg;
 	MLPPRegOld regularization;
 	std::vector<std::vector<std::vector<real_t>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads.
 	auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
 	auto outputAvn = outputLayer->activation_map[outputLayer->activation];
 	outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
 	std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
 	outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
 	if (!network.empty()) {
 		auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
 		network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
 		std::vector<std::vector<real_t>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
 		cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
 		for (int ii = network.size() - 2; ii >= 0; ii--) {
 			uint32_t i = static_cast<uint32_t>(ii);
 			auto hiddenLayerAvnl = network[i].activation_map[network[i].activation];
 			network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvnl)(network[i].z, 1));
 			std::vector<std::vector<real_t>> hiddenLayerWGradl = alg.matmult(alg.transpose(network[i].input), network[i].delta);
 			cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGradl, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
 		}
 	}
 	return cumulativeHiddenLayerWGrad;
 }
 void MLPPWGANOld::UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
 	MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
 	std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
 	MLPPUtilities::UI(outputLayer->weights, outputLayer->bias);
 	if (!network.empty()) {
 		for (int ii = network.size() - 1; ii >= 0; ii--) {
 			uint32_t i = static_cast<uint32_t>(ii);
 			std::cout << "Layer " << i + 1 << ": " << std::endl;
 			MLPPUtilities::UI(network[i].weights, network[i].bias);
 		}
 	}
 }
--- a/mlpp/wgan/wgan_old.h
+++ b/mlpp/wgan/wgan_old.h
@ -1,68 +0,0 @@
 #ifndef MLPP_WGAN_OLD_H
 #define MLPP_WGAN_OLD_H
 //
 //  WGAN.hpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "core/containers/vector.h"
 #include "core/math/math_defs.h"
 #include "core/string/ustring.h"
 #include "core/object/reference.h"
 #include "../lin_alg/mlpp_matrix.h"
 #include "../lin_alg/mlpp_vector.h"
 #include "../hidden_layer/hidden_layer_old.h"
 #include "../output_layer/output_layer_old.h"
 #include "../activation/activation.h"
 #include "../cost/cost.h"
 #include "../regularization/reg_old.h"
 #include "../utilities/utilities.h"
 #include <string>
 #include <tuple>
 #include <vector>
 class MLPPWGANOld {
 public:
 	MLPPWGANOld(real_t k, std::vector<std::vector<real_t>> outputSet);
 	~MLPPWGANOld();
 	std::vector<std::vector<real_t>> generateExample(int n);
 	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
 	real_t score();
 	void save(std::string fileName);
 	void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 	void addOutputLayer(std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
 private:
 	std::vector<std::vector<real_t>> modelSetTestGenerator(std::vector<std::vector<real_t>> X); // Evaluator for the generator of the WGAN.
 	std::vector<real_t> modelSetTestDiscriminator(std::vector<std::vector<real_t>> X); // Evaluator for the discriminator of the WGAN.
 	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
 	void forwardPass();
 	void updateDiscriminatorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate);
 	void updateGeneratorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, real_t learning_rate);
 	std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> computeDiscriminatorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet);
 	std::vector<std::vector<std::vector<real_t>>> computeGeneratorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet);
 	void UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet);
 	std::vector<std::vector<real_t>> outputSet;
 	std::vector<real_t> y_hat;
 	std::vector<MLPPOldHiddenLayer> network;
 	MLPPOldOutputLayer *outputLayer;
 	int n;
 	int k;
 };
 #endif /* WGAN_hpp */
--- a/register_types.cpp
+++ b/register_types.cpp
@ -72,10 +72,6 @@ SOFTWARE.
 #ifdef TESTS_ENABLED
 #include "test/mlpp_matrix_tests.h"
 #include "test/mlpp_tests.h"
 #ifdef OLD_CLASSES_ENABLED
 #include "test/mlpp_tests_old.h"
 #endif
 #endif
 void register_pmlpp_types(ModuleRegistrationLevel p_level) {
@ -134,10 +130,6 @@ void register_pmlpp_types(ModuleRegistrationLevel p_level) {
 #ifdef TESTS_ENABLED
 		ClassDB::register_class<MLPPTests>();
 		ClassDB::register_class<MLPPMatrixTests>();
 #ifdef OLD_CLASSES_ENABLED
 		ClassDB::register_class<MLPPTestsOld>();
 #endif
 #endif
 	}
 }
--- a/test/mlpp_tests_old.cpp
+++ b/test/mlpp_tests_old.cpp
@ -80,6 +80,21 @@ void MLPPTestsOld::test_univariate_linear_regression() {
 }
 void MLPPTestsOld::test_multivariate_linear_regression_gradient_descent(bool ui) {
 	MLPPData data;
 	MLPPLinAlgOld alg;
 	Ref<MLPPDataSimple> ds = data.load_california_housing(_california_housing_data_path);
 	MLPPLinRegOld model_old(ds->get_input()->to_std_vector(), ds->get_output()->to_std_vector()); // Can use Lasso, Ridge, ElasticNet Reg
 	model_old.SGD(0.00000001, 300000, ui);
 	alg.printVector(model_old.modelSetTest(ds->get_input()->to_std_vector()));
 	//void Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool UI = false);
 	//void NAG(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool UI = false);
 	//void Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI = false);
 	//void Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI = false);
 	//void Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
 	//void Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
 }
 void MLPPTestsOld::test_multivariate_linear_regression_sgd(bool ui) {