From 62598551f4476dbb5f4767d2a3ef0f4ffb8d7525 Mon Sep 17 00:00:00 2001
From: Relintai <relintai@protonmail.com>
Date: Sat, 30 Dec 2023 00:20:49 +0100
Subject: [PATCH] Removed the old classes.

---
 SCsub                                         |   56 -
 main.cpp                                      |  720 ---------
 mlpp/activation/activation_old.cpp            |  953 -----------
 mlpp/activation/activation_old.h              |  146 --
 mlpp/ann/ann_old.cpp                          |  808 ----------
 mlpp/ann/ann_old.h                            |   73 -
 mlpp/auto_encoder/auto_encoder_old.cpp        |  264 ---
 mlpp/auto_encoder/auto_encoder_old.h          |   58 -
 mlpp/bernoulli_nb/bernoulli_nb_old.cpp        |  179 ---
 mlpp/bernoulli_nb/bernoulli_nb_old.h          |   42 -
 mlpp/c_log_log_reg/c_log_log_reg_old.cpp      |  224 ---
 mlpp/c_log_log_reg/c_log_log_reg_old.h        |   54 -
 mlpp/convolutions/convolutions_old.cpp        |  378 -----
 mlpp/convolutions/convolutions_old.h          |   56 -
 mlpp/cost/cost_old.cpp                        |  395 -----
 mlpp/cost/cost_old.h                          |   85 -
 mlpp/data/data.cpp                            |    1 -
 mlpp/data/data_old.cpp                        |  833 ----------
 mlpp/data/data_old.h                          |  110 --
 mlpp/dual_svc/dual_svc_old.cpp                |  244 ---
 mlpp/dual_svc/dual_svc_old.h                  |   69 -
 mlpp/exp_reg/exp_reg_old.cpp                  |  247 ---
 mlpp/exp_reg/exp_reg_old.h                    |   50 -
 mlpp/gan/gan_old.cpp                          |  287 ----
 mlpp/gan/gan_old.h                            |   59 -
 .../gauss_markov_checker_old.cpp              |   61 -
 .../gauss_markov_checker_old.h                |   29 -
 mlpp/gaussian_nb/gaussian_nb_old.cpp          |   93 --
 mlpp/gaussian_nb/gaussian_nb_old.h            |   37 -
 mlpp/hidden_layer/hidden_layer_old.cpp        |  118 --
 mlpp/hidden_layer/hidden_layer_old.h          |   61 -
 .../hypothesis_testing_old.cpp                |   20 -
 .../hypothesis_testing_old.h                  |   25 -
 mlpp/lin_alg/lin_alg_old.cpp                  | 1410 -----------------
 mlpp/lin_alg/lin_alg_old.h                    |  230 ---
 mlpp/lin_reg/lin_reg_old.cpp                  |  598 -------
 mlpp/lin_reg/lin_reg_old.h                    |   60 -
 mlpp/log_reg/log_reg_old.cpp                  |  213 ---
 mlpp/log_reg/log_reg_old.h                    |   51 -
 mlpp/mann/mann_old.cpp                        |  189 ---
 mlpp/mann/mann_old.h                          |   51 -
 mlpp/mlp/mlp_old.cpp                          |  287 ----
 mlpp/mlp/mlp_old.h                            |   70 -
 .../multi_output_layer_old.cpp                |  139 --
 .../multi_output_layer_old.h                  |   66 -
 mlpp/multinomial_nb/multinomial_nb_old.cpp    |  121 --
 mlpp/multinomial_nb/multinomial_nb_old.h      |   40 -
 .../numerical_analysis_old.cpp                |  300 ----
 .../numerical_analysis_old.h                  |   59 -
 mlpp/outlier_finder/outlier_finder_old.cpp    |   42 -
 mlpp/outlier_finder/outlier_finder_old.h      |   30 -
 mlpp/output_layer/output_layer_old.cpp        |  135 --
 mlpp/output_layer/output_layer_old.h          |   65 -
 mlpp/pca/pca_old.cpp                          |   59 -
 mlpp/pca/pca_old.h                            |   31 -
 mlpp/probit_reg/probit_reg_old.cpp            |  248 ---
 mlpp/probit_reg/probit_reg_old.h              |   53 -
 mlpp/regularization/reg_old.cpp               |  166 --
 mlpp/regularization/reg_old.h                 |   33 -
 mlpp/softmax_net/softmax_net_old.cpp          |  309 ----
 mlpp/softmax_net/softmax_net_old.h            |   60 -
 mlpp/softmax_reg/softmax_reg_old.cpp          |  193 ---
 mlpp/softmax_reg/softmax_reg_old.h            |   50 -
 mlpp/stat/stat_old.cpp                        |  215 ---
 mlpp/stat/stat_old.h                          |   52 -
 mlpp/svc/svc_old.cpp                          |  198 ---
 mlpp/svc/svc_old.h                            |   55 -
 mlpp/tanh_reg/tanh_reg_old.cpp                |  196 ---
 mlpp/tanh_reg/tanh_reg_old.h                  |   55 -
 mlpp/transforms/transforms_old.cpp            |   58 -
 mlpp/transforms/transforms_old.h              |   20 -
 mlpp/uni_lin_reg/uni_lin_reg_old.cpp          |   34 -
 mlpp/uni_lin_reg/uni_lin_reg_old.h            |   29 -
 mlpp/utilities/utilities_old.cpp              |  399 -----
 mlpp/utilities/utilities_old.h                |   54 -
 mlpp/wgan/wgan_old.cpp                        |  305 ----
 mlpp/wgan/wgan_old.h                          |   68 -
 register_types.cpp                            |    8 -
 test/mlpp_tests_old.cpp                       |   15 +
 79 files changed, 15 insertions(+), 13939 deletions(-)
 delete mode 100644 main.cpp
 delete mode 100644 mlpp/activation/activation_old.cpp
 delete mode 100644 mlpp/activation/activation_old.h
 delete mode 100644 mlpp/ann/ann_old.cpp
 delete mode 100644 mlpp/ann/ann_old.h
 delete mode 100644 mlpp/auto_encoder/auto_encoder_old.cpp
 delete mode 100644 mlpp/auto_encoder/auto_encoder_old.h
 delete mode 100644 mlpp/bernoulli_nb/bernoulli_nb_old.cpp
 delete mode 100644 mlpp/bernoulli_nb/bernoulli_nb_old.h
 delete mode 100644 mlpp/c_log_log_reg/c_log_log_reg_old.cpp
 delete mode 100644 mlpp/c_log_log_reg/c_log_log_reg_old.h
 delete mode 100644 mlpp/convolutions/convolutions_old.cpp
 delete mode 100644 mlpp/convolutions/convolutions_old.h
 delete mode 100644 mlpp/cost/cost_old.cpp
 delete mode 100644 mlpp/cost/cost_old.h
 delete mode 100644 mlpp/data/data_old.cpp
 delete mode 100644 mlpp/data/data_old.h
 delete mode 100644 mlpp/dual_svc/dual_svc_old.cpp
 delete mode 100644 mlpp/dual_svc/dual_svc_old.h
 delete mode 100644 mlpp/exp_reg/exp_reg_old.cpp
 delete mode 100644 mlpp/exp_reg/exp_reg_old.h
 delete mode 100644 mlpp/gan/gan_old.cpp
 delete mode 100644 mlpp/gan/gan_old.h
 delete mode 100644 mlpp/gauss_markov_checker/gauss_markov_checker_old.cpp
 delete mode 100644 mlpp/gauss_markov_checker/gauss_markov_checker_old.h
 delete mode 100644 mlpp/gaussian_nb/gaussian_nb_old.cpp
 delete mode 100644 mlpp/gaussian_nb/gaussian_nb_old.h
 delete mode 100644 mlpp/hidden_layer/hidden_layer_old.cpp
 delete mode 100644 mlpp/hidden_layer/hidden_layer_old.h
 delete mode 100644 mlpp/hypothesis_testing/hypothesis_testing_old.cpp
 delete mode 100644 mlpp/hypothesis_testing/hypothesis_testing_old.h
 delete mode 100644 mlpp/lin_alg/lin_alg_old.cpp
 delete mode 100644 mlpp/lin_alg/lin_alg_old.h
 delete mode 100644 mlpp/lin_reg/lin_reg_old.cpp
 delete mode 100644 mlpp/lin_reg/lin_reg_old.h
 delete mode 100644 mlpp/log_reg/log_reg_old.cpp
 delete mode 100644 mlpp/log_reg/log_reg_old.h
 delete mode 100644 mlpp/mann/mann_old.cpp
 delete mode 100644 mlpp/mann/mann_old.h
 delete mode 100644 mlpp/mlp/mlp_old.cpp
 delete mode 100644 mlpp/mlp/mlp_old.h
 delete mode 100644 mlpp/multi_output_layer/multi_output_layer_old.cpp
 delete mode 100644 mlpp/multi_output_layer/multi_output_layer_old.h
 delete mode 100644 mlpp/multinomial_nb/multinomial_nb_old.cpp
 delete mode 100644 mlpp/multinomial_nb/multinomial_nb_old.h
 delete mode 100644 mlpp/numerical_analysis/numerical_analysis_old.cpp
 delete mode 100644 mlpp/numerical_analysis/numerical_analysis_old.h
 delete mode 100644 mlpp/outlier_finder/outlier_finder_old.cpp
 delete mode 100644 mlpp/outlier_finder/outlier_finder_old.h
 delete mode 100644 mlpp/output_layer/output_layer_old.cpp
 delete mode 100644 mlpp/output_layer/output_layer_old.h
 delete mode 100644 mlpp/pca/pca_old.cpp
 delete mode 100644 mlpp/pca/pca_old.h
 delete mode 100644 mlpp/probit_reg/probit_reg_old.cpp
 delete mode 100644 mlpp/probit_reg/probit_reg_old.h
 delete mode 100644 mlpp/regularization/reg_old.cpp
 delete mode 100644 mlpp/regularization/reg_old.h
 delete mode 100644 mlpp/softmax_net/softmax_net_old.cpp
 delete mode 100644 mlpp/softmax_net/softmax_net_old.h
 delete mode 100644 mlpp/softmax_reg/softmax_reg_old.cpp
 delete mode 100644 mlpp/softmax_reg/softmax_reg_old.h
 delete mode 100644 mlpp/stat/stat_old.cpp
 delete mode 100644 mlpp/stat/stat_old.h
 delete mode 100644 mlpp/svc/svc_old.cpp
 delete mode 100644 mlpp/svc/svc_old.h
 delete mode 100644 mlpp/tanh_reg/tanh_reg_old.cpp
 delete mode 100644 mlpp/tanh_reg/tanh_reg_old.h
 delete mode 100644 mlpp/transforms/transforms_old.cpp
 delete mode 100644 mlpp/transforms/transforms_old.h
 delete mode 100644 mlpp/uni_lin_reg/uni_lin_reg_old.cpp
 delete mode 100644 mlpp/uni_lin_reg/uni_lin_reg_old.h
 delete mode 100644 mlpp/utilities/utilities_old.cpp
 delete mode 100644 mlpp/utilities/utilities_old.h
 delete mode 100644 mlpp/wgan/wgan_old.cpp
 delete mode 100644 mlpp/wgan/wgan_old.h

diff --git a/SCsub b/SCsub
index dc9f23a..4640c0e 100644
--- a/SCsub
+++ b/SCsub
@@ -4,19 +4,11 @@ Import('env')
 
 module_env = env.Clone()
 
-module_env.pmlpp_build_old_classes = True
 module_env.pmlpp_build_tests = True
 
-if ARGUMENTS.get('pmlpp_build_old_classes', 'yes') == 'no':
-    module_env.pmlpp_build_old_classes = False
-
 if ARGUMENTS.get('pmlpp_build_tests', 'yes') == 'no':
     module_env.pmlpp_build_tests = False
 
-if env.msvc:
-    # Old classes can't build on MSVC
-    module_env.pmlpp_build_old_classes = False
-
 sources = [
     "register_types.cpp",
 
@@ -73,54 +65,6 @@ if module_env.pmlpp_build_tests:
         "test/mlpp_matrix_tests.cpp",
     ]
 
-if module_env.pmlpp_build_old_classes:
-    module_env.Prepend(CPPDEFINES=["OLD_CLASSES_ENABLED"])
-
-    sources += [
-        "mlpp/wgan/wgan_old.cpp",
-        "mlpp/output_layer/output_layer_old.cpp",
-        "mlpp/multi_output_layer/multi_output_layer_old.cpp",
-        "mlpp/hidden_layer/hidden_layer_old.cpp",
-        "mlpp/mlp/mlp_old.cpp",
-        "mlpp/pca/pca_old.cpp",
-        "mlpp/uni_lin_reg/uni_lin_reg_old.cpp",
-        "mlpp/outlier_finder/outlier_finder_old.cpp",
-        "mlpp/probit_reg/probit_reg_old.cpp",
-        "mlpp/svc/svc_old.cpp",
-        "mlpp/softmax_reg/softmax_reg_old.cpp",
-        "mlpp/auto_encoder/auto_encoder_old.cpp",
-        "mlpp/tanh_reg/tanh_reg_old.cpp",
-        "mlpp/softmax_net/softmax_net_old.cpp",
-        "mlpp/multinomial_nb/multinomial_nb_old.cpp",
-        "mlpp/mann/mann_old.cpp",
-        "mlpp/log_reg/log_reg_old.cpp",
-        "mlpp/lin_reg/lin_reg_old.cpp",
-        "mlpp/gaussian_nb/gaussian_nb_old.cpp",
-        "mlpp/gan/gan_old.cpp",
-        "mlpp/exp_reg/exp_reg_old.cpp",
-        "mlpp/dual_svc/dual_svc_old.cpp",
-        "mlpp/c_log_log_reg/c_log_log_reg_old.cpp",
-        "mlpp/bernoulli_nb/bernoulli_nb_old.cpp",
-        "mlpp/ann/ann_old.cpp",
-        "mlpp/numerical_analysis/numerical_analysis_old.cpp",
-        "mlpp/regularization/reg_old.cpp",
-        "mlpp/gauss_markov_checker/gauss_markov_checker_old.cpp",
-        "mlpp/utilities/utilities_old.cpp",
-        "mlpp/transforms/transforms_old.cpp",
-        "mlpp/stat/stat_old.cpp",
-        "mlpp/lin_alg/lin_alg_old.cpp",
-        "mlpp/hypothesis_testing/hypothesis_testing_old.cpp",
-        "mlpp/data/data_old.cpp",
-        "mlpp/cost/cost_old.cpp",
-        "mlpp/convolutions/convolutions_old.cpp",
-        "mlpp/activation/activation_old.cpp",
-    ]
-
-if module_env.pmlpp_build_tests and module_env.pmlpp_build_old_classes:
-    sources += [
-        "test/mlpp_tests_old.cpp",
-    ]
-
 
 if ARGUMENTS.get('pmlpp_shared', 'no') == 'yes':
     # Shared lib compilation
diff --git a/main.cpp b/main.cpp
deleted file mode 100644
index e0b4b86..0000000
--- a/main.cpp
+++ /dev/null
@@ -1,720 +0,0 @@
-//
-//  main.cpp
-//  TEST_APP
-//
-//  Created by Marc on 1/20/21.
-//
-
-// THINGS CURRENTLY TO DO: 
-// POLYMORPHIC IMPLEMENTATION OF REGRESSION CLASSES
-// EXTEND SGD/MBGD SUPPORT FOR DYN. SIZED ANN 
-// ADD LEAKYRELU, ELU, SELU TO ANN
-// FIX VECTOR/MATRIX/TENSOR RESIZE ROUTINE
-
-// HYPOTHESIS TESTING CLASS 
-// GAUSS MARKOV CHECKER CLASS
-
-#include <iostream>
-#include <ctime>
-#include <cmath>
-#include <vector>
-#include "MLPP/UniLinReg/UniLinReg.hpp"
-#include "MLPP/LinReg/LinReg.hpp"
-#include "MLPP/LogReg/LogReg.hpp"
-#include "MLPP/CLogLogReg/CLogLogReg.hpp"
-#include "MLPP/ExpReg/ExpReg.hpp"
-#include "MLPP/ProbitReg/ProbitReg.hpp"
-#include "MLPP/SoftmaxReg/SoftmaxReg.hpp"
-#include "MLPP/TanhReg/TanhReg.hpp"
-#include "MLPP/MLP/MLP.hpp"
-#include "MLPP/SoftmaxNet/SoftmaxNet.hpp"
-#include "MLPP/AutoEncoder/AutoEncoder.hpp"
-#include "MLPP/ANN/ANN.hpp"
-#include "MLPP/MANN/MANN.hpp"
-#include "MLPP/MultinomialNB/MultinomialNB.hpp"
-#include "MLPP/BernoulliNB/BernoulliNB.hpp"
-#include "MLPP/GaussianNB/GaussianNB.hpp"
-#include "MLPP/KMeans/KMeans.hpp"
-#include "MLPP/kNN/kNN.hpp"
-#include "MLPP/PCA/PCA.hpp"
-#include "MLPP/OutlierFinder/OutlierFinder.hpp"
-#include "MLPP/Stat/Stat.hpp"
-#include "MLPP/LinAlg/LinAlg.hpp"
-#include "MLPP/Activation/Activation.hpp"
-#include "MLPP/Cost/Cost.hpp"
-#include "MLPP/Data/Data.hpp"
-#include "MLPP/Convolutions/Convolutions.hpp"
-#include "MLPP/SVC/SVC.hpp"
-#include "MLPP/NumericalAnalysis/NumericalAnalysis.hpp"
-#include "MLPP/DualSVC/DualSVC.hpp"
-#include "MLPP/GAN/GAN.hpp"
-#include "MLPP/WGAN/WGAN.hpp"
-#include "MLPP/Transforms/Transforms.hpp"
-
-
-// real_t f(real_t x){
-//     return x*x*x + 2*x - 2; 
-// }
-
-real_t f(real_t x){
-    return sin(x);
-}
-
-real_t f_prime(real_t x){
-    return 2 * x;
-}
-
-real_t f_prime_2var(std::vector<real_t> x){
-    return 2 * x[0] + x[1];
-}
-/*
-    y = x^3 + 2x - 2
-    y' = 3x^2 + 2
-    y'' = 6x
-    y''(2) = 12
-*/
-
-// real_t f_mv(std::vector<real_t> x){
-//     return x[0] * x[0] + x[0] * x[1] * x[1] + x[1] + 5; 
-// }
-
-/* 
-    Where x, y = x[0], x[1], this function is defined as:
-    f(x, y) = x^2 + xy^2 + y + 5
-    ∂f/∂x = 2x + 2y
-    ∂^2f/∂x∂y = 2
-*/
-
-real_t f_mv(std::vector<real_t> x){
-    return x[0] * x[0] * x[0] + x[0] + x[1] * x[1] * x[1] * x[0] + x[2] * x[2] * x[1];
-}
-
-/*
-    Where x, y = x[0], x[1], this function is defined as:
-    f(x, y) = x^3 + x + xy^3 + yz^2
-
-    fy = 3xy^2 + 2yz
-    fyy = 6xy + 2z
-    fyyz = 2
-
-    ∂^2f/∂y^2 = 6xy + 2z
-    ∂^3f/∂y^3 = 6x
-
-    ∂f/∂z = 2zy
-    ∂^2f/∂z^2 = 2y
-    ∂^3f/∂z^3 = 0
-    
-    ∂f/∂x = 3x^2 + 1 + y^3
-    ∂^2f/∂x^2 = 6x
-    ∂^3f/∂x^3 = 6
-
-    ∂f/∂z = 2zy
-    ∂^2f/∂z^2 = 2z
-
-    ∂f/∂y = 3xy^2
-    ∂^2f/∂y∂x = 3y^2
-
-*/
-
-
-int main() {
-
-    // // OBJECTS
-    MLPPStat stat;
-    MLPPLinAlg alg;
-    MLPPActivation avn;
-    MLPPCost cost;
-    MLPPData data; 
-    MLPPConvolutions conv; 
-
-    // DATA SETS
-    // std::vector<std::vector<real_t>> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
-    // std::vector<real_t> outputSet = {2,4,6,8,10,12,14,16,18,20};
-
-    // std::vector<std::vector<real_t>> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}};
-    // std::vector<real_t> outputSet = {0,0,0,0,1,1,1,1};
-
-    // std::vector<std::vector<real_t>> inputSet = {{4,3,0,-3,-4}, {0,0,0,1,1}};
-    // std::vector<real_t> outputSet = {1,1,0,-1,-1};
-
-    // std::vector<std::vector<real_t>> inputSet = {{0,1,2,3,4}};
-    // std::vector<real_t> outputSet = {1,2,4,8,16};
-
-    //std::vector<std::vector<real_t>> inputSet = {{32, 0, 7}, {2, 28, 17}, {0, 9, 23}}; 
-
-    // std::vector<std::vector<real_t>> inputSet = {{1,1,0,0,1}, {0,0,1,1,1}, {0,1,1,0,1}};
-    // std::vector<real_t> outputSet = {0,1,0,1,1};
-
-    // std::vector<std::vector<real_t>> inputSet = {{0,0,1,1}, {0,1,0,1}};
-    // std::vector<real_t> outputSet = {0,1,1,0};
-
-    // // STATISTICS
-    // std::vector<real_t> x = {1,2,3,4,5,6,7,8,9,10};
-    // std::vector<real_t> y = {10,9,8,7,6,5,4,3,2,1};
-    // std::vector<real_t> w = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
-
-    // std::cout << "Arithmetic Mean: " << stat.mean(x) << std::endl;
-    // std::cout << "Median: " << stat.median(x) << std::endl;
-    // alg.printVector(x);
-    // alg.printVector(stat.mode(x));
-    // std::cout << "Range: " << stat.range(x) << std::endl;
-    // std::cout << "Midrange: " << stat.midrange(x) << std::endl;
-    // std::cout << "Absolute Average Deviation: " << stat.absAvgDeviation(x) << std::endl;
-    // std::cout << "Standard Deviation: " << stat.standardDeviation(x) << std::endl;
-    // std::cout << "Variance: " << stat.variance(x) << std::endl;
-    // std::cout << "Covariance: " << stat.covariance(x, y) << std::endl;
-    // std::cout << "Correlation: " << stat.correlation(x, y) << std::endl;
-    // std::cout << "R^2: " << stat.R2(x, y) << std::endl;
-    // // Returns 1 - (1/k^2)
-    // std::cout << "Chebyshev Inequality: " << stat.chebyshevIneq(2) << std::endl;
-    // std::cout << "Weighted Mean: " << stat.weightedMean(x, w) << std::endl;
-    // std::cout << "Geometric Mean: " << stat.geometricMean(x) << std::endl;
-    // std::cout << "Harmonic Mean: " << stat.harmonicMean(x) << std::endl;
-    // std::cout << "Root Mean Square (Quadratic mean): " << stat.RMS(x) << std::endl;
-    // std::cout << "Power Mean (p = 5): " << stat.powerMean(x, 5) << std::endl;
-    // std::cout << "Lehmer Mean (p = 5): " << stat.lehmerMean(x, 5) << std::endl;
-    // std::cout << "Weighted Lehmer Mean (p = 5): " << stat.weightedLehmerMean(x, w, 5) << std::endl;
-    // std::cout << "Contraharmonic Mean: " << stat.contraHarmonicMean(x) << std::endl;
-    // std::cout << "Hernonian Mean: " << stat.heronianMean(1, 10) << std::endl;
-    // std::cout << "Heinz Mean (x = 1): " << stat.heinzMean(1, 10, 1) << std::endl;
-    // std::cout << "Neuman-Sandor Mean: " << stat.neumanSandorMean(1, 10) << std::endl;
-    // std::cout << "Stolarsky Mean (p = 5): " << stat.stolarskyMean(1, 10, 5) << std::endl;
-    // std::cout << "Identric Mean: " << stat.identricMean(1, 10) << std::endl;
-    // std::cout << "Logarithmic Mean: " << stat.logMean(1, 10) << std::endl;
-    // std::cout << "Absolute Average Deviation: " << stat.absAvgDeviation(x) << std::endl;
-
-    // LINEAR ALGEBRA
-    // std::vector<std::vector<real_t>> square = {{1, 1}, {-1, 1}, {1, -1}, {-1, -1}};
-
-    // alg.printMatrix(alg.rotate(square, M_PI/4));
-
-    // std::vector<std::vector<real_t>> A = {
-    //     {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
-    //     {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
-    // };
-    // std::vector<real_t> a = {4, 3, 1, 3}; 
-    // std::vector<real_t> b = {3, 5, 6, 1};
-
-    // alg.printMatrix(alg.matmult(alg.transpose(A), A)); 
-    // std::cout << std::endl;
-    // std::cout << alg.dot(a, b) << std::endl;
-    // std::cout << std::endl;
-    // alg.printMatrix(alg.hadamard_product(A, A));
-    // std::cout << std::endl;
-    // alg.printMatrix(alg.identity(10));
-
-    // UNIVARIATE LINEAR REGRESSION 
-    // Univariate, simple linear regression, case where k = 1
-    // auto [inputSet, outputSet] = data.loadFiresAndCrime();
-    // UniLinReg model(inputSet, outputSet);
-    // alg.printVector(model.modelSetTest(inputSet));
-
-    // // MULIVARIATE LINEAR REGRESSION
-    // auto [inputSet, outputSet] = data.loadCaliforniaHousing();
-
-    // LinReg model(inputSet, outputSet); // Can use Lasso, Ridge, ElasticNet Reg
-
-    //model.gradientDescent(0.001, 30, 0);
-    //model.SGD(0.00000001, 300000, 1);
-    //model.MBGD(0.001, 10000, 2, 1);
-    //model.normalEquation(); 
-
-    // LinReg adamModel(alg.transpose(inputSet), outputSet);
-    // alg.printVector(model.modelSetTest(inputSet));
-    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
-
-
-    // const int TRIAL_NUM = 1000; 
-
-    // real_t scoreSGD = 0; 
-    // real_t scoreADAM = 0; 
-    // for(int i = 0; i < TRIAL_NUM; i++){
-    //     LinReg model(alg.transpose(inputSet), outputSet);
-    //     model.MBGD(0.001, 5, 1, 0);
-    //     scoreSGD += model.score();
-
-    //     LinReg adamModel(alg.transpose(inputSet), outputSet);
-    //     adamModel.Adam(0.1, 5, 1, 0.9, 0.999, 1e-8, 0); // Change batch size = sgd, bgd
-    //     scoreADAM += adamModel.score();
-    // }
-
-    // std::cout << "ACCURACY, AVG, SGD: " << 100 * scoreSGD/TRIAL_NUM << "%" << std::endl;
-
-    // std::cout << std::endl;
-
-    // std::cout << "ACCURACY, AVG, ADAM: " << 100 * scoreADAM/TRIAL_NUM << "%" << std::endl;
-
-
-    // std::cout << "Total epoch num: 300" << std::endl;
-    // std::cout << "Method: 1st Order w/ Jacobians" << std::endl;
-
-    // LinReg model(alg.transpose(inputSet), outputSet); // Can use Lasso, Ridge, ElasticNet Reg
-
-    // model.gradientDescent(0.001, 300, 0);
-
-
-    // std::cout << "--------------------------------------------" << std::endl;
-    // std::cout << "Total epoch num: 300" << std::endl;
-    // std::cout << "Method: Newtonian 2nd Order w/ Hessians" << std::endl;
-    // LinReg model2(alg.transpose(inputSet), outputSet); 
-
-    // model2.NewtonRaphson(1.5, 300, 0);
-
-
-    // // LOGISTIC REGRESSION
-    // auto [inputSet, outputSet] = data.load rastCancer();
-    // LogReg model(inputSet, outputSet); 
-    // model.SGD(0.001, 100000, 0);
-    // alg.printVector(model.modelSetTest(inputSet));
-    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
-
-    // // PROBIT REGRESSION
-    // std::vector<std::vector<real_t>> inputSet;
-    // std::vector<real_t> outputSet;
-    // data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancer.csv", inputSet, outputSet);
-    // ProbitReg model(inputSet, outputSet); 
-    // model.SGD(0.001, 10000, 1);
-    // alg.printVector(model.modelSetTest(inputSet));
-    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
-
-    // // CLOGLOG REGRESSION
-    // std::vector<std::vector<real_t>> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}};
-    // std::vector<real_t> outputSet = {0,0,0,0,1,1,1,1};
-    // CLogLogReg model(alg.transpose(inputSet), outputSet); 
-    // model.SGD(0.1, 10000, 0);
-    // alg.printVector(model.modelSetTest(alg.transpose(inputSet)));
-    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
-
-    // // EXPREG REGRESSION
-    // std::vector<std::vector<real_t>> inputSet = {{0,1,2,3,4}};
-    // std::vector<real_t> outputSet = {1,2,4,8,16};
-    // ExpReg model(alg.transpose(inputSet), outputSet); 
-    // model.SGD(0.001, 10000, 0);
-    // alg.printVector(model.modelSetTest(alg.transpose(inputSet)));
-    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
-
-    // // TANH REGRESSION
-    // std::vector<std::vector<real_t>> inputSet = {{4,3,0,-3,-4}, {0,0,0,1,1}};
-    // std::vector<real_t> outputSet = {1,1,0,-1,-1};
-    // TanhReg model(alg.transpose(inputSet), outputSet); 
-    // model.SGD(0.1, 10000, 0);
-    // alg.printVector(model.modelSetTest(alg.transpose(inputSet)));
-    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
-
-    // // SOFTMAX REGRESSION
-    // auto [inputSet, outputSet] = data.loadIris();
-    // SoftmaxReg model(inputSet, outputSet);
-    // model.SGD(0.1, 10000, 1);
-    // alg.printMatrix(model.modelSetTest(inputSet));
-    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
-
-    // // SUPPORT VECTOR CLASSIFICATION
-    // auto [inputSet, outputSet] = data.loadBreastCancerSVC();
-    // SVC model(inputSet, outputSet, 1); 
-    // model.SGD(0.00001, 100000, 1);
-    // alg.printVector(model.modelSetTest(inputSet));
-    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
-
-    // SoftmaxReg model(inputSet, outputSet); 
-    // model.SGD(0.001, 20000, 0);
-    // alg.printMatrix(model.modelSetTest(inputSet));
-
-    // // MLP
-    // std::vector<std::vector<real_t>> inputSet = {{0,0,1,1}, {0,1,0,1}};
-    // inputSet = alg.transpose(inputSet);
-    // std::vector<real_t> outputSet = {0,1,1,0};
-
-    // MLP model(inputSet, outputSet, 2); 
-    // model.gradientDescent(0.1, 10000, 0);
-    // alg.printVector(model.modelSetTest(inputSet));
-    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
-
-    // // SOFTMAX NETWORK
-    // auto [inputSet, outputSet] = data.loadWine();
-    // SoftmaxNet model(inputSet, outputSet, 1); 
-    // model.gradientDescent(0.01, 100000, 1);
-    // alg.printMatrix(model.modelSetTest(inputSet));
-    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
-
-    // // AUTOENCODER
-    // std::vector<std::vector<real_t>> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
-    // AutoEncoder model(alg.transpose(inputSet), 5); 
-    // model.SGD(0.001, 300000, 0);
-    // alg.printMatrix(model.modelSetTest(alg.transpose(inputSet)));
-    // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
-
-    // DYNAMICALLY SIZED ANN
-    // Possible Weight Init Methods: Default, Uniform, HeNormal, HeUniform, XavierNormal, XavierUniform
-    // Possible Activations: Linear, Sigmoid, Swish, Softplus, Softsign, CLogLog, Ar{Sinh, Cosh, Tanh, Csch, Sech, Coth},  GaussianCDF, GELU, UnitStep
-    // Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss
-    // std::vector<std::vector<real_t>> inputSet = {{0,0,1,1}, {0,1,0,1}};
-    // std::vector<real_t> outputSet = {0,1,1,0};
-    // ANN ann(alg.transpose(inputSet), outputSet);
-    // ann.addLayer(2, "Cosh");
-    // ann.addOutputLayer("Sigmoid", "LogLoss");
-
-
-    // ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, 1);
-    // ann.Adadelta(1, 1000, 2, 0.9, 0.000001, 1);
-    // ann.Momentum(0.1, 8000, 2, 0.9, true, 1);
-
-    //ann.setLearningRateScheduler("Step", 0.5, 1000);
-    // ann.gradientDescent(0.01, 30000);
-    // alg.printVector(ann.modelSetTest(alg.transpose(inputSet)));
-    // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
-
-    std::vector<std::vector<real_t>> outputSet = {{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}, 
-                                               {2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40}};
-
-    WGAN gan(2, alg.transpose(outputSet)); // our gan is a wasserstein gan (wgan)
-    gan.addLayer(5, "Sigmoid");
-    gan.addLayer(2, "RELU");
-    gan.addLayer(5, "Sigmoid");
-    gan.addOutputLayer(); // User can specify weight init- if necessary.
-    gan.gradientDescent(0.1, 55000, 0);
-    std::cout << "GENERATED INPUT: (Gaussian-sampled noise):" << std::endl;
-    alg.printMatrix(gan.generateExample(100));
-
-
-    // typedef std::vector<std::vector<real_t>> Matrix;
-    // typedef std::vector<real_t> Vector;
-
-    // Matrix inputSet = {{0,0}, {0,1}, {1,0}, {1,1}}; // XOR 
-    // Vector outputSet = {0,1,1,0};
-
-    // ANN ann(inputSet, outputSet);
-    // ann.addLayer(5, "Sigmoid");
-    // ann.addLayer(8, "Sigmoid"); // Add more layers as needed. 
-    // ann.addOutputLayer("Sigmoid", "LogLoss");
-    // ann.gradientDescent(1, 20000, 1);
-
-    // Vector predictions = ann.modelSetTest(inputSet);
-    // alg.printVector(predictions); // Testing out the model's preds for train set.
-    // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; // Accuracy.
-
-    // // DYNAMICALLY SIZED MANN (Multidimensional Output ANN)
-    // std::vector<std::vector<real_t>> inputSet = {{1,2,3},{2,4,6},{3,6,9},{4,8,12}};
-    // std::vector<std::vector<real_t>> outputSet = {{1,5}, {2,10}, {3,15}, {4,20}};
-
-    // MANN mann(inputSet, outputSet);
-    // mann.addOutputLayer("Linear", "MSE");
-    // mann.gradientDescent(0.001, 80000, 0);
-    // alg.printMatrix(mann.modelSetTest(inputSet));
-    // std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl;
-
-    // std::vector<std::vector<real_t>> inputSet;
-    // std::vector<real_t> tempOutputSet;
-    // data.setData(4, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
-    // std::vector<std::vector<real_t>> outputSet = data.oneHotRep(tempOutputSet, 3);
-
-    // TRAIN TEST SPLIT CHECK
-    // std::vector<std::vector<real_t>> inputSet1 = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
-    // std::vector<std::vector<real_t>> outputSet1 = {{2,4,6,8,10,12,14,16,18,20}};
-    // auto [inputSet, outputSet, inputTestSet, outputTestSet] = data.trainTestSplit(alg.transpose(inputSet1), alg.transpose(outputSet1), 0.2);
-    // alg.printMatrix(inputSet);
-    // alg.printMatrix(outputSet);
-    // alg.printMatrix(inputTestSet);
-    // alg.printMatrix(outputTestSet);
-
-
-    // alg.printMatrix(inputSet);
-    // alg.printMatrix(outputSet);
-
-    // MANN mann(inputSet, outputSet);
-    // mann.addLayer(100, "RELU", "XavierNormal");
-    // mann.addOutputLayer("Softmax", "CrossEntropy", "XavierNormal");
-    // mann.gradientDescent(0.1, 80000, 1);
-    // alg.printMatrix(mann.modelSetTest(inputSet));
-    // std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl;
-
-    // // NAIVE BAYES
-    // std::vector<std::vector<real_t>> inputSet = {{1,1,1,1,1}, {0,0,1,1,1}, {0,0,1,0,1}};
-    // std::vector<real_t> outputSet = {0,1,0,1,1};
-
-    // MultinomialNB MNB(alg.transpose(inputSet), outputSet, 2);
-    // alg.printVector(MNB.modelSetTest(alg.transpose(inputSet)));
-
-    // BernoulliNB BNB(alg.transpose(inputSet), outputSet);
-    // alg.printVector(BNB.modelSetTest(alg.transpose(inputSet)));
-
-    // GaussianNB GNB(alg.transpose(inputSet), outputSet, 2);
-    // alg.printVector(GNB.modelSetTest(alg.transpose(inputSet)));
-
-    // // KMeans
-    // std::vector<std::vector<real_t>> inputSet = {{32, 0, 7}, {2, 28, 17}, {0, 9, 23}}; 
-    // KMeans kmeans(inputSet, 3, "KMeans++");
-    // kmeans.train(3, 1);
-    // std::cout << std::endl;
-    // alg.printMatrix(kmeans.modelSetTest(inputSet)); // Returns the assigned centroids to each of the respective training examples
-    // std::cout << std::endl;
-    // alg.printVector(kmeans.silhouette_scores());
-
-    // // kNN 
-    // std::vector<std::vector<real_t>> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}};
-    // std::vector<real_t> outputSet = {0,0,0,0,1,1,1,1};
-    // kNN knn(alg.transpose(inputSet), outputSet, 8);
-    // alg.printVector(knn.modelSetTest(alg.transpose(inputSet)));
-    // std::cout << "ACCURACY: " << 100 * knn.score() << "%" << std::endl;
-
-
-    // // CONVOLUTION, POOLING, ETC.. 
-    // std::vector<std::vector<real_t>> input = {
-    //     {1},
-    // };
-
-    // std::vector<std::vector<std::vector<real_t>>> tensorSet; 
-    // tensorSet.push_back(input);
-    // tensorSet.push_back(input);
-    // tensorSet.push_back(input);
-
-    // alg.printTensor(data.rgb2xyz(tensorSet));
-
-    // std::vector<std::vector<real_t>> input = {
-    //     {62,55,55,54,49,48,47,55},
-    //     {62,57,54,52,48,47,48,53},
-    //     {61,60,52,49,48,47,49,54},
-    //     {63,61,60,60,63,65,68,65},
-    //     {67,67,70,74,79,85,91,92},
-    //     {82,95,101,106,114,115,112,117},
-    //     {96,111,115,119,128,128,130,127},
-    //     {109,121,127,133,139,141,140,133},
-    // };
-
-    // Transforms trans; 
-
-    // alg.printMatrix(trans.discreteCosineTransform(input));
-
-    // alg.printMatrix(conv.convolve(input, conv.getPrewittVertical(), 1)); // Can use padding
-    // alg.printMatrix(conv.pool(input, 4, 4, "Max")); // Can use Max, Min, or Average pooling. 
-
-    // std::vector<std::vector<std::vector<real_t>>> tensorSet; 
-    // tensorSet.push_back(input);
-    // tensorSet.push_back(input);
-    // alg.printVector(conv.globalPool(tensorSet, "Average")); // Can use Max, Min, or Average global pooling. 
-
-    // std::vector<std::vector<real_t>> laplacian = {{1, 1, 1}, {1, -4, 1}, {1, 1, 1}};
-    // alg.printMatrix(conv.convolve(conv.gaussianFilter2D(5, 1), laplacian, 1));
-
-
-    // // PCA, SVD, eigenvalues & eigenvectors
-    // std::vector<std::vector<real_t>> inputSet = {{1,1}, {1,1}};
-    // auto [Eigenvectors, Eigenvalues] = alg.eig(inputSet); 
-    // std::cout << "Eigenvectors:" << std::endl; 
-    // alg.printMatrix(Eigenvectors);
-    // std::cout << std::endl;
-    // std::cout << "Eigenvalues:" << std::endl; 
-    // alg.printMatrix(Eigenvalues);
-
-    // auto [U, S, Vt] = alg.SVD(inputSet);
-
-    // // PCA done using Jacobi's method to approximate eigenvalues and eigenvectors.
-    // PCA dr(inputSet, 1); // 1 dimensional representation. 
-    // std::cout << std::endl;
-    // std::cout << "Dimensionally reduced representation:" << std::endl;
-    // alg.printMatrix(dr.principalComponents());
-    // std::cout << "SCORE: " << dr.score() << std::endl; 
-
-
-    // // NLP/DATA
-    // std::string verbText = "I am appearing and thinking, as well as conducting.";
-    // std::cout << "Stemming Example:" << std::endl;
-    // std::cout << data.stemming(verbText) << std::endl;
-    // std::cout << std::endl;
-
-    // std::vector<std::string> sentences = {"He is a good boy", "She is a good girl", "The boy and girl are good"};
-    // std::cout << "Bag of Words Example:" << std::endl;
-    // alg.printMatrix(data.BOW(sentences, "Default"));
-    // std::cout << std::endl;
-    // std::cout << "TFIDF Example:" << std::endl;
-    // alg.printMatrix(data.TFIDF(sentences));
-    // std::cout << std::endl;
-
-    // std::cout << "Tokenization:" << std::endl;
-    // alg.printVector(data.tokenize(verbText));
-    // std::cout << std::endl;
-
-    // std::cout << "Word2Vec:" << std::endl;
-    // std::string textArchive = {"He is a good boy. She is a good girl. The boy and girl are good."};
-    // std::vector<std::string> corpus = data.splitSentences(textArchive);
-    // auto [wordEmbeddings, wordList] = data.word2Vec(corpus, "CBOW", 2, 2, 0.1, 10000); // Can use either CBOW or Skip-n-gram.
-    // alg.printMatrix(wordEmbeddings);
-    // std::cout << std::endl;
-
-    // std::vector<std::string> textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"};
-
-    // alg.printMatrix(data.LSA(textArchive, 2));
-    // //alg.printMatrix(data.BOW(textArchive, "Default"));
-    // std::cout << std::endl;
-    
-
-    // std::vector<std::vector<real_t>> inputSet = {{1,2},{2,3},{3,4},{4,5},{5,6}};
-    // std::cout << "Feature Scaling Example:" << std::endl;
-    // alg.printMatrix(data.featureScaling(inputSet));
-    // std::cout << std::endl;
-
-    // std::cout << "Mean Centering Example:" << std::endl;
-    // alg.printMatrix(data.meanCentering(inputSet));
-    // std::cout << std::endl;
-
-    // std::cout << "Mean Normalization Example:" << std::endl;
-    // alg.printMatrix(data.meanNormalization(inputSet));
-    // std::cout << std::endl;
-
-    // // Outlier Finder
-    // std::vector<real_t> inputSet = {1,2,3,4,5,6,7,8,9,23554332523523};
-    // OutlierFinder outlierFinder(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier. 
-    // alg.printVector(outlierFinder.modelTest(inputSet));
-
-    // // Testing new Functions
-    // real_t z_s = 0.001;
-    // std::cout << avn.logit(z_s) << std::endl;
-    // std::cout << avn.logit(z_s, 1) << std::endl;
-
-    // std::vector<real_t> z_v = {0.001};
-    // alg.printVector(avn.logit(z_v));
-    // alg.printVector(avn.logit(z_v, 1));
-
-    // std::vector<std::vector<real_t>> Z_m = {{0.001}};
-    // alg.printMatrix(avn.logit(Z_m));
-    // alg.printMatrix(avn.logit(Z_m, 1));
-
-    // std::cout << alg.trace({{1,2}, {3,4}}) << std::endl;
-    // alg.printMatrix(alg.pinverse({{1,2}, {3,4}}));
-    // alg.printMatrix(alg.diag({1,2,3,4,5}));
-    // alg.printMatrix(alg.kronecker_product({{1,2,3,4,5}}, {{6,7,8,9,10}}));
-    // alg.printMatrix(alg.matrixPower({{5,5},{5,5}}, 2));
-    // alg.printVector(alg.solve({{1,1}, {1.5, 4.0}}, {2200, 5050}));
-
-    // std::vector<std::vector<real_t>> matrixOfCubes = {{1,2,64,27}};
-    // std::vector<real_t> vectorOfCubes = {1,2,64,27};
-    // alg.printMatrix(alg.cbrt(matrixOfCubes));
-    // alg.printVector(alg.cbrt(vectorOfCubes));
-    // std::cout << alg.max({{1,2,3,4,5}, {6,5,3,4,1}, {9,9,9,9,9}}) << std::endl;
-    // std::cout << alg.min({{1,2,3,4,5}, {6,5,3,4,1}, {9,9,9,9,9}}) << std::endl;
-
-    // std::vector<real_t> chicken; 
-    // data.getImage("../../Data/apple.jpeg", chicken);
-    // alg.printVector(chicken);
-
-    // std::vector<std::vector<real_t>> P = {{12, -51, 4}, {6, 167, -68}, {-4, 24, -41}};
-    // alg.printMatrix(P);
-
-    // alg.printMatrix(alg.gramSchmidtProcess(P));
-
-    // auto [Q, R] = alg.QRD(P); // It works! 
-    
-    //  alg.printMatrix(Q);
-
-    //  alg.printMatrix(R); 
-
-    // // Checking positive-definiteness checker. For Cholesky Decomp. 
-    // std::vector<std::vector<real_t>> A = 
-    // {
-    //     {1,-1,-1,-1},                        
-    //     {-1,2,2,2},
-    //     {-1,2,3,1},
-    //     {-1,2,1,4}
-    // };
-
-    // std::cout << std::boolalpha << alg.positiveDefiniteChecker(A) << std::endl;
-    // auto [L, Lt] = alg.chol(A); // works.
-    // alg.printMatrix(L);
-    // alg.printMatrix(Lt);
-
-    // Checks for numerical analysis class.
-    NumericalAnalysis numAn;
-
-    //std::cout << numAn.quadraticApproximation(f, 0, 1) << std::endl;
-
-    // std::cout << numAn.cubicApproximation(f, 0, 1.001) << std::endl;
-
-    // std::cout << f(1.001) << std::endl;
-
-    // std::cout << numAn.quadraticApproximation(f_mv, {0, 0, 0}, {1, 1, 1}) << std::endl;
-
-    // std::cout << numAn.numDiff(&f, 1) << std::endl;
-    // std::cout << numAn.newtonRaphsonMethod(&f, 1, 1000) << std::endl;
-    //std::cout << numAn.invQuadraticInterpolation(&f, {100, 2,1.5}, 10) << std::endl;
-
-    // std::cout << numAn.numDiff(&f_mv, {1, 1}, 1) << std::endl; // Derivative w.r.t. x.
-
-    // alg.printVector(numAn.jacobian(&f_mv, {1, 1}));
-
-    //std::cout << numAn.numDiff_2(&f, 2) << std::endl; 
-
-    //std::cout << numAn.numDiff_3(&f, 2) << std::endl; 
-
-    // std::cout << numAn.numDiff_2(&f_mv, {2, 2, 500}, 2, 2) << std::endl;
-    //std::cout << numAn.numDiff_3(&f_mv, {2, 1000, 130}, 0, 0, 0) << std::endl;
-
-    // alg.printTensor(numAn.thirdOrderTensor(&f_mv, {1, 1, 1}));
-    // std::cout << "Our Hessian." << std::endl;
-    // alg.printMatrix(numAn.hessian(&f_mv, {2, 2, 500}));
-
-    // std::cout << numAn.laplacian(f_mv, {1,1,1}) << std::endl;
-
-    // std::vector<std::vector<std::vector<real_t>>> tensor;
-    // tensor.push_back({{1,2}, {1,2}, {1,2}});
-    // tensor.push_back({{1,2}, {1,2}, {1,2}});
-
-    // alg.printTensor(tensor);
-
-    // alg.printMatrix(alg.tensor_vec_mult(tensor, {1,2}));
-
-    // std::cout << numAn.cubicApproximation(f_mv, {0, 0, 0}, {1, 1, 1}) << std::endl;
-
-    // std::cout << numAn.eulerianMethod(f_prime, {1, 1}, 1.5, 0.000001) << std::endl;
-
-    // std::cout << numAn.eulerianMethod(f_prime_2var, {2, 3}, 2.5, 0.00000001) << std::endl;
-
-    // alg.printMatrix(conv.dx(A));
-    // alg.printMatrix(conv.dy(A));
-
-    // alg.printMatrix(conv.gradOrientation(A));
-
-    // std::vector<std::vector<real_t>> A = 
-    // {
-    //     {1,0,0,0},                        
-    //     {0,0,0,0},
-    //     {0,0,0,0},
-    //     {0,0,0,1}
-    // };
-
-    // std::vector<std::vector<std::string>> h = conv.harrisCornerDetection(A); 
-
-    // for(int i = 0; i < h.size(); i++){
-    //     for(int j = 0; j < h[i].size(); j++){
-    //         std::cout << h[i][j] << " ";
-    //     }
-    //     std::cout << std::endl;
-    // } // Harris detector works. Life is good!
-
-    // std::vector<real_t> a = {3,4,4};
-    // std::vector<real_t> b = {4,4,4};
-    // alg.printVector(alg.cross(a,b));
-
-    //SUPPORT VECTOR CLASSIFICATION (kernel method)
-    // std::vector<std::vector<real_t>> inputSet; 
-    // std::vector<real_t> outputSet; 
-    // data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancerSVM.csv", inputSet, outputSet);
-
-    // std::vector<std::vector<real_t>> inputSet; 
-    // std::vector<real_t> outputSet; 
-    // data.setData(4, "/Users/marcmelikyan/Desktop/Data/IrisSVM.csv", inputSet, outputSet);
-
-    // DualSVC kernelSVM(inputSet, outputSet, 1000);
-    // kernelSVM.gradientDescent(0.0001, 20, 1);
-
-    // std::vector<std::vector<real_t>> linearlyIndependentMat = 
-    
-    // {
-    //     {1,2,3,4},
-    //     {234538495,4444,6111,55}
-    // };
-
-    // std::cout << "True of false: linearly independent?: " << std::boolalpha << alg.linearIndependenceChecker(linearlyIndependentMat) << std::endl;
-    
-
-    return 0;
-}
-
diff --git a/mlpp/activation/activation_old.cpp b/mlpp/activation/activation_old.cpp
deleted file mode 100644
index 17158ee..0000000
--- a/mlpp/activation/activation_old.cpp
+++ /dev/null
@@ -1,953 +0,0 @@
-//
-//  Activation.cpp
-//
-//  Created by Marc Melikyan on 1/16/21.
-//
-
-#include "activation_old.h"
-#include "../lin_alg/lin_alg_old.h"
-
-#include <algorithm>
-#include <cmath>
-#include <iostream>
-
-#ifndef M_PI
-#define M_PI 3.141592653
-#endif
-
-real_t MLPPActivationOld::linear(real_t z, bool deriv) {
-	if (deriv) {
-		return 1;
-	}
-	return z;
-}
-
-std::vector<real_t> MLPPActivationOld::linear(std::vector<real_t> z, bool deriv) {
-	if (deriv) {
-		MLPPLinAlgOld alg;
-		return alg.onevec(z.size());
-	}
-	return z;
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::linear(std::vector<std::vector<real_t>> z, bool deriv) {
-	if (deriv) {
-		MLPPLinAlgOld alg;
-		return alg.onemat(z.size(), z[0].size());
-	}
-	return z;
-}
-
-real_t MLPPActivationOld::sigmoid(real_t z, bool deriv) {
-	if (deriv) {
-		return sigmoid(z) * (1 - sigmoid(z));
-	}
-	return 1 / (1 + exp(-z));
-}
-
-std::vector<real_t> MLPPActivationOld::sigmoid(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), sigmoid(z)));
-	}
-	return alg.elementWiseDivision(alg.onevec(z.size()), alg.addition(alg.onevec(z.size()), alg.exp(alg.scalarMultiply(-1, z))));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::sigmoid(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), sigmoid(z)));
-	}
-	return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.addition(alg.onemat(z.size(), z[0].size()), alg.exp(alg.scalarMultiply(-1, z))));
-}
-
-std::vector<real_t> MLPPActivationOld::softmax(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	std::vector<real_t> a;
-	a.resize(z.size());
-	std::vector<real_t> expZ = alg.exp(z);
-	real_t sum = 0;
-
-	for (uint32_t i = 0; i < z.size(); i++) {
-		sum += expZ[i];
-	}
-	for (uint32_t i = 0; i < z.size(); i++) {
-		a[i] = expZ[i] / sum;
-	}
-	return a;
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::softmax(std::vector<std::vector<real_t>> z, bool deriv) {
-	std::vector<std::vector<real_t>> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < z.size(); i++) {
-		a[i] = softmax(z[i]);
-	}
-	return a;
-}
-
-std::vector<real_t> MLPPActivationOld::adjSoftmax(std::vector<real_t> z) {
-	MLPPLinAlgOld alg;
-	std::vector<real_t> a;
-	real_t C = -*std::max_element(z.begin(), z.end());
-	z = alg.scalarAdd(C, z);
-
-	return softmax(z);
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::adjSoftmax(std::vector<std::vector<real_t>> z) {
-	std::vector<std::vector<real_t>> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < z.size(); i++) {
-		a[i] = adjSoftmax(z[i]);
-	}
-	return a;
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::softmaxDeriv(std::vector<real_t> z) {
-	std::vector<std::vector<real_t>> deriv;
-	std::vector<real_t> a = softmax(z);
-	deriv.resize(a.size());
-	for (uint32_t i = 0; i < deriv.size(); i++) {
-		deriv[i].resize(a.size());
-	}
-	for (uint32_t i = 0; i < a.size(); i++) {
-		for (uint32_t j = 0; j < z.size(); j++) {
-			if (i == j) {
-				deriv[i][j] = a[i] * (1 - a[i]);
-			} else {
-				deriv[i][j] = -a[i] * a[j];
-			}
-		}
-	}
-	return deriv;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPActivationOld::softmaxDeriv(std::vector<std::vector<real_t>> z) {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<std::vector<real_t>>> deriv;
-	std::vector<std::vector<real_t>> a = softmax(z);
-
-	deriv.resize(a.size());
-	for (uint32_t i = 0; i < deriv.size(); i++) {
-		deriv[i].resize(a.size());
-	}
-	for (uint32_t i = 0; i < a.size(); i++) {
-		for (uint32_t j = 0; j < z.size(); j++) {
-			if (i == j) {
-				deriv[i][j] = alg.subtraction(a[i], alg.hadamard_product(a[i], a[i]));
-			} else {
-				deriv[i][j] = alg.scalarMultiply(-1, alg.hadamard_product(a[i], a[j]));
-			}
-		}
-	}
-	return deriv;
-}
-
-real_t MLPPActivationOld::softplus(real_t z, bool deriv) {
-	if (deriv) {
-		return sigmoid(z);
-	}
-	return std::log(1 + exp(z));
-}
-
-std::vector<real_t> MLPPActivationOld::softplus(std::vector<real_t> z, bool deriv) {
-	if (deriv) {
-		return sigmoid(z);
-	}
-	MLPPLinAlgOld alg;
-	return alg.log(alg.addition(alg.onevec(z.size()), alg.exp(z)));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::softplus(std::vector<std::vector<real_t>> z, bool deriv) {
-	if (deriv) {
-		return sigmoid(z);
-	}
-	MLPPLinAlgOld alg;
-	return alg.log(alg.addition(alg.onemat(z.size(), z[0].size()), alg.exp(z)));
-}
-
-real_t MLPPActivationOld::softsign(real_t z, bool deriv) {
-	if (deriv) {
-		return 1 / ((1 + abs(z)) * (1 + abs(z)));
-	}
-	return z / (1 + abs(z));
-}
-
-std::vector<real_t> MLPPActivationOld::softsign(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.onevec(z.size()), alg.exponentiate(alg.addition(alg.onevec(z.size()), alg.abs(z)), 2));
-	}
-	return alg.elementWiseDivision(z, alg.addition(alg.onevec(z.size()), alg.abs(z)));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::softsign(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.exponentiate(alg.addition(alg.onemat(z.size(), z[0].size()), alg.abs(z)), 2));
-	}
-	return alg.elementWiseDivision(z, alg.addition(alg.onemat(z.size(), z[0].size()), alg.abs(z)));
-}
-
-real_t MLPPActivationOld::gaussianCDF(real_t z, bool deriv) {
-	if (deriv) {
-		return (1 / sqrt(2 * M_PI)) * exp(-z * z / 2);
-	}
-	return 0.5 * (1 + erf(z / sqrt(2)));
-}
-
-std::vector<real_t> MLPPActivationOld::gaussianCDF(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.scalarMultiply(1 / sqrt(2 * M_PI), alg.exp(alg.scalarMultiply(-1 / 2, alg.hadamard_product(z, z))));
-	}
-	return alg.scalarMultiply(0.5, alg.addition(alg.onevec(z.size()), alg.erf(alg.scalarMultiply(1 / sqrt(2), z))));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::gaussianCDF(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.scalarMultiply(1 / sqrt(2 * M_PI), alg.exp(alg.scalarMultiply(-1 / 2, alg.hadamard_product(z, z))));
-	}
-	return alg.scalarMultiply(0.5, alg.addition(alg.onemat(z.size(), z[0].size()), alg.erf(alg.scalarMultiply(1 / sqrt(2), z))));
-}
-
-real_t MLPPActivationOld::cloglog(real_t z, bool deriv) {
-	if (deriv) {
-		return exp(z - exp(z));
-	}
-	return 1 - exp(-exp(z));
-}
-
-std::vector<real_t> MLPPActivationOld::cloglog(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.exp(alg.scalarMultiply(-1, alg.exp(z)));
-	}
-	return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.exp(alg.scalarMultiply(-1, alg.exp(z)))));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::cloglog(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.exp(alg.scalarMultiply(-1, alg.exp(z)));
-	}
-	return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.exp(alg.scalarMultiply(-1, alg.exp(z)))));
-}
-
-real_t MLPPActivationOld::logit(real_t z, bool deriv) {
-	if (deriv) {
-		return 1 / z - 1 / (z - 1);
-	}
-	return std::log(z / (1 - z));
-}
-
-std::vector<real_t> MLPPActivationOld::logit(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.subtraction(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.elementWiseDivision(alg.onevec(z.size()), alg.subtraction(z, alg.onevec(z.size()))));
-	}
-	return alg.log(alg.elementWiseDivision(z, alg.subtraction(alg.onevec(z.size()), z)));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::logit(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.subtraction(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.subtraction(z, alg.onemat(z.size(), z[0].size()))));
-	}
-	return alg.log(alg.elementWiseDivision(z, alg.subtraction(alg.onemat(z.size(), z[0].size()), z)));
-}
-
-real_t MLPPActivationOld::unitStep(real_t z, bool deriv) {
-	if (deriv) {
-		return 0;
-	}
-	return z < 0 ? 0 : 1;
-}
-
-std::vector<real_t> MLPPActivationOld::unitStep(std::vector<real_t> z, bool deriv) {
-	if (deriv) {
-		std::vector<real_t> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = unitStep(z[i], true);
-		}
-		return lderiv;
-	}
-	std::vector<real_t> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = unitStep(z[i]);
-	}
-	return a;
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::unitStep(std::vector<std::vector<real_t>> z, bool deriv) {
-	if (deriv) {
-		std::vector<std::vector<real_t>> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = unitStep(z[i], true);
-		}
-		return lderiv;
-	}
-	std::vector<std::vector<real_t>> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = unitStep(z[i]);
-	}
-	return a;
-}
-
-real_t MLPPActivationOld::swish(real_t z, bool deriv) {
-	if (deriv) {
-		return swish(z) + sigmoid(z) * (1 - swish(z));
-	}
-	return z * sigmoid(z);
-}
-
-std::vector<real_t> MLPPActivationOld::swish(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		alg.addition(swish(z), alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), swish(z))));
-	}
-	return alg.hadamard_product(z, sigmoid(z));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::swish(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		alg.addition(swish(z), alg.subtraction(sigmoid(z), alg.hadamard_product(sigmoid(z), swish(z))));
-	}
-	return alg.hadamard_product(z, sigmoid(z));
-}
-
-real_t MLPPActivationOld::mish(real_t z, bool deriv) {
-	if (deriv) {
-		return sech(softplus(z)) * sech(softplus(z)) * z * sigmoid(z) + mish(z) / z;
-	}
-	return z * tanh(softplus(z));
-}
-
-std::vector<real_t> MLPPActivationOld::mish(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.addition(alg.hadamard_product(alg.hadamard_product(alg.hadamard_product(sech(softplus(z)), sech(softplus(z))), z), sigmoid(z)), alg.elementWiseDivision(mish(z), z));
-	}
-	return alg.hadamard_product(z, tanh(softplus(z)));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::mish(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.addition(alg.hadamard_product(alg.hadamard_product(alg.hadamard_product(sech(softplus(z)), sech(softplus(z))), z), sigmoid(z)), alg.elementWiseDivision(mish(z), z));
-	}
-	return alg.hadamard_product(z, tanh(softplus(z)));
-}
-
-real_t MLPPActivationOld::sinc(real_t z, bool deriv) {
-	if (deriv) {
-		return (z * std::cos(z) - std::sin(z)) / (z * z);
-	}
-	return std::sin(z) / z;
-}
-
-std::vector<real_t> MLPPActivationOld::sinc(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.subtraction(alg.hadamard_product(z, alg.cos(z)), alg.sin(z)), alg.hadamard_product(z, z));
-	}
-	return alg.elementWiseDivision(alg.sin(z), z);
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::sinc(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.subtraction(alg.hadamard_product(z, alg.cos(z)), alg.sin(z)), alg.hadamard_product(z, z));
-	}
-	return alg.elementWiseDivision(alg.sin(z), z);
-}
-
-real_t MLPPActivationOld::RELU(real_t z, bool deriv) {
-	if (deriv) {
-		if (z <= 0) {
-			return 0;
-		} else {
-			return 1;
-		}
-	}
-	return fmax(0, z);
-}
-
-std::vector<real_t> MLPPActivationOld::RELU(std::vector<real_t> z, bool deriv) {
-	if (deriv) {
-		std::vector<real_t> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = RELU(z[i], true);
-		}
-		return lderiv;
-	}
-	std::vector<real_t> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = RELU(z[i]);
-	}
-	return a;
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::RELU(std::vector<std::vector<real_t>> z, bool deriv) {
-	if (deriv) {
-		std::vector<std::vector<real_t>> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = RELU(z[i], true);
-		}
-		return lderiv;
-	}
-	std::vector<std::vector<real_t>> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = RELU(z[i]);
-	}
-	return a;
-}
-
-real_t MLPPActivationOld::leakyReLU(real_t z, real_t c, bool deriv) {
-	if (deriv) {
-		if (z <= 0) {
-			return c;
-		} else {
-			return 1;
-		}
-	}
-	return fmax(c * z, z);
-}
-
-std::vector<real_t> MLPPActivationOld::leakyReLU(std::vector<real_t> z, real_t c, bool deriv) {
-	if (deriv) {
-		std::vector<real_t> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = leakyReLU(z[i], c, true);
-		}
-		return lderiv;
-	}
-	std::vector<real_t> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = leakyReLU(z[i], c);
-	}
-	return a;
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::leakyReLU(std::vector<std::vector<real_t>> z, real_t c, bool deriv) {
-	if (deriv) {
-		std::vector<std::vector<real_t>> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = leakyReLU(z[i], c, true);
-		}
-		return lderiv;
-	}
-	std::vector<std::vector<real_t>> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = leakyReLU(z[i], c);
-	}
-	return a;
-}
-
-real_t MLPPActivationOld::ELU(real_t z, real_t c, bool deriv) {
-	if (deriv) {
-		if (z <= 0) {
-			return c * exp(z);
-		} else {
-			return 1;
-		}
-	}
-	if (z >= 0) {
-		return z;
-	} else {
-		return c * (exp(z) - 1);
-	}
-}
-
-std::vector<real_t> MLPPActivationOld::ELU(std::vector<real_t> z, real_t c, bool deriv) {
-	if (deriv) {
-		std::vector<real_t> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = ELU(z[i], c, true);
-		}
-		return lderiv;
-	}
-	std::vector<real_t> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = ELU(z[i], c);
-	}
-	return a;
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::ELU(std::vector<std::vector<real_t>> z, real_t c, bool deriv) {
-	if (deriv) {
-		std::vector<std::vector<real_t>> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = ELU(z[i], c, true);
-		}
-		return lderiv;
-	}
-	std::vector<std::vector<real_t>> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = ELU(z[i], c);
-	}
-	return a;
-}
-
-real_t MLPPActivationOld::SELU(real_t z, real_t lambda, real_t c, bool deriv) {
-	if (deriv) {
-		return ELU(z, c, true);
-	}
-	return lambda * ELU(z, c);
-}
-
-std::vector<real_t> MLPPActivationOld::SELU(std::vector<real_t> z, real_t lambda, real_t c, bool deriv) {
-	if (deriv) {
-		std::vector<real_t> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = SELU(z[i], lambda, c, true);
-		}
-		return lderiv;
-	}
-	std::vector<real_t> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = SELU(z[i], lambda, c);
-	}
-	return a;
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::SELU(std::vector<std::vector<real_t>> z, real_t lambda, real_t c, bool deriv) {
-	if (deriv) {
-		std::vector<std::vector<real_t>> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = SELU(z[i], lambda, c, true);
-		}
-		return lderiv;
-	}
-	std::vector<std::vector<real_t>> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = SELU(z[i], lambda, c);
-	}
-	return a;
-}
-
-real_t MLPPActivationOld::GELU(real_t z, bool deriv) {
-	if (deriv) {
-		return 0.5 * tanh(0.0356774 * std::pow(z, 3) + 0.797885 * z) + (0.0535161 * std::pow(z, 3) + 0.398942 * z) * std::pow(sech(0.0356774 * std::pow(z, 3) + 0.797885 * z), 2) + 0.5;
-	}
-	return 0.5 * z * (1 + tanh(sqrt(2 / M_PI) * (z + 0.044715 * std::pow(z, 3))));
-}
-
-std::vector<real_t> MLPPActivationOld::GELU(std::vector<real_t> z, bool deriv) {
-	if (deriv) {
-		std::vector<real_t> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = GELU(z[i], true);
-		}
-		return lderiv;
-	}
-	std::vector<real_t> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = GELU(z[i]);
-	}
-	return a;
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::GELU(std::vector<std::vector<real_t>> z, bool deriv) {
-	if (deriv) {
-		std::vector<std::vector<real_t>> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = GELU(z[i], true);
-		}
-		return lderiv;
-	}
-	std::vector<std::vector<real_t>> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = GELU(z[i]);
-	}
-	return a;
-}
-
-real_t MLPPActivationOld::sign(real_t z, bool deriv) {
-	if (deriv) {
-		return 0;
-	}
-	if (z < 0) {
-		return -1;
-	} else if (z == 0) {
-		return 0;
-	} else {
-		return 1;
-	}
-}
-
-std::vector<real_t> MLPPActivationOld::sign(std::vector<real_t> z, bool deriv) {
-	if (deriv) {
-		std::vector<real_t> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = sign(z[i], true);
-		}
-		return lderiv;
-	}
-	std::vector<real_t> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = sign(z[i]);
-	}
-	return a;
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::sign(std::vector<std::vector<real_t>> z, bool deriv) {
-	if (deriv) {
-		std::vector<std::vector<real_t>> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = sign(z[i], true);
-		}
-		return lderiv;
-	}
-	std::vector<std::vector<real_t>> a;
-	a.resize(z.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] = sign(z[i]);
-	}
-	return a;
-}
-
-real_t MLPPActivationOld::sinh(real_t z, bool deriv) {
-	if (deriv) {
-		return cosh(z);
-	}
-	return 0.5 * (exp(z) - exp(-z));
-}
-
-std::vector<real_t> MLPPActivationOld::sinh(std::vector<real_t> z, bool deriv) {
-	if (deriv) {
-		return cosh(z);
-	}
-	MLPPLinAlgOld alg;
-	return alg.scalarMultiply(0.5, alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::sinh(std::vector<std::vector<real_t>> z, bool deriv) {
-	if (deriv) {
-		return cosh(z);
-	}
-	MLPPLinAlgOld alg;
-	return alg.scalarMultiply(0.5, alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
-}
-
-real_t MLPPActivationOld::cosh(real_t z, bool deriv) {
-	if (deriv) {
-		return sinh(z);
-	}
-	return 0.5 * (exp(z) + exp(-z));
-}
-
-std::vector<real_t> MLPPActivationOld::cosh(std::vector<real_t> z, bool deriv) {
-	if (deriv) {
-		return sinh(z);
-	}
-	MLPPLinAlgOld alg;
-	return alg.scalarMultiply(0.5, alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::cosh(std::vector<std::vector<real_t>> z, bool deriv) {
-	if (deriv) {
-		return sinh(z);
-	}
-	MLPPLinAlgOld alg;
-	return alg.scalarMultiply(0.5, alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
-}
-
-real_t MLPPActivationOld::tanh(real_t z, bool deriv) {
-	if (deriv) {
-		return 1 - tanh(z) * tanh(z);
-	}
-	return (exp(z) - exp(-z)) / (exp(z) + exp(-z));
-}
-
-std::vector<real_t> MLPPActivationOld::tanh(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.hadamard_product(tanh(z), tanh(z))));
-	}
-	return alg.elementWiseDivision(alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))), alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::tanh(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.scalarMultiply(-1, alg.scalarAdd(-1, alg.hadamard_product(tanh(z), tanh(z))));
-	}
-
-	return alg.elementWiseDivision(alg.subtraction(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))), alg.addition(alg.exp(z), alg.exp(alg.scalarMultiply(-1, z))));
-}
-
-real_t MLPPActivationOld::csch(real_t z, bool deriv) {
-	if (deriv) {
-		return -csch(z) * coth(z);
-	}
-	return 1 / sinh(z);
-}
-
-std::vector<real_t> MLPPActivationOld::csch(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), coth(z));
-	}
-	return alg.elementWiseDivision(alg.onevec(z.size()), sinh(z));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::csch(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), coth(z));
-	}
-	return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), sinh(z));
-}
-
-real_t MLPPActivationOld::sech(real_t z, bool deriv) {
-	if (deriv) {
-		return -sech(z) * tanh(z);
-	}
-	return 1 / cosh(z);
-}
-
-std::vector<real_t> MLPPActivationOld::sech(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.hadamard_product(alg.scalarMultiply(-1, sech(z)), tanh(z));
-	}
-	return alg.elementWiseDivision(alg.onevec(z.size()), cosh(z));
-
-	// return activation(z, deriv, static_cast<void (*)(real_t, bool)>(&sech));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::sech(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.hadamard_product(alg.scalarMultiply(-1, sech(z)), tanh(z));
-	}
-	return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), cosh(z));
-
-	// return activation(z, deriv, static_cast<void (*)(real_t, bool)>(&sech));
-}
-
-real_t MLPPActivationOld::coth(real_t z, bool deriv) {
-	if (deriv) {
-		return -csch(z) * csch(z);
-	}
-	return 1 / tanh(z);
-}
-
-std::vector<real_t> MLPPActivationOld::coth(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), csch(z));
-	}
-	return alg.elementWiseDivision(alg.onevec(z.size()), tanh(z));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::coth(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.hadamard_product(alg.scalarMultiply(-1, csch(z)), csch(z));
-	}
-	return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), tanh(z));
-}
-
-real_t MLPPActivationOld::arsinh(real_t z, bool deriv) {
-	if (deriv) {
-		return 1 / sqrt(z * z + 1);
-	}
-	return std::log(z + sqrt(z * z + 1));
-}
-
-std::vector<real_t> MLPPActivationOld::arsinh(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.onevec(z.size()), alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onevec(z.size()))));
-	}
-	return alg.log(alg.addition(z, alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onevec(z.size())))));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::arsinh(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size()))));
-	}
-	return alg.log(alg.addition(z, alg.sqrt(alg.addition(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size())))));
-}
-
-real_t MLPPActivationOld::arcosh(real_t z, bool deriv) {
-	if (deriv) {
-		return 1 / sqrt(z * z - 1);
-	}
-	return std::log(z + sqrt(z * z - 1));
-}
-
-std::vector<real_t> MLPPActivationOld::arcosh(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.onevec(z.size()), alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onevec(z.size()))));
-	}
-	return alg.log(alg.addition(z, alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onevec(z.size())))));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::arcosh(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size()))));
-	}
-	return alg.log(alg.addition(z, alg.sqrt(alg.subtraction(alg.hadamard_product(z, z), alg.onemat(z.size(), z[0].size())))));
-}
-
-real_t MLPPActivationOld::artanh(real_t z, bool deriv) {
-	if (deriv) {
-		return 1 / (1 - z * z);
-	}
-	return 0.5 * std::log((1 + z) / (1 - z));
-}
-
-std::vector<real_t> MLPPActivationOld::artanh(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.onevec(z.size()), alg.subtraction(alg.onevec(z.size()), alg.hadamard_product(z, z)));
-	}
-	return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onevec(z.size()), z), alg.subtraction(alg.onevec(z.size()), z))));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::artanh(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.subtraction(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z)));
-	}
-	return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onemat(z.size(), z[0].size()), z), alg.subtraction(alg.onemat(z.size(), z[0].size()), z))));
-}
-
-real_t MLPPActivationOld::arcsch(real_t z, bool deriv) {
-	if (deriv) {
-		return -1 / ((z * z) * sqrt(1 + (1 / (z * z))));
-	}
-	return std::log(sqrt(1 + (1 / (z * z))) + (1 / z));
-}
-
-std::vector<real_t> MLPPActivationOld::arcsch(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.full(z.size(), -1), alg.hadamard_product(alg.hadamard_product(z, z), alg.sqrt(alg.addition(alg.onevec(z.size()), alg.elementWiseDivision(alg.onevec(z.size()), alg.hadamard_product(z, z))))));
-	}
-	return alg.log(alg.addition(alg.sqrt(alg.addition(alg.onevec(z.size()), alg.elementWiseDivision(alg.onevec(z.size()), alg.hadamard_product(z, z)))), alg.elementWiseDivision(alg.onevec(z.size()), z)));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::arcsch(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.full(z.size(), z[0].size(), -1), alg.hadamard_product(alg.hadamard_product(z, z), alg.sqrt(alg.addition(alg.onemat(z.size(), z[0].size()), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z))))));
-	}
-	return alg.log(alg.addition(alg.sqrt(alg.addition(alg.onemat(z.size(), z[0].size()), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z)))), alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z)));
-}
-
-real_t MLPPActivationOld::arsech(real_t z, bool deriv) {
-	if (deriv) {
-		return -1 / (z * sqrt(1 - z * z));
-	}
-	return std::log((1 / z) + ((1 / z) + 1) * ((1 / z) - 1));
-}
-
-std::vector<real_t> MLPPActivationOld::arsech(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.full(z.size(), -1), alg.hadamard_product(z, alg.sqrt(alg.subtraction(alg.onevec(z.size()), alg.hadamard_product(z, z)))));
-	}
-	return alg.log(alg.addition(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.hadamard_product(alg.addition(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.onevec(z.size())), alg.subtraction(alg.elementWiseDivision(alg.onevec(z.size()), z), alg.onevec(z.size())))));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::arsech(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.full(z.size(), z[0].size(), -1), alg.hadamard_product(z, alg.sqrt(alg.subtraction(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z)))));
-	}
-	return alg.log(alg.addition(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.hadamard_product(alg.addition(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.onemat(z.size(), z[0].size())), alg.subtraction(alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), z), alg.onemat(z.size(), z[0].size())))));
-}
-
-real_t MLPPActivationOld::arcoth(real_t z, bool deriv) {
-	if (deriv) {
-		return 1 / (1 - z * z);
-	}
-	return 0.5 * std::log((1 + z) / (z - 1));
-}
-
-std::vector<real_t> MLPPActivationOld::arcoth(std::vector<real_t> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.onevec(z.size()), alg.subtraction(alg.onevec(z.size()), alg.hadamard_product(z, z)));
-	}
-	return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onevec(z.size()), z), alg.subtraction(z, alg.onevec(z.size())))));
-}
-
-std::vector<std::vector<real_t>> MLPPActivationOld::arcoth(std::vector<std::vector<real_t>> z, bool deriv) {
-	MLPPLinAlgOld alg;
-	if (deriv) {
-		return alg.elementWiseDivision(alg.onemat(z.size(), z[0].size()), alg.subtraction(alg.onemat(z.size(), z[0].size()), alg.hadamard_product(z, z)));
-	}
-	return alg.scalarMultiply(0.5, alg.log(alg.elementWiseDivision(alg.addition(alg.onemat(z.size(), z[0].size()), z), alg.subtraction(z, alg.onemat(z.size(), z[0].size())))));
-}
-
-// TO DO: Implement this template activation
-std::vector<real_t> MLPPActivationOld::activation(std::vector<real_t> z, bool deriv, real_t (*function)(real_t, bool)) {
-	if (deriv) {
-		std::vector<real_t> lderiv;
-		lderiv.resize(z.size());
-		for (uint32_t i = 0; i < z.size(); i++) {
-			lderiv[i] = function(z[i], true);
-		}
-		return lderiv;
-	}
-	std::vector<real_t> a;
-	a.resize(z.size());
-	for (uint32_t i = 0; i < z.size(); i++) {
-		a[i] = function(z[i], deriv);
-	}
-	return a;
-}
diff --git a/mlpp/activation/activation_old.h b/mlpp/activation/activation_old.h
deleted file mode 100644
index 176f2aa..0000000
--- a/mlpp/activation/activation_old.h
+++ /dev/null
@@ -1,146 +0,0 @@
-
-#ifndef MLPP_ACTIVATION_OLD_H
-#define MLPP_ACTIVATION_OLD_H
-
-//
-//  Activation.hpp
-//
-//  Created by Marc Melikyan on 1/16/21.
-//
-
-#include "core/math/math_defs.h"
-#include "core/int_types.h"
-
-#include <vector>
-
-class MLPPActivationOld {
-public:
-	real_t linear(real_t z, bool deriv = false);
-	std::vector<real_t> linear(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> linear(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t sigmoid(real_t z, bool deriv = false);
-	std::vector<real_t> sigmoid(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> sigmoid(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	std::vector<real_t> softmax(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> softmax(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	std::vector<real_t> adjSoftmax(std::vector<real_t> z);
-	std::vector<std::vector<real_t>> adjSoftmax(std::vector<std::vector<real_t>> z);
-
-	std::vector<std::vector<real_t>> softmaxDeriv(std::vector<real_t> z);
-	std::vector<std::vector<std::vector<real_t>>> softmaxDeriv(std::vector<std::vector<real_t>> z);
-
-	real_t softplus(real_t z, bool deriv = false);
-	std::vector<real_t> softplus(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> softplus(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t softsign(real_t z, bool deriv = false);
-	std::vector<real_t> softsign(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> softsign(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t gaussianCDF(real_t z, bool deriv = false);
-	std::vector<real_t> gaussianCDF(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> gaussianCDF(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t cloglog(real_t z, bool deriv = false);
-	std::vector<real_t> cloglog(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> cloglog(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t logit(real_t z, bool deriv = false);
-	std::vector<real_t> logit(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> logit(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t unitStep(real_t z, bool deriv = false);
-	std::vector<real_t> unitStep(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> unitStep(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t swish(real_t z, bool deriv = false);
-	std::vector<real_t> swish(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> swish(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t mish(real_t z, bool deriv = false);
-	std::vector<real_t> mish(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> mish(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t sinc(real_t z, bool deriv = false);
-	std::vector<real_t> sinc(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> sinc(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t RELU(real_t z, bool deriv = false);
-	std::vector<real_t> RELU(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> RELU(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t leakyReLU(real_t z, real_t c, bool deriv = false);
-	std::vector<real_t> leakyReLU(std::vector<real_t> z, real_t c, bool deriv = false);
-	std::vector<std::vector<real_t>> leakyReLU(std::vector<std::vector<real_t>> z, real_t c, bool deriv = false);
-
-	real_t ELU(real_t z, real_t c, bool deriv = false);
-	std::vector<real_t> ELU(std::vector<real_t> z, real_t c, bool deriv = false);
-	std::vector<std::vector<real_t>> ELU(std::vector<std::vector<real_t>> z, real_t c, bool deriv = false);
-
-	real_t SELU(real_t z, real_t lambda, real_t c, bool deriv = false);
-	std::vector<real_t> SELU(std::vector<real_t> z, real_t lambda, real_t c, bool deriv = false);
-	std::vector<std::vector<real_t>> SELU(std::vector<std::vector<real_t>>, real_t lambda, real_t c, bool deriv = false);
-
-	real_t GELU(real_t z, bool deriv = false);
-	std::vector<real_t> GELU(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> GELU(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t sign(real_t z, bool deriv = false);
-	std::vector<real_t> sign(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> sign(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t sinh(real_t z, bool deriv = false);
-	std::vector<real_t> sinh(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> sinh(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t cosh(real_t z, bool deriv = false);
-	std::vector<real_t> cosh(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> cosh(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t tanh(real_t z, bool deriv = false);
-	std::vector<real_t> tanh(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> tanh(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t csch(real_t z, bool deriv = false);
-	std::vector<real_t> csch(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> csch(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t sech(real_t z, bool deriv = false);
-	std::vector<real_t> sech(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> sech(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t coth(real_t z, bool deriv = false);
-	std::vector<real_t> coth(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> coth(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t arsinh(real_t z, bool deriv = false);
-	std::vector<real_t> arsinh(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> arsinh(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t arcosh(real_t z, bool deriv = false);
-	std::vector<real_t> arcosh(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> arcosh(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t artanh(real_t z, bool deriv = false);
-	std::vector<real_t> artanh(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> artanh(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t arcsch(real_t z, bool deriv = false);
-	std::vector<real_t> arcsch(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> arcsch(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t arsech(real_t z, bool deriv = false);
-	std::vector<real_t> arsech(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> arsech(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	real_t arcoth(real_t z, bool deriv = false);
-	std::vector<real_t> arcoth(std::vector<real_t> z, bool deriv = false);
-	std::vector<std::vector<real_t>> arcoth(std::vector<std::vector<real_t>> z, bool deriv = false);
-
-	std::vector<real_t> activation(std::vector<real_t> z, bool deriv, real_t (*function)(real_t, bool));
-};
-
-#endif /* Activation_hpp */
diff --git a/mlpp/ann/ann_old.cpp b/mlpp/ann/ann_old.cpp
deleted file mode 100644
index 40381d6..0000000
--- a/mlpp/ann/ann_old.cpp
+++ /dev/null
@@ -1,808 +0,0 @@
-//
-//  ANN.cpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "ann_old.h"
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include <cmath>
-#include <iostream>
-#include <random>
-
-MLPPANNOld::MLPPANNOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet) {
-	inputSet = p_inputSet;
-	outputSet = p_outputSet;
-
-	n = inputSet.size();
-	k = inputSet[0].size();
-	lrScheduler = "None";
-	decayConstant = 0;
-	dropRate = 0;
-}
-
-MLPPANNOld::~MLPPANNOld() {
-	delete outputLayer;
-}
-
-std::vector<real_t> MLPPANNOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	if (!network.empty()) {
-		network[0].input = X;
-		network[0].forwardPass();
-
-		for (uint32_t i = 1; i < network.size(); i++) {
-			network[i].input = network[i - 1].a;
-			network[i].forwardPass();
-		}
-		outputLayer->input = network[network.size() - 1].a;
-	} else {
-		outputLayer->input = X;
-	}
-	outputLayer->forwardPass();
-	return outputLayer->a;
-}
-
-real_t MLPPANNOld::modelTest(std::vector<real_t> x) {
-	if (!network.empty()) {
-		network[0].Test(x);
-		for (uint32_t i = 1; i < network.size(); i++) {
-			network[i].Test(network[i - 1].a_test);
-		}
-		outputLayer->Test(network[network.size() - 1].a_test);
-	} else {
-		outputLayer->Test(x);
-	}
-	return outputLayer->a_test;
-}
-
-void MLPPANNOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-	real_t initial_learning_rate = learning_rate;
-
-	alg.printMatrix(network[network.size() - 1].weights);
-	while (true) {
-		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
-		cost_prev = Cost(y_hat, outputSet);
-
-		auto grads = computeGradients(y_hat, outputSet);
-		auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
-		auto outputWGrad = std::get<1>(grads);
-
-		cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
-		outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
-		updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
-
-		std::cout << learning_rate << std::endl;
-
-		forwardPass();
-
-		if (UI) {
-			MLPPANNOld::UI(epoch, cost_prev, y_hat, outputSet);
-		}
-
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPANNOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-
-	real_t cost_prev = 0;
-	int epoch = 1;
-	real_t initial_learning_rate = learning_rate;
-
-	while (true) {
-		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
-
-		std::random_device rd;
-		std::default_random_engine generator(rd());
-		std::uniform_int_distribution<int> distribution(0, int(n - 1));
-		int outputIndex = distribution(generator);
-
-		std::vector<real_t> y_hat = modelSetTest({ inputSet[outputIndex] });
-		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
-
-		auto grads = computeGradients(y_hat, { outputSet[outputIndex] });
-		auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
-		auto outputWGrad = std::get<1>(grads);
-
-		cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
-		outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
-
-		updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
-		y_hat = modelSetTest({ inputSet[outputIndex] });
-
-		if (UI) {
-			MLPPANNOld::UI(epoch, cost_prev, y_hat, { outputSet[outputIndex] });
-		}
-
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPANNOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
-	MLPPLinAlgOld alg;
-
-	real_t cost_prev = 0;
-	int epoch = 1;
-	real_t initial_learning_rate = learning_rate;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	// always evaluate the result
-	// always do forward pass only ONCE at end.
-
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	while (true) {
-		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
-			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
-			auto outputWGrad = std::get<1>(grads);
-
-			cumulativeHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad);
-			outputWGrad = alg.scalarMultiply(learning_rate / n, outputWGrad);
-
-			updateParameters(cumulativeHiddenLayerWGrad, outputWGrad, learning_rate); // subject to change. may want bias to have this matrix too.
-			y_hat = modelSetTest(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPANNOld::Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool NAG, bool UI) {
-	MLPPLinAlgOld alg;
-
-	real_t cost_prev = 0;
-	int epoch = 1;
-	real_t initial_learning_rate = learning_rate;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	// always evaluate the result
-	// always do forward pass only ONCE at end.
-
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Initializing necessary components for Adam.
-	std::vector<std::vector<std::vector<real_t>>> v_hidden;
-
-	std::vector<real_t> v_output;
-	while (true) {
-		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
-			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
-			auto outputWGrad = std::get<1>(grads);
-
-			if (!network.empty() && v_hidden.empty()) { // Initing our tensor
-				v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
-			}
-
-			if (v_output.empty()) {
-				v_output.resize(outputWGrad.size());
-			}
-
-			if (NAG) { // "Aposterori" calculation
-				updateParameters(v_hidden, v_output, 0); // DON'T update bias.
-			}
-
-			v_hidden = alg.addition(alg.scalarMultiply(gamma, v_hidden), alg.scalarMultiply(learning_rate / n, cumulativeHiddenLayerWGrad));
-
-			v_output = alg.addition(alg.scalarMultiply(gamma, v_output), alg.scalarMultiply(learning_rate / n, outputWGrad));
-
-			updateParameters(v_hidden, v_output, learning_rate); // subject to change. may want bias to have this matrix too.
-			y_hat = modelSetTest(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPANNOld::Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI) {
-	MLPPLinAlgOld alg;
-
-	real_t cost_prev = 0;
-	int epoch = 1;
-	real_t initial_learning_rate = learning_rate;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	// always evaluate the result
-	// always do forward pass only ONCE at end.
-
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Initializing necessary components for Adam.
-	std::vector<std::vector<std::vector<real_t>>> v_hidden;
-
-	std::vector<real_t> v_output;
-	while (true) {
-		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
-			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
-			auto outputWGrad = std::get<1>(grads);
-
-			if (!network.empty() && v_hidden.empty()) { // Initing our tensor
-				v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
-			}
-
-			if (v_output.empty()) {
-				v_output.resize(outputWGrad.size());
-			}
-
-			v_hidden = alg.addition(v_hidden, alg.exponentiate(cumulativeHiddenLayerWGrad, 2));
-
-			v_output = alg.addition(v_output, alg.exponentiate(outputWGrad, 2));
-
-			std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
-			std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
-
-			updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
-			y_hat = modelSetTest(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPANNOld::Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI) {
-	MLPPLinAlgOld alg;
-
-	real_t cost_prev = 0;
-	int epoch = 1;
-	real_t initial_learning_rate = learning_rate;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	// always evaluate the result
-	// always do forward pass only ONCE at end.
-
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Initializing necessary components for Adam.
-	std::vector<std::vector<std::vector<real_t>>> v_hidden;
-
-	std::vector<real_t> v_output;
-	while (true) {
-		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
-			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
-			auto outputWGrad = std::get<1>(grads);
-
-			if (!network.empty() && v_hidden.empty()) { // Initing our tensor
-				v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
-			}
-
-			if (v_output.empty()) {
-				v_output.resize(outputWGrad.size());
-			}
-
-			v_hidden = alg.addition(alg.scalarMultiply(1 - b1, v_hidden), alg.scalarMultiply(b1, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
-
-			v_output = alg.addition(v_output, alg.exponentiate(outputWGrad, 2));
-
-			std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(cumulativeHiddenLayerWGrad, alg.scalarAdd(e, alg.sqrt(v_hidden))));
-			std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(outputWGrad, alg.scalarAdd(e, alg.sqrt(v_output))));
-
-			updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
-			y_hat = modelSetTest(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPANNOld::Adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
-	MLPPLinAlgOld alg;
-
-	real_t cost_prev = 0;
-	int epoch = 1;
-	real_t initial_learning_rate = learning_rate;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	// always evaluate the result
-	// always do forward pass only ONCE at end.
-
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Initializing necessary components for Adam.
-	std::vector<std::vector<std::vector<real_t>>> m_hidden;
-	std::vector<std::vector<std::vector<real_t>>> v_hidden;
-
-	std::vector<real_t> m_output;
-	std::vector<real_t> v_output;
-	while (true) {
-		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
-			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
-			auto outputWGrad = std::get<1>(grads);
-
-			if (!network.empty() && m_hidden.empty() && v_hidden.empty()) { // Initing our tensor
-				m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
-				v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
-			}
-
-			if (m_output.empty() && v_output.empty()) {
-				m_output.resize(outputWGrad.size());
-				v_output.resize(outputWGrad.size());
-			}
-
-			m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
-			v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
-
-			m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
-			v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2)));
-
-			std::vector<std::vector<std::vector<real_t>>> m_hidden_hat = alg.scalarMultiply(1 / (1 - std::pow(b1, epoch)), m_hidden);
-			std::vector<std::vector<std::vector<real_t>>> v_hidden_hat = alg.scalarMultiply(1 / (1 - std::pow(b2, epoch)), v_hidden);
-
-			std::vector<real_t> m_output_hat = alg.scalarMultiply(1 / (1 - std::pow(b1, epoch)), m_output);
-			std::vector<real_t> v_output_hat = alg.scalarMultiply(1 / (1 - std::pow(b2, epoch)), v_output);
-
-			std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
-			std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
-
-			updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
-			y_hat = modelSetTest(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPANNOld::Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
-	MLPPLinAlgOld alg;
-
-	real_t cost_prev = 0;
-	int epoch = 1;
-	real_t initial_learning_rate = learning_rate;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	// always evaluate the result
-	// always do forward pass only ONCE at end.
-
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Initializing necessary components for Adam.
-	std::vector<std::vector<std::vector<real_t>>> m_hidden;
-	std::vector<std::vector<std::vector<real_t>>> u_hidden;
-
-	std::vector<real_t> m_output;
-	std::vector<real_t> u_output;
-	while (true) {
-		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
-			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
-			auto outputWGrad = std::get<1>(grads);
-
-			if (!network.empty() && m_hidden.empty() && u_hidden.empty()) { // Initing our tensor
-				m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
-				u_hidden = alg.resize(u_hidden, cumulativeHiddenLayerWGrad);
-			}
-
-			if (m_output.empty() && u_output.empty()) {
-				m_output.resize(outputWGrad.size());
-				u_output.resize(outputWGrad.size());
-			}
-
-			m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
-			u_hidden = alg.max(alg.scalarMultiply(b2, u_hidden), alg.abs(cumulativeHiddenLayerWGrad));
-
-			m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
-			u_output = alg.max(alg.scalarMultiply(b2, u_output), alg.abs(outputWGrad));
-
-			std::vector<std::vector<std::vector<real_t>>> m_hidden_hat = alg.scalarMultiply(1 / (1 - std::pow(b1, epoch)), m_hidden);
-
-			std::vector<real_t> m_output_hat = alg.scalarMultiply(1 / (1 - std::pow(b1, epoch)), m_output);
-
-			std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_hat, alg.scalarAdd(e, u_hidden)));
-			std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_hat, alg.scalarAdd(e, u_output)));
-
-			updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
-			y_hat = modelSetTest(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPANNOld::Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
-	MLPPLinAlgOld alg;
-
-	real_t cost_prev = 0;
-	int epoch = 1;
-	real_t initial_learning_rate = learning_rate;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	// always evaluate the result
-	// always do forward pass only ONCE at end.
-
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Initializing necessary components for Adam.
-	std::vector<std::vector<std::vector<real_t>>> m_hidden;
-	std::vector<std::vector<std::vector<real_t>>> v_hidden;
-
-	std::vector<real_t> m_output;
-	std::vector<real_t> v_output;
-	while (true) {
-		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
-			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
-			auto outputWGrad = std::get<1>(grads);
-
-			if (!network.empty() && m_hidden.empty() && v_hidden.empty()) { // Initing our tensor
-				m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
-				v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
-			}
-
-			if (m_output.empty() && v_output.empty()) {
-				m_output.resize(outputWGrad.size());
-				v_output.resize(outputWGrad.size());
-			}
-
-			m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
-			v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
-
-			m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
-			v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2)));
-
-			std::vector<std::vector<std::vector<real_t>>> m_hidden_hat = alg.scalarMultiply(1 / (1 - std::pow(b1, epoch)), m_hidden);
-			std::vector<std::vector<std::vector<real_t>>> v_hidden_hat = alg.scalarMultiply(1 / (1 - std::pow(b2, epoch)), v_hidden);
-			std::vector<std::vector<std::vector<real_t>>> m_hidden_final = alg.addition(alg.scalarMultiply(b1, m_hidden_hat), alg.scalarMultiply((1 - b1) / (1 - std::pow(b1, epoch)), cumulativeHiddenLayerWGrad));
-
-			std::vector<real_t> m_output_hat = alg.scalarMultiply(1 / (1 - std::pow(b1, epoch)), m_output);
-			std::vector<real_t> v_output_hat = alg.scalarMultiply(1 / (1 - std::pow(b2, epoch)), v_output);
-			std::vector<real_t> m_output_final = alg.addition(alg.scalarMultiply(b1, m_output_hat), alg.scalarMultiply((1 - b1) / (1 - std::pow(b1, epoch)), outputWGrad));
-
-			std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden_final, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
-			std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output_final, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
-
-			updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
-			y_hat = modelSetTest(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPANNOld::AMSGrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
-	MLPPLinAlgOld alg;
-
-	real_t cost_prev = 0;
-	int epoch = 1;
-	real_t initial_learning_rate = learning_rate;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	// always evaluate the result
-	// always do forward pass only ONCE at end.
-
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Initializing necessary components for Adam.
-	std::vector<std::vector<std::vector<real_t>>> m_hidden;
-	std::vector<std::vector<std::vector<real_t>>> v_hidden;
-
-	std::vector<std::vector<std::vector<real_t>>> v_hidden_hat;
-
-	std::vector<real_t> m_output;
-	std::vector<real_t> v_output;
-
-	std::vector<real_t> v_output_hat;
-	while (true) {
-		learning_rate = applyLearningRateScheduler(initial_learning_rate, decayConstant, epoch, dropRate);
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = modelSetTest(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			auto grads = computeGradients(y_hat, outputMiniBatches[i]);
-			auto cumulativeHiddenLayerWGrad = std::get<0>(grads);
-			auto outputWGrad = std::get<1>(grads);
-
-			if (!network.empty() && m_hidden.empty() && v_hidden.empty()) { // Initing our tensor
-				m_hidden = alg.resize(m_hidden, cumulativeHiddenLayerWGrad);
-				v_hidden = alg.resize(v_hidden, cumulativeHiddenLayerWGrad);
-				v_hidden_hat = alg.resize(v_hidden_hat, cumulativeHiddenLayerWGrad);
-			}
-
-			if (m_output.empty() && v_output.empty()) {
-				m_output.resize(outputWGrad.size());
-				v_output.resize(outputWGrad.size());
-				v_output_hat.resize(outputWGrad.size());
-			}
-
-			m_hidden = alg.addition(alg.scalarMultiply(b1, m_hidden), alg.scalarMultiply(1 - b1, cumulativeHiddenLayerWGrad));
-			v_hidden = alg.addition(alg.scalarMultiply(b2, v_hidden), alg.scalarMultiply(1 - b2, alg.exponentiate(cumulativeHiddenLayerWGrad, 2)));
-
-			m_output = alg.addition(alg.scalarMultiply(b1, m_output), alg.scalarMultiply(1 - b1, outputWGrad));
-			v_output = alg.addition(alg.scalarMultiply(b2, v_output), alg.scalarMultiply(1 - b2, alg.exponentiate(outputWGrad, 2)));
-
-			v_hidden_hat = alg.max(v_hidden_hat, v_hidden);
-
-			v_output_hat = alg.max(v_output_hat, v_output);
-
-			std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_hidden, alg.scalarAdd(e, alg.sqrt(v_hidden_hat))));
-			std::vector<real_t> outputLayerUpdation = alg.scalarMultiply(learning_rate / n, alg.elementWiseDivision(m_output, alg.scalarAdd(e, alg.sqrt(v_output_hat))));
-
-			updateParameters(hiddenLayerUpdations, outputLayerUpdation, learning_rate); // subject to change. may want bias to have this matrix too.
-			y_hat = modelSetTest(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPANNOld::UI(epoch, cost_prev, y_hat, outputMiniBatches[i]);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-real_t MLPPANNOld::score() {
-	MLPPUtilities util;
-	forwardPass();
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPANNOld::save(std::string fileName) {
-	MLPPUtilities util;
-	if (!network.empty()) {
-		util.saveParameters(fileName, network[0].weights, network[0].bias, false, 1);
-		for (uint32_t i = 1; i < network.size(); i++) {
-			util.saveParameters(fileName, network[i].weights, network[i].bias, true, i + 1);
-		}
-		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, true, network.size() + 1);
-	} else {
-		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, false, network.size() + 1);
-	}
-}
-
-void MLPPANNOld::setLearningRateScheduler(std::string type, real_t decayConstant) {
-	lrScheduler = type;
-	MLPPANNOld::decayConstant = decayConstant;
-}
-
-void MLPPANNOld::setLearningRateScheduler(std::string type, real_t decayConstant, real_t dropRate) {
-	lrScheduler = type;
-	MLPPANNOld::decayConstant = decayConstant;
-	MLPPANNOld::dropRate = dropRate;
-}
-
-// https://en.wikipedia.org/wiki/Learning_rate
-// Learning Rate Decay (C2W2L09) - Andrew Ng - Deep Learning Specialization
-real_t MLPPANNOld::applyLearningRateScheduler(real_t learningRate, real_t decayConstant, real_t epoch, real_t dropRate) {
-	if (lrScheduler == "Time") {
-		return learningRate / (1 + decayConstant * epoch);
-	} else if (lrScheduler == "Epoch") {
-		return learningRate * (decayConstant / std::sqrt(epoch));
-	} else if (lrScheduler == "Step") {
-		return learningRate * std::pow(decayConstant, int((1 + epoch) / dropRate)); // Utilizing an explicit int conversion implicitly takes the floor.
-	} else if (lrScheduler == "Exponential") {
-		return learningRate * std::exp(-decayConstant * epoch);
-	}
-	return learningRate;
-}
-
-void MLPPANNOld::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
-	if (network.empty()) {
-		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha));
-		network[0].forwardPass();
-	} else {
-		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
-		network[network.size() - 1].forwardPass();
-	}
-}
-
-void MLPPANNOld::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
-	if (!network.empty()) {
-		outputLayer = new MLPPOldOutputLayer(network[network.size() - 1].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha);
-	} else {
-		outputLayer = new MLPPOldOutputLayer(k, activation, loss, inputSet, weightInit, reg, lambda, alpha);
-	}
-}
-
-real_t MLPPANNOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPRegOld regularization;
-	class MLPPCostOld cost;
-	real_t totalRegTerm = 0;
-
-	auto cost_function = outputLayer->cost_map[outputLayer->cost];
-	if (!network.empty()) {
-		for (uint32_t i = 0; i < network.size() - 1; i++) {
-			totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
-		}
-	}
-	return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
-}
-
-void MLPPANNOld::forwardPass() {
-	if (!network.empty()) {
-		network[0].input = inputSet;
-		network[0].forwardPass();
-
-		for (uint32_t i = 1; i < network.size(); i++) {
-			network[i].input = network[i - 1].a;
-			network[i].forwardPass();
-		}
-		outputLayer->input = network[network.size() - 1].a;
-	} else {
-		outputLayer->input = inputSet;
-	}
-	outputLayer->forwardPass();
-	y_hat = outputLayer->a;
-}
-
-void MLPPANNOld::updateParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate) {
-	MLPPLinAlgOld alg;
-
-	outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
-	outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n;
-
-	if (!network.empty()) {
-		network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]);
-		network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate / n, network[network.size() - 1].delta));
-
-		for (int i = network.size() - 2; i >= 0; i--) {
-			network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
-			network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate / n, network[i].delta));
-		}
-	}
-}
-
-std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> MLPPANNOld::computeGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
-	// std::cout << "BEGIN" << std::endl;
-	class MLPPCostOld cost;
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-
-	std::vector<std::vector<std::vector<real_t>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads.
-
-	auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
-	auto outputAvn = outputLayer->activation_map[outputLayer->activation];
-	outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
-	std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
-	outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
-
-	if (!network.empty()) {
-		auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
-		network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
-		std::vector<std::vector<real_t>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
-
-		cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
-
-		for (int i = network.size() - 2; i >= 0; i--) {
-			hiddenLayerAvn = network[i].activation_map[network[i].activation];
-			network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
-			hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
-			cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
-		}
-	}
-	return { cumulativeHiddenLayerWGrad, outputWGrad };
-}
-
-void MLPPANNOld::UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
-	MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-	std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
-	MLPPUtilities::UI(outputLayer->weights, outputLayer->bias);
-	if (!network.empty()) {
-		for (int i = network.size() - 1; i >= 0; i--) {
-			std::cout << "Layer " << i + 1 << ": " << std::endl;
-			MLPPUtilities::UI(network[i].weights, network[i].bias);
-		}
-	}
-}
diff --git a/mlpp/ann/ann_old.h b/mlpp/ann/ann_old.h
deleted file mode 100644
index 1d30ae4..0000000
--- a/mlpp/ann/ann_old.h
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef MLPP_ANN_OLD_H
-#define MLPP_ANN_OLD_H
-
-//
-//  ANN.hpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include "../hidden_layer/hidden_layer.h"
-#include "../output_layer/output_layer.h"
-
-#include "../hidden_layer/hidden_layer_old.h"
-#include "../output_layer/output_layer_old.h"
-
-#include <string>
-#include <tuple>
-#include <vector>
-
-class MLPPANNOld {
-public:
-	MLPPANNOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet);
-	~MLPPANNOld();
-	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
-	real_t modelTest(std::vector<real_t> x);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
-	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
-	void Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool NAG, bool UI = false);
-	void Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI = false);
-	void Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI = false);
-	void Adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
-	void Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
-	void Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
-	void AMSGrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
-	real_t score();
-	void save(std::string fileName);
-
-	void setLearningRateScheduler(std::string type, real_t decayConstant);
-	void setLearningRateScheduler(std::string type, real_t decayConstant, real_t dropRate);
-
-	void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-	void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-
-private:
-	real_t applyLearningRateScheduler(real_t learningRate, real_t decayConstant, real_t epoch, real_t dropRate);
-
-	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
-
-	void forwardPass();
-	void updateParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate);
-	std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> computeGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet);
-
-	void UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet);
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-	std::vector<real_t> y_hat;
-
-	std::vector<MLPPOldHiddenLayer> network;
-	MLPPOldOutputLayer *outputLayer;
-
-	int n;
-	int k;
-
-	std::string lrScheduler;
-	real_t decayConstant;
-	real_t dropRate;
-};
-
-#endif /* ANN_hpp */
\ No newline at end of file
diff --git a/mlpp/auto_encoder/auto_encoder_old.cpp b/mlpp/auto_encoder/auto_encoder_old.cpp
deleted file mode 100644
index 4b079c4..0000000
--- a/mlpp/auto_encoder/auto_encoder_old.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-//
-//  AutoEncoder.cpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "auto_encoder_old.h"
-
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-std::vector<std::vector<real_t>> MLPPAutoEncoderOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	return Evaluate(X);
-}
-
-std::vector<real_t> MLPPAutoEncoderOld::modelTest(std::vector<real_t> x) {
-	return Evaluate(x);
-}
-
-void MLPPAutoEncoderOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, inputSet);
-
-		// Calculating the errors
-		std::vector<std::vector<real_t>> error = alg.subtraction(y_hat, inputSet);
-
-		// Calculating the weight/bias gradients for layer 2
-		std::vector<std::vector<real_t>> D2_1 = alg.matmult(alg.transpose(a2), error);
-
-		// weights and bias updation for layer 2
-		weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate / n, D2_1));
-
-		// Calculating the bias gradients for layer 2
-		bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
-
-		//Calculating the weight/bias for layer 1
-
-		std::vector<std::vector<real_t>> D1_1 = alg.matmult(error, alg.transpose(weights2));
-
-		std::vector<std::vector<real_t>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
-
-		std::vector<std::vector<real_t>> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2);
-
-		// weight an bias updation for layer 1
-		weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate / n, D1_3));
-
-		bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate / n, D1_2));
-
-		forwardPass();
-
-		// UI PORTION
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, inputSet));
-			std::cout << "Layer 1:" << std::endl;
-			MLPPUtilities::UI(weights1, bias1);
-			std::cout << "Layer 2:" << std::endl;
-			MLPPUtilities::UI(weights2, bias2);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPAutoEncoderOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	while (true) {
-		std::random_device rd;
-		std::default_random_engine generator(rd());
-		std::uniform_int_distribution<int> distribution(0, int(n - 1));
-		int outputIndex = distribution(generator);
-
-		std::vector<real_t> y_hat = Evaluate(inputSet[outputIndex]);
-		auto prop_res = propagate(inputSet[outputIndex]);
-		auto z2 = std::get<0>(prop_res);
-		auto a2 = std::get<1>(prop_res);
-
-		cost_prev = Cost({ y_hat }, { inputSet[outputIndex] });
-		std::vector<real_t> error = alg.subtraction(y_hat, inputSet[outputIndex]);
-
-		// Weight updation for layer 2
-		std::vector<std::vector<real_t>> D2_1 = alg.outerProduct(error, a2);
-		weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, alg.transpose(D2_1)));
-
-		// Bias updation for layer 2
-		bias2 = alg.subtraction(bias2, alg.scalarMultiply(learning_rate, error));
-
-		// Weight updation for layer 1
-		std::vector<real_t> D1_1 = alg.mat_vec_mult(weights2, error);
-		std::vector<real_t> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
-		std::vector<std::vector<real_t>> D1_3 = alg.outerProduct(inputSet[outputIndex], D1_2);
-
-		weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
-		// Bias updation for layer 1
-
-		bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2));
-
-		y_hat = Evaluate(inputSet[outputIndex]);
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { inputSet[outputIndex] }));
-			std::cout << "Layer 1:" << std::endl;
-			MLPPUtilities::UI(weights1, bias1);
-			std::cout << "Layer 2:" << std::endl;
-			MLPPUtilities::UI(weights2, bias2);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPAutoEncoderOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	std::vector<std::vector<std::vector<real_t>>> inputMiniBatches = MLPPUtilities::createMiniBatches(inputSet, n_mini_batch);
-
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<std::vector<real_t>> y_hat = Evaluate(inputMiniBatches[i]);
-
-			auto prop_res = propagate(inputMiniBatches[i]);
-			auto z2 = std::get<0>(prop_res);
-			auto a2 = std::get<1>(prop_res);
-
-			cost_prev = Cost(y_hat, inputMiniBatches[i]);
-
-			// Calculating the errors
-			std::vector<std::vector<real_t>> error = alg.subtraction(y_hat, inputMiniBatches[i]);
-
-			// Calculating the weight/bias gradients for layer 2
-
-			std::vector<std::vector<real_t>> D2_1 = alg.matmult(alg.transpose(a2), error);
-
-			// weights and bias updation for layer 2
-			weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate / inputMiniBatches[i].size(), D2_1));
-
-			// Bias Updation for layer 2
-			bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
-
-			//Calculating the weight/bias for layer 1
-
-			std::vector<std::vector<real_t>> D1_1 = alg.matmult(error, alg.transpose(weights2));
-
-			std::vector<std::vector<real_t>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
-
-			std::vector<std::vector<real_t>> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2);
-
-			// weight an bias updation for layer 1
-			weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate / inputMiniBatches[i].size(), D1_3));
-
-			bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate / inputMiniBatches[i].size(), D1_2));
-
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, inputMiniBatches[i]));
-				std::cout << "Layer 1:" << std::endl;
-				MLPPUtilities::UI(weights1, bias1);
-				std::cout << "Layer 2:" << std::endl;
-				MLPPUtilities::UI(weights2, bias2);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-real_t MLPPAutoEncoderOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, inputSet);
-}
-
-void MLPPAutoEncoderOld::save(std::string fileName) {
-	MLPPUtilities util;
-	util.saveParameters(fileName, weights1, bias1, 0, 1);
-	util.saveParameters(fileName, weights2, bias2, 1, 2);
-}
-
-MLPPAutoEncoderOld::MLPPAutoEncoderOld(std::vector<std::vector<real_t>> pinputSet, int pn_hidden) {
-	inputSet = pinputSet;
-	n_hidden = pn_hidden;
-	n = inputSet.size();
-	k = inputSet[0].size();
-
-	y_hat.resize(inputSet.size());
-
-	weights1 = MLPPUtilities::weightInitialization(k, n_hidden);
-	weights2 = MLPPUtilities::weightInitialization(n_hidden, k);
-	bias1 = MLPPUtilities::biasInitialization(n_hidden);
-	bias2 = MLPPUtilities::biasInitialization(k);
-}
-
-real_t MLPPAutoEncoderOld::Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	class MLPPCostOld cost;
-	return cost.MSE(y_hat, inputSet);
-}
-
-std::vector<std::vector<real_t>> MLPPAutoEncoderOld::Evaluate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
-	std::vector<std::vector<real_t>> a2 = avn.sigmoid(z2);
-	return alg.mat_vec_add(alg.matmult(a2, weights2), bias2);
-}
-
-std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPAutoEncoderOld::propagate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
-	std::vector<std::vector<real_t>> a2 = avn.sigmoid(z2);
-	return { z2, a2 };
-}
-
-std::vector<real_t> MLPPAutoEncoderOld::Evaluate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1);
-	std::vector<real_t> a2 = avn.sigmoid(z2);
-	return alg.addition(alg.mat_vec_mult(alg.transpose(weights2), a2), bias2);
-}
-
-std::tuple<std::vector<real_t>, std::vector<real_t>> MLPPAutoEncoderOld::propagate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1);
-	std::vector<real_t> a2 = avn.sigmoid(z2);
-	return { z2, a2 };
-}
-
-void MLPPAutoEncoderOld::forwardPass() {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1);
-	a2 = avn.sigmoid(z2);
-	y_hat = alg.mat_vec_add(alg.matmult(a2, weights2), bias2);
-}
diff --git a/mlpp/auto_encoder/auto_encoder_old.h b/mlpp/auto_encoder/auto_encoder_old.h
deleted file mode 100644
index adb6bf4..0000000
--- a/mlpp/auto_encoder/auto_encoder_old.h
+++ /dev/null
@@ -1,58 +0,0 @@
-
-#ifndef MLPP_AUTO_ENCODER_OLD_H
-#define MLPP_AUTO_ENCODER_OLD_H
-
-//
-//  AutoEncoder.hpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <tuple>
-#include <vector>
-
-class MLPPAutoEncoderOld {
-public:
-	std::vector<std::vector<real_t>> modelSetTest(std::vector<std::vector<real_t>> X);
-	std::vector<real_t> modelTest(std::vector<real_t> x);
-
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
-	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
-
-	real_t score();
-
-	void save(std::string fileName);
-
-	MLPPAutoEncoderOld(std::vector<std::vector<real_t>> inputSet, int n_hidden);
-
-private:
-	real_t Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	std::vector<std::vector<real_t>> Evaluate(std::vector<std::vector<real_t>> X);
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> propagate(std::vector<std::vector<real_t>> X);
-	std::vector<real_t> Evaluate(std::vector<real_t> x);
-	std::tuple<std::vector<real_t>, std::vector<real_t>> propagate(std::vector<real_t> x);
-	void forwardPass();
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<std::vector<real_t>> y_hat;
-
-	std::vector<std::vector<real_t>> weights1;
-	std::vector<std::vector<real_t>> weights2;
-
-	std::vector<real_t> bias1;
-	std::vector<real_t> bias2;
-
-	std::vector<std::vector<real_t>> z2;
-	std::vector<std::vector<real_t>> a2;
-
-	int n;
-	int k;
-	int n_hidden;
-};
-
-#endif /* AutoEncoder_hpp */
diff --git a/mlpp/bernoulli_nb/bernoulli_nb_old.cpp b/mlpp/bernoulli_nb/bernoulli_nb_old.cpp
deleted file mode 100644
index 3c736b3..0000000
--- a/mlpp/bernoulli_nb/bernoulli_nb_old.cpp
+++ /dev/null
@@ -1,179 +0,0 @@
-//
-//  BernoulliNB.cpp
-//
-//  Created by Marc Melikyan on 1/17/21.
-//
-
-#include "bernoulli_nb_old.h"
-#include "../data/data.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-MLPPBernoulliNBOld::MLPPBernoulliNBOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet) {
-	inputSet = p_inputSet;
-	outputSet = p_outputSet;
-	class_num = 2;
-
-	y_hat.resize(outputSet.size());
-	Evaluate();
-}
-
-std::vector<real_t> MLPPBernoulliNBOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	std::vector<real_t> y_hat;
-	for (uint32_t i = 0; i < X.size(); i++) {
-		y_hat.push_back(modelTest(X[i]));
-	}
-	return y_hat;
-}
-
-real_t MLPPBernoulliNBOld::modelTest(std::vector<real_t> x) {
-	real_t score_0 = 1;
-	real_t score_1 = 1;
-
-	std::vector<int> foundIndices;
-
-	for (uint32_t j = 0; j < x.size(); j++) {
-		for (uint32_t k = 0; k < vocab.size(); k++) {
-			if (x[j] == vocab[k]) {
-				score_0 *= theta[0][vocab[k]];
-				score_1 *= theta[1][vocab[k]];
-
-				foundIndices.push_back(k);
-			}
-		}
-	}
-
-	for (uint32_t i = 0; i < vocab.size(); i++) {
-		bool found = false;
-		for (uint32_t j = 0; j < foundIndices.size(); j++) {
-			if (vocab[i] == vocab[foundIndices[j]]) {
-				found = true;
-			}
-		}
-		if (!found) {
-			score_0 *= 1 - theta[0][vocab[i]];
-			score_1 *= 1 - theta[1][vocab[i]];
-		}
-	}
-
-	score_0 *= prior_0;
-	score_1 *= prior_1;
-
-	// Assigning the traning example to a class
-
-	if (score_0 > score_1) {
-		return 0;
-	} else {
-		return 1;
-	}
-}
-
-real_t MLPPBernoulliNBOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPBernoulliNBOld::computeVocab() {
-	MLPPLinAlgOld alg;
-	MLPPData data;
-	vocab = data.vecToSet<real_t>(alg.flatten(inputSet));
-}
-
-void MLPPBernoulliNBOld::computeTheta() {
-	// Resizing theta for the sake of ease & proper access of the elements.
-	theta.resize(class_num);
-
-	// Setting all values in the hasmap by default to 0.
-	for (int i = class_num - 1; i >= 0; i--) {
-		for (uint32_t j = 0; j < vocab.size(); j++) {
-			theta[i][vocab[j]] = 0;
-		}
-	}
-
-	for (uint32_t i = 0; i < inputSet.size(); i++) {
-		for (uint32_t j = 0; j < inputSet[0].size(); j++) {
-			theta[outputSet[i]][inputSet[i][j]]++;
-		}
-	}
-
-	for (uint32_t i = 0; i < theta.size(); i++) {
-		for (uint32_t j = 0; j < theta[i].size(); j++) {
-			if (i == 0) {
-				theta[i][j] /= prior_0 * y_hat.size();
-			} else {
-				theta[i][j] /= prior_1 * y_hat.size();
-			}
-		}
-	}
-}
-
-void MLPPBernoulliNBOld::Evaluate() {
-	for (uint32_t i = 0; i < outputSet.size(); i++) {
-		// Pr(B | A) * Pr(A)
-		real_t score_0 = 1;
-		real_t score_1 = 1;
-
-		real_t sum = 0;
-		for (uint32_t ii = 0; ii < outputSet.size(); ii++) {
-			if (outputSet[ii] == 1) {
-				sum += outputSet[ii];
-			}
-		}
-
-		// Easy computation of priors, i.e. Pr(C_k)
-		prior_1 = sum / y_hat.size();
-		prior_0 = 1 - prior_1;
-
-		// Evaluating Theta...
-		computeTheta();
-
-		// Evaluating the vocab set...
-		computeVocab();
-
-		std::vector<int> foundIndices;
-
-		for (uint32_t j = 0; j < inputSet.size(); j++) {
-			for (uint32_t k = 0; k < vocab.size(); k++) {
-				if (inputSet[i][j] == vocab[k]) {
-					score_0 += std::log(theta[0][vocab[k]]);
-					score_1 += std::log(theta[1][vocab[k]]);
-
-					foundIndices.push_back(k);
-				}
-			}
-		}
-
-		for (uint32_t ii = 0; ii < vocab.size(); ii++) {
-			bool found = false;
-			for (uint32_t j = 0; j < foundIndices.size(); j++) {
-				if (vocab[ii] == vocab[foundIndices[j]]) {
-					found = true;
-				}
-			}
-			if (!found) {
-				score_0 += std::log(1 - theta[0][vocab[ii]]);
-				score_1 += std::log(1 - theta[1][vocab[ii]]);
-			}
-		}
-
-		score_0 += std::log(prior_0);
-		score_1 += std::log(prior_1);
-
-		score_0 = exp(score_0);
-		score_1 = exp(score_1);
-
-		std::cout << score_0 << std::endl;
-		std::cout << score_1 << std::endl;
-
-		// Assigning the traning example to a class
-
-		if (score_0 > score_1) {
-			y_hat[i] = 0;
-		} else {
-			y_hat[i] = 1;
-		}
-	}
-}
diff --git a/mlpp/bernoulli_nb/bernoulli_nb_old.h b/mlpp/bernoulli_nb/bernoulli_nb_old.h
deleted file mode 100644
index fa32f75..0000000
--- a/mlpp/bernoulli_nb/bernoulli_nb_old.h
+++ /dev/null
@@ -1,42 +0,0 @@
-
-#ifndef MLPP_BERNOULLI_NB_OLD_H
-#define MLPP_BERNOULLI_NB_OLD_H
-
-//
-//  BernoulliNB.hpp
-//
-//  Created by Marc Melikyan on 1/17/21.
-//
-
-#include "core/math/math_defs.h"
-
-#include <map>
-#include <vector>
-
-class MLPPBernoulliNBOld {
-public:
-	MLPPBernoulliNBOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet);
-	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
-	real_t modelTest(std::vector<real_t> x);
-	real_t score();
-
-private:
-	void computeVocab();
-	void computeTheta();
-	void Evaluate();
-
-	// Model Params
-	real_t prior_1 = 0;
-	real_t prior_0 = 0;
-
-	std::vector<std::map<real_t, int>> theta;
-	std::vector<real_t> vocab;
-	int class_num;
-
-	// Datasets
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-	std::vector<real_t> y_hat;
-};
-
-#endif /* BernoulliNB_hpp */
\ No newline at end of file
diff --git a/mlpp/c_log_log_reg/c_log_log_reg_old.cpp b/mlpp/c_log_log_reg/c_log_log_reg_old.cpp
deleted file mode 100644
index fb04268..0000000
--- a/mlpp/c_log_log_reg/c_log_log_reg_old.cpp
+++ /dev/null
@@ -1,224 +0,0 @@
-//
-//  CLogLogReg.cpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "c_log_log_reg_old.h"
-
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-MLPPCLogLogRegOld::MLPPCLogLogRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg, real_t lambda, real_t alpha) :
-		inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) {
-	y_hat.resize(n);
-	weights = MLPPUtilities::weightInitialization(k);
-	bias = MLPPUtilities::biasInitialization();
-}
-
-std::vector<real_t> MLPPCLogLogRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	return Evaluate(X);
-}
-
-real_t MLPPCLogLogRegOld::modelTest(std::vector<real_t> x) {
-	return Evaluate(x);
-}
-
-void MLPPCLogLogRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-
-		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
-
-		// Calculating the weight gradients
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.cloglog(z, 1)))));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Calculating the bias gradients
-		bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n;
-
-		forwardPass();
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPCLogLogRegOld::MLE(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-
-		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
-
-		weights = alg.addition(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.cloglog(z, 1)))));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Calculating the bias gradients
-		bias += learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n;
-		forwardPass();
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPCLogLogRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		std::random_device rd;
-		std::default_random_engine generator(rd());
-		std::uniform_int_distribution<int> distribution(0, int(n - 1));
-		int outputIndex = distribution(generator);
-
-		real_t y_hat = Evaluate(inputSet[outputIndex]);
-		real_t z = propagate(inputSet[outputIndex]);
-		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
-
-		real_t error = y_hat - outputSet[outputIndex];
-
-		// Weight Updation
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error * exp(z - exp(z)), inputSet[outputIndex]));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Bias updation
-		bias -= learning_rate * error * exp(z - exp(z));
-
-		y_hat = Evaluate({ inputSet[outputIndex] });
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPCLogLogRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			std::vector<real_t> z = propagate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight gradients
-			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.cloglog(z, 1)))));
-			weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-			// Calculating the bias gradients
-			bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.cloglog(z, 1))) / n;
-
-			forwardPass();
-
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-real_t MLPPCLogLogRegOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-real_t MLPPCLogLogRegOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPRegOld regularization;
-	class MLPPCostOld cost;
-	return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
-}
-
-std::vector<real_t> MLPPCLogLogRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	return avn.cloglog(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)));
-}
-
-std::vector<real_t> MLPPCLogLogRegOld::propagate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights));
-}
-
-real_t MLPPCLogLogRegOld::Evaluate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	return avn.cloglog(alg.dot(weights, x) + bias);
-}
-
-real_t MLPPCLogLogRegOld::propagate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	return alg.dot(weights, x) + bias;
-}
-
-// cloglog ( wTx + b )
-void MLPPCLogLogRegOld::forwardPass() {
-	MLPPActivationOld avn;
-
-	z = propagate(inputSet);
-	y_hat = avn.cloglog(z);
-}
diff --git a/mlpp/c_log_log_reg/c_log_log_reg_old.h b/mlpp/c_log_log_reg/c_log_log_reg_old.h
deleted file mode 100644
index 5278299..0000000
--- a/mlpp/c_log_log_reg/c_log_log_reg_old.h
+++ /dev/null
@@ -1,54 +0,0 @@
-
-#ifndef MLPP_C_LOG_LOG_REG_OLD_H
-#define MLPP_C_LOG_LOG_REG_OLD_H
-
-//
-//  CLogLogReg.hpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <vector>
-
-class MLPPCLogLogRegOld {
-public:
-	MLPPCLogLogRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
-	real_t modelTest(std::vector<real_t> x);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	void MLE(real_t learning_rate, int max_epoch, bool UI = false);
-	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
-	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
-	real_t score();
-
-private:
-	void weightInitialization(int k);
-	void biasInitialization();
-	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
-
-	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
-	std::vector<real_t> propagate(std::vector<std::vector<real_t>> X);
-	real_t Evaluate(std::vector<real_t> x);
-	real_t propagate(std::vector<real_t> x);
-	void forwardPass();
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-	std::vector<real_t> y_hat;
-	std::vector<real_t> z;
-	std::vector<real_t> weights;
-	real_t bias;
-
-	int n;
-	int k;
-
-	// Regularization Params
-	std::string reg;
-	real_t lambda;
-	real_t alpha; /* This is the controlling param for Elastic Net*/
-};
-
-#endif /* CLogLogReg_hpp */
diff --git a/mlpp/convolutions/convolutions_old.cpp b/mlpp/convolutions/convolutions_old.cpp
deleted file mode 100644
index b24e794..0000000
--- a/mlpp/convolutions/convolutions_old.cpp
+++ /dev/null
@@ -1,378 +0,0 @@
-//
-//  Convolutions.cpp
-//
-//  Created by Marc Melikyan on 4/6/21.
-//
-
-#include "../convolutions/convolutions_old.h"
-
-#include "../lin_alg/lin_alg_old.h"
-#include "../stat/stat_old.h"
-#include <cmath>
-#include <iostream>
-
-#ifndef M_PI
-#define M_PI 3.141592653
-#endif
-
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::convolve_2d(std::vector<std::vector<real_t>> input, std::vector<std::vector<real_t>> filter, int S, int P) {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<real_t>> feature_map;
-	uint32_t N = input.size();
-	uint32_t F = filter.size();
-	uint32_t map_size = (N - F + 2 * P) / S + 1; // This is computed as ⌊map_size⌋ by def- thanks C++!
-
-	if (P != 0) {
-		std::vector<std::vector<real_t>> padded_input;
-		padded_input.resize(N + 2 * P);
-		for (uint32_t i = 0; i < padded_input.size(); i++) {
-			padded_input[i].resize(N + 2 * P);
-		}
-		for (uint32_t i = 0; i < padded_input.size(); i++) {
-			for (uint32_t j = 0; j < padded_input[i].size(); j++) {
-				if (i - P < 0 || j - P < 0 || i - P > input.size() - 1 || j - P > input[0].size() - 1) {
-					padded_input[i][j] = 0;
-				} else {
-					padded_input[i][j] = input[i - P][j - P];
-				}
-			}
-		}
-		input.resize(padded_input.size());
-		for (uint32_t i = 0; i < padded_input.size(); i++) {
-			input[i].resize(padded_input[i].size());
-		}
-		input = padded_input;
-	}
-
-	feature_map.resize(map_size);
-	for (uint32_t i = 0; i < map_size; i++) {
-		feature_map[i].resize(map_size);
-	}
-
-	for (uint32_t i = 0; i < map_size; i++) {
-		for (uint32_t j = 0; j < map_size; j++) {
-			std::vector<real_t> convolving_input;
-			for (uint32_t k = 0; k < F; k++) {
-				for (uint32_t p = 0; p < F; p++) {
-					if (i == 0 && j == 0) {
-						convolving_input.push_back(input[i + k][j + p]);
-					} else if (i == 0) {
-						convolving_input.push_back(input[i + k][j + (S - 1) + p]);
-					} else if (j == 0) {
-						convolving_input.push_back(input[i + (S - 1) + k][j + p]);
-					} else {
-						convolving_input.push_back(input[i + (S - 1) + k][j + (S - 1) + p]);
-					}
-				}
-			}
-			feature_map[i][j] = alg.dot(convolving_input, alg.flatten(filter));
-		}
-	}
-	return feature_map;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPConvolutionsOld::convolve_3d(std::vector<std::vector<std::vector<real_t>>> input, std::vector<std::vector<std::vector<real_t>>> filter, int S, int P) {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<std::vector<real_t>>> feature_map;
-	uint32_t N = input[0].size();
-	uint32_t F = filter[0].size();
-	uint32_t C = filter.size() / input.size();
-	uint32_t map_size = (N - F + 2 * P) / S + 1; // This is computed as ⌊map_size⌋ by def.
-
-	if (P != 0) {
-		for (uint32_t c = 0; c < input.size(); c++) {
-			std::vector<std::vector<real_t>> padded_input;
-			padded_input.resize(N + 2 * P);
-			for (uint32_t i = 0; i < padded_input.size(); i++) {
-				padded_input[i].resize(N + 2 * P);
-			}
-			for (uint32_t i = 0; i < padded_input.size(); i++) {
-				for (uint32_t j = 0; j < padded_input[i].size(); j++) {
-					if (i - P < 0 || j - P < 0 || i - P > input[c].size() - 1 || j - P > input[c][0].size() - 1) {
-						padded_input[i][j] = 0;
-					} else {
-						padded_input[i][j] = input[c][i - P][j - P];
-					}
-				}
-			}
-			input[c].resize(padded_input.size());
-			for (uint32_t i = 0; i < padded_input.size(); i++) {
-				input[c][i].resize(padded_input[i].size());
-			}
-			input[c] = padded_input;
-		}
-	}
-
-	feature_map.resize(C);
-	for (uint32_t i = 0; i < feature_map.size(); i++) {
-		feature_map[i].resize(map_size);
-		for (uint32_t j = 0; j < feature_map[i].size(); j++) {
-			feature_map[i][j].resize(map_size);
-		}
-	}
-
-	for (uint32_t c = 0; c < C; c++) {
-		for (uint32_t i = 0; i < map_size; i++) {
-			for (uint32_t j = 0; j < map_size; j++) {
-				std::vector<real_t> convolving_input;
-				for (uint32_t t = 0; t < input.size(); t++) {
-					for (uint32_t k = 0; k < F; k++) {
-						for (uint32_t p = 0; p < F; p++) {
-							if (i == 0 && j == 0) {
-								convolving_input.push_back(input[t][i + k][j + p]);
-							} else if (i == 0) {
-								convolving_input.push_back(input[t][i + k][j + (S - 1) + p]);
-							} else if (j == 0) {
-								convolving_input.push_back(input[t][i + (S - 1) + k][j + p]);
-							} else {
-								convolving_input.push_back(input[t][i + (S - 1) + k][j + (S - 1) + p]);
-							}
-						}
-					}
-				}
-				feature_map[c][i][j] = alg.dot(convolving_input, alg.flatten(filter));
-			}
-		}
-	}
-	return feature_map;
-}
-
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::pool_2d(std::vector<std::vector<real_t>> input, int F, int S, std::string type) {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<real_t>> pooled_map;
-	uint32_t N = input.size();
-	uint32_t map_size = floor((N - F) / S + 1);
-
-	pooled_map.resize(map_size);
-	for (uint32_t i = 0; i < map_size; i++) {
-		pooled_map[i].resize(map_size);
-	}
-
-	for (uint32_t i = 0; i < map_size; i++) {
-		for (uint32_t j = 0; j < map_size; j++) {
-			std::vector<real_t> pooling_input;
-			for (int k = 0; k < F; k++) {
-				for (int p = 0; p < F; p++) {
-					if (i == 0 && j == 0) {
-						pooling_input.push_back(input[i + k][j + p]);
-					} else if (i == 0) {
-						pooling_input.push_back(input[i + k][j + (S - 1) + p]);
-					} else if (j == 0) {
-						pooling_input.push_back(input[i + (S - 1) + k][j + p]);
-					} else {
-						pooling_input.push_back(input[i + (S - 1) + k][j + (S - 1) + p]);
-					}
-				}
-			}
-			if (type == "Average") {
-				MLPPStatOld stat;
-				pooled_map[i][j] = stat.mean(pooling_input);
-			} else if (type == "Min") {
-				pooled_map[i][j] = alg.min(pooling_input);
-			} else {
-				pooled_map[i][j] = alg.max(pooling_input);
-			}
-		}
-	}
-	return pooled_map;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPConvolutionsOld::pool_3d(std::vector<std::vector<std::vector<real_t>>> input, int F, int S, std::string type) {
-	std::vector<std::vector<std::vector<real_t>>> pooled_map;
-	for (uint32_t i = 0; i < input.size(); i++) {
-		pooled_map.push_back(pool_2d(input[i], F, S, type));
-	}
-	return pooled_map;
-}
-
-real_t MLPPConvolutionsOld::global_pool_2d(std::vector<std::vector<real_t>> input, std::string type) {
-	MLPPLinAlgOld alg;
-	if (type == "Average") {
-		MLPPStatOld stat;
-		return stat.mean(alg.flatten(input));
-	} else if (type == "Min") {
-		return alg.min(alg.flatten(input));
-	} else {
-		return alg.max(alg.flatten(input));
-	}
-}
-
-std::vector<real_t> MLPPConvolutionsOld::global_pool_3d(std::vector<std::vector<std::vector<real_t>>> input, std::string type) {
-	std::vector<real_t> pooled_map;
-	for (uint32_t i = 0; i < input.size(); i++) {
-		pooled_map.push_back(global_pool_2d(input[i], type));
-	}
-	return pooled_map;
-}
-
-real_t MLPPConvolutionsOld::gaussian_2d(real_t x, real_t y, real_t std) {
-	real_t std_sq = std * std;
-	return 1 / (2 * M_PI * std_sq) * std::exp(-(x * x + y * y) / 2 * std_sq);
-}
-
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::gaussian_filter_2d(int size, real_t std) {
-	std::vector<std::vector<real_t>> filter;
-	filter.resize(size);
-	for (uint32_t i = 0; i < filter.size(); i++) {
-		filter[i].resize(size);
-	}
-	for (int i = 0; i < size; i++) {
-		for (int j = 0; j < size; j++) {
-			filter[i][j] = gaussian_2d(i - (size - 1) / 2, (size - 1) / 2 - j, std);
-		}
-	}
-	return filter;
-}
-
-/*
-Indeed a filter could have been used for this purpose, but I decided that it would've just
-been easier to carry out the calculation explicitly, mainly because it is more informative,
-and also because my convolution algorithm is only built for filters with equally sized
-heights and widths.
-*/
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::dx(std::vector<std::vector<real_t>> input) {
-	std::vector<std::vector<real_t>> deriv; // We assume a gray scale image.
-	deriv.resize(input.size());
-	for (uint32_t i = 0; i < deriv.size(); i++) {
-		deriv[i].resize(input[i].size());
-	}
-
-	for (uint32_t i = 0; i < input.size(); i++) {
-		for (uint32_t j = 0; j < input[i].size(); j++) {
-			if (j != 0 && j != input.size() - 1) {
-				deriv[i][j] = input[i][j + 1] - input[i][j - 1];
-			} else if (j == 0) {
-				deriv[i][j] = input[i][j + 1] - 0; // Implicit zero-padding
-			} else {
-				deriv[i][j] = 0 - input[i][j - 1]; // Implicit zero-padding
-			}
-		}
-	}
-	return deriv;
-}
-
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::dy(std::vector<std::vector<real_t>> input) {
-	std::vector<std::vector<real_t>> deriv;
-	deriv.resize(input.size());
-	for (uint32_t i = 0; i < deriv.size(); i++) {
-		deriv[i].resize(input[i].size());
-	}
-
-	for (uint32_t i = 0; i < input.size(); i++) {
-		for (uint32_t j = 0; j < input[i].size(); j++) {
-			if (i != 0 && i != input.size() - 1) {
-				deriv[i][j] = input[i - 1][j] - input[i + 1][j];
-			} else if (i == 0) {
-				deriv[i][j] = 0 - input[i + 1][j]; // Implicit zero-padding
-			} else {
-				deriv[i][j] = input[i - 1][j] - 0; // Implicit zero-padding
-			}
-		}
-	}
-	return deriv;
-}
-
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::grad_magnitude(std::vector<std::vector<real_t>> input) {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<real_t>> x_deriv_2 = alg.hadamard_product(dx(input), dx(input));
-	std::vector<std::vector<real_t>> y_deriv_2 = alg.hadamard_product(dy(input), dy(input));
-	return alg.sqrt(alg.addition(x_deriv_2, y_deriv_2));
-}
-
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::grad_orientation(std::vector<std::vector<real_t>> input) {
-	std::vector<std::vector<real_t>> deriv;
-	deriv.resize(input.size());
-	for (uint32_t i = 0; i < deriv.size(); i++) {
-		deriv[i].resize(input[i].size());
-	}
-
-	std::vector<std::vector<real_t>> x_deriv = dx(input);
-	std::vector<std::vector<real_t>> y_deriv = dy(input);
-	for (uint32_t i = 0; i < deriv.size(); i++) {
-		for (uint32_t j = 0; j < deriv[i].size(); j++) {
-			deriv[i][j] = std::atan2(y_deriv[i][j], x_deriv[i][j]);
-		}
-	}
-	return deriv;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPConvolutionsOld::compute_m(std::vector<std::vector<real_t>> input) {
-	real_t const SIGMA = 1;
-	real_t const GAUSSIAN_SIZE = 3;
-
-	real_t const GAUSSIAN_PADDING = ((input.size() - 1) + GAUSSIAN_SIZE - input.size()) / 2; // Convs must be same.
-	std::cout << GAUSSIAN_PADDING << std::endl;
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<real_t>> x_deriv = dx(input);
-	std::vector<std::vector<real_t>> y_deriv = dy(input);
-
-	std::vector<std::vector<real_t>> gaussian_filter = gaussian_filter_2d(GAUSSIAN_SIZE, SIGMA); // Sigma of 1, size of 3.
-	std::vector<std::vector<real_t>> xx_deriv = convolve_2d(alg.hadamard_product(x_deriv, x_deriv), gaussian_filter, 1, GAUSSIAN_PADDING);
-	std::vector<std::vector<real_t>> yy_deriv = convolve_2d(alg.hadamard_product(y_deriv, y_deriv), gaussian_filter, 1, GAUSSIAN_PADDING);
-	std::vector<std::vector<real_t>> xy_deriv = convolve_2d(alg.hadamard_product(x_deriv, y_deriv), gaussian_filter, 1, GAUSSIAN_PADDING);
-
-	std::vector<std::vector<std::vector<real_t>>> M = { xx_deriv, yy_deriv, xy_deriv };
-	return M;
-}
-std::vector<std::vector<std::string>> MLPPConvolutionsOld::harris_corner_detection(std::vector<std::vector<real_t>> input) {
-	real_t const k = 0.05; // Empirically determined wherein k -> [0.04, 0.06], though conventionally 0.05 is typically used as well.
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<std::vector<real_t>>> M = compute_m(input);
-	std::vector<std::vector<real_t>> det = alg.subtraction(alg.hadamard_product(M[0], M[1]), alg.hadamard_product(M[2], M[2]));
-	std::vector<std::vector<real_t>> trace = alg.addition(M[0], M[1]);
-
-	// The reason this is not a scalar is because xx_deriv, xy_deriv, yx_deriv, and yy_deriv are not scalars.
-	std::vector<std::vector<real_t>> r = alg.subtraction(det, alg.scalarMultiply(k, alg.hadamard_product(trace, trace)));
-	std::vector<std::vector<std::string>> imageTypes;
-	imageTypes.resize(r.size());
-	alg.printMatrix(r);
-	for (uint32_t i = 0; i < r.size(); i++) {
-		imageTypes[i].resize(r[i].size());
-		for (uint32_t j = 0; j < r[i].size(); j++) {
-			if (r[i][j] > 0) {
-				imageTypes[i][j] = "C";
-			} else if (r[i][j] < 0) {
-				imageTypes[i][j] = "E";
-			} else {
-				imageTypes[i][j] = "N";
-			}
-		}
-	}
-	return imageTypes;
-}
-
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_prewitt_horizontal() {
-	return _prewitt_horizontal;
-}
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_prewitt_vertical() {
-	return _prewitt_vertical;
-}
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_sobel_horizontal() {
-	return _sobel_horizontal;
-}
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_sobel_vertical() {
-	return _sobel_vertical;
-}
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_scharr_horizontal() {
-	return _scharr_horizontal;
-}
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_scharr_vertical() {
-	return _scharr_vertical;
-}
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_roberts_horizontal() {
-	return _roberts_horizontal;
-}
-std::vector<std::vector<real_t>> MLPPConvolutionsOld::get_roberts_vertical() {
-	return _roberts_vertical;
-}
-
-MLPPConvolutionsOld::MLPPConvolutionsOld() {
-	_prewitt_horizontal = { { 1, 1, 1 }, { 0, 0, 0 }, { -1, -1, -1 } };
-	_prewitt_vertical = { { 1, 0, -1 }, { 1, 0, -1 }, { 1, 0, -1 } };
-	_sobel_horizontal = { { 1, 2, 1 }, { 0, 0, 0 }, { -1, -2, -1 } };
-	_sobel_vertical = { { -1, 0, 1 }, { -2, 0, 2 }, { -1, 0, 1 } };
-	_scharr_horizontal = { { 3, 10, 3 }, { 0, 0, 0 }, { -3, -10, -3 } };
-	_scharr_vertical = { { 3, 0, -3 }, { 10, 0, -10 }, { 3, 0, -3 } };
-	_roberts_horizontal = { { 0, 1 }, { -1, 0 } };
-	_roberts_vertical = { { 1, 0 }, { 0, -1 } };
-}
diff --git a/mlpp/convolutions/convolutions_old.h b/mlpp/convolutions/convolutions_old.h
deleted file mode 100644
index 693d272..0000000
--- a/mlpp/convolutions/convolutions_old.h
+++ /dev/null
@@ -1,56 +0,0 @@
-
-#ifndef MLPP_CONVOLUTIONS_OLD_H
-#define MLPP_CONVOLUTIONS_OLD_H
-
-#include <string>
-#include <vector>
-
-#include "core/math/math_defs.h"
-#include "core/int_types.h"
-
-class MLPPConvolutionsOld {
-public:
-	std::vector<std::vector<real_t>> convolve_2d(std::vector<std::vector<real_t>> input, std::vector<std::vector<real_t>> filter, int S, int P = 0);
-	std::vector<std::vector<std::vector<real_t>>> convolve_3d(std::vector<std::vector<std::vector<real_t>>> input, std::vector<std::vector<std::vector<real_t>>> filter, int S, int P = 0);
-
-	std::vector<std::vector<real_t>> pool_2d(std::vector<std::vector<real_t>> input, int F, int S, std::string type);
-	std::vector<std::vector<std::vector<real_t>>> pool_3d(std::vector<std::vector<std::vector<real_t>>> input, int F, int S, std::string type);
-
-	real_t global_pool_2d(std::vector<std::vector<real_t>> input, std::string type);
-	std::vector<real_t> global_pool_3d(std::vector<std::vector<std::vector<real_t>>> input, std::string type);
-
-	real_t gaussian_2d(real_t x, real_t y, real_t std);
-	std::vector<std::vector<real_t>> gaussian_filter_2d(int size, real_t std);
-
-	std::vector<std::vector<real_t>> dx(std::vector<std::vector<real_t>> input);
-	std::vector<std::vector<real_t>> dy(std::vector<std::vector<real_t>> input);
-
-	std::vector<std::vector<real_t>> grad_magnitude(std::vector<std::vector<real_t>> input);
-	std::vector<std::vector<real_t>> grad_orientation(std::vector<std::vector<real_t>> input);
-
-	std::vector<std::vector<std::vector<real_t>>> compute_m(std::vector<std::vector<real_t>> input);
-	std::vector<std::vector<std::string>> harris_corner_detection(std::vector<std::vector<real_t>> input);
-
-	std::vector<std::vector<real_t>> get_prewitt_horizontal();
-	std::vector<std::vector<real_t>> get_prewitt_vertical();
-	std::vector<std::vector<real_t>> get_sobel_horizontal();
-	std::vector<std::vector<real_t>> get_sobel_vertical();
-	std::vector<std::vector<real_t>> get_scharr_horizontal();
-	std::vector<std::vector<real_t>> get_scharr_vertical();
-	std::vector<std::vector<real_t>> get_roberts_horizontal();
-	std::vector<std::vector<real_t>> get_roberts_vertical();
-
-	MLPPConvolutionsOld();
-
-protected:
-	std::vector<std::vector<real_t>> _prewitt_horizontal;
-	std::vector<std::vector<real_t>> _prewitt_vertical;
-	std::vector<std::vector<real_t>> _sobel_horizontal;
-	std::vector<std::vector<real_t>> _sobel_vertical;
-	std::vector<std::vector<real_t>> _scharr_horizontal;
-	std::vector<std::vector<real_t>> _scharr_vertical;
-	std::vector<std::vector<real_t>> _roberts_horizontal;
-	std::vector<std::vector<real_t>> _roberts_vertical;
-};
-
-#endif // Convolutions_hpp
\ No newline at end of file
diff --git a/mlpp/cost/cost_old.cpp b/mlpp/cost/cost_old.cpp
deleted file mode 100644
index 90d36ec..0000000
--- a/mlpp/cost/cost_old.cpp
+++ /dev/null
@@ -1,395 +0,0 @@
-//
-//  Reg.cpp
-//
-//  Created by Marc Melikyan on 1/16/21.
-//
-
-#include "cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include <cmath>
-#include <iostream>
-
-real_t MLPPCostOld::MSE(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		sum += (y_hat[i] - y[i]) * (y_hat[i] - y[i]);
-	}
-	return sum / 2 * y_hat.size();
-}
-
-real_t MLPPCostOld::MSE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
-			sum += (y_hat[i][j] - y[i][j]) * (y_hat[i][j] - y[i][j]);
-		}
-	}
-	return sum / 2 * y_hat.size();
-}
-
-std::vector<real_t> MLPPCostOld::MSEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPLinAlgOld alg;
-	return alg.subtraction(y_hat, y);
-}
-
-std::vector<std::vector<real_t>> MLPPCostOld::MSEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	MLPPLinAlgOld alg;
-	return alg.subtraction(y_hat, y);
-}
-
-real_t MLPPCostOld::RMSE(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		sum += (y_hat[i] - y[i]) * (y_hat[i] - y[i]);
-	}
-	return sqrt(sum / y_hat.size());
-}
-
-real_t MLPPCostOld::RMSE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
-			sum += (y_hat[i][j] - y[i][j]) * (y_hat[i][j] - y[i][j]);
-		}
-	}
-	return sqrt(sum / y_hat.size());
-}
-
-std::vector<real_t> MLPPCostOld::RMSEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPLinAlgOld alg;
-	return alg.scalarMultiply(1 / (2 * sqrt(MSE(y_hat, y))), MSEDeriv(y_hat, y));
-}
-
-std::vector<std::vector<real_t>> MLPPCostOld::RMSEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	MLPPLinAlgOld alg;
-	return alg.scalarMultiply(1 / (2 / sqrt(MSE(y_hat, y))), MSEDeriv(y_hat, y));
-}
-
-real_t MLPPCostOld::MAE(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		sum += abs((y_hat[i] - y[i]));
-	}
-	return sum / y_hat.size();
-}
-
-real_t MLPPCostOld::MAE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
-			sum += abs((y_hat[i][j] - y[i][j]));
-		}
-	}
-	return sum / y_hat.size();
-}
-
-std::vector<real_t> MLPPCostOld::MAEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	std::vector<real_t> deriv;
-	deriv.resize(y_hat.size());
-	for (uint32_t i = 0; i < deriv.size(); i++) {
-		if (y_hat[i] < 0) {
-			deriv[i] = -1;
-		} else if (y_hat[i] == 0) {
-			deriv[i] = 0;
-		} else {
-			deriv[i] = 1;
-		}
-	}
-	return deriv;
-}
-
-std::vector<std::vector<real_t>> MLPPCostOld::MAEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	std::vector<std::vector<real_t>> deriv;
-	deriv.resize(y_hat.size());
-	for (uint32_t i = 0; i < deriv.size(); i++) {
-		deriv.resize(y_hat[i].size());
-	}
-	for (uint32_t i = 0; i < deriv.size(); i++) {
-		for (uint32_t j = 0; j < deriv[i].size(); j++) {
-			if (y_hat[i][j] < 0) {
-				deriv[i][j] = -1;
-			} else if (y_hat[i][j] == 0) {
-				deriv[i][j] = 0;
-			} else {
-				deriv[i][j] = 1;
-			}
-		}
-	}
-	return deriv;
-}
-
-real_t MLPPCostOld::MBE(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		sum += (y_hat[i] - y[i]);
-	}
-	return sum / y_hat.size();
-}
-
-real_t MLPPCostOld::MBE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
-			sum += (y_hat[i][j] - y[i][j]);
-		}
-	}
-	return sum / y_hat.size();
-}
-
-std::vector<real_t> MLPPCostOld::MBEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPLinAlgOld alg;
-	return alg.onevec(y_hat.size());
-}
-
-std::vector<std::vector<real_t>> MLPPCostOld::MBEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	MLPPLinAlgOld alg;
-	return alg.onemat(y_hat.size(), y_hat[0].size());
-}
-
-real_t MLPPCostOld::LogLoss(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	real_t sum = 0;
-	real_t eps = 1e-8;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		sum += -(y[i] * std::log(y_hat[i] + eps) + (1 - y[i]) * std::log(1 - y_hat[i] + eps));
-	}
-
-	return sum / y_hat.size();
-}
-
-real_t MLPPCostOld::LogLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	real_t sum = 0;
-	real_t eps = 1e-8;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
-			sum += -(y[i][j] * std::log(y_hat[i][j] + eps) + (1 - y[i][j]) * std::log(1 - y_hat[i][j] + eps));
-		}
-	}
-
-	return sum / y_hat.size();
-}
-
-std::vector<real_t> MLPPCostOld::LogLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPLinAlgOld alg;
-	return alg.addition(alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)), alg.elementWiseDivision(alg.scalarMultiply(-1, alg.scalarAdd(-1, y)), alg.scalarMultiply(-1, alg.scalarAdd(-1, y_hat))));
-}
-
-std::vector<std::vector<real_t>> MLPPCostOld::LogLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	MLPPLinAlgOld alg;
-	return alg.addition(alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)), alg.elementWiseDivision(alg.scalarMultiply(-1, alg.scalarAdd(-1, y)), alg.scalarMultiply(-1, alg.scalarAdd(-1, y_hat))));
-}
-
-real_t MLPPCostOld::CrossEntropy(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		sum += y[i] * std::log(y_hat[i]);
-	}
-
-	return -1 * sum;
-}
-
-real_t MLPPCostOld::CrossEntropy(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
-			sum += y[i][j] * std::log(y_hat[i][j]);
-		}
-	}
-
-	return -1 * sum;
-}
-
-std::vector<real_t> MLPPCostOld::CrossEntropyDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPLinAlgOld alg;
-	return alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat));
-}
-
-std::vector<std::vector<real_t>> MLPPCostOld::CrossEntropyDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	MLPPLinAlgOld alg;
-	return alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat));
-}
-
-real_t MLPPCostOld::HuberLoss(std::vector<real_t> y_hat, std::vector<real_t> y, real_t delta) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		if (abs(y[i] - y_hat[i]) <= delta) {
-			sum += (y[i] - y_hat[i]) * (y[i] - y_hat[i]);
-		} else {
-			sum += 2 * delta * abs(y[i] - y_hat[i]) - delta * delta;
-		}
-	}
-	return sum;
-}
-
-real_t MLPPCostOld::HuberLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, real_t delta) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
-			if (abs(y[i][j] - y_hat[i][j]) <= delta) {
-				sum += (y[i][j] - y_hat[i][j]) * (y[i][j] - y_hat[i][j]);
-			} else {
-				sum += 2 * delta * abs(y[i][j] - y_hat[i][j]) - delta * delta;
-			}
-		}
-	}
-	return sum;
-}
-
-std::vector<real_t> MLPPCostOld::HuberLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y, real_t delta) {
-	std::vector<real_t> deriv;
-	deriv.resize(y_hat.size());
-
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		if (abs(y[i] - y_hat[i]) <= delta) {
-			deriv.push_back(-(y[i] - y_hat[i]));
-		} else {
-			if (y_hat[i] > 0 || y_hat[i] < 0) {
-				deriv.push_back(2 * delta * (y_hat[i] / abs(y_hat[i])));
-			} else {
-				deriv.push_back(0);
-			}
-		}
-	}
-	return deriv;
-}
-
-std::vector<std::vector<real_t>> MLPPCostOld::HuberLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, real_t delta) {
-	std::vector<std::vector<real_t>> deriv;
-	deriv.resize(y_hat.size());
-	for (uint32_t i = 0; i < deriv.size(); i++) {
-		deriv[i].resize(y_hat[i].size());
-	}
-
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
-			if (abs(y[i][j] - y_hat[i][j]) <= delta) {
-				deriv[i].push_back(-(y[i][j] - y_hat[i][j]));
-			} else {
-				if (y_hat[i][j] > 0 || y_hat[i][j] < 0) {
-					deriv[i].push_back(2 * delta * (y_hat[i][j] / abs(y_hat[i][j])));
-				} else {
-					deriv[i].push_back(0);
-				}
-			}
-		}
-	}
-	return deriv;
-}
-
-real_t MLPPCostOld::HingeLoss(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		sum += fmax(0, 1 - y[i] * y_hat[i]);
-	}
-
-	return sum / y_hat.size();
-}
-
-real_t MLPPCostOld::HingeLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
-			sum += fmax(0, 1 - y[i][j] * y_hat[i][j]);
-		}
-	}
-
-	return sum / y_hat.size();
-}
-
-std::vector<real_t> MLPPCostOld::HingeLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	std::vector<real_t> deriv;
-	deriv.resize(y_hat.size());
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		if (1 - y[i] * y_hat[i] > 0) {
-			deriv[i] = -y[i];
-		} else {
-			deriv[i] = 0;
-		}
-	}
-	return deriv;
-}
-
-std::vector<std::vector<real_t>> MLPPCostOld::HingeLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	std::vector<std::vector<real_t>> deriv;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
-			if (1 - y[i][j] * y_hat[i][j] > 0) {
-				deriv[i][j] = -y[i][j];
-			} else {
-				deriv[i][j] = 0;
-			}
-		}
-	}
-	return deriv;
-}
-
-real_t MLPPCostOld::WassersteinLoss(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		sum += y_hat[i] * y[i];
-	}
-	return -sum / y_hat.size();
-}
-
-real_t MLPPCostOld::WassersteinLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
-			sum += y_hat[i][j] * y[i][j];
-		}
-	}
-	return -sum / y_hat.size();
-}
-
-std::vector<real_t> MLPPCostOld::WassersteinLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPLinAlgOld alg;
-	return alg.scalarMultiply(-1, y); // Simple.
-}
-
-std::vector<std::vector<real_t>> MLPPCostOld::WassersteinLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	MLPPLinAlgOld alg;
-	return alg.scalarMultiply(-1, y); // Simple.
-}
-
-real_t MLPPCostOld::HingeLoss(std::vector<real_t> y_hat, std::vector<real_t> y, std::vector<real_t> weights, real_t C) {
-	MLPPRegOld regularization;
-	return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
-}
-real_t MLPPCostOld::HingeLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, std::vector<std::vector<real_t>> weights, real_t C) {
-	MLPPRegOld regularization;
-	return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge");
-}
-
-std::vector<real_t> MLPPCostOld::HingeLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y, real_t C) {
-	MLPPLinAlgOld alg;
-	return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y));
-}
-std::vector<std::vector<real_t>> MLPPCostOld::HingeLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, real_t C) {
-	MLPPLinAlgOld alg;
-	return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y));
-}
-
-real_t MLPPCostOld::dualFormSVM(std::vector<real_t> alpha, std::vector<std::vector<real_t>> X, std::vector<real_t> y) {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<real_t>> Y = alg.diag(y); // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
-	std::vector<std::vector<real_t>> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations.
-	std::vector<std::vector<real_t>> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y);
-	real_t alphaQ = alg.matmult(alg.matmult({ alpha }, Q), alg.transpose({ alpha }))[0][0];
-	std::vector<real_t> one = alg.onevec(alpha.size());
-
-	return -alg.dot(one, alpha) + 0.5 * alphaQ;
-}
-
-std::vector<real_t> MLPPCostOld::dualFormSVMDeriv(std::vector<real_t> alpha, std::vector<std::vector<real_t>> X, std::vector<real_t> y) {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<real_t>> Y = alg.zeromat(y.size(), y.size());
-	for (uint32_t i = 0; i < y.size(); i++) {
-		Y[i][i] = y[i]; // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
-	}
-	std::vector<std::vector<real_t>> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations.
-	std::vector<std::vector<real_t>> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y);
-	std::vector<real_t> alphaQDeriv = alg.mat_vec_mult(Q, alpha);
-	std::vector<real_t> one = alg.onevec(alpha.size());
-
-	return alg.subtraction(alphaQDeriv, one);
-}
diff --git a/mlpp/cost/cost_old.h b/mlpp/cost/cost_old.h
deleted file mode 100644
index c186a47..0000000
--- a/mlpp/cost/cost_old.h
+++ /dev/null
@@ -1,85 +0,0 @@
-
-#ifndef MLPP_COST_OLD_H
-#define MLPP_COST_OLD_H
-
-//
-//  Cost.hpp
-//
-//  Created by Marc Melikyan on 1/16/21.
-//
-
-#include "core/math/math_defs.h"
-#include "core/int_types.h"
-
-#include <vector>
-
-class MLPPCostOld {
-public:
-	// Regression Costs
-	real_t MSE(std::vector<real_t> y_hat, std::vector<real_t> y);
-	real_t MSE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	std::vector<real_t> MSEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
-	std::vector<std::vector<real_t>> MSEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	real_t RMSE(std::vector<real_t> y_hat, std::vector<real_t> y);
-	real_t RMSE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	std::vector<real_t> RMSEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
-	std::vector<std::vector<real_t>> RMSEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	real_t MAE(std::vector<real_t> y_hat, std::vector<real_t> y);
-	real_t MAE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	std::vector<real_t> MAEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
-	std::vector<std::vector<real_t>> MAEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	real_t MBE(std::vector<real_t> y_hat, std::vector<real_t> y);
-	real_t MBE(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	std::vector<real_t> MBEDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
-	std::vector<std::vector<real_t>> MBEDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	// Classification Costs
-	real_t LogLoss(std::vector<real_t> y_hat, std::vector<real_t> y);
-	real_t LogLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	std::vector<real_t> LogLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
-	std::vector<std::vector<real_t>> LogLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	real_t CrossEntropy(std::vector<real_t> y_hat, std::vector<real_t> y);
-	real_t CrossEntropy(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	std::vector<real_t> CrossEntropyDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
-	std::vector<std::vector<real_t>> CrossEntropyDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	real_t HuberLoss(std::vector<real_t> y_hat, std::vector<real_t> y, real_t delta);
-	real_t HuberLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, real_t delta);
-
-	std::vector<real_t> HuberLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y, real_t delta);
-	std::vector<std::vector<real_t>> HuberLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, real_t delta);
-
-	real_t HingeLoss(std::vector<real_t> y_hat, std::vector<real_t> y);
-	real_t HingeLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	std::vector<real_t> HingeLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
-	std::vector<std::vector<real_t>> HingeLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	real_t HingeLoss(std::vector<real_t> y_hat, std::vector<real_t> y, std::vector<real_t> weights, real_t C);
-	real_t HingeLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, std::vector<std::vector<real_t>> weights, real_t C);
-
-	std::vector<real_t> HingeLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y, real_t C);
-	std::vector<std::vector<real_t>> HingeLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y, real_t C);
-
-	real_t WassersteinLoss(std::vector<real_t> y_hat, std::vector<real_t> y);
-	real_t WassersteinLoss(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	std::vector<real_t> WassersteinLossDeriv(std::vector<real_t> y_hat, std::vector<real_t> y);
-	std::vector<std::vector<real_t>> WassersteinLossDeriv(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	real_t dualFormSVM(std::vector<real_t> alpha, std::vector<std::vector<real_t>> X, std::vector<real_t> y); // TO DO: DON'T forget to add non-linear kernelizations.
-
-	std::vector<real_t> dualFormSVMDeriv(std::vector<real_t> alpha, std::vector<std::vector<real_t>> X, std::vector<real_t> y);
-};
-
-#endif /* Cost_hpp */
diff --git a/mlpp/data/data.cpp b/mlpp/data/data.cpp
index 02e0e4e..ba831b2 100644
--- a/mlpp/data/data.cpp
+++ b/mlpp/data/data.cpp
@@ -13,7 +13,6 @@
 #include "../stat/stat.h"
 
 #include "../softmax_net/softmax_net.h"
-#include "data_old.h"
 
 #include <algorithm>
 #include <cmath>
diff --git a/mlpp/data/data_old.cpp b/mlpp/data/data_old.cpp
deleted file mode 100644
index 8e3f602..0000000
--- a/mlpp/data/data_old.cpp
+++ /dev/null
@@ -1,833 +0,0 @@
-//
-//  Data.cpp
-//  MLP
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "data_old.h"
-
-#include "core/os/file_access.h"
-
-#include "../lin_alg/lin_alg_old.h"
-#include "../softmax_net/softmax_net_old.h"
-#include "../stat/stat_old.h"
-
-#include <algorithm>
-#include <cmath>
-#include <fstream>
-#include <iostream>
-#include <random>
-#include <sstream>
-
-// Loading Datasets
-std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> MLPPDataOld::loadBreastCancer() {
-	const int BREAST_CANCER_SIZE = 30; // k = 30
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-
-	setData(BREAST_CANCER_SIZE, "MLPP/Data/Datasets/BreastCancer.csv", inputSet, outputSet);
-	return { inputSet, outputSet };
-}
-
-std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> MLPPDataOld::loadBreastCancerSVC() {
-	const int BREAST_CANCER_SIZE = 30; // k = 30
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-
-	setData(BREAST_CANCER_SIZE, "MLPP/Data/Datasets/BreastCancerSVM.csv", inputSet, outputSet);
-	return { inputSet, outputSet };
-}
-
-std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPDataOld::loadIris() {
-	const int IRIS_SIZE = 4;
-	const int ONE_HOT_NUM = 3;
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> tempOutputSet;
-
-	setData(IRIS_SIZE, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
-	std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
-	return { inputSet, outputSet };
-}
-
-std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPDataOld::loadWine() {
-	const int WINE_SIZE = 4;
-	const int ONE_HOT_NUM = 3;
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> tempOutputSet;
-
-	setData(WINE_SIZE, "MLPP/Data/Datasets/Iris.csv", inputSet, tempOutputSet);
-	std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
-	return { inputSet, outputSet };
-}
-
-std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPDataOld::loadMnistTrain() {
-	const int MNIST_SIZE = 784;
-	const int ONE_HOT_NUM = 10;
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> tempOutputSet;
-
-	setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTrain.csv", inputSet, tempOutputSet);
-	std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
-	return { inputSet, outputSet };
-}
-
-std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPDataOld::loadMnistTest() {
-	const int MNIST_SIZE = 784;
-	const int ONE_HOT_NUM = 10;
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> tempOutputSet;
-
-	setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTest.csv", inputSet, tempOutputSet);
-	std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
-	return { inputSet, outputSet };
-}
-
-std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> MLPPDataOld::loadCaliforniaHousing() {
-	const int CALIFORNIA_HOUSING_SIZE = 13; // k = 30
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-
-	setData(CALIFORNIA_HOUSING_SIZE, "MLPP/Data/Datasets/CaliforniaHousing.csv", inputSet, outputSet);
-	return { inputSet, outputSet };
-}
-
-std::tuple<std::vector<real_t>, std::vector<real_t>> MLPPDataOld::loadFiresAndCrime() {
-	std::vector<real_t> inputSet; // k is implicitly 1.
-	std::vector<real_t> outputSet;
-
-	setData("MLPP/Data/Datasets/FiresAndCrime.csv", inputSet, outputSet);
-	return { inputSet, outputSet };
-}
-
-// Note that inputs and outputs should be pairs (technically), but this
-// implementation will separate them. (My implementation keeps them tied together.)
-// Not yet sure whether this is intentional or not (or it's something like a compiler specific difference)
-std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPDataOld::trainTestSplit(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, real_t testSize) {
-	std::random_device rd;
-	std::default_random_engine generator(rd());
-
-	std::shuffle(inputSet.begin(), inputSet.end(), generator); // inputSet random shuffle
-	std::shuffle(outputSet.begin(), outputSet.end(), generator); // outputSet random shuffle)
-
-	std::vector<std::vector<real_t>> inputTestSet;
-	std::vector<std::vector<real_t>> outputTestSet;
-
-	int testInputNumber = testSize * inputSet.size(); // implicit usage of floor
-	int testOutputNumber = testSize * outputSet.size(); // implicit usage of floor
-
-	for (int i = 0; i < testInputNumber; i++) {
-		inputTestSet.push_back(inputSet[i]);
-		inputSet.erase(inputSet.begin());
-	}
-
-	for (int i = 0; i < testOutputNumber; i++) {
-		outputTestSet.push_back(outputSet[i]);
-		outputSet.erase(outputSet.begin());
-	}
-
-	return { inputSet, outputSet, inputTestSet, outputTestSet };
-}
-
-// MULTIVARIATE SUPERVISED
-
-void MLPPDataOld::setData(int k, std::string fileName, std::vector<std::vector<real_t>> &inputSet, std::vector<real_t> &outputSet) {
-	MLPPLinAlgOld alg;
-	std::string inputTemp;
-	std::string outputTemp;
-
-	inputSet.resize(k);
-
-	std::ifstream dataFile(fileName);
-	if (!dataFile.is_open()) {
-		std::cout << fileName << " failed to open." << std::endl;
-	}
-
-	std::string line;
-	while (std::getline(dataFile, line)) {
-		std::stringstream ss(line);
-
-		for (int i = 0; i < k; i++) {
-			std::getline(ss, inputTemp, ',');
-			inputSet[i].push_back(std::stod(inputTemp));
-		}
-
-		std::getline(ss, outputTemp, ',');
-		outputSet.push_back(std::stod(outputTemp));
-	}
-	inputSet = alg.transpose(inputSet);
-	dataFile.close();
-}
-
-void MLPPDataOld::printData(std::vector<std::string> inputName, std::string outputName, std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet) {
-	MLPPLinAlgOld alg;
-	inputSet = alg.transpose(inputSet);
-	for (uint32_t i = 0; i < inputSet.size(); i++) {
-		std::cout << inputName[i] << std::endl;
-		for (uint32_t j = 0; j < inputSet[i].size(); j++) {
-			std::cout << inputSet[i][j] << std::endl;
-		}
-	}
-
-	std::cout << outputName << std::endl;
-	for (uint32_t i = 0; i < outputSet.size(); i++) {
-		std::cout << outputSet[i] << std::endl;
-	}
-}
-
-// UNSUPERVISED
-
-void MLPPDataOld::setData(int k, std::string fileName, std::vector<std::vector<real_t>> &inputSet) {
-	MLPPLinAlgOld alg;
-	std::string inputTemp;
-
-	inputSet.resize(k);
-
-	std::ifstream dataFile(fileName);
-	if (!dataFile.is_open()) {
-		std::cout << fileName << " failed to open." << std::endl;
-	}
-
-	std::string line;
-	while (std::getline(dataFile, line)) {
-		std::stringstream ss(line);
-
-		for (int i = 0; i < k; i++) {
-			std::getline(ss, inputTemp, ',');
-			inputSet[i].push_back(std::stod(inputTemp));
-		}
-	}
-	inputSet = alg.transpose(inputSet);
-	dataFile.close();
-}
-
-void MLPPDataOld::printData(std::vector<std::string> inputName, std::vector<std::vector<real_t>> inputSet) {
-	MLPPLinAlgOld alg;
-	inputSet = alg.transpose(inputSet);
-	for (uint32_t i = 0; i < inputSet.size(); i++) {
-		std::cout << inputName[i] << std::endl;
-		for (uint32_t j = 0; j < inputSet[i].size(); j++) {
-			std::cout << inputSet[i][j] << std::endl;
-		}
-	}
-}
-
-// SIMPLE
-
-void MLPPDataOld::setData(std::string fileName, std::vector<real_t> &inputSet, std::vector<real_t> &outputSet) {
-	std::string inputTemp, outputTemp;
-
-	std::ifstream dataFile(fileName);
-	if (!dataFile.is_open()) {
-		std::cout << "The file failed to open." << std::endl;
-	}
-
-	std::string line;
-
-	while (std::getline(dataFile, line)) {
-		std::stringstream ss(line);
-
-		std::getline(ss, inputTemp, ',');
-		std::getline(ss, outputTemp, ',');
-
-		inputSet.push_back(std::stod(inputTemp));
-		outputSet.push_back(std::stod(outputTemp));
-	}
-
-	dataFile.close();
-}
-
-void MLPPDataOld::printData(std::string &inputName, std::string &outputName, std::vector<real_t> &inputSet, std::vector<real_t> &outputSet) {
-	std::cout << inputName << std::endl;
-	for (uint32_t i = 0; i < inputSet.size(); i++) {
-		std::cout << inputSet[i] << std::endl;
-	}
-
-	std::cout << outputName << std::endl;
-	for (uint32_t i = 0; i < inputSet.size(); i++) {
-		std::cout << outputSet[i] << std::endl;
-	}
-}
-
-// Images
-std::vector<std::vector<real_t>> MLPPDataOld::rgb2gray(std::vector<std::vector<std::vector<real_t>>> input) {
-	std::vector<std::vector<real_t>> grayScale;
-	grayScale.resize(input[0].size());
-	for (uint32_t i = 0; i < grayScale.size(); i++) {
-		grayScale[i].resize(input[0][i].size());
-	}
-	for (uint32_t i = 0; i < grayScale.size(); i++) {
-		for (uint32_t j = 0; j < grayScale[i].size(); j++) {
-			grayScale[i][j] = 0.299 * input[0][i][j] + 0.587 * input[1][i][j] + 0.114 * input[2][i][j];
-		}
-	}
-	return grayScale;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPDataOld::rgb2ycbcr(std::vector<std::vector<std::vector<real_t>>> input) {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<std::vector<real_t>>> YCbCr;
-	YCbCr = alg.resize(YCbCr, input);
-	for (uint32_t i = 0; i < YCbCr[0].size(); i++) {
-		for (uint32_t j = 0; j < YCbCr[0][i].size(); j++) {
-			YCbCr[0][i][j] = 0.299 * input[0][i][j] + 0.587 * input[1][i][j] + 0.114 * input[2][i][j];
-			YCbCr[1][i][j] = -0.169 * input[0][i][j] - 0.331 * input[1][i][j] + 0.500 * input[2][i][j];
-			YCbCr[2][i][j] = 0.500 * input[0][i][j] - 0.419 * input[1][i][j] - 0.081 * input[2][i][j];
-		}
-	}
-	return YCbCr;
-}
-
-// Conversion formulas available here:
-// https://www.rapidtables.com/convert/color/rgb-to-hsv.html
-std::vector<std::vector<std::vector<real_t>>> MLPPDataOld::rgb2hsv(std::vector<std::vector<std::vector<real_t>>> input) {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<std::vector<real_t>>> HSV;
-	HSV = alg.resize(HSV, input);
-	for (uint32_t i = 0; i < HSV[0].size(); i++) {
-		for (uint32_t j = 0; j < HSV[0][i].size(); j++) {
-			real_t rPrime = input[0][i][j] / 255;
-			real_t gPrime = input[1][i][j] / 255;
-			real_t bPrime = input[2][i][j] / 255;
-
-			real_t cMax = alg.max({ rPrime, gPrime, bPrime });
-			real_t cMin = alg.min({ rPrime, gPrime, bPrime });
-			real_t delta = cMax - cMin;
-
-			// H calculation.
-			if (delta == 0) {
-				HSV[0][i][j] = 0;
-			} else {
-				if (cMax == rPrime) {
-					HSV[0][i][j] = 60 * fmod(((gPrime - bPrime) / delta), 6);
-				} else if (cMax == gPrime) {
-					HSV[0][i][j] = 60 * ((bPrime - rPrime) / delta + 2);
-				} else { // cMax == bPrime
-					HSV[0][i][j] = 60 * ((rPrime - gPrime) / delta + 6);
-				}
-			}
-
-			// S calculation.
-			if (cMax == 0) {
-				HSV[1][i][j] = 0;
-			} else {
-				HSV[1][i][j] = delta / cMax;
-			}
-
-			// V calculation.
-			HSV[2][i][j] = cMax;
-		}
-	}
-	return HSV;
-}
-
-// http://machinethatsees.blogspot.com/2013/07/how-to-convert-rgb-to-xyz-or-vice-versa.html
-std::vector<std::vector<std::vector<real_t>>> MLPPDataOld::rgb2xyz(std::vector<std::vector<std::vector<real_t>>> input) {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<std::vector<real_t>>> XYZ;
-	XYZ = alg.resize(XYZ, input);
-	std::vector<std::vector<real_t>> RGB2XYZ = { { 0.4124564, 0.3575761, 0.1804375 }, { 0.2126726, 0.7151522, 0.0721750 }, { 0.0193339, 0.1191920, 0.9503041 } };
-	return alg.vector_wise_tensor_product(input, RGB2XYZ);
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPDataOld::xyz2rgb(std::vector<std::vector<std::vector<real_t>>> input) {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<std::vector<real_t>>> XYZ;
-	XYZ = alg.resize(XYZ, input);
-	std::vector<std::vector<real_t>> RGB2XYZ = alg.inverse({ { 0.4124564, 0.3575761, 0.1804375 }, { 0.2126726, 0.7151522, 0.0721750 }, { 0.0193339, 0.1191920, 0.9503041 } });
-	return alg.vector_wise_tensor_product(input, RGB2XYZ);
-}
-
-// TEXT-BASED & NLP
-std::string MLPPDataOld::toLower(std::string text) {
-	for (uint32_t i = 0; i < text.size(); i++) {
-		text[i] = tolower(text[i]);
-	}
-	return text;
-}
-
-std::vector<char> MLPPDataOld::split(std::string text) {
-	std::vector<char> split_data;
-	for (uint32_t i = 0; i < text.size(); i++) {
-		split_data.push_back(text[i]);
-	}
-	return split_data;
-}
-
-std::vector<std::string> MLPPDataOld::splitSentences(std::string data) {
-	std::vector<std::string> sentences;
-	std::string currentStr = "";
-
-	for (uint32_t i = 0; i < data.length(); i++) {
-		currentStr.push_back(data[i]);
-		if (data[i] == '.' && data[i + 1] != '.') {
-			sentences.push_back(currentStr);
-			currentStr = "";
-			i++;
-		}
-	}
-	return sentences;
-}
-
-std::vector<std::string> MLPPDataOld::removeSpaces(std::vector<std::string> data) {
-	for (uint32_t i = 0; i < data.size(); i++) {
-		auto it = data[i].begin();
-		for (uint32_t j = 0; j < data[i].length(); j++) {
-			if (data[i][j] == ' ') {
-				data[i].erase(it);
-			}
-			it++;
-		}
-	}
-	return data;
-}
-
-std::vector<std::string> MLPPDataOld::removeNullByte(std::vector<std::string> data) {
-	for (uint32_t i = 0; i < data.size(); i++) {
-		if (data[i] == "\0") {
-			data.erase(data.begin() + i);
-		}
-	}
-	return data;
-}
-
-std::vector<std::string> MLPPDataOld::segment(std::string text) {
-	std::vector<std::string> segmented_data;
-	int prev_delim = 0;
-	for (uint32_t i = 0; i < text.length(); i++) {
-		if (text[i] == ' ') {
-			segmented_data.push_back(text.substr(prev_delim, i - prev_delim));
-			prev_delim = i + 1;
-		} else if (text[i] == ',' || text[i] == '!' || text[i] == '.' || text[i] == '-') {
-			segmented_data.push_back(text.substr(prev_delim, i - prev_delim));
-			std::string punc;
-			punc.push_back(text[i]);
-			segmented_data.push_back(punc);
-			prev_delim = i + 2;
-			i++;
-		} else if (i == text.length() - 1) {
-			segmented_data.push_back(text.substr(prev_delim, text.length() - prev_delim)); // hehe oops- forgot this
-		}
-	}
-
-	return segmented_data;
-}
-
-std::vector<real_t> MLPPDataOld::tokenize(std::string text) {
-	int max_num = 0;
-	bool new_num = true;
-	std::vector<std::string> segmented_data = segment(text);
-	std::vector<real_t> tokenized_data;
-	tokenized_data.resize(segmented_data.size());
-	for (uint32_t i = 0; i < segmented_data.size(); i++) {
-		for (int j = i - 1; j >= 0; j--) {
-			if (segmented_data[i] == segmented_data[j]) {
-				tokenized_data[i] = tokenized_data[j];
-				new_num = false;
-			}
-		}
-		if (!new_num) {
-			new_num = true;
-		} else {
-			max_num++;
-			tokenized_data[i] = max_num;
-		}
-	}
-	return tokenized_data;
-}
-
-std::vector<std::string> MLPPDataOld::removeStopWords(std::string text) {
-	std::vector<std::string> stopWords = { "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now" };
-	std::vector<std::string> segmented_data = removeSpaces(segment(toLower(text)));
-
-	for (uint32_t i = 0; i < stopWords.size(); i++) {
-		for (uint32_t j = 0; j < segmented_data.size(); j++) {
-			if (segmented_data[j] == stopWords[i]) {
-				segmented_data.erase(segmented_data.begin() + j);
-			}
-		}
-	}
-	return segmented_data;
-}
-
-std::vector<std::string> MLPPDataOld::removeStopWords(std::vector<std::string> segmented_data) {
-	std::vector<std::string> stopWords = { "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now" };
-	for (uint32_t i = 0; i < segmented_data.size(); i++) {
-		for (uint32_t j = 0; j < stopWords.size(); j++) {
-			if (segmented_data[i] == stopWords[j]) {
-				segmented_data.erase(segmented_data.begin() + i);
-			}
-		}
-	}
-	return segmented_data;
-}
-
-std::string MLPPDataOld::stemming(std::string text) {
-	// Our list of suffixes which we use to compare against
-	std::vector<std::string> suffixes = { "eer", "er", "ion", "ity", "ment", "ness", "or", "sion", "ship", "th", "able", "ible", "al", "ant", "ary", "ful", "ic", "ious", "ous", "ive", "less", "y", "ed", "en", "ing", "ize", "ise", "ly", "ward", "wise" };
-	int padding_size = 4;
-	char padding = ' '; // our padding
-
-	for (int i = 0; i < padding_size; i++) {
-		text[text.length() + i] = padding; // ' ' will be our padding value
-	}
-
-	for (uint32_t i = 0; i < text.size(); i++) {
-		for (uint32_t j = 0; j < suffixes.size(); j++) {
-			if (text.substr(i, suffixes[j].length()) == suffixes[j] && (text[i + suffixes[j].length()] == ' ' || text[i + suffixes[j].length()] == ',' || text[i + suffixes[j].length()] == '-' || text[i + suffixes[j].length()] == '.' || text[i + suffixes[j].length()] == '!')) {
-				text.erase(i, suffixes[j].length());
-			}
-		}
-	}
-
-	return text;
-}
-
-std::vector<std::vector<real_t>> MLPPDataOld::BOW(std::vector<std::string> sentences, std::string type) {
-	/*
-	STEPS OF BOW:
-		1) To lowercase (done by removeStopWords function by def)
-		2) Removing stop words
-		3) Obtain a list of the used words
-		4) Create a one hot encoded vector of the words and sentences
-		5) Sentence.size() x list.size() matrix
-	*/
-
-	std::vector<std::string> wordList = removeNullByte(removeStopWords(createWordList(sentences)));
-
-	std::vector<std::vector<std::string>> segmented_sentences;
-	segmented_sentences.resize(sentences.size());
-
-	for (uint32_t i = 0; i < sentences.size(); i++) {
-		segmented_sentences[i] = removeStopWords(sentences[i]);
-	}
-
-	std::vector<std::vector<real_t>> bow;
-
-	bow.resize(sentences.size());
-	for (uint32_t i = 0; i < bow.size(); i++) {
-		bow[i].resize(wordList.size());
-	}
-
-	for (uint32_t i = 0; i < segmented_sentences.size(); i++) {
-		for (uint32_t j = 0; j < segmented_sentences[i].size(); j++) {
-			for (uint32_t k = 0; k < wordList.size(); k++) {
-				if (segmented_sentences[i][j] == wordList[k]) {
-					if (type == "Binary") {
-						bow[i][k] = 1;
-					} else {
-						bow[i][k]++;
-					}
-				}
-			}
-		}
-	}
-	return bow;
-}
-
-std::vector<std::vector<real_t>> MLPPDataOld::TFIDF(std::vector<std::string> sentences) {
-	MLPPLinAlgOld alg;
-	std::vector<std::string> wordList = removeNullByte(removeStopWords(createWordList(sentences)));
-
-	std::vector<std::vector<std::string>> segmented_sentences;
-	segmented_sentences.resize(sentences.size());
-
-	for (uint32_t i = 0; i < sentences.size(); i++) {
-		segmented_sentences[i] = removeStopWords(sentences[i]);
-	}
-
-	std::vector<std::vector<real_t>> TF;
-	std::vector<int> frequency;
-	frequency.resize(wordList.size());
-	TF.resize(segmented_sentences.size());
-	for (uint32_t i = 0; i < TF.size(); i++) {
-		TF[i].resize(wordList.size());
-	}
-	for (uint32_t i = 0; i < segmented_sentences.size(); i++) {
-		std::vector<bool> present(wordList.size(), false);
-		for (uint32_t j = 0; j < segmented_sentences[i].size(); j++) {
-			for (uint32_t k = 0; k < wordList.size(); k++) {
-				if (segmented_sentences[i][j] == wordList[k]) {
-					TF[i][k]++;
-					if (!present[k]) {
-						frequency[k]++;
-						present[k] = true;
-					}
-				}
-			}
-		}
-		TF[i] = alg.scalarMultiply(real_t(1) / real_t(segmented_sentences[i].size()), TF[i]);
-	}
-
-	std::vector<real_t> IDF;
-	IDF.resize(frequency.size());
-
-	for (uint32_t i = 0; i < IDF.size(); i++) {
-		IDF[i] = std::log((real_t)segmented_sentences.size() / (real_t)frequency[i]);
-	}
-
-	std::vector<std::vector<real_t>> TFIDF;
-	TFIDF.resize(segmented_sentences.size());
-	for (uint32_t i = 0; i < TFIDF.size(); i++) {
-		TFIDF[i].resize(wordList.size());
-	}
-
-	for (uint32_t i = 0; i < TFIDF.size(); i++) {
-		for (uint32_t j = 0; j < TFIDF[i].size(); j++) {
-			TFIDF[i][j] = TF[i][j] * IDF[j];
-		}
-	}
-
-	return TFIDF;
-}
-
-std::tuple<std::vector<std::vector<real_t>>, std::vector<std::string>> MLPPDataOld::word2Vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, real_t learning_rate, int max_epoch) {
-	std::vector<std::string> wordList = removeNullByte(removeStopWords(createWordList(sentences)));
-
-	std::vector<std::vector<std::string>> segmented_sentences;
-	segmented_sentences.resize(sentences.size());
-
-	for (uint32_t i = 0; i < sentences.size(); i++) {
-		segmented_sentences[i] = removeStopWords(sentences[i]);
-	}
-
-	std::vector<std::string> inputStrings;
-	std::vector<std::string> outputStrings;
-
-	for (uint32_t i = 0; i < segmented_sentences.size(); i++) {
-		for (uint32_t j = 0; j < segmented_sentences[i].size(); j++) {
-			for (int k = windowSize; k > 0; k--) {
-				int jmk = (int)j - k;
-
-				if (jmk >= 0) {
-					inputStrings.push_back(segmented_sentences[i][j]);
-
-					outputStrings.push_back(segmented_sentences[i][jmk]);
-				}
-				if (j + k <= segmented_sentences[i].size() - 1) {
-					inputStrings.push_back(segmented_sentences[i][j]);
-					outputStrings.push_back(segmented_sentences[i][j + k]);
-				}
-			}
-		}
-	}
-
-	uint32_t inputSize = inputStrings.size();
-
-	inputStrings.insert(inputStrings.end(), outputStrings.begin(), outputStrings.end());
-
-	std::vector<std::vector<real_t>> BOW = MLPPDataOld::BOW(inputStrings, "Binary");
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<std::vector<real_t>> outputSet;
-
-	for (uint32_t i = 0; i < inputSize; i++) {
-		inputSet.push_back(BOW[i]);
-	}
-
-	for (uint32_t i = inputSize; i < BOW.size(); i++) {
-		outputSet.push_back(BOW[i]);
-	}
-
-	MLPPSoftmaxNetOld *model;
-
-	if (type == "Skipgram") {
-		model = new MLPPSoftmaxNetOld(outputSet, inputSet, dimension);
-	} else { // else = CBOW. We maintain it is a default.
-		model = new MLPPSoftmaxNetOld(inputSet, outputSet, dimension);
-	}
-
-	model->gradientDescent(learning_rate, max_epoch, false);
-
-	std::vector<std::vector<real_t>> wordEmbeddings = model->getEmbeddings();
-	delete model;
-	return { wordEmbeddings, wordList };
-}
-
-struct WordsToVecResult {
-	std::vector<std::vector<real_t>> word_embeddings;
-	std::vector<std::string> word_list;
-};
-
-MLPPDataOld::WordsToVecResult MLPPDataOld::word_to_vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, real_t learning_rate, int max_epoch) {
-	WordsToVecResult res;
-
-	res.word_list = removeNullByte(removeStopWords(createWordList(sentences)));
-
-	std::vector<std::vector<std::string>> segmented_sentences;
-	segmented_sentences.resize(sentences.size());
-
-	for (uint32_t i = 0; i < sentences.size(); i++) {
-		segmented_sentences[i] = removeStopWords(sentences[i]);
-	}
-
-	std::vector<std::string> inputStrings;
-	std::vector<std::string> outputStrings;
-
-	for (uint32_t i = 0; i < segmented_sentences.size(); i++) {
-		for (uint32_t j = 0; j < segmented_sentences[i].size(); j++) {
-			for (int k = windowSize; k > 0; k--) {
-				if (j - k >= 0) {
-					inputStrings.push_back(segmented_sentences[i][j]);
-
-					outputStrings.push_back(segmented_sentences[i][j - k]);
-				}
-				if (j + k <= segmented_sentences[i].size() - 1) {
-					inputStrings.push_back(segmented_sentences[i][j]);
-					outputStrings.push_back(segmented_sentences[i][j + k]);
-				}
-			}
-		}
-	}
-
-	uint32_t inputSize = inputStrings.size();
-
-	inputStrings.insert(inputStrings.end(), outputStrings.begin(), outputStrings.end());
-
-	std::vector<std::vector<real_t>> BOW = MLPPDataOld::BOW(inputStrings, "Binary");
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<std::vector<real_t>> outputSet;
-
-	for (uint32_t i = 0; i < inputSize; i++) {
-		inputSet.push_back(BOW[i]);
-	}
-
-	for (uint32_t i = inputSize; i < BOW.size(); i++) {
-		outputSet.push_back(BOW[i]);
-	}
-
-	MLPPSoftmaxNetOld *model;
-
-	if (type == "Skipgram") {
-		model = new MLPPSoftmaxNetOld(outputSet, inputSet, dimension);
-	} else { // else = CBOW. We maintain it is a default.
-		model = new MLPPSoftmaxNetOld(inputSet, outputSet, dimension);
-	}
-
-	model->gradientDescent(learning_rate, max_epoch, false);
-
-	res.word_embeddings = model->getEmbeddings();
-	delete model;
-
-	return res;
-}
-
-std::vector<std::vector<real_t>> MLPPDataOld::LSA(std::vector<std::string> sentences, int dim) {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<real_t>> docWordData = BOW(sentences, "Binary");
-
-	MLPPLinAlgOld::SVDResultOld svr_res = alg.SVD(docWordData);
-	std::vector<std::vector<real_t>> S_trunc = alg.zeromat(dim, dim);
-	std::vector<std::vector<real_t>> Vt_trunc;
-	for (int i = 0; i < dim; i++) {
-		S_trunc[i][i] = svr_res.S[i][i];
-		Vt_trunc.push_back(svr_res.Vt[i]);
-	}
-
-	std::vector<std::vector<real_t>> embeddings = alg.matmult(S_trunc, Vt_trunc);
-	return embeddings;
-}
-
-std::vector<std::string> MLPPDataOld::createWordList(std::vector<std::string> sentences) {
-	std::string combinedText = "";
-	for (uint32_t i = 0; i < sentences.size(); i++) {
-		if (i != 0) {
-			combinedText += " ";
-		}
-		combinedText += sentences[i];
-	}
-
-	return removeSpaces(vecToSet(removeStopWords(combinedText)));
-}
-
-// EXTRA
-void MLPPDataOld::setInputNames(std::string fileName, std::vector<std::string> &inputNames) {
-	std::string inputNameTemp;
-	std::ifstream dataFile(fileName);
-	if (!dataFile.is_open()) {
-		std::cout << fileName << " failed to open." << std::endl;
-	}
-
-	while (std::getline(dataFile, inputNameTemp)) {
-		inputNames.push_back(inputNameTemp);
-	}
-
-	dataFile.close();
-}
-
-std::vector<std::vector<real_t>> MLPPDataOld::featureScaling(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	X = alg.transpose(X);
-	std::vector<real_t> max_elements, min_elements;
-	max_elements.resize(X.size());
-	min_elements.resize(X.size());
-
-	for (uint32_t i = 0; i < X.size(); i++) {
-		max_elements[i] = alg.max(X[i]);
-		min_elements[i] = alg.min(X[i]);
-	}
-
-	for (uint32_t i = 0; i < X.size(); i++) {
-		for (uint32_t j = 0; j < X[i].size(); j++) {
-			X[i][j] = (X[i][j] - min_elements[i]) / (max_elements[i] - min_elements[i]);
-		}
-	}
-	return alg.transpose(X);
-}
-
-std::vector<std::vector<real_t>> MLPPDataOld::meanNormalization(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	MLPPStatOld stat;
-	// (X_j - mu_j) / std_j, for every j
-
-	X = meanCentering(X);
-	for (uint32_t i = 0; i < X.size(); i++) {
-		X[i] = alg.scalarMultiply(1 / stat.standardDeviation(X[i]), X[i]);
-	}
-	return X;
-}
-
-std::vector<std::vector<real_t>> MLPPDataOld::meanCentering(std::vector<std::vector<real_t>> X) {
-	MLPPStatOld stat;
-	for (uint32_t i = 0; i < X.size(); i++) {
-		real_t mean_i = stat.mean(X[i]);
-		for (uint32_t j = 0; j < X[i].size(); j++) {
-			X[i][j] -= mean_i;
-		}
-	}
-	return X;
-}
-
-std::vector<std::vector<real_t>> MLPPDataOld::oneHotRep(std::vector<real_t> tempOutputSet, int n_class) {
-	std::vector<std::vector<real_t>> outputSet;
-	outputSet.resize(tempOutputSet.size());
-	for (uint32_t i = 0; i < tempOutputSet.size(); i++) {
-		for (int j = 0; j <= n_class - 1; j++) {
-			if (tempOutputSet[i] == j) {
-				outputSet[i].push_back(1);
-			} else {
-				outputSet[i].push_back(0);
-			}
-		}
-	}
-	return outputSet;
-}
-
-std::vector<real_t> MLPPDataOld::reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet) {
-	std::vector<real_t> outputSet;
-	//uint32_t n_class = tempOutputSet[0].size();
-	for (uint32_t i = 0; i < tempOutputSet.size(); i++) {
-		int current_class = 1;
-		for (uint32_t j = 0; j < tempOutputSet[i].size(); j++) {
-			if (tempOutputSet[i][j] == 1) {
-				break;
-			} else {
-				current_class++;
-			}
-		}
-		outputSet.push_back(current_class);
-	}
-
-	return outputSet;
-}
diff --git a/mlpp/data/data_old.h b/mlpp/data/data_old.h
deleted file mode 100644
index 4d59601..0000000
--- a/mlpp/data/data_old.h
+++ /dev/null
@@ -1,110 +0,0 @@
-
-#ifndef MLPP_DATA_OLD_H
-#define MLPP_DATA_OLD_H
-
-//
-//  Data.hpp
-//  MLP
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "core/math/math_defs.h"
-#include "core/int_types.h"
-
-#include <string>
-#include <tuple>
-#include <vector>
-
-class MLPPDataOld {
-public:
-	// Load Datasets
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> loadBreastCancer();
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> loadBreastCancerSVC();
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> loadIris();
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> loadWine();
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> loadMnistTrain();
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> loadMnistTest();
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> loadCaliforniaHousing();
-	std::tuple<std::vector<real_t>, std::vector<real_t>> loadFiresAndCrime();
-
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> trainTestSplit(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, real_t testSize);
-
-	// Supervised
-	void setData(int k, std::string fileName, std::vector<std::vector<real_t>> &inputSet, std::vector<real_t> &outputSet);
-	void printData(std::vector<std::string> inputName, std::string outputName, std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet);
-
-	// Unsupervised
-	void setData(int k, std::string fileName, std::vector<std::vector<real_t>> &inputSet);
-	void printData(std::vector<std::string> inputName, std::vector<std::vector<real_t>> inputSet);
-
-	// Simple
-	void setData(std::string fileName, std::vector<real_t> &inputSet, std::vector<real_t> &outputSet);
-	void printData(std::string &inputName, std::string &outputName, std::vector<real_t> &inputSet, std::vector<real_t> &outputSet);
-
-	// Images
-	std::vector<std::vector<real_t>> rgb2gray(std::vector<std::vector<std::vector<real_t>>> input);
-	std::vector<std::vector<std::vector<real_t>>> rgb2ycbcr(std::vector<std::vector<std::vector<real_t>>> input);
-	std::vector<std::vector<std::vector<real_t>>> rgb2hsv(std::vector<std::vector<std::vector<real_t>>> input);
-	std::vector<std::vector<std::vector<real_t>>> rgb2xyz(std::vector<std::vector<std::vector<real_t>>> input);
-	std::vector<std::vector<std::vector<real_t>>> xyz2rgb(std::vector<std::vector<std::vector<real_t>>> input);
-
-	// Text-Based & NLP
-	std::string toLower(std::string text);
-	std::vector<char> split(std::string text);
-	std::vector<std::string> splitSentences(std::string data);
-	std::vector<std::string> removeSpaces(std::vector<std::string> data);
-	std::vector<std::string> removeNullByte(std::vector<std::string> data);
-	std::vector<std::string> segment(std::string text);
-	std::vector<real_t> tokenize(std::string text);
-	std::vector<std::string> removeStopWords(std::string text);
-	std::vector<std::string> removeStopWords(std::vector<std::string> segmented_data);
-
-	std::string stemming(std::string text);
-
-	std::vector<std::vector<real_t>> BOW(std::vector<std::string> sentences, std::string = "Default");
-	std::vector<std::vector<real_t>> TFIDF(std::vector<std::string> sentences);
-
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::string>> word2Vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, real_t learning_rate, int max_epoch);
-
-	struct WordsToVecResult {
-		std::vector<std::vector<real_t>> word_embeddings;
-		std::vector<std::string> word_list;
-	};
-
-	WordsToVecResult word_to_vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, real_t learning_rate, int max_epoch);
-
-	std::vector<std::vector<real_t>> LSA(std::vector<std::string> sentences, int dim);
-
-	std::vector<std::string> createWordList(std::vector<std::string> sentences);
-
-	// Extra
-	void setInputNames(std::string fileName, std::vector<std::string> &inputNames);
-	std::vector<std::vector<real_t>> featureScaling(std::vector<std::vector<real_t>> X);
-	std::vector<std::vector<real_t>> meanNormalization(std::vector<std::vector<real_t>> X);
-	std::vector<std::vector<real_t>> meanCentering(std::vector<std::vector<real_t>> X);
-	std::vector<std::vector<real_t>> oneHotRep(std::vector<real_t> tempOutputSet, int n_class);
-	std::vector<real_t> reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet);
-
-	template <class T>
-	std::vector<T> vecToSet(std::vector<T> inputSet) {
-		std::vector<T> setInputSet;
-		for (uint32_t i = 0; i < inputSet.size(); i++) {
-			bool new_element = true;
-			for (uint32_t j = 0; j < setInputSet.size(); j++) {
-				if (setInputSet[j] == inputSet[i]) {
-					new_element = false;
-				}
-			}
-			if (new_element) {
-				setInputSet.push_back(inputSet[i]);
-			}
-		}
-		return setInputSet;
-	}
-
-protected:
-	static void _bind_methods();
-};
-
-#endif /* Data_hpp */
diff --git a/mlpp/dual_svc/dual_svc_old.cpp b/mlpp/dual_svc/dual_svc_old.cpp
deleted file mode 100644
index 35de9cd..0000000
--- a/mlpp/dual_svc/dual_svc_old.cpp
+++ /dev/null
@@ -1,244 +0,0 @@
-//
-//  DualSVC.cpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "dual_svc_old.h"
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-MLPPDualSVCOld::MLPPDualSVCOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet, real_t p_C, std::string p_kernel) {
-	inputSet = p_inputSet;
-	outputSet = p_outputSet;
-	n = p_inputSet.size();
-	k = p_inputSet[0].size();
-	C = p_C;
-	kernel = p_kernel;
-
-	y_hat.resize(n);
-	bias = MLPPUtilities::biasInitialization();
-	alpha = MLPPUtilities::weightInitialization(n); // One alpha for all training examples, as per the lagrangian multipliers.
-	K = kernelFunction(inputSet, inputSet, kernel); // For now this is unused. When non-linear kernels are added, the K will be manipulated.
-}
-
-std::vector<real_t> MLPPDualSVCOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	return Evaluate(X);
-}
-
-real_t MLPPDualSVCOld::modelTest(std::vector<real_t> x) {
-	return Evaluate(x);
-}
-
-void MLPPDualSVCOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	class MLPPCostOld cost;
-	MLPPLinAlgOld alg;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(alpha, inputSet, outputSet);
-
-		alpha = alg.subtraction(alpha, alg.scalarMultiply(learning_rate, cost.dualFormSVMDeriv(alpha, inputSet, outputSet)));
-
-		alphaProjection();
-
-		// Calculating the bias
-		real_t biasGradient = 0;
-		for (uint32_t i = 0; i < alpha.size(); i++) {
-			real_t sum = 0;
-			if (alpha[i] < C && alpha[i] > 0) {
-				for (uint32_t j = 0; j < alpha.size(); j++) {
-					if (alpha[j] > 0) {
-						sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], inputSet[i]); // TO DO: DON'T forget to add non-linear kernelizations.
-					}
-				}
-			}
-			biasGradient = (1 - outputSet[i] * sum) / outputSet[i];
-			break;
-		}
-		bias -= biasGradient * learning_rate;
-
-		forwardPass();
-
-		// UI PORTION
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(alpha, inputSet, outputSet));
-			MLPPUtilities::UI(alpha, bias);
-			std::cout << score() << std::endl; // TO DO: DELETE THIS.
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-// void MLPPDualSVCOld::SGD(real_t learning_rate, int max_epoch, bool UI){
-//     class MLPPCostOld cost;
-//     MLPPActivationOld avn;
-//     MLPPLinAlgOld alg;
-//     MLPPRegOld regularization;
-
-//     real_t cost_prev = 0;
-//     int epoch = 1;
-
-//     while(true){
-//         std::random_device rd;
-//         std::default_random_engine generator(rd());
-//         std::uniform_int_distribution<int> distribution(0, int(n - 1));
-//         int outputIndex = distribution(generator);
-
-//         cost_prev = Cost(alpha, inputSet[outputIndex], outputSet[outputIndex]);
-
-//         // Bias updation
-//         bias -= learning_rate * costDeriv;
-
-//         y_hat = Evaluate({inputSet[outputIndex]});
-
-//         if(UI) {
-//             MLPPUtilities::CostInfo(epoch, cost_prev, Cost(alpha));
-//             MLPPUtilities::UI(weights, bias);
-//         }
-//         epoch++;
-
-//         if(epoch > max_epoch) { break; }
-//     }
-//     forwardPass();
-// }
-
-// void MLPPDualSVCOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI){
-//     class MLPPCostOld cost;
-//     MLPPActivationOld avn;
-//     MLPPLinAlgOld alg;
-//     MLPPRegOld regularization;
-//     real_t cost_prev = 0;
-//     int epoch = 1;
-
-//     // Creating the mini-batches
-//     int n_mini_batch = n/mini_batch_size;
-//     auto [inputMiniBatches, outputMiniBatches] = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-
-//     while(true){
-//         for(int i = 0; i < n_mini_batch; i++){
-//             std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-//             std::vector<real_t> z = propagate(inputMiniBatches[i]);
-//             cost_prev = Cost(z, outputMiniBatches[i], weights, C);
-
-//             // Calculating the weight gradients
-//             weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate/n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C))));
-//             weights = regularization.regWeights(weights, learning_rate/n, 0, "Ridge");
-
-//             // Calculating the bias gradients
-//             bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n;
-
-//             forwardPass();
-
-//             y_hat = Evaluate(inputMiniBatches[i]);
-
-//             if(UI) {
-//                 MLPPUtilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C));
-//                 MLPPUtilities::UI(weights, bias);
-//             }
-//         }
-//         epoch++;
-//         if(epoch > max_epoch) { break; }
-//     }
-//     forwardPass();
-// }
-
-real_t MLPPDualSVCOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPDualSVCOld::save(std::string fileName) {
-	MLPPUtilities util;
-	util.saveParameters(fileName, alpha, bias);
-}
-
-real_t MLPPDualSVCOld::Cost(std::vector<real_t> alpha, std::vector<std::vector<real_t>> X, std::vector<real_t> y) {
-	class MLPPCostOld cost;
-	return cost.dualFormSVM(alpha, X, y);
-}
-
-std::vector<real_t> MLPPDualSVCOld::Evaluate(std::vector<std::vector<real_t>> X) {
-	MLPPActivationOld avn;
-	return avn.sign(propagate(X));
-}
-
-std::vector<real_t> MLPPDualSVCOld::propagate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	std::vector<real_t> z;
-	for (uint32_t i = 0; i < X.size(); i++) {
-		real_t sum = 0;
-		for (uint32_t j = 0; j < alpha.size(); j++) {
-			if (alpha[j] != 0) {
-				sum += alpha[j] * outputSet[j] * alg.dot(inputSet[j], X[i]); // TO DO: DON'T forget to add non-linear kernelizations.
-			}
-		}
-		sum += bias;
-		z.push_back(sum);
-	}
-	return z;
-}
-
-real_t MLPPDualSVCOld::Evaluate(std::vector<real_t> x) {
-	MLPPActivationOld avn;
-	return avn.sign(propagate(x));
-}
-
-real_t MLPPDualSVCOld::propagate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	real_t z = 0;
-	for (uint32_t j = 0; j < alpha.size(); j++) {
-		if (alpha[j] != 0) {
-			z += alpha[j] * outputSet[j] * alg.dot(inputSet[j], x); // TO DO: DON'T forget to add non-linear kernelizations.
-		}
-	}
-	z += bias;
-	return z;
-}
-
-void MLPPDualSVCOld::forwardPass() {
-	MLPPActivationOld avn;
-
-	z = propagate(inputSet);
-	y_hat = avn.sign(z);
-}
-
-void MLPPDualSVCOld::alphaProjection() {
-	for (uint32_t i = 0; i < alpha.size(); i++) {
-		if (alpha[i] > C) {
-			alpha[i] = C;
-		} else if (alpha[i] < 0) {
-			alpha[i] = 0;
-		}
-	}
-}
-
-real_t MLPPDualSVCOld::kernelFunction(std::vector<real_t> u, std::vector<real_t> v, std::string kernel) {
-	MLPPLinAlgOld alg;
-	if (kernel == "Linear") {
-		return alg.dot(u, v);
-	}
-
-	return 0;
-}
-
-std::vector<std::vector<real_t>> MLPPDualSVCOld::kernelFunction(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B, std::string kernel) {
-	MLPPLinAlgOld alg;
-	if (kernel == "Linear") {
-		return alg.matmult(inputSet, alg.transpose(inputSet));
-	}
-
-	return std::vector<std::vector<real_t>>();
-}
diff --git a/mlpp/dual_svc/dual_svc_old.h b/mlpp/dual_svc/dual_svc_old.h
deleted file mode 100644
index a04e650..0000000
--- a/mlpp/dual_svc/dual_svc_old.h
+++ /dev/null
@@ -1,69 +0,0 @@
-
-#ifndef MLPP_DUAL_SVC_OLD_H
-#define MLPP_DUAL_SVC_OLD_H
-
-//
-//  DualSVC.hpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-// http://disp.ee.ntu.edu.tw/~pujols/Support%20Vector%20Machine.pdf
-// http://ciml.info/dl/v0_99/ciml-v0_99-ch11.pdf
-// Were excellent for the practical intution behind the dual formulation.
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <vector>
-
-class MLPPDualSVCOld {
-public:
-	MLPPDualSVCOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, real_t C, std::string kernel = "Linear");
-	MLPPDualSVCOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, real_t C, std::string kernel, real_t p, real_t c);
-
-	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
-	real_t modelTest(std::vector<real_t> x);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
-	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
-	real_t score();
-	void save(std::string fileName);
-
-private:
-	void init();
-
-	real_t Cost(std::vector<real_t> alpha, std::vector<std::vector<real_t>> X, std::vector<real_t> y);
-
-	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
-	std::vector<real_t> propagate(std::vector<std::vector<real_t>> X);
-	real_t Evaluate(std::vector<real_t> x);
-	real_t propagate(std::vector<real_t> x);
-	void forwardPass();
-
-	void alphaProjection();
-
-	real_t kernelFunction(std::vector<real_t> v, std::vector<real_t> u, std::string kernel);
-	std::vector<std::vector<real_t>> kernelFunction(std::vector<std::vector<real_t>> U, std::vector<std::vector<real_t>> V, std::string kernel);
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-	std::vector<real_t> z;
-	std::vector<real_t> y_hat;
-	real_t bias;
-
-	std::vector<real_t> alpha;
-	std::vector<std::vector<real_t>> K;
-
-	real_t C;
-	int n;
-	int k;
-
-	std::string kernel;
-	real_t p; // Poly
-	real_t c; // Poly
-
-	// UI Portion
-	void UI(int epoch, real_t cost_prev);
-};
-
-#endif /* DualSVC_hpp */
diff --git a/mlpp/exp_reg/exp_reg_old.cpp b/mlpp/exp_reg/exp_reg_old.cpp
deleted file mode 100644
index 1c51155..0000000
--- a/mlpp/exp_reg/exp_reg_old.cpp
+++ /dev/null
@@ -1,247 +0,0 @@
-//
-//  ExpReg.cpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "exp_reg_old.h"
-
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../stat/stat_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-MLPPExpRegOld::MLPPExpRegOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet, std::string p_reg, real_t p_lambda, real_t p_alpha) {
-	inputSet = p_inputSet;
-	outputSet = p_outputSet;
-	n = p_inputSet.size();
-	k = p_inputSet[0].size();
-	reg = p_reg;
-	lambda = p_lambda;
-	alpha = p_alpha;
-
-	y_hat.resize(n);
-	weights = MLPPUtilities::weightInitialization(k);
-	initial = MLPPUtilities::weightInitialization(k);
-	bias = MLPPUtilities::biasInitialization();
-}
-
-std::vector<real_t> MLPPExpRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	return Evaluate(X);
-}
-
-real_t MLPPExpRegOld::modelTest(std::vector<real_t> x) {
-	return Evaluate(x);
-}
-
-void MLPPExpRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-
-		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
-
-		for (int i = 0; i < k; i++) {
-			// Calculating the weight gradient
-			real_t sum = 0;
-			for (int j = 0; j < n; j++) {
-				sum += error[j] * inputSet[j][i] * std::pow(weights[i], inputSet[j][i] - 1);
-			}
-			real_t w_gradient = sum / n;
-
-			// Calculating the initial gradient
-			real_t sum2 = 0;
-			for (int j = 0; j < n; j++) {
-				sum2 += error[j] * std::pow(weights[i], inputSet[j][i]);
-			}
-
-			real_t i_gradient = sum2 / n;
-
-			// Weight/initial updation
-			weights[i] -= learning_rate * w_gradient;
-			initial[i] -= learning_rate * i_gradient;
-		}
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Calculating the bias gradient
-		real_t sum = 0;
-		for (int j = 0; j < n; j++) {
-			sum += (y_hat[j] - outputSet[j]);
-		}
-		real_t b_gradient = sum / n;
-
-		// bias updation
-		bias -= learning_rate * b_gradient;
-		forwardPass();
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPExpRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	while (true) {
-		std::random_device rd;
-		std::default_random_engine generator(rd());
-		std::uniform_int_distribution<int> distribution(0, int(n - 1));
-		int outputIndex = distribution(generator);
-
-		real_t y_hat = Evaluate(inputSet[outputIndex]);
-		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
-
-		for (int i = 0; i < k; i++) {
-			// Calculating the weight gradients
-
-			real_t w_gradient = (y_hat - outputSet[outputIndex]) * inputSet[outputIndex][i] * std::pow(weights[i], inputSet[outputIndex][i] - 1);
-			real_t i_gradient = (y_hat - outputSet[outputIndex]) * std::pow(weights[i], inputSet[outputIndex][i]);
-
-			// Weight/initial updation
-			weights[i] -= learning_rate * w_gradient;
-			initial[i] -= learning_rate * i_gradient;
-		}
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Calculating the bias gradients
-		real_t b_gradient = (y_hat - outputSet[outputIndex]);
-
-		// Bias updation
-		bias -= learning_rate * b_gradient;
-		y_hat = Evaluate({ inputSet[outputIndex] });
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPExpRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			for (int j = 0; j < k; j++) {
-				// Calculating the weight gradient
-				real_t sum = 0;
-				for (uint32_t k = 0; k < outputMiniBatches[i].size(); k++) {
-					sum += error[k] * inputMiniBatches[i][k][j] * std::pow(weights[j], inputMiniBatches[i][k][j] - 1);
-				}
-				real_t w_gradient = sum / outputMiniBatches[i].size();
-
-				// Calculating the initial gradient
-				real_t sum2 = 0;
-				for (uint32_t k = 0; k < outputMiniBatches[i].size(); k++) {
-					sum2 += error[k] * std::pow(weights[j], inputMiniBatches[i][k][j]);
-				}
-
-				real_t i_gradient = sum2 / outputMiniBatches[i].size();
-
-				// Weight/initial updation
-				weights[j] -= learning_rate * w_gradient;
-				initial[j] -= learning_rate * i_gradient;
-			}
-			weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-			// Calculating the bias gradient
-			//real_t sum = 0;
-			//for (uint32_t j = 0; j < outputMiniBatches[i].size(); j++) {
-			//	sum += (y_hat[j] - outputMiniBatches[i][j]);
-			//}
-
-			//real_t b_gradient = sum / outputMiniBatches[i].size();
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-real_t MLPPExpRegOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPExpRegOld::save(std::string fileName) {
-	MLPPUtilities util;
-	util.saveParameters(fileName, weights, initial, bias);
-}
-
-real_t MLPPExpRegOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPRegOld regularization;
-	class MLPPCostOld cost;
-	return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
-}
-
-std::vector<real_t> MLPPExpRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
-	std::vector<real_t> y_hat;
-	y_hat.resize(X.size());
-	for (uint32_t i = 0; i < X.size(); i++) {
-		y_hat[i] = 0;
-		for (uint32_t j = 0; j < X[i].size(); j++) {
-			y_hat[i] += initial[j] * std::pow(weights[j], X[i][j]);
-		}
-		y_hat[i] += bias;
-	}
-	return y_hat;
-}
-
-real_t MLPPExpRegOld::Evaluate(std::vector<real_t> x) {
-	real_t y_hat = 0;
-	for (uint32_t i = 0; i < x.size(); i++) {
-		y_hat += initial[i] * std::pow(weights[i], x[i]);
-	}
-
-	return y_hat + bias;
-}
-
-// a * w^x + b
-void MLPPExpRegOld::forwardPass() {
-	y_hat = Evaluate(inputSet);
-}
diff --git a/mlpp/exp_reg/exp_reg_old.h b/mlpp/exp_reg/exp_reg_old.h
deleted file mode 100644
index 06f9227..0000000
--- a/mlpp/exp_reg/exp_reg_old.h
+++ /dev/null
@@ -1,50 +0,0 @@
-
-#ifndef MLPP_EXP_REG_OLD_H
-#define MLPP_EXP_REG_OLD_H
-
-//
-//  ExpReg.hpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <vector>
-
-class MLPPExpRegOld {
-public:
-	MLPPExpRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
-	real_t modelTest(std::vector<real_t> x);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = 1);
-	void SGD(real_t learning_rate, int max_epoch, bool UI = 1);
-	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
-	real_t score();
-	void save(std::string fileName);
-
-private:
-	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
-
-	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
-	real_t Evaluate(std::vector<real_t> x);
-	void forwardPass();
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-	std::vector<real_t> y_hat;
-	std::vector<real_t> weights;
-	std::vector<real_t> initial;
-	real_t bias;
-
-	int n;
-	int k;
-
-	// Regularization Params
-	std::string reg;
-	real_t lambda;
-	real_t alpha; /* This is the controlling param for Elastic Net*/
-};
-
-#endif /* ExpReg_hpp */
diff --git a/mlpp/gan/gan_old.cpp b/mlpp/gan/gan_old.cpp
deleted file mode 100644
index 519e0ef..0000000
--- a/mlpp/gan/gan_old.cpp
+++ /dev/null
@@ -1,287 +0,0 @@
-//
-//  GAN.cpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "gan_old.h"
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include <cmath>
-#include <iostream>
-
-MLPPGANOld::MLPPGANOld(real_t k, std::vector<std::vector<real_t>> outputSet) :
-		outputSet(outputSet), n(outputSet.size()), k(k) {
-}
-
-MLPPGANOld::~MLPPGANOld() {
-	delete outputLayer;
-}
-
-std::vector<std::vector<real_t>> MLPPGANOld::generateExample(int n) {
-	MLPPLinAlgOld alg;
-	return modelSetTestGenerator(alg.gaussianNoise(n, k));
-}
-
-void MLPPGANOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	class MLPPCost cost;
-	MLPPLinAlgOld alg;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, alg.onevec(n));
-
-		// Training of the discriminator.
-
-		std::vector<std::vector<real_t>> generatorInputSet = alg.gaussianNoise(n, k);
-		std::vector<std::vector<real_t>> discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
-		discriminatorInputSet.insert(discriminatorInputSet.end(), outputSet.begin(), outputSet.end()); // Fake + real inputs.
-
-		std::vector<real_t> y_hat = modelSetTestDiscriminator(discriminatorInputSet);
-		std::vector<real_t> outputSet = alg.zerovec(n);
-		std::vector<real_t> outputSetReal = alg.onevec(n);
-		outputSet.insert(outputSet.end(), outputSetReal.begin(), outputSetReal.end()); // Fake + real output scores.
-
-		auto dgrads = computeDiscriminatorGradients(y_hat, outputSet);
-		auto cumulativeDiscriminatorHiddenLayerWGrad = std::get<0>(dgrads);
-		auto outputDiscriminatorWGrad = std::get<1>(dgrads);
-
-		cumulativeDiscriminatorHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeDiscriminatorHiddenLayerWGrad);
-		outputDiscriminatorWGrad = alg.scalarMultiply(learning_rate / n, outputDiscriminatorWGrad);
-		updateDiscriminatorParameters(cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad, learning_rate);
-
-		// Training of the generator.
-		generatorInputSet = alg.gaussianNoise(n, k);
-		discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
-		y_hat = modelSetTestDiscriminator(discriminatorInputSet);
-		outputSet = alg.onevec(n);
-
-		std::vector<std::vector<std::vector<real_t>>> cumulativeGeneratorHiddenLayerWGrad = computeGeneratorGradients(y_hat, outputSet);
-		cumulativeGeneratorHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeGeneratorHiddenLayerWGrad);
-		updateGeneratorParameters(cumulativeGeneratorHiddenLayerWGrad, learning_rate);
-
-		forwardPass();
-		if (UI) {
-			MLPPGANOld::UI(epoch, cost_prev, MLPPGANOld::y_hat, alg.onevec(n));
-		}
-
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-real_t MLPPGANOld::score() {
-	MLPPLinAlgOld alg;
-	MLPPUtilities util;
-	forwardPass();
-	return util.performance(y_hat, alg.onevec(n));
-}
-
-void MLPPGANOld::save(std::string fileName) {
-	MLPPUtilities util;
-	if (!network.empty()) {
-		util.saveParameters(fileName, network[0].weights, network[0].bias, false, 1);
-		for (uint32_t i = 1; i < network.size(); i++) {
-			util.saveParameters(fileName, network[i].weights, network[i].bias, true, i + 1);
-		}
-		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, true, network.size() + 1);
-	} else {
-		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, false, network.size() + 1);
-	}
-}
-
-void MLPPGANOld::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
-	MLPPLinAlgOld alg;
-	if (network.empty()) {
-		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha));
-		network[0].forwardPass();
-	} else {
-		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
-		network[network.size() - 1].forwardPass();
-	}
-}
-
-void MLPPGANOld::addOutputLayer(std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
-	MLPPLinAlgOld alg;
-	if (!network.empty()) {
-		outputLayer = new MLPPOldOutputLayer(network[network.size() - 1].n_hidden, "Sigmoid", "LogLoss", network[network.size() - 1].a, weightInit, reg, lambda, alpha);
-	} else {
-		outputLayer = new MLPPOldOutputLayer(k, "Sigmoid", "LogLoss", alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha);
-	}
-}
-
-std::vector<std::vector<real_t>> MLPPGANOld::modelSetTestGenerator(std::vector<std::vector<real_t>> X) {
-	if (!network.empty()) {
-		network[0].input = X;
-		network[0].forwardPass();
-
-		for (uint32_t i = 1; i <= network.size() / 2; i++) {
-			network[i].input = network[i - 1].a;
-			network[i].forwardPass();
-		}
-	}
-	return network[network.size() / 2].a;
-}
-
-std::vector<real_t> MLPPGANOld::modelSetTestDiscriminator(std::vector<std::vector<real_t>> X) {
-	if (!network.empty()) {
-		for (uint32_t i = network.size() / 2 + 1; i < network.size(); i++) {
-			if (i == network.size() / 2 + 1) {
-				network[i].input = X;
-			} else {
-				network[i].input = network[i - 1].a;
-			}
-			network[i].forwardPass();
-		}
-		outputLayer->input = network[network.size() - 1].a;
-	}
-	outputLayer->forwardPass();
-	return outputLayer->a;
-}
-
-real_t MLPPGANOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPRegOld regularization;
-	class MLPPCostOld cost;
-	real_t totalRegTerm = 0;
-
-	auto cost_function = outputLayer->cost_map[outputLayer->cost];
-	if (!network.empty()) {
-		for (uint32_t i = 0; i < network.size() - 1; i++) {
-			totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
-		}
-	}
-	return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
-}
-
-void MLPPGANOld::forwardPass() {
-	MLPPLinAlgOld alg;
-	if (!network.empty()) {
-		network[0].input = alg.gaussianNoise(n, k);
-		network[0].forwardPass();
-
-		for (uint32_t i = 1; i < network.size(); i++) {
-			network[i].input = network[i - 1].a;
-			network[i].forwardPass();
-		}
-		outputLayer->input = network[network.size() - 1].a;
-	} else { // Should never happen, though.
-		outputLayer->input = alg.gaussianNoise(n, k);
-	}
-	outputLayer->forwardPass();
-	y_hat = outputLayer->a;
-}
-
-void MLPPGANOld::updateDiscriminatorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate) {
-	MLPPLinAlgOld alg;
-
-	outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
-	outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n;
-
-	if (!network.empty()) {
-		network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]);
-		network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate / n, network[network.size() - 1].delta));
-
-		for (int i = static_cast<int>(network.size()) - 2; i > static_cast<int>(network.size()) / 2; i--) {
-			network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
-			network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate / n, network[i].delta));
-		}
-	}
-}
-
-void MLPPGANOld::updateGeneratorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, real_t learning_rate) {
-	MLPPLinAlgOld alg;
-
-	if (!network.empty()) {
-		for (int i = network.size() / 2; i >= 0; i--) {
-			//std::cout << network[i].weights.size() << "x" << network[i].weights[0].size() << std::endl;
-			//std::cout << hiddenLayerUpdations[(network.size() - 2) - i + 1].size() << "x" << hiddenLayerUpdations[(network.size() - 2) - i + 1][0].size() << std::endl;
-			network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
-			network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate / n, network[i].delta));
-		}
-	}
-}
-
-std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> MLPPGANOld::computeDiscriminatorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
-	class MLPPCostOld cost;
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-
-	std::vector<std::vector<std::vector<real_t>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads.
-
-	auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
-	auto outputAvn = outputLayer->activation_map[outputLayer->activation];
-	outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
-	std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
-	outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
-
-	if (!network.empty()) {
-		auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
-
-		network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
-		std::vector<std::vector<real_t>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
-
-		cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
-
-		//std::cout << "HIDDENLAYER FIRST:" << hiddenLayerWGrad.size() << "x" << hiddenLayerWGrad[0].size() << std::endl;
-		//std::cout << "WEIGHTS SECOND:" << network[network.size() - 1].weights.size() << "x" << network[network.size() - 1].weights[0].size() << std::endl;
-
-		for (int i = static_cast<int>(network.size()) - 2; i > static_cast<int>(network.size()) / 2; i--) {
-			hiddenLayerAvn = network[i].activation_map[network[i].activation];
-			network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
-			hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
-
-			cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
-		}
-	}
-	return { cumulativeHiddenLayerWGrad, outputWGrad };
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPGANOld::computeGeneratorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
-	class MLPPCostOld cost;
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-
-	std::vector<std::vector<std::vector<real_t>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads.
-
-	auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
-	auto outputAvn = outputLayer->activation_map[outputLayer->activation];
-	outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
-	std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
-	outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
-	if (!network.empty()) {
-		auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
-		network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
-		std::vector<std::vector<real_t>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
-		cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
-
-		for (int i = network.size() - 2; i >= 0; i--) {
-			hiddenLayerAvn = network[i].activation_map[network[i].activation];
-			network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
-			hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
-			cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
-		}
-	}
-	return cumulativeHiddenLayerWGrad;
-}
-
-void MLPPGANOld::UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
-	MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-	std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
-	MLPPUtilities::UI(outputLayer->weights, outputLayer->bias);
-	if (!network.empty()) {
-		for (int i = network.size() - 1; i >= 0; i--) {
-			std::cout << "Layer " << i + 1 << ": " << std::endl;
-			MLPPUtilities::UI(network[i].weights, network[i].bias);
-		}
-	}
-}
diff --git a/mlpp/gan/gan_old.h b/mlpp/gan/gan_old.h
deleted file mode 100644
index a88d593..0000000
--- a/mlpp/gan/gan_old.h
+++ /dev/null
@@ -1,59 +0,0 @@
-
-#ifndef MLPP_GAN_OLD_hpp
-#define MLPP_GAN_OLD_hpp
-
-//
-//  GAN.hpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include "../hidden_layer/hidden_layer.h"
-#include "../output_layer/output_layer.h"
-
-#include "../hidden_layer/hidden_layer_old.h"
-#include "../output_layer/output_layer_old.h"
-
-#include <string>
-#include <tuple>
-#include <vector>
-
-class MLPPGANOld {
-public:
-	MLPPGANOld(real_t k, std::vector<std::vector<real_t>> outputSet);
-	~MLPPGANOld();
-	std::vector<std::vector<real_t>> generateExample(int n);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	real_t score();
-	void save(std::string fileName);
-
-	void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-	void addOutputLayer(std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-
-private:
-	std::vector<std::vector<real_t>> modelSetTestGenerator(std::vector<std::vector<real_t>> X); // Evaluator for the generator of the gan.
-	std::vector<real_t> modelSetTestDiscriminator(std::vector<std::vector<real_t>> X); // Evaluator for the discriminator of the gan.
-
-	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
-
-	void forwardPass();
-	void updateDiscriminatorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate);
-	void updateGeneratorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, real_t learning_rate);
-	std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> computeDiscriminatorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet);
-	std::vector<std::vector<std::vector<real_t>>> computeGeneratorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet);
-
-	void UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet);
-
-	std::vector<std::vector<real_t>> outputSet;
-	std::vector<real_t> y_hat;
-
-	std::vector<MLPPOldHiddenLayer> network;
-	MLPPOldOutputLayer *outputLayer;
-
-	int n;
-	int k;
-};
-
-#endif /* GAN_hpp */
\ No newline at end of file
diff --git a/mlpp/gauss_markov_checker/gauss_markov_checker_old.cpp b/mlpp/gauss_markov_checker/gauss_markov_checker_old.cpp
deleted file mode 100644
index 52252b9..0000000
--- a/mlpp/gauss_markov_checker/gauss_markov_checker_old.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-//
-//  GaussMarkovChecker.cpp
-//
-//  Created by Marc Melikyan on 11/13/20.
-//
-
-#include "gauss_markov_checker_old.h"
-#include "../stat/stat_old.h"
-#include "core/int_types.h"
-#include <iostream>
-
-void MLPPGaussMarkovCheckerOld::checkGMConditions(std::vector<real_t> eps) {
-	bool condition1 = arithmeticMean(eps);
-	bool condition2 = homoscedasticity(eps);
-	bool condition3 = exogeneity(eps);
-
-	if (condition1 && condition2 && condition3) {
-		std::cout << "Gauss-Markov conditions were not violated. You may use OLS to obtain a BLUE estimator" << std::endl;
-	} else {
-		std::cout << "A test of the expected value of 0 of the error terms returned " << std::boolalpha << condition1 << ", a test of homoscedasticity has returned " << std::boolalpha << condition2 << ", and a test of exogenity has returned " << std::boolalpha << "." << std::endl;
-	}
-}
-
-bool MLPPGaussMarkovCheckerOld::arithmeticMean(std::vector<real_t> eps) {
-	MLPPStatOld stat;
-	if (stat.mean(eps) == 0) {
-		return true;
-	} else {
-		return false;
-	}
-}
-
-bool MLPPGaussMarkovCheckerOld::homoscedasticity(std::vector<real_t> eps) {
-	MLPPStatOld stat;
-	real_t currentVar = (eps[0] - stat.mean(eps)) * (eps[0] - stat.mean(eps)) / eps.size();
-	for (uint32_t i = 0; i < eps.size(); i++) {
-		if (currentVar != (eps[i] - stat.mean(eps)) * (eps[i] - stat.mean(eps)) / eps.size()) {
-			return false;
-		}
-	}
-
-	return true;
-}
-
-bool MLPPGaussMarkovCheckerOld::exogeneity(std::vector<real_t> eps) {
-	MLPPStatOld stat;
-	for (uint32_t i = 0; i < eps.size(); i++) {
-		for (uint32_t j = 0; j < eps.size(); j++) {
-			if (i != j) {
-				if ((eps[i] - stat.mean(eps)) * (eps[j] - stat.mean(eps)) / eps.size() != 0) {
-					return false;
-				}
-			}
-		}
-	}
-
-	return true;
-}
-
-void MLPPGaussMarkovCheckerOld::_bind_methods() {
-}
diff --git a/mlpp/gauss_markov_checker/gauss_markov_checker_old.h b/mlpp/gauss_markov_checker/gauss_markov_checker_old.h
deleted file mode 100644
index 9ef2e19..0000000
--- a/mlpp/gauss_markov_checker/gauss_markov_checker_old.h
+++ /dev/null
@@ -1,29 +0,0 @@
-
-#ifndef MLPP_GAUSS_MARKOV_CHECKER_OLD_H
-#define MLPP_GAUSS_MARKOV_CHECKER_OLD_H
-
-//
-//  GaussMarkovChecker.hpp
-//
-//  Created by Marc Melikyan on 11/13/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <vector>
-
-class MLPPGaussMarkovCheckerOld {
-public:
-	void checkGMConditions(std::vector<real_t> eps);
-
-	// Independent, 3 Gauss-Markov Conditions
-	bool arithmeticMean(std::vector<real_t> eps); // 1) Arithmetic Mean of 0.
-	bool homoscedasticity(std::vector<real_t> eps); // 2) Homoscedasticity
-	bool exogeneity(std::vector<real_t> eps); // 3) Cov of any 2 non-equal eps values = 0.
-
-protected:
-	static void _bind_methods();
-};
-
-#endif /* GaussMarkovChecker_hpp */
diff --git a/mlpp/gaussian_nb/gaussian_nb_old.cpp b/mlpp/gaussian_nb/gaussian_nb_old.cpp
deleted file mode 100644
index b55b8bd..0000000
--- a/mlpp/gaussian_nb/gaussian_nb_old.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-//
-//  GaussianNB.cpp
-//
-//  Created by Marc Melikyan on 1/17/21.
-//
-
-#include "gaussian_nb_old.h"
-
-#include "../lin_alg/lin_alg_old.h"
-#include "../stat/stat_old.h"
-#include "../utilities/utilities.h"
-
-#include <algorithm>
-#include <iostream>
-#include <random>
-
-#ifndef M_PI
-#define M_PI 3.141592653
-#endif
-
-MLPPGaussianNBOld::MLPPGaussianNBOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet, int p_class_num) {
-	inputSet = p_inputSet;
-	outputSet = p_outputSet;
-	class_num = p_class_num;
-
-	y_hat.resize(outputSet.size());
-	Evaluate();
-}
-
-std::vector<real_t> MLPPGaussianNBOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	std::vector<real_t> y_hat;
-	for (uint32_t i = 0; i < X.size(); i++) {
-		y_hat.push_back(modelTest(X[i]));
-	}
-	return y_hat;
-}
-
-real_t MLPPGaussianNBOld::modelTest(std::vector<real_t> x) {
-	real_t score[class_num];
-	real_t y_hat_i = 1;
-	for (int i = class_num - 1; i >= 0; i--) {
-		y_hat_i += std::log(priors[i] * (1 / sqrt(2 * M_PI * sigma[i] * sigma[i])) * exp(-(x[i] * mu[i]) * (x[i] * mu[i]) / (2 * sigma[i] * sigma[i])));
-		score[i] = exp(y_hat_i);
-	}
-	return std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(real_t)));
-}
-
-real_t MLPPGaussianNBOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPGaussianNBOld::Evaluate() {
-	MLPPStatOld stat;
-	MLPPLinAlgOld alg;
-
-	// Computing mu_k_y and sigma_k_y
-	mu.resize(class_num);
-	sigma.resize(class_num);
-	for (int i = class_num - 1; i >= 0; i--) {
-		std::vector<real_t> set;
-		for (uint32_t j = 0; j < inputSet.size(); j++) {
-			for (uint32_t k = 0; k < inputSet[j].size(); k++) {
-				if (outputSet[j] == i) {
-					set.push_back(inputSet[j][k]);
-				}
-			}
-		}
-		mu[i] = stat.mean(set);
-		sigma[i] = stat.standardDeviation(set);
-	}
-
-	// Priors
-	priors.resize(class_num);
-	for (uint32_t i = 0; i < outputSet.size(); i++) {
-		priors[int(outputSet[i])]++;
-	}
-	priors = alg.scalarMultiply(real_t(1) / real_t(outputSet.size()), priors);
-
-	for (uint32_t i = 0; i < outputSet.size(); i++) {
-		real_t score[class_num];
-		real_t y_hat_i = 1;
-		for (int j = class_num - 1; j >= 0; j--) {
-			for (uint32_t k = 0; k < inputSet[i].size(); k++) {
-				y_hat_i += std::log(priors[j] * (1 / sqrt(2 * M_PI * sigma[j] * sigma[j])) * exp(-(inputSet[i][k] * mu[j]) * (inputSet[i][k] * mu[j]) / (2 * sigma[j] * sigma[j])));
-			}
-			score[j] = exp(y_hat_i);
-			std::cout << score[j] << std::endl;
-		}
-		y_hat[i] = std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(real_t)));
-		std::cout << std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(real_t))) << std::endl;
-	}
-}
diff --git a/mlpp/gaussian_nb/gaussian_nb_old.h b/mlpp/gaussian_nb/gaussian_nb_old.h
deleted file mode 100644
index 6a3c04d..0000000
--- a/mlpp/gaussian_nb/gaussian_nb_old.h
+++ /dev/null
@@ -1,37 +0,0 @@
-
-#ifndef MLPP_GAUSSIAN_NB_OLD_H
-#define MLPP_GAUSSIAN_NB_OLD_H
-
-//
-//  GaussianNB.hpp
-//
-//  Created by Marc Melikyan on 1/17/21.
-//
-
-#include "core/math/math_defs.h"
-
-#include <vector>
-
-class MLPPGaussianNBOld {
-public:
-	MLPPGaussianNBOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int class_num);
-	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
-	real_t modelTest(std::vector<real_t> x);
-	real_t score();
-
-private:
-	void Evaluate();
-
-	int class_num;
-
-	std::vector<real_t> priors;
-	std::vector<real_t> mu;
-	std::vector<real_t> sigma;
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-
-	std::vector<real_t> y_hat;
-};
-
-#endif /* GaussianNB_hpp */
diff --git a/mlpp/hidden_layer/hidden_layer_old.cpp b/mlpp/hidden_layer/hidden_layer_old.cpp
deleted file mode 100644
index 3f74b7f..0000000
--- a/mlpp/hidden_layer/hidden_layer_old.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-//
-//  HiddenLayer.cpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "hidden_layer_old.h"
-#include "../activation/activation.h"
-#include "../lin_alg/lin_alg_old.h"
-
-#include <iostream>
-#include <random>
-
-MLPPOldHiddenLayer::MLPPOldHiddenLayer(int p_n_hidden, std::string p_activation, std::vector<std::vector<real_t>> p_input, std::string p_weightInit, std::string p_reg, real_t p_lambda, real_t p_alpha) {
-	n_hidden = p_n_hidden;
-	activation = p_activation;
-	input = p_input;
-	weightInit = p_weightInit;
-	reg = p_reg;
-	lambda = p_lambda;
-	alpha = p_alpha;
-
-	weights = MLPPUtilities::weightInitialization(input[0].size(), n_hidden, weightInit);
-	bias = MLPPUtilities::biasInitialization(n_hidden);
-
-	activation_map["Linear"] = &MLPPActivationOld::linear;
-	activationTest_map["Linear"] = &MLPPActivationOld::linear;
-
-	activation_map["Sigmoid"] = &MLPPActivationOld::sigmoid;
-	activationTest_map["Sigmoid"] = &MLPPActivationOld::sigmoid;
-
-	activation_map["Swish"] = &MLPPActivationOld::swish;
-	activationTest_map["Swish"] = &MLPPActivationOld::swish;
-
-	activation_map["Mish"] = &MLPPActivationOld::mish;
-	activationTest_map["Mish"] = &MLPPActivationOld::mish;
-
-	activation_map["SinC"] = &MLPPActivationOld::sinc;
-	activationTest_map["SinC"] = &MLPPActivationOld::sinc;
-
-	activation_map["Softplus"] = &MLPPActivationOld::softplus;
-	activationTest_map["Softplus"] = &MLPPActivationOld::softplus;
-
-	activation_map["Softsign"] = &MLPPActivationOld::softsign;
-	activationTest_map["Softsign"] = &MLPPActivationOld::softsign;
-
-	activation_map["CLogLog"] = &MLPPActivationOld::cloglog;
-	activationTest_map["CLogLog"] = &MLPPActivationOld::cloglog;
-
-	activation_map["Logit"] = &MLPPActivationOld::logit;
-	activationTest_map["Logit"] = &MLPPActivationOld::logit;
-
-	activation_map["GaussianCDF"] = &MLPPActivationOld::gaussianCDF;
-	activationTest_map["GaussianCDF"] = &MLPPActivationOld::gaussianCDF;
-
-	activation_map["RELU"] = &MLPPActivationOld::RELU;
-	activationTest_map["RELU"] = &MLPPActivationOld::RELU;
-
-	activation_map["GELU"] = &MLPPActivationOld::GELU;
-	activationTest_map["GELU"] = &MLPPActivationOld::GELU;
-
-	activation_map["Sign"] = &MLPPActivationOld::sign;
-	activationTest_map["Sign"] = &MLPPActivationOld::sign;
-
-	activation_map["UnitStep"] = &MLPPActivationOld::unitStep;
-	activationTest_map["UnitStep"] = &MLPPActivationOld::unitStep;
-
-	activation_map["Sinh"] = &MLPPActivationOld::sinh;
-	activationTest_map["Sinh"] = &MLPPActivationOld::sinh;
-
-	activation_map["Cosh"] = &MLPPActivationOld::cosh;
-	activationTest_map["Cosh"] = &MLPPActivationOld::cosh;
-
-	activation_map["Tanh"] = &MLPPActivationOld::tanh;
-	activationTest_map["Tanh"] = &MLPPActivationOld::tanh;
-
-	activation_map["Csch"] = &MLPPActivationOld::csch;
-	activationTest_map["Csch"] = &MLPPActivationOld::csch;
-
-	activation_map["Sech"] = &MLPPActivationOld::sech;
-	activationTest_map["Sech"] = &MLPPActivationOld::sech;
-
-	activation_map["Coth"] = &MLPPActivationOld::coth;
-	activationTest_map["Coth"] = &MLPPActivationOld::coth;
-
-	activation_map["Arsinh"] = &MLPPActivationOld::arsinh;
-	activationTest_map["Arsinh"] = &MLPPActivationOld::arsinh;
-
-	activation_map["Arcosh"] = &MLPPActivationOld::arcosh;
-	activationTest_map["Arcosh"] = &MLPPActivationOld::arcosh;
-
-	activation_map["Artanh"] = &MLPPActivationOld::artanh;
-	activationTest_map["Artanh"] = &MLPPActivationOld::artanh;
-
-	activation_map["Arcsch"] = &MLPPActivationOld::arcsch;
-	activationTest_map["Arcsch"] = &MLPPActivationOld::arcsch;
-
-	activation_map["Arsech"] = &MLPPActivationOld::arsech;
-	activationTest_map["Arsech"] = &MLPPActivationOld::arsech;
-
-	activation_map["Arcoth"] = &MLPPActivationOld::arcoth;
-	activationTest_map["Arcoth"] = &MLPPActivationOld::arcoth;
-}
-
-void MLPPOldHiddenLayer::forwardPass() {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-
-	z = alg.mat_vec_add(alg.matmult(input, weights), bias);
-	a = (avn.*activation_map[activation])(z, false);
-}
-
-void MLPPOldHiddenLayer::Test(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	z_test = alg.addition(alg.mat_vec_mult(alg.transpose(weights), x), bias);
-	a_test = (avn.*activationTest_map[activation])(z_test, false);
-}
diff --git a/mlpp/hidden_layer/hidden_layer_old.h b/mlpp/hidden_layer/hidden_layer_old.h
deleted file mode 100644
index 23ca450..0000000
--- a/mlpp/hidden_layer/hidden_layer_old.h
+++ /dev/null
@@ -1,61 +0,0 @@
-
-#ifndef MLPP_HIDDEN_LAYER_OLD_H
-#define MLPP_HIDDEN_LAYER_OLD_H
-
-//
-//  HiddenLayer.hpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "core/math/math_defs.h"
-#include "core/string/ustring.h"
-
-#include "core/object/reference.h"
-
-#include "../activation/activation_old.h"
-#include "../regularization/reg.h"
-#include "../utilities/utilities.h"
-
-#include "../lin_alg/mlpp_matrix.h"
-#include "../lin_alg/mlpp_vector.h"
-
-#include <map>
-#include <string>
-#include <vector>
-
-class MLPPOldHiddenLayer {
-public:
-	MLPPOldHiddenLayer(int n_hidden, std::string activation, std::vector<std::vector<real_t>> input, std::string weightInit, std::string reg, real_t lambda, real_t alpha);
-
-	int n_hidden;
-	std::string activation;
-
-	std::vector<std::vector<real_t>> input;
-
-	std::vector<std::vector<real_t>> weights;
-	std::vector<real_t> bias;
-
-	std::vector<std::vector<real_t>> z;
-	std::vector<std::vector<real_t>> a;
-
-	std::map<std::string, std::vector<std::vector<real_t>> (MLPPActivationOld::*)(std::vector<std::vector<real_t>>, bool)> activation_map;
-	std::map<std::string, std::vector<real_t> (MLPPActivationOld::*)(std::vector<real_t>, bool)> activationTest_map;
-
-	std::vector<real_t> z_test;
-	std::vector<real_t> a_test;
-
-	std::vector<std::vector<real_t>> delta;
-
-	// Regularization Params
-	std::string reg;
-	real_t lambda; /* Regularization Parameter */
-	real_t alpha; /* This is the controlling param for Elastic Net*/
-
-	std::string weightInit;
-
-	void forwardPass();
-	void Test(std::vector<real_t> x);
-};
-
-#endif /* HiddenLayer_hpp */
\ No newline at end of file
diff --git a/mlpp/hypothesis_testing/hypothesis_testing_old.cpp b/mlpp/hypothesis_testing/hypothesis_testing_old.cpp
deleted file mode 100644
index c5d1896..0000000
--- a/mlpp/hypothesis_testing/hypothesis_testing_old.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//
-//  HypothesisTesting.cpp
-//
-//  Created by Marc Melikyan on 3/10/21.
-//
-
-#include "hypothesis_testing_old.h"
-
-std::tuple<bool, real_t> MLPPHypothesisTestingOld::chiSquareTest(std::vector<real_t> observed, std::vector<real_t> expected) {
-	//real_t df = observed.size() - 1; // These are our degrees of freedom
-	//real_t sum = 0;
-	//for (uint32_t i = 0; i < observed.size(); i++) {
-	//	sum += (observed[i] - expected[i]) * (observed[i] - expected[i]) / expected[i];
-	//}
-
-	return std::tuple<bool, real_t>();
-}
-
-void MLPPHypothesisTestingOld::_bind_methods() {
-}
diff --git a/mlpp/hypothesis_testing/hypothesis_testing_old.h b/mlpp/hypothesis_testing/hypothesis_testing_old.h
deleted file mode 100644
index 8510bc8..0000000
--- a/mlpp/hypothesis_testing/hypothesis_testing_old.h
+++ /dev/null
@@ -1,25 +0,0 @@
-
-#ifndef MLPP_HYPOTHESIS_TESTING_OLD_H
-#define MLPP_HYPOTHESIS_TESTING_OLD_H
-
-//
-//  HypothesisTesting.hpp
-//
-//  Created by Marc Melikyan on 3/10/21.
-//
-
-#include "core/math/math_defs.h"
-#include "core/int_types.h"
-
-#include <tuple>
-#include <vector>
-
-class MLPPHypothesisTestingOld {
-public:
-	std::tuple<bool, real_t> chiSquareTest(std::vector<real_t> observed, std::vector<real_t> expected);
-
-protected:
-	static void _bind_methods();
-};
-
-#endif /* HypothesisTesting_hpp */
diff --git a/mlpp/lin_alg/lin_alg_old.cpp b/mlpp/lin_alg/lin_alg_old.cpp
deleted file mode 100644
index b38ed62..0000000
--- a/mlpp/lin_alg/lin_alg_old.cpp
+++ /dev/null
@@ -1,1410 +0,0 @@
-//
-//  LinAlg.cpp
-//
-//  Created by Marc Melikyan on 1/8/21.
-//
-
-#include "lin_alg_old.h"
-
-#include "core/math/math_funcs.h"
-
-#include "../stat/stat_old.h"
-
-#include <cmath>
-#include <iostream>
-#include <map>
-#include <random>
-
-#ifndef M_PI
-#define M_PI 3.141592653
-#endif
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::gramMatrix(std::vector<std::vector<real_t>> A) {
-	return matmult(transpose(A), A); // AtA
-}
-
-bool MLPPLinAlgOld::linearIndependenceChecker(std::vector<std::vector<real_t>> A) {
-	if (det(gramMatrix(A), A.size()) == 0) {
-		return false;
-	}
-	return true;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::gaussianNoise(int n, int m) {
-	std::random_device rd;
-	std::default_random_engine generator(rd());
-
-	std::vector<std::vector<real_t>> A;
-	A.resize(n);
-	for (int i = 0; i < n; i++) {
-		A[i].resize(m);
-		for (int j = 0; j < m; j++) {
-			std::normal_distribution<real_t> distribution(0, 1); // Standard normal distribution. Mean of 0, std of 1.
-			A[i][j] = distribution(generator);
-		}
-	}
-	return A;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::addition(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B) {
-	std::vector<std::vector<real_t>> C;
-	C.resize(A.size());
-	for (uint32_t i = 0; i < C.size(); i++) {
-		C[i].resize(A[0].size());
-	}
-
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[0].size(); j++) {
-			C[i][j] = A[i][j] + B[i][j];
-		}
-	}
-	return C;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::subtraction(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B) {
-	std::vector<std::vector<real_t>> C;
-	C.resize(A.size());
-	for (uint32_t i = 0; i < C.size(); i++) {
-		C[i].resize(A[0].size());
-	}
-
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[0].size(); j++) {
-			C[i][j] = A[i][j] - B[i][j];
-		}
-	}
-	return C;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::matmult(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B) {
-	std::vector<std::vector<real_t>> C;
-	C.resize(A.size());
-	for (uint32_t i = 0; i < C.size(); i++) {
-		C[i].resize(B[0].size());
-	}
-
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t k = 0; k < B.size(); k++) {
-			for (uint32_t j = 0; j < B[0].size(); j++) {
-				C[i][j] += A[i][k] * B[k][j];
-			}
-		}
-	}
-	return C;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::hadamard_product(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B) {
-	std::vector<std::vector<real_t>> C;
-	C.resize(A.size());
-	for (uint32_t i = 0; i < C.size(); i++) {
-		C[i].resize(A[0].size());
-	}
-
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[0].size(); j++) {
-			C[i][j] = A[i][j] * B[i][j];
-		}
-	}
-	return C;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::kronecker_product(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B) {
-	std::vector<std::vector<real_t>> C;
-
-	// [1,1,1,1]   [1,2,3,4,5]
-	// [1,1,1,1]   [1,2,3,4,5]
-	//             [1,2,3,4,5]
-
-	// [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5]
-	// [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5]
-	// [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5]
-	// [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5]
-	// [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5]
-	// [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5] [1,2,3,4,5]
-
-	// Resulting matrix: A.size() * B.size()
-	//                   A[0].size() * B[0].size()
-
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < B.size(); j++) {
-			std::vector<std::vector<real_t>> row;
-			for (uint32_t k = 0; k < A[0].size(); k++) {
-				row.push_back(scalarMultiply(A[i][k], B[j]));
-			}
-			C.push_back(flatten(row));
-		}
-	}
-	return C;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::elementWiseDivision(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B) {
-	std::vector<std::vector<real_t>> C;
-	C.resize(A.size());
-	for (uint32_t i = 0; i < C.size(); i++) {
-		C[i].resize(A[0].size());
-	}
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			C[i][j] = A[i][j] / B[i][j];
-		}
-	}
-	return C;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::transpose(std::vector<std::vector<real_t>> A) {
-	std::vector<std::vector<real_t>> AT;
-	AT.resize(A[0].size());
-	for (uint32_t i = 0; i < AT.size(); i++) {
-		AT[i].resize(A.size());
-	}
-
-	for (uint32_t i = 0; i < A[0].size(); i++) {
-		for (uint32_t j = 0; j < A.size(); j++) {
-			AT[i][j] = A[j][i];
-		}
-	}
-	return AT;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::scalarMultiply(real_t scalar, std::vector<std::vector<real_t>> A) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			A[i][j] *= scalar;
-		}
-	}
-	return A;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::scalarAdd(real_t scalar, std::vector<std::vector<real_t>> A) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			A[i][j] += scalar;
-		}
-	}
-	return A;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::log(std::vector<std::vector<real_t>> A) {
-	std::vector<std::vector<real_t>> B;
-	B.resize(A.size());
-	for (uint32_t i = 0; i < B.size(); i++) {
-		B[i].resize(A[0].size());
-	}
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			B[i][j] = std::log(A[i][j]);
-		}
-	}
-	return B;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::log10(std::vector<std::vector<real_t>> A) {
-	std::vector<std::vector<real_t>> B;
-	B.resize(A.size());
-	for (uint32_t i = 0; i < B.size(); i++) {
-		B[i].resize(A[0].size());
-	}
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			B[i][j] = std::log10(A[i][j]);
-		}
-	}
-	return B;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::exp(std::vector<std::vector<real_t>> A) {
-	std::vector<std::vector<real_t>> B;
-	B.resize(A.size());
-	for (uint32_t i = 0; i < B.size(); i++) {
-		B[i].resize(A[0].size());
-	}
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			B[i][j] = std::exp(A[i][j]);
-		}
-	}
-	return B;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::erf(std::vector<std::vector<real_t>> A) {
-	std::vector<std::vector<real_t>> B;
-	B.resize(A.size());
-	for (uint32_t i = 0; i < B.size(); i++) {
-		B[i].resize(A[0].size());
-	}
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			B[i][j] = std::erf(A[i][j]);
-		}
-	}
-	return B;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::exponentiate(std::vector<std::vector<real_t>> A, real_t p) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			A[i][j] = std::pow(A[i][j], p);
-		}
-	}
-	return A;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::sqrt(std::vector<std::vector<real_t>> A) {
-	return exponentiate(A, 0.5);
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::cbrt(std::vector<std::vector<real_t>> A) {
-	return exponentiate(A, real_t(1) / real_t(3));
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::matrixPower(std::vector<std::vector<real_t>> A, int n) {
-	std::vector<std::vector<real_t>> B = identity(A.size());
-	if (n == 0) {
-		return identity(A.size());
-	} else if (n < 0) {
-		A = inverse(A);
-	}
-	for (int i = 0; i < std::abs(n); i++) {
-		B = matmult(B, A);
-	}
-	return B;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::abs(std::vector<std::vector<real_t>> A) {
-	std::vector<std::vector<real_t>> B;
-	B.resize(A.size());
-	for (uint32_t i = 0; i < B.size(); i++) {
-		B[i].resize(A[0].size());
-	}
-	for (uint32_t i = 0; i < B.size(); i++) {
-		for (uint32_t j = 0; j < B[i].size(); j++) {
-			B[i][j] = std::abs(A[i][j]);
-		}
-	}
-	return B;
-}
-
-real_t MLPPLinAlgOld::det(std::vector<std::vector<real_t>> A, int d) {
-	real_t deter = 0;
-	std::vector<std::vector<real_t>> B;
-	B.resize(d);
-	for (int i = 0; i < d; i++) {
-		B[i].resize(d);
-	}
-
-	/* This is the base case in which the input is a 2x2 square matrix.
-	Recursion is performed unless and until we reach this base case,
-	such that we recieve a scalar as the result. */
-	if (d == 2) {
-		return A[0][0] * A[1][1] - A[0][1] * A[1][0];
-	}
-
-	else {
-		for (int i = 0; i < d; i++) {
-			int sub_i = 0;
-			for (int j = 1; j < d; j++) {
-				int sub_j = 0;
-				for (int k = 0; k < d; k++) {
-					if (k == i) {
-						continue;
-					}
-					B[sub_i][sub_j] = A[j][k];
-					sub_j++;
-				}
-				sub_i++;
-			}
-			deter += std::pow(-1, i) * A[0][i] * det(B, d - 1);
-		}
-	}
-	return deter;
-}
-
-real_t MLPPLinAlgOld::trace(std::vector<std::vector<real_t>> A) {
-	real_t trace = 0;
-	for (uint32_t i = 0; i < A.size(); i++) {
-		trace += A[i][i];
-	}
-	return trace;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::cofactor(std::vector<std::vector<real_t>> A, int n, int i, int j) {
-	std::vector<std::vector<real_t>> cof;
-	cof.resize(A.size());
-	for (uint32_t ii = 0; ii < cof.size(); ii++) {
-		cof[ii].resize(A.size());
-	}
-	int sub_i = 0, sub_j = 0;
-
-	for (int row = 0; row < n; row++) {
-		for (int col = 0; col < n; col++) {
-			if (row != i && col != j) {
-				cof[sub_i][sub_j++] = A[row][col];
-
-				if (sub_j == n - 1) {
-					sub_j = 0;
-					sub_i++;
-				}
-			}
-		}
-	}
-	return cof;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::adjoint(std::vector<std::vector<real_t>> A) {
-	//Resizing the initial adjoint matrix
-	std::vector<std::vector<real_t>> adj;
-	adj.resize(A.size());
-	for (uint32_t i = 0; i < adj.size(); i++) {
-		adj[i].resize(A.size());
-	}
-
-	// Checking for the case where the given N x N matrix is a scalar
-	if (A.size() == 1) {
-		adj[0][0] = 1;
-		return adj;
-	}
-
-	if (A.size() == 2) {
-		adj[0][0] = A[1][1];
-		adj[1][1] = A[0][0];
-
-		adj[0][1] = -A[0][1];
-		adj[1][0] = -A[1][0];
-		return adj;
-	}
-
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A.size(); j++) {
-			std::vector<std::vector<real_t>> cof = cofactor(A, int(A.size()), i, j);
-			// 1 if even, -1 if odd
-			int sign = (i + j) % 2 == 0 ? 1 : -1;
-			adj[j][i] = sign * det(cof, int(A.size()) - 1);
-		}
-	}
-	return adj;
-}
-
-// The inverse can be computed as (1 / determinant(A)) * adjoint(A)
-std::vector<std::vector<real_t>> MLPPLinAlgOld::inverse(std::vector<std::vector<real_t>> A) {
-	return scalarMultiply(1 / det(A, int(A.size())), adjoint(A));
-}
-
-// This is simply the Moore-Penrose least squares approximation of the inverse.
-std::vector<std::vector<real_t>> MLPPLinAlgOld::pinverse(std::vector<std::vector<real_t>> A) {
-	return matmult(inverse(matmult(transpose(A), A)), transpose(A));
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::zeromat(int n, int m) {
-	std::vector<std::vector<real_t>> zeromat;
-	zeromat.resize(n);
-	for (uint32_t i = 0; i < zeromat.size(); i++) {
-		zeromat[i].resize(m);
-	}
-	return zeromat;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::onemat(int n, int m) {
-	return full(n, m, 1);
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::full(int n, int m, int k) {
-	std::vector<std::vector<real_t>> full;
-	full.resize(n);
-	for (uint32_t i = 0; i < full.size(); i++) {
-		full[i].resize(m);
-	}
-	for (uint32_t i = 0; i < full.size(); i++) {
-		for (uint32_t j = 0; j < full[i].size(); j++) {
-			full[i][j] = k;
-		}
-	}
-	return full;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::sin(std::vector<std::vector<real_t>> A) {
-	std::vector<std::vector<real_t>> B;
-	B.resize(A.size());
-	for (uint32_t i = 0; i < B.size(); i++) {
-		B[i].resize(A[0].size());
-	}
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			B[i][j] = std::sin(A[i][j]);
-		}
-	}
-	return B;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::cos(std::vector<std::vector<real_t>> A) {
-	std::vector<std::vector<real_t>> B;
-	B.resize(A.size());
-	for (uint32_t i = 0; i < B.size(); i++) {
-		B[i].resize(A[0].size());
-	}
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			B[i][j] = std::cos(A[i][j]);
-		}
-	}
-	return B;
-}
-
-std::vector<real_t> MLPPLinAlgOld::max(std::vector<real_t> a, std::vector<real_t> b) {
-	std::vector<real_t> c;
-	c.resize(a.size());
-	for (uint32_t i = 0; i < c.size(); i++) {
-		if (a[i] >= b[i]) {
-			c[i] = a[i];
-		} else {
-			c[i] = b[i];
-		}
-	}
-	return c;
-}
-
-real_t MLPPLinAlgOld::max(std::vector<std::vector<real_t>> A) {
-	return max(flatten(A));
-}
-
-real_t MLPPLinAlgOld::min(std::vector<std::vector<real_t>> A) {
-	return min(flatten(A));
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::round(std::vector<std::vector<real_t>> A) {
-	std::vector<std::vector<real_t>> B;
-	B.resize(A.size());
-	for (uint32_t i = 0; i < B.size(); i++) {
-		B[i].resize(A[0].size());
-	}
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			B[i][j] = std::round(A[i][j]);
-		}
-	}
-	return B;
-}
-
-real_t MLPPLinAlgOld::norm_2(std::vector<std::vector<real_t>> A) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			sum += A[i][j] * A[i][j];
-		}
-	}
-	return std::sqrt(sum);
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::identity(real_t d) {
-	std::vector<std::vector<real_t>> identityMat;
-	identityMat.resize(d);
-	for (uint32_t i = 0; i < identityMat.size(); i++) {
-		identityMat[i].resize(d);
-	}
-	for (uint32_t i = 0; i < identityMat.size(); i++) {
-		for (uint32_t j = 0; j < identityMat.size(); j++) {
-			if (i == j) {
-				identityMat[i][j] = 1;
-			} else {
-				identityMat[i][j] = 0;
-			}
-		}
-	}
-	return identityMat;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::cov(std::vector<std::vector<real_t>> A) {
-	MLPPStatOld stat;
-	std::vector<std::vector<real_t>> covMat;
-	covMat.resize(A.size());
-	for (uint32_t i = 0; i < covMat.size(); i++) {
-		covMat[i].resize(A.size());
-	}
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A.size(); j++) {
-			covMat[i][j] = stat.covariance(A[i], A[j]);
-		}
-	}
-	return covMat;
-}
-
-std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPLinAlgOld::eig(std::vector<std::vector<real_t>> A) {
-	/*
-	A (the entered parameter) in most use cases will be X'X, XX', etc. and must be symmetric.
-	That simply means that 1) X' = X and 2) X is a square matrix. This function that computes the
-	eigenvalues of a matrix is utilizing Jacobi's method.
-	*/
-
-	real_t diagonal = true; // Perform the iterative Jacobi algorithm unless and until we reach a diagonal matrix which yields us the eigenvals.
-
-	std::map<int, int> val_to_vec;
-	std::vector<std::vector<real_t>> a_new;
-	std::vector<std::vector<real_t>> eigenvectors = identity(A.size());
-	do {
-		real_t a_ij = A[0][1];
-		real_t sub_i = 0;
-		real_t sub_j = 1;
-		for (uint32_t i = 0; i < A.size(); i++) {
-			for (uint32_t j = 0; j < A[i].size(); j++) {
-				if (i != j && std::abs(A[i][j]) > a_ij) {
-					a_ij = A[i][j];
-					sub_i = i;
-					sub_j = j;
-				} else if (i != j && std::abs(A[i][j]) == a_ij) {
-					if (i < sub_i) {
-						a_ij = A[i][j];
-						sub_i = i;
-						sub_j = j;
-					}
-				}
-			}
-		}
-
-		real_t a_ii = A[sub_i][sub_i];
-		real_t a_jj = A[sub_j][sub_j];
-		//real_t a_ji = A[sub_j][sub_i];
-		real_t theta;
-
-		if (a_ii == a_jj) {
-			theta = M_PI / 4;
-		} else {
-			theta = 0.5 * atan(2 * a_ij / (a_ii - a_jj));
-		}
-
-		std::vector<std::vector<real_t>> P = identity(A.size());
-		P[sub_i][sub_j] = -std::sin(theta);
-		P[sub_i][sub_i] = std::cos(theta);
-		P[sub_j][sub_j] = std::cos(theta);
-		P[sub_j][sub_i] = std::sin(theta);
-
-		a_new = matmult(matmult(inverse(P), A), P);
-
-		for (uint32_t i = 0; i < a_new.size(); i++) {
-			for (uint32_t j = 0; j < a_new[i].size(); j++) {
-				if (i != j && std::round(a_new[i][j]) == 0) {
-					a_new[i][j] = 0;
-				}
-			}
-		}
-
-		bool non_zero = false;
-		for (uint32_t i = 0; i < a_new.size(); i++) {
-			for (uint32_t j = 0; j < a_new[i].size(); j++) {
-				if (i != j && std::round(a_new[i][j]) != 0) {
-					non_zero = true;
-				}
-			}
-		}
-
-		if (non_zero) {
-			diagonal = false;
-		} else {
-			diagonal = true;
-		}
-
-		if (a_new == A) {
-			diagonal = true;
-			for (uint32_t i = 0; i < a_new.size(); i++) {
-				for (uint32_t j = 0; j < a_new[i].size(); j++) {
-					if (i != j) {
-						a_new[i][j] = 0;
-					}
-				}
-			}
-		}
-
-		eigenvectors = matmult(eigenvectors, P);
-		A = a_new;
-
-	} while (!diagonal);
-
-	std::vector<std::vector<real_t>> a_new_prior = a_new;
-
-	// Bubble Sort. Should change this later.
-	for (uint32_t i = 0; i < a_new.size() - 1; i++) {
-		for (uint32_t j = 0; j < a_new.size() - 1 - i; j++) {
-			if (a_new[j][j] < a_new[j + 1][j + 1]) {
-				real_t temp = a_new[j + 1][j + 1];
-				a_new[j + 1][j + 1] = a_new[j][j];
-				a_new[j][j] = temp;
-			}
-		}
-	}
-
-	for (uint32_t i = 0; i < a_new.size(); i++) {
-		for (uint32_t j = 0; j < a_new.size(); j++) {
-			if (a_new[i][i] == a_new_prior[j][j]) {
-				val_to_vec[i] = j;
-			}
-		}
-	}
-
-	std::vector<std::vector<real_t>> eigen_temp = eigenvectors;
-	for (uint32_t i = 0; i < eigenvectors.size(); i++) {
-		for (uint32_t j = 0; j < eigenvectors[i].size(); j++) {
-			eigenvectors[i][j] = eigen_temp[i][val_to_vec[j]];
-		}
-	}
-	return { eigenvectors, a_new };
-}
-
-MLPPLinAlgOld::EigenResultOld MLPPLinAlgOld::eigen_old(std::vector<std::vector<real_t>> A) {
-	/*
-	A (the entered parameter) in most use cases will be X'X, XX', etc. and must be symmetric.
-	That simply means that 1) X' = X and 2) X is a square matrix. This function that computes the
-	eigenvalues of a matrix is utilizing Jacobi's method.
-	*/
-
-	real_t diagonal = true; // Perform the iterative Jacobi algorithm unless and until we reach a diagonal matrix which yields us the eigenvals.
-
-	std::map<int, int> val_to_vec;
-	std::vector<std::vector<real_t>> a_new;
-	std::vector<std::vector<real_t>> eigenvectors = identity(A.size());
-	do {
-		real_t a_ij = A[0][1];
-		real_t sub_i = 0;
-		real_t sub_j = 1;
-		for (uint32_t i = 0; i < A.size(); i++) {
-			for (uint32_t j = 0; j < A[i].size(); j++) {
-				if (i != j && std::abs(A[i][j]) > a_ij) {
-					a_ij = A[i][j];
-					sub_i = i;
-					sub_j = j;
-				} else if (i != j && std::abs(A[i][j]) == a_ij) {
-					if (i < sub_i) {
-						a_ij = A[i][j];
-						sub_i = i;
-						sub_j = j;
-					}
-				}
-			}
-		}
-
-		real_t a_ii = A[sub_i][sub_i];
-		real_t a_jj = A[sub_j][sub_j];
-		//real_t a_ji = A[sub_j][sub_i];
-		real_t theta;
-
-		if (a_ii == a_jj) {
-			theta = M_PI / 4;
-		} else {
-			theta = 0.5 * atan(2 * a_ij / (a_ii - a_jj));
-		}
-
-		std::vector<std::vector<real_t>> P = identity(A.size());
-		P[sub_i][sub_j] = -std::sin(theta);
-		P[sub_i][sub_i] = std::cos(theta);
-		P[sub_j][sub_j] = std::cos(theta);
-		P[sub_j][sub_i] = std::sin(theta);
-
-		a_new = matmult(matmult(inverse(P), A), P);
-
-		for (uint32_t i = 0; i < a_new.size(); i++) {
-			for (uint32_t j = 0; j < a_new[i].size(); j++) {
-				if (i != j && std::round(a_new[i][j]) == 0) {
-					a_new[i][j] = 0;
-				}
-			}
-		}
-
-		bool non_zero = false;
-		for (uint32_t i = 0; i < a_new.size(); i++) {
-			for (uint32_t j = 0; j < a_new[i].size(); j++) {
-				if (i != j && std::round(a_new[i][j]) != 0) {
-					non_zero = true;
-				}
-			}
-		}
-
-		if (non_zero) {
-			diagonal = false;
-		} else {
-			diagonal = true;
-		}
-
-		if (a_new == A) {
-			diagonal = true;
-			for (uint32_t i = 0; i < a_new.size(); i++) {
-				for (uint32_t j = 0; j < a_new[i].size(); j++) {
-					if (i != j) {
-						a_new[i][j] = 0;
-					}
-				}
-			}
-		}
-
-		eigenvectors = matmult(eigenvectors, P);
-		A = a_new;
-
-	} while (!diagonal);
-
-	std::vector<std::vector<real_t>> a_new_prior = a_new;
-
-	// Bubble Sort. Should change this later.
-	for (uint32_t i = 0; i < a_new.size() - 1; i++) {
-		for (uint32_t j = 0; j < a_new.size() - 1 - i; j++) {
-			if (a_new[j][j] < a_new[j + 1][j + 1]) {
-				real_t temp = a_new[j + 1][j + 1];
-				a_new[j + 1][j + 1] = a_new[j][j];
-				a_new[j][j] = temp;
-			}
-		}
-	}
-
-	for (uint32_t i = 0; i < a_new.size(); i++) {
-		for (uint32_t j = 0; j < a_new.size(); j++) {
-			if (a_new[i][i] == a_new_prior[j][j]) {
-				val_to_vec[i] = j;
-			}
-		}
-	}
-
-	std::vector<std::vector<real_t>> eigen_temp = eigenvectors;
-	for (uint32_t i = 0; i < eigenvectors.size(); i++) {
-		for (uint32_t j = 0; j < eigenvectors[i].size(); j++) {
-			eigenvectors[i][j] = eigen_temp[i][val_to_vec[j]];
-		}
-	}
-
-	EigenResultOld res;
-	res.eigen_vectors = eigenvectors;
-	res.eigen_values = a_new;
-
-	return res;
-}
-
-MLPPLinAlgOld::SVDResultOld MLPPLinAlgOld::SVD(std::vector<std::vector<real_t>> A) {
-	EigenResultOld left_eigen = eigen_old(matmult(A, transpose(A)));
-	EigenResultOld right_eigen = eigen_old(matmult(transpose(A), A));
-
-	std::vector<std::vector<real_t>> singularvals = sqrt(left_eigen.eigen_values);
-	std::vector<std::vector<real_t>> sigma = zeromat(A.size(), A[0].size());
-	for (uint32_t i = 0; i < singularvals.size(); i++) {
-		for (uint32_t j = 0; j < singularvals[i].size(); j++) {
-			sigma[i][j] = singularvals[i][j];
-		}
-	}
-
-	SVDResultOld res;
-	res.U = left_eigen.eigen_vectors;
-	res.S = sigma;
-	res.Vt = right_eigen.eigen_vectors;
-
-	return res;
-}
-
-std::vector<real_t> MLPPLinAlgOld::vectorProjection(std::vector<real_t> a, std::vector<real_t> b) {
-	real_t product = dot(a, b) / dot(a, a);
-	return scalarMultiply(product, a); // Projection of vector a onto b. Denotated as proj_a(b).
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::gramSchmidtProcess(std::vector<std::vector<real_t>> A) {
-	A = transpose(A); // C++ vectors lack a mechanism to directly index columns. So, we transpose *a copy* of A for this purpose for ease of use.
-	std::vector<std::vector<real_t>> B;
-	B.resize(A.size());
-	for (uint32_t i = 0; i < B.size(); i++) {
-		B[i].resize(A[0].size());
-	}
-
-	B[0] = A[0]; // We set a_1 = b_1 as an initial condition.
-	B[0] = scalarMultiply(1 / norm_2(B[0]), B[0]);
-	for (uint32_t i = 1; i < B.size(); i++) {
-		B[i] = A[i];
-		for (int j = i - 1; j >= 0; j--) {
-			B[i] = subtraction(B[i], vectorProjection(B[j], A[i]));
-		}
-		B[i] = scalarMultiply(1 / norm_2(B[i]), B[i]); // Very simply multiply all elements of vec B[i] by 1/||B[i]||_2
-	}
-	return transpose(B); // We re-transpose the marix.
-}
-
-std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPLinAlgOld::QRD(std::vector<std::vector<real_t>> A) {
-	std::vector<std::vector<real_t>> Q = gramSchmidtProcess(A);
-	std::vector<std::vector<real_t>> R = matmult(transpose(Q), A);
-	return { Q, R };
-}
-
-MLPPLinAlgOld::QRDResult MLPPLinAlgOld::qrd(std::vector<std::vector<real_t>> A) {
-	QRDResult res;
-
-	res.Q = gramSchmidtProcess(A);
-	res.R = matmult(transpose(res.Q), A);
-
-	return res;
-}
-
-std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPLinAlgOld::chol(std::vector<std::vector<real_t>> A) {
-	std::vector<std::vector<real_t>> L = zeromat(A.size(), A[0].size());
-	for (uint32_t j = 0; j < L.size(); j++) { // Matrices entered must be square. No problem here.
-		for (uint32_t i = j; i < L.size(); i++) {
-			if (i == j) {
-				real_t sum = 0;
-				for (uint32_t k = 0; k < j; k++) {
-					sum += L[i][k] * L[i][k];
-				}
-				L[i][j] = std::sqrt(A[i][j] - sum);
-			} else { // That is, i!=j
-				real_t sum = 0;
-				for (uint32_t k = 0; k < j; k++) {
-					sum += L[i][k] * L[j][k];
-				}
-				L[i][j] = (A[i][j] - sum) / L[j][j];
-			}
-		}
-	}
-	return { L, transpose(L) }; // Indeed, L.T is our upper triangular matrix.
-}
-
-MLPPLinAlgOld::CholeskyResult MLPPLinAlgOld::cholesky(std::vector<std::vector<real_t>> A) {
-	std::vector<std::vector<real_t>> L = zeromat(A.size(), A[0].size());
-	for (uint32_t j = 0; j < L.size(); j++) { // Matrices entered must be square. No problem here.
-		for (uint32_t i = j; i < L.size(); i++) {
-			if (i == j) {
-				real_t sum = 0;
-				for (uint32_t k = 0; k < j; k++) {
-					sum += L[i][k] * L[i][k];
-				}
-				L[i][j] = std::sqrt(A[i][j] - sum);
-			} else { // That is, i!=j
-				real_t sum = 0;
-				for (uint32_t k = 0; k < j; k++) {
-					sum += L[i][k] * L[j][k];
-				}
-				L[i][j] = (A[i][j] - sum) / L[j][j];
-			}
-		}
-	}
-
-	CholeskyResult res;
-	res.L = L;
-	res.Lt = transpose(L); // Indeed, L.T is our upper triangular matrix.
-
-	return res;
-}
-
-real_t MLPPLinAlgOld::sum_elements(std::vector<std::vector<real_t>> A) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			sum += A[i][j];
-		}
-	}
-	return sum;
-}
-
-std::vector<real_t> MLPPLinAlgOld::flatten(std::vector<std::vector<real_t>> A) {
-	std::vector<real_t> a;
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			a.push_back(A[i][j]);
-		}
-	}
-	return a;
-}
-
-std::vector<real_t> MLPPLinAlgOld::solve(std::vector<std::vector<real_t>> A, std::vector<real_t> b) {
-	return mat_vec_mult(inverse(A), b);
-}
-
-bool MLPPLinAlgOld::positiveDefiniteChecker(std::vector<std::vector<real_t>> A) {
-	auto eig_result = eig(A);
-	auto eigenvectors = std::get<0>(eig_result);
-	auto eigenvals = std::get<1>(eig_result);
-
-	std::vector<real_t> eigenvals_vec;
-	for (uint32_t i = 0; i < eigenvals.size(); i++) {
-		eigenvals_vec.push_back(eigenvals[i][i]);
-	}
-	for (uint32_t i = 0; i < eigenvals_vec.size(); i++) {
-		if (eigenvals_vec[i] <= 0) { // Simply check to ensure all eigenvalues are positive.
-			return false;
-		}
-	}
-	return true;
-}
-
-bool MLPPLinAlgOld::negativeDefiniteChecker(std::vector<std::vector<real_t>> A) {
-	auto eig_result = eig(A);
-	auto eigenvectors = std::get<0>(eig_result);
-	auto eigenvals = std::get<1>(eig_result);
-
-	std::vector<real_t> eigenvals_vec;
-	for (uint32_t i = 0; i < eigenvals.size(); i++) {
-		eigenvals_vec.push_back(eigenvals[i][i]);
-	}
-	for (uint32_t i = 0; i < eigenvals_vec.size(); i++) {
-		if (eigenvals_vec[i] >= 0) { // Simply check to ensure all eigenvalues are negative.
-			return false;
-		}
-	}
-	return true;
-}
-
-bool MLPPLinAlgOld::zeroEigenvalue(std::vector<std::vector<real_t>> A) {
-	auto eig_result = eig(A);
-	auto eigenvectors = std::get<0>(eig_result);
-	auto eigenvals = std::get<1>(eig_result);
-
-	std::vector<real_t> eigenvals_vec;
-	for (uint32_t i = 0; i < eigenvals.size(); i++) {
-		eigenvals_vec.push_back(eigenvals[i][i]);
-	}
-	for (uint32_t i = 0; i < eigenvals_vec.size(); i++) {
-		if (eigenvals_vec[i] == 0) {
-			return true;
-		}
-	}
-	return false;
-}
-
-void MLPPLinAlgOld::printMatrix(std::vector<std::vector<real_t>> A) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			std::cout << A[i][j] << " ";
-		}
-		std::cout << std::endl;
-	}
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::outerProduct(std::vector<real_t> a, std::vector<real_t> b) {
-	std::vector<std::vector<real_t>> C;
-	C.resize(a.size());
-	for (uint32_t i = 0; i < C.size(); i++) {
-		C[i] = scalarMultiply(a[i], b);
-	}
-	return C;
-}
-
-std::vector<real_t> MLPPLinAlgOld::hadamard_product(std::vector<real_t> a, std::vector<real_t> b) {
-	std::vector<real_t> c;
-	c.resize(a.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		c[i] = a[i] * b[i];
-	}
-
-	return c;
-}
-
-std::vector<real_t> MLPPLinAlgOld::elementWiseDivision(std::vector<real_t> a, std::vector<real_t> b) {
-	std::vector<real_t> c;
-	c.resize(a.size());
-
-	for (uint32_t i = 0; i < a.size(); i++) {
-		c[i] = a[i] / b[i];
-	}
-	return c;
-}
-
-std::vector<real_t> MLPPLinAlgOld::scalarMultiply(real_t scalar, std::vector<real_t> a) {
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] *= scalar;
-	}
-	return a;
-}
-
-std::vector<real_t> MLPPLinAlgOld::scalarAdd(real_t scalar, std::vector<real_t> a) {
-	for (uint32_t i = 0; i < a.size(); i++) {
-		a[i] += scalar;
-	}
-	return a;
-}
-
-std::vector<real_t> MLPPLinAlgOld::addition(std::vector<real_t> a, std::vector<real_t> b) {
-	std::vector<real_t> c;
-	c.resize(a.size());
-	for (uint32_t i = 0; i < a.size(); i++) {
-		c[i] = a[i] + b[i];
-	}
-	return c;
-}
-
-std::vector<real_t> MLPPLinAlgOld::subtraction(std::vector<real_t> a, std::vector<real_t> b) {
-	std::vector<real_t> c;
-	c.resize(a.size());
-	for (uint32_t i = 0; i < a.size(); i++) {
-		c[i] = a[i] - b[i];
-	}
-	return c;
-}
-
-std::vector<real_t> MLPPLinAlgOld::subtractMatrixRows(std::vector<real_t> a, std::vector<std::vector<real_t>> B) {
-	for (uint32_t i = 0; i < B.size(); i++) {
-		a = subtraction(a, B[i]);
-	}
-	return a;
-}
-
-std::vector<real_t> MLPPLinAlgOld::log(std::vector<real_t> a) {
-	std::vector<real_t> b;
-	b.resize(a.size());
-	for (uint32_t i = 0; i < a.size(); i++) {
-		b[i] = std::log(a[i]);
-	}
-	return b;
-}
-
-std::vector<real_t> MLPPLinAlgOld::log10(std::vector<real_t> a) {
-	std::vector<real_t> b;
-	b.resize(a.size());
-	for (uint32_t i = 0; i < a.size(); i++) {
-		b[i] = std::log10(a[i]);
-	}
-	return b;
-}
-
-std::vector<real_t> MLPPLinAlgOld::exp(std::vector<real_t> a) {
-	std::vector<real_t> b;
-	b.resize(a.size());
-	for (uint32_t i = 0; i < a.size(); i++) {
-		b[i] = std::exp(a[i]);
-	}
-	return b;
-}
-
-std::vector<real_t> MLPPLinAlgOld::erf(std::vector<real_t> a) {
-	std::vector<real_t> b;
-	b.resize(a.size());
-	for (uint32_t i = 0; i < a.size(); i++) {
-		b[i] = std::erf(a[i]);
-	}
-	return b;
-}
-
-std::vector<real_t> MLPPLinAlgOld::exponentiate(std::vector<real_t> a, real_t p) {
-	std::vector<real_t> b;
-	b.resize(a.size());
-	for (uint32_t i = 0; i < b.size(); i++) {
-		b[i] = std::pow(a[i], p);
-	}
-	return b;
-}
-
-std::vector<real_t> MLPPLinAlgOld::sqrt(std::vector<real_t> a) {
-	return exponentiate(a, 0.5);
-}
-
-std::vector<real_t> MLPPLinAlgOld::cbrt(std::vector<real_t> a) {
-	return exponentiate(a, real_t(1) / real_t(3));
-}
-
-real_t MLPPLinAlgOld::dot(std::vector<real_t> a, std::vector<real_t> b) {
-	real_t c = 0;
-	for (uint32_t i = 0; i < a.size(); i++) {
-		c += a[i] * b[i];
-	}
-	return c;
-}
-
-std::vector<real_t> MLPPLinAlgOld::cross(std::vector<real_t> a, std::vector<real_t> b) {
-	// Cross products exist in R^7 also. Though, I will limit it to R^3 as Wolfram does this.
-	std::vector<std::vector<real_t>> mat = { onevec(3), a, b };
-
-	real_t det1 = det({ { a[1], a[2] }, { b[1], b[2] } }, 2);
-	real_t det2 = -det({ { a[0], a[2] }, { b[0], b[2] } }, 2);
-	real_t det3 = det({ { a[0], a[1] }, { b[0], b[1] } }, 2);
-
-	return { det1, det2, det3 };
-}
-
-std::vector<real_t> MLPPLinAlgOld::abs(std::vector<real_t> a) {
-	std::vector<real_t> b;
-	b.resize(a.size());
-	for (uint32_t i = 0; i < b.size(); i++) {
-		b[i] = std::abs(a[i]);
-	}
-	return b;
-}
-
-std::vector<real_t> MLPPLinAlgOld::zerovec(int n) {
-	std::vector<real_t> zerovec;
-	zerovec.resize(n);
-	return zerovec;
-}
-
-std::vector<real_t> MLPPLinAlgOld::onevec(int n) {
-	return full(n, 1);
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::diag(std::vector<real_t> a) {
-	std::vector<std::vector<real_t>> B = zeromat(a.size(), a.size());
-	for (uint32_t i = 0; i < B.size(); i++) {
-		B[i][i] = a[i];
-	}
-	return B;
-}
-
-std::vector<real_t> MLPPLinAlgOld::full(int n, int k) {
-	std::vector<real_t> full;
-	full.resize(n);
-	for (uint32_t i = 0; i < full.size(); i++) {
-		full[i] = k;
-	}
-	return full;
-}
-
-std::vector<real_t> MLPPLinAlgOld::sin(std::vector<real_t> a) {
-	std::vector<real_t> b;
-	b.resize(a.size());
-	for (uint32_t i = 0; i < a.size(); i++) {
-		b[i] = std::sin(a[i]);
-	}
-	return b;
-}
-
-std::vector<real_t> MLPPLinAlgOld::cos(std::vector<real_t> a) {
-	std::vector<real_t> b;
-	b.resize(a.size());
-	for (uint32_t i = 0; i < a.size(); i++) {
-		b[i] = std::cos(a[i]);
-	}
-	return b;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::rotate(std::vector<std::vector<real_t>> A, real_t theta, int axis) {
-	std::vector<std::vector<real_t>> rotationMatrix = { { std::cos(theta), -std::sin(theta) }, { std::sin(theta), std::cos(theta) } };
-	if (axis == 0) {
-		rotationMatrix = { { 1, 0, 0 }, { 0, std::cos(theta), -std::sin(theta) }, { 0, std::sin(theta), std::cos(theta) } };
-	} else if (axis == 1) {
-		rotationMatrix = { { std::cos(theta), 0, std::sin(theta) }, { 0, 1, 0 }, { -std::sin(theta), 0, std::cos(theta) } };
-	} else if (axis == 2) {
-		rotationMatrix = { { std::cos(theta), -std::sin(theta), 0 }, { std::sin(theta), std::cos(theta), 0 }, { 1, 0, 0 } };
-	}
-
-	return matmult(A, rotationMatrix);
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::max(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B) {
-	std::vector<std::vector<real_t>> C;
-	C.resize(A.size());
-	for (uint32_t i = 0; i < C.size(); i++) {
-		C[i].resize(A[0].size());
-	}
-	for (uint32_t i = 0; i < A.size(); i++) {
-		C[i] = max(A[i], B[i]);
-	}
-	return C;
-}
-
-real_t MLPPLinAlgOld::max(std::vector<real_t> a) {
-	int max = a[0];
-	for (uint32_t i = 0; i < a.size(); i++) {
-		if (a[i] > max) {
-			max = a[i];
-		}
-	}
-	return max;
-}
-
-real_t MLPPLinAlgOld::min(std::vector<real_t> a) {
-	int min = a[0];
-	for (uint32_t i = 0; i < a.size(); i++) {
-		if (a[i] < min) {
-			min = a[i];
-		}
-	}
-	return min;
-}
-
-std::vector<real_t> MLPPLinAlgOld::round(std::vector<real_t> a) {
-	std::vector<real_t> b;
-	b.resize(a.size());
-	for (uint32_t i = 0; i < a.size(); i++) {
-		b[i] = std::round(a[i]);
-	}
-	return b;
-}
-
-// Multidimensional Euclidean Distance
-real_t MLPPLinAlgOld::euclideanDistance(std::vector<real_t> a, std::vector<real_t> b) {
-	real_t dist = 0;
-	for (uint32_t i = 0; i < a.size(); i++) {
-		dist += (a[i] - b[i]) * (a[i] - b[i]);
-	}
-	return std::sqrt(dist);
-}
-
-real_t MLPPLinAlgOld::norm_2(std::vector<real_t> a) {
-	return std::sqrt(norm_sq(a));
-}
-
-real_t MLPPLinAlgOld::norm_sq(std::vector<real_t> a) {
-	real_t n_sq = 0;
-	for (uint32_t i = 0; i < a.size(); i++) {
-		n_sq += a[i] * a[i];
-	}
-	return n_sq;
-}
-
-real_t MLPPLinAlgOld::sum_elements(std::vector<real_t> a) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < a.size(); i++) {
-		sum += a[i];
-	}
-	return sum;
-}
-
-real_t MLPPLinAlgOld::cosineSimilarity(std::vector<real_t> a, std::vector<real_t> b) {
-	return dot(a, b) / (norm_2(a) * norm_2(b));
-}
-
-void MLPPLinAlgOld::printVector(std::vector<real_t> a) {
-	for (uint32_t i = 0; i < a.size(); i++) {
-		std::cout << a[i] << " ";
-	}
-	std::cout << std::endl;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::mat_vec_add(std::vector<std::vector<real_t>> A, std::vector<real_t> b) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			A[i][j] += b[j];
-		}
-	}
-	return A;
-}
-
-std::vector<real_t> MLPPLinAlgOld::mat_vec_mult(std::vector<std::vector<real_t>> A, std::vector<real_t> b) {
-	std::vector<real_t> c;
-	c.resize(A.size());
-
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t k = 0; k < b.size(); k++) {
-			c[i] += A[i][k] * b[k];
-		}
-	}
-	return c;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPLinAlgOld::addition(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		A[i] = addition(A[i], B[i]);
-	}
-	return A;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPLinAlgOld::elementWiseDivision(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		A[i] = elementWiseDivision(A[i], B[i]);
-	}
-	return A;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPLinAlgOld::sqrt(std::vector<std::vector<std::vector<real_t>>> A) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		A[i] = sqrt(A[i]);
-	}
-	return A;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPLinAlgOld::exponentiate(std::vector<std::vector<std::vector<real_t>>> A, real_t p) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		A[i] = exponentiate(A[i], p);
-	}
-	return A;
-}
-
-std::vector<std::vector<real_t>> MLPPLinAlgOld::tensor_vec_mult(std::vector<std::vector<std::vector<real_t>>> A, std::vector<real_t> b) {
-	std::vector<std::vector<real_t>> C;
-	C.resize(A.size());
-	for (uint32_t i = 0; i < C.size(); i++) {
-		C[i].resize(A[0].size());
-	}
-	for (uint32_t i = 0; i < C.size(); i++) {
-		for (uint32_t j = 0; j < C[i].size(); j++) {
-			C[i][j] = dot(A[i][j], b);
-		}
-	}
-	return C;
-}
-
-std::vector<real_t> MLPPLinAlgOld::flatten(std::vector<std::vector<std::vector<real_t>>> A) {
-	std::vector<real_t> c;
-	for (uint32_t i = 0; i < A.size(); i++) {
-		std::vector<real_t> flattenedVec = flatten(A[i]);
-		c.insert(c.end(), flattenedVec.begin(), flattenedVec.end());
-	}
-	return c;
-}
-
-void MLPPLinAlgOld::printTensor(std::vector<std::vector<std::vector<real_t>>> A) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		printMatrix(A[i]);
-		if (i != A.size() - 1) {
-			std::cout << std::endl;
-		}
-	}
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPLinAlgOld::scalarMultiply(real_t scalar, std::vector<std::vector<std::vector<real_t>>> A) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		A[i] = scalarMultiply(scalar, A[i]);
-	}
-	return A;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPLinAlgOld::scalarAdd(real_t scalar, std::vector<std::vector<std::vector<real_t>>> A) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		A[i] = scalarAdd(scalar, A[i]);
-	}
-	return A;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPLinAlgOld::resize(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B) {
-	A.resize(B.size());
-	for (uint32_t i = 0; i < B.size(); i++) {
-		A[i].resize(B[i].size());
-		for (uint32_t j = 0; j < B[i].size(); j++) {
-			A[i][j].resize(B[i][j].size());
-		}
-	}
-	return A;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPLinAlgOld::max(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		A[i] = max(A[i], B[i]);
-	}
-	return A;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPLinAlgOld::abs(std::vector<std::vector<std::vector<real_t>>> A) {
-	for (uint32_t i = 0; i < A.size(); i++) {
-		A[i] = abs(A[i]);
-	}
-	return A;
-}
-
-real_t MLPPLinAlgOld::norm_2(std::vector<std::vector<std::vector<real_t>>> A) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < A.size(); i++) {
-		for (uint32_t j = 0; j < A[i].size(); j++) {
-			for (uint32_t k = 0; k < A[i][j].size(); k++) {
-				sum += A[i][j][k] * A[i][j][k];
-			}
-		}
-	}
-	return std::sqrt(sum);
-}
-
-// Bad implementation. Change this later.
-std::vector<std::vector<std::vector<real_t>>> MLPPLinAlgOld::vector_wise_tensor_product(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<real_t>> B) {
-	std::vector<std::vector<std::vector<real_t>>> C;
-	C = resize(C, A);
-	for (uint32_t i = 0; i < A[0].size(); i++) {
-		for (uint32_t j = 0; j < A[0][i].size(); j++) {
-			std::vector<real_t> currentVector;
-			currentVector.resize(A.size());
-
-			for (uint32_t k = 0; k < C.size(); k++) {
-				currentVector[k] = A[k][i][j];
-			}
-
-			currentVector = mat_vec_mult(B, currentVector);
-
-			for (uint32_t k = 0; k < C.size(); k++) {
-				C[k][i][j] = currentVector[k];
-			}
-		}
-	}
-	return C;
-}
diff --git a/mlpp/lin_alg/lin_alg_old.h b/mlpp/lin_alg/lin_alg_old.h
deleted file mode 100644
index 4ec91b8..0000000
--- a/mlpp/lin_alg/lin_alg_old.h
+++ /dev/null
@@ -1,230 +0,0 @@
-
-#ifndef MLPP_LIN_ALG_OLD_H
-#define MLPP_LIN_ALG_OLD_H
-
-//
-//  LinAlg.hpp
-//
-//  Created by Marc Melikyan on 1/8/21.
-//
-
-//TODO Methods here should probably use error macros in a way where they get disabled in non-tools(?) (maybe release?) builds
-
-#include "core/math/math_defs.h"
-
-#include <tuple>
-#include <vector>
-
-class MLPPLinAlgOld {
-public:
-	// MATRIX FUNCTIONS
-
-	std::vector<std::vector<real_t>> gramMatrix(std::vector<std::vector<real_t>> A);
-
-	bool linearIndependenceChecker(std::vector<std::vector<real_t>> A);
-
-	std::vector<std::vector<real_t>> gaussianNoise(int n, int m);
-
-	std::vector<std::vector<real_t>> addition(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
-	std::vector<std::vector<real_t>> subtraction(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
-	std::vector<std::vector<real_t>> matmult(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
-
-	std::vector<std::vector<real_t>> hadamard_product(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
-	std::vector<std::vector<real_t>> kronecker_product(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
-	std::vector<std::vector<real_t>> elementWiseDivision(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
-
-	std::vector<std::vector<real_t>> transpose(std::vector<std::vector<real_t>> A);
-	std::vector<std::vector<real_t>> scalarMultiply(real_t scalar, std::vector<std::vector<real_t>> A);
-	std::vector<std::vector<real_t>> scalarAdd(real_t scalar, std::vector<std::vector<real_t>> A);
-
-	std::vector<std::vector<real_t>> log(std::vector<std::vector<real_t>> A);
-	std::vector<std::vector<real_t>> log10(std::vector<std::vector<real_t>> A);
-	std::vector<std::vector<real_t>> exp(std::vector<std::vector<real_t>> A);
-	std::vector<std::vector<real_t>> erf(std::vector<std::vector<real_t>> A);
-	std::vector<std::vector<real_t>> exponentiate(std::vector<std::vector<real_t>> A, real_t p);
-	std::vector<std::vector<real_t>> sqrt(std::vector<std::vector<real_t>> A);
-	std::vector<std::vector<real_t>> cbrt(std::vector<std::vector<real_t>> A);
-
-	std::vector<std::vector<real_t>> matrixPower(std::vector<std::vector<real_t>> A, int n);
-
-	std::vector<std::vector<real_t>> abs(std::vector<std::vector<real_t>> A);
-
-	real_t det(std::vector<std::vector<real_t>> A, int d);
-
-	real_t trace(std::vector<std::vector<real_t>> A);
-
-	std::vector<std::vector<real_t>> cofactor(std::vector<std::vector<real_t>> A, int n, int i, int j);
-	std::vector<std::vector<real_t>> adjoint(std::vector<std::vector<real_t>> A);
-	std::vector<std::vector<real_t>> inverse(std::vector<std::vector<real_t>> A);
-	std::vector<std::vector<real_t>> pinverse(std::vector<std::vector<real_t>> A);
-
-	std::vector<std::vector<real_t>> zeromat(int n, int m);
-	std::vector<std::vector<real_t>> onemat(int n, int m);
-	std::vector<std::vector<real_t>> full(int n, int m, int k);
-
-	std::vector<std::vector<real_t>> sin(std::vector<std::vector<real_t>> A);
-	std::vector<std::vector<real_t>> cos(std::vector<std::vector<real_t>> A);
-
-	std::vector<std::vector<real_t>> rotate(std::vector<std::vector<real_t>> A, real_t theta, int axis = -1);
-
-	std::vector<std::vector<real_t>> max(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
-	real_t max(std::vector<std::vector<real_t>> A);
-	real_t min(std::vector<std::vector<real_t>> A);
-
-	std::vector<std::vector<real_t>> round(std::vector<std::vector<real_t>> A);
-
-	real_t norm_2(std::vector<std::vector<real_t>> A);
-
-	std::vector<std::vector<real_t>> identity(real_t d);
-
-	std::vector<std::vector<real_t>> cov(std::vector<std::vector<real_t>> A);
-
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> eig(std::vector<std::vector<real_t>> A);
-
-	struct EigenResultOld {
-		std::vector<std::vector<real_t>> eigen_vectors;
-		std::vector<std::vector<real_t>> eigen_values;
-	};
-
-	EigenResultOld eigen_old(std::vector<std::vector<real_t>> A);
-
-	struct SVDResultOld {
-		std::vector<std::vector<real_t>> U;
-		std::vector<std::vector<real_t>> S;
-		std::vector<std::vector<real_t>> Vt;
-	};
-
-	SVDResultOld SVD(std::vector<std::vector<real_t>> A);
-
-	std::vector<real_t> vectorProjection(std::vector<real_t> a, std::vector<real_t> b);
-
-	std::vector<std::vector<real_t>> gramSchmidtProcess(std::vector<std::vector<real_t>> A);
-
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> QRD(std::vector<std::vector<real_t>> A);
-
-	struct QRDResult {
-		std::vector<std::vector<real_t>> Q;
-		std::vector<std::vector<real_t>> R;
-	};
-
-	QRDResult qrd(std::vector<std::vector<real_t>> A);
-
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> chol(std::vector<std::vector<real_t>> A);
-
-	struct CholeskyResult {
-		std::vector<std::vector<real_t>> L;
-		std::vector<std::vector<real_t>> Lt;
-	};
-
-	CholeskyResult cholesky(std::vector<std::vector<real_t>> A);
-
-	real_t sum_elements(std::vector<std::vector<real_t>> A);
-
-	std::vector<real_t> flatten(std::vector<std::vector<real_t>> A);
-
-	std::vector<real_t> solve(std::vector<std::vector<real_t>> A, std::vector<real_t> b);
-
-	bool positiveDefiniteChecker(std::vector<std::vector<real_t>> A);
-
-	bool negativeDefiniteChecker(std::vector<std::vector<real_t>> A);
-
-	bool zeroEigenvalue(std::vector<std::vector<real_t>> A);
-
-	void printMatrix(std::vector<std::vector<real_t>> A);
-
-	// VECTOR FUNCTIONS
-
-	std::vector<std::vector<real_t>> outerProduct(std::vector<real_t> a, std::vector<real_t> b); // This multiplies a, bT
-	std::vector<real_t> hadamard_product(std::vector<real_t> a, std::vector<real_t> b);
-
-	std::vector<real_t> elementWiseDivision(std::vector<real_t> a, std::vector<real_t> b);
-
-	std::vector<real_t> scalarMultiply(real_t scalar, std::vector<real_t> a);
-
-	std::vector<real_t> scalarAdd(real_t scalar, std::vector<real_t> a);
-
-	std::vector<real_t> addition(std::vector<real_t> a, std::vector<real_t> b);
-
-	std::vector<real_t> subtraction(std::vector<real_t> a, std::vector<real_t> b);
-
-	std::vector<real_t> subtractMatrixRows(std::vector<real_t> a, std::vector<std::vector<real_t>> B);
-
-	std::vector<real_t> log(std::vector<real_t> a);
-	std::vector<real_t> log10(std::vector<real_t> a);
-	std::vector<real_t> exp(std::vector<real_t> a);
-	std::vector<real_t> erf(std::vector<real_t> a);
-	std::vector<real_t> exponentiate(std::vector<real_t> a, real_t p);
-	std::vector<real_t> sqrt(std::vector<real_t> a);
-	std::vector<real_t> cbrt(std::vector<real_t> a);
-
-	real_t dot(std::vector<real_t> a, std::vector<real_t> b);
-
-	std::vector<real_t> cross(std::vector<real_t> a, std::vector<real_t> b);
-
-	std::vector<real_t> abs(std::vector<real_t> a);
-
-	std::vector<real_t> zerovec(int n);
-	std::vector<real_t> onevec(int n);
-	std::vector<real_t> full(int n, int k);
-
-	std::vector<std::vector<real_t>> diag(std::vector<real_t> a);
-
-	std::vector<real_t> sin(std::vector<real_t> a);
-	std::vector<real_t> cos(std::vector<real_t> a);
-
-	std::vector<real_t> max(std::vector<real_t> a, std::vector<real_t> b);
-
-	real_t max(std::vector<real_t> a);
-
-	real_t min(std::vector<real_t> a);
-
-	std::vector<real_t> round(std::vector<real_t> a);
-
-	real_t euclideanDistance(std::vector<real_t> a, std::vector<real_t> b);
-
-	real_t norm_2(std::vector<real_t> a);
-
-	real_t norm_sq(std::vector<real_t> a);
-
-	real_t sum_elements(std::vector<real_t> a);
-
-	real_t cosineSimilarity(std::vector<real_t> a, std::vector<real_t> b);
-
-	void printVector(std::vector<real_t> a);
-
-	// MATRIX-VECTOR FUNCTIONS
-	std::vector<std::vector<real_t>> mat_vec_add(std::vector<std::vector<real_t>> A, std::vector<real_t> b);
-	std::vector<real_t> mat_vec_mult(std::vector<std::vector<real_t>> A, std::vector<real_t> b);
-
-	// TENSOR FUNCTIONS
-	std::vector<std::vector<std::vector<real_t>>> addition(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B);
-
-	std::vector<std::vector<std::vector<real_t>>> elementWiseDivision(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B);
-
-	std::vector<std::vector<std::vector<real_t>>> sqrt(std::vector<std::vector<std::vector<real_t>>> A);
-
-	std::vector<std::vector<std::vector<real_t>>> exponentiate(std::vector<std::vector<std::vector<real_t>>> A, real_t p);
-
-	std::vector<std::vector<real_t>> tensor_vec_mult(std::vector<std::vector<std::vector<real_t>>> A, std::vector<real_t> b);
-
-	std::vector<real_t> flatten(std::vector<std::vector<std::vector<real_t>>> A);
-
-	void printTensor(std::vector<std::vector<std::vector<real_t>>> A);
-
-	std::vector<std::vector<std::vector<real_t>>> scalarMultiply(real_t scalar, std::vector<std::vector<std::vector<real_t>>> A);
-	std::vector<std::vector<std::vector<real_t>>> scalarAdd(real_t scalar, std::vector<std::vector<std::vector<real_t>>> A);
-
-	std::vector<std::vector<std::vector<real_t>>> resize(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B);
-
-	std::vector<std::vector<std::vector<real_t>>> hadamard_product(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B);
-
-	std::vector<std::vector<std::vector<real_t>>> max(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<std::vector<real_t>>> B);
-
-	std::vector<std::vector<std::vector<real_t>>> abs(std::vector<std::vector<std::vector<real_t>>> A);
-
-	real_t norm_2(std::vector<std::vector<std::vector<real_t>>> A);
-
-	std::vector<std::vector<std::vector<real_t>>> vector_wise_tensor_product(std::vector<std::vector<std::vector<real_t>>> A, std::vector<std::vector<real_t>> B);
-};
-
-#endif /* LinAlg_hpp */
\ No newline at end of file
diff --git a/mlpp/lin_reg/lin_reg_old.cpp b/mlpp/lin_reg/lin_reg_old.cpp
deleted file mode 100644
index 59ac17b..0000000
--- a/mlpp/lin_reg/lin_reg_old.cpp
+++ /dev/null
@@ -1,598 +0,0 @@
-//
-//  LinReg.cpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "lin_reg_old.h"
-
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../stat/stat_old.h"
-#include "../utilities/utilities.h"
-
-#include <cmath>
-#include <iostream>
-#include <random>
-
-MLPPLinRegOld::MLPPLinRegOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet, std::string p_reg, real_t p_lambda, real_t p_alpha) {
-	inputSet = p_inputSet;
-	outputSet = p_outputSet;
-	n = p_inputSet.size();
-	k = p_inputSet[0].size();
-	reg = p_reg;
-	lambda = p_lambda;
-	alpha = p_alpha;
-
-	y_hat.resize(n);
-
-	weights = MLPPUtilities::weightInitialization(k);
-	bias = MLPPUtilities::biasInitialization();
-}
-
-std::vector<real_t> MLPPLinRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	return Evaluate(X);
-}
-
-real_t MLPPLinRegOld::modelTest(std::vector<real_t> x) {
-	return Evaluate(x);
-}
-
-void MLPPLinRegOld::NewtonRaphson(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-
-		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
-
-		// Calculating the weight gradients (2nd derivative)
-		std::vector<real_t> first_derivative = alg.mat_vec_mult(alg.transpose(inputSet), error);
-		std::vector<std::vector<real_t>> second_derivative = alg.matmult(alg.transpose(inputSet), inputSet);
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(alg.inverse(second_derivative)), first_derivative)));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Calculating the bias gradients (2nd derivative)
-		bias -= learning_rate * alg.sum_elements(error) / n; // We keep this the same. The 2nd derivative is just [1].
-		forwardPass();
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPLinRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-
-		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
-
-		// Calculating the weight gradients
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), error)));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Calculating the bias gradients
-		bias -= learning_rate * alg.sum_elements(error) / n;
-		forwardPass();
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPLinRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	while (true) {
-		std::random_device rd;
-		std::default_random_engine generator(rd());
-		std::uniform_int_distribution<int> distribution(0, int(n - 1));
-		int outputIndex = distribution(generator);
-
-		real_t y_hat = Evaluate(inputSet[outputIndex]);
-		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
-
-		real_t error = y_hat - outputSet[outputIndex];
-
-		// Weight updation
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error, inputSet[outputIndex]));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Bias updation
-		bias -= learning_rate * error;
-
-		y_hat = Evaluate({ inputSet[outputIndex] });
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPLinRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight gradients
-			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error)));
-			weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-			// Calculating the bias gradients
-			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size();
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPLinRegOld::Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Initializing necessary components for Momentum.
-	std::vector<real_t> v = alg.zerovec(weights.size());
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight gradients
-			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
-			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
-			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
-
-			v = alg.addition(alg.scalarMultiply(gamma, v), alg.scalarMultiply(learning_rate, weight_grad));
-
-			weights = alg.subtraction(weights, v);
-
-			// Calculating the bias gradients
-			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPLinRegOld::NAG(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Initializing necessary components for Momentum.
-	std::vector<real_t> v = alg.zerovec(weights.size());
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			weights = alg.subtraction(weights, alg.scalarMultiply(gamma, v)); // "Aposterori" calculation
-
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight gradients
-			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
-			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
-			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
-
-			v = alg.addition(alg.scalarMultiply(gamma, v), alg.scalarMultiply(learning_rate, weight_grad));
-
-			weights = alg.subtraction(weights, v);
-
-			// Calculating the bias gradients
-			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPLinRegOld::Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Initializing necessary components for Adagrad.
-	std::vector<real_t> v = alg.zerovec(weights.size());
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight gradients
-			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
-			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
-			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
-
-			v = alg.hadamard_product(weight_grad, weight_grad);
-
-			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(weight_grad, alg.sqrt(alg.scalarAdd(e, v)))));
-
-			// Calculating the bias gradients
-			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPLinRegOld::Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI) {
-	// Adagrad upgrade. Momentum is applied.
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Initializing necessary components for Adagrad.
-	std::vector<real_t> v = alg.zerovec(weights.size());
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight gradients
-			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
-			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
-			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
-
-			v = alg.addition(alg.scalarMultiply(b1, v), alg.scalarMultiply(1 - b1, alg.hadamard_product(weight_grad, weight_grad)));
-
-			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(weight_grad, alg.sqrt(alg.scalarAdd(e, v)))));
-
-			// Calculating the bias gradients
-			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPLinRegOld::Adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Initializing necessary components for Adam.
-	std::vector<real_t> m = alg.zerovec(weights.size());
-
-	std::vector<real_t> v = alg.zerovec(weights.size());
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight gradients
-			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
-			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
-			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
-
-			m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
-			v = alg.addition(alg.scalarMultiply(b2, v), alg.scalarMultiply(1 - b2, alg.exponentiate(weight_grad, 2)));
-
-			std::vector<real_t> m_hat = alg.scalarMultiply(1 / (1 - pow(b1, epoch)), m);
-			std::vector<real_t> v_hat = alg.scalarMultiply(1 / (1 - pow(b2, epoch)), v);
-
-			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_hat, alg.scalarAdd(e, alg.sqrt(v_hat)))));
-
-			// Calculating the bias gradients
-			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPLinRegOld::Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	std::vector<real_t> m = alg.zerovec(weights.size());
-
-	std::vector<real_t> u = alg.zerovec(weights.size());
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight gradients
-			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
-			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
-			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
-
-			m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
-			u = alg.max(alg.scalarMultiply(b2, u), alg.abs(weight_grad));
-
-			std::vector<real_t> m_hat = alg.scalarMultiply(1 / (1 - pow(b1, epoch)), m);
-
-			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_hat, u)));
-
-			// Calculating the bias gradients
-			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPLinRegOld::Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Initializing necessary components for Adam.
-	std::vector<real_t> m = alg.zerovec(weights.size());
-	std::vector<real_t> v = alg.zerovec(weights.size());
-	std::vector<real_t> m_final = alg.zerovec(weights.size());
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight gradients
-			std::vector<real_t> gradient = alg.scalarMultiply(1 / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
-			std::vector<real_t> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
-			std::vector<real_t> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
-
-			m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
-			v = alg.addition(alg.scalarMultiply(b2, v), alg.scalarMultiply(1 - b2, alg.exponentiate(weight_grad, 2)));
-			m_final = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply((1 - b1) / (1 - pow(b1, epoch)), weight_grad));
-
-			std::vector<real_t> m_hat = alg.scalarMultiply(1 / (1 - pow(b1, epoch)), m);
-			std::vector<real_t> v_hat = alg.scalarMultiply(1 / (1 - pow(b2, epoch)), v);
-
-			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_final, alg.scalarAdd(e, alg.sqrt(v_hat)))));
-
-			// Calculating the bias gradients
-			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPLinRegOld::normalEquation() {
-	MLPPLinAlgOld alg;
-	MLPPStatOld stat;
-	std::vector<real_t> x_means;
-	std::vector<std::vector<real_t>> inputSetT = alg.transpose(inputSet);
-
-	x_means.resize(inputSetT.size());
-	for (uint32_t i = 0; i < inputSetT.size(); i++) {
-		x_means[i] = (stat.mean(inputSetT[i]));
-	}
-
-	//try {
-	std::vector<real_t> temp;
-	temp.resize(k);
-	temp = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
-	if (std::isnan(temp[0])) {
-		//throw 99;
-		//TODO ERR_FAIL_COND
-		std::cout << "ERR: Resulting matrix was noninvertible/degenerate, and so the normal equation could not be performed. Try utilizing gradient descent." << std::endl;
-		return;
-	} else {
-		if (reg == "Ridge") {
-			weights = alg.mat_vec_mult(alg.inverse(alg.addition(alg.matmult(alg.transpose(inputSet), inputSet), alg.scalarMultiply(lambda, alg.identity(k)))), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
-		} else {
-			weights = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
-		}
-
-		bias = stat.mean(outputSet) - alg.dot(weights, x_means);
-
-		forwardPass();
-	}
-	//} catch (int err_num) {
-	//	std::cout << "ERR " << err_num << ": Resulting matrix was noninvertible/degenerate, and so the normal equation could not be performed. Try utilizing gradient descent." << std::endl;
-	//}
-}
-
-real_t MLPPLinRegOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPLinRegOld::save(std::string fileName) {
-	MLPPUtilities util;
-	util.saveParameters(fileName, weights, bias);
-}
-
-real_t MLPPLinRegOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPRegOld regularization;
-	class MLPPCostOld cost;
-	return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
-}
-
-std::vector<real_t> MLPPLinRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights));
-}
-
-real_t MLPPLinRegOld::Evaluate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	return alg.dot(weights, x) + bias;
-}
-
-// wTx + b
-void MLPPLinRegOld::forwardPass() {
-	y_hat = Evaluate(inputSet);
-}
diff --git a/mlpp/lin_reg/lin_reg_old.h b/mlpp/lin_reg/lin_reg_old.h
deleted file mode 100644
index babee8b..0000000
--- a/mlpp/lin_reg/lin_reg_old.h
+++ /dev/null
@@ -1,60 +0,0 @@
-
-#ifndef MLPP_LIN_REG_OLD_H
-#define MLPP_LIN_REG_OLD_H
-
-//
-//  LinReg.hpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <vector>
-
-class MLPPLinRegOld {
-public:
-	MLPPLinRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
-	real_t modelTest(std::vector<real_t> x);
-	void NewtonRaphson(real_t learning_rate, int max_epoch, bool UI);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
-
-	void Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool UI = false);
-	void NAG(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool UI = false);
-	void Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI = false);
-	void Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI = false);
-	void Adam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
-	void Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
-	void Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
-
-	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
-	void normalEquation();
-	real_t score();
-	void save(std::string fileName);
-
-private:
-	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
-
-	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
-	real_t Evaluate(std::vector<real_t> x);
-	void forwardPass();
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-	std::vector<real_t> y_hat;
-	std::vector<real_t> weights;
-	real_t bias;
-
-	int n;
-	int k;
-
-	// Regularization Params
-	std::string reg;
-	int lambda;
-	int alpha; /* This is the controlling param for Elastic Net*/
-};
-
-#endif /* LinReg_hpp */
diff --git a/mlpp/log_reg/log_reg_old.cpp b/mlpp/log_reg/log_reg_old.cpp
deleted file mode 100644
index 4116bec..0000000
--- a/mlpp/log_reg/log_reg_old.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-//
-//  LogReg.cpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "log_reg_old.h"
-
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-MLPPLogRegOld::MLPPLogRegOld(std::vector<std::vector<real_t>> pinputSet, std::vector<real_t> poutputSet, std::string preg, real_t plambda, real_t palpha) {
-	inputSet = pinputSet;
-	outputSet = poutputSet;
-	n = pinputSet.size();
-	k = pinputSet[0].size();
-	reg = preg;
-	lambda = plambda;
-	alpha = palpha;
-
-	y_hat.resize(n);
-	weights = MLPPUtilities::weightInitialization(k);
-	bias = MLPPUtilities::biasInitialization();
-}
-
-std::vector<real_t> MLPPLogRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	return Evaluate(X);
-}
-
-real_t MLPPLogRegOld::modelTest(std::vector<real_t> x) {
-	return Evaluate(x);
-}
-
-void MLPPLogRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-
-		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
-
-		// Calculating the weight gradients
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), error)));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Calculating the bias gradients
-		bias -= learning_rate * alg.sum_elements(error) / n;
-		forwardPass();
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPLogRegOld::MLE(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-
-		std::vector<real_t> error = alg.subtraction(outputSet, y_hat);
-
-		// Calculating the weight gradients
-		weights = alg.addition(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), error)));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Calculating the bias gradients
-		bias += learning_rate * alg.sum_elements(error) / n;
-		forwardPass();
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPLogRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	while (true) {
-		std::random_device rd;
-		std::default_random_engine generator(rd());
-		std::uniform_int_distribution<int> distribution(0, int(n - 1));
-		int outputIndex = distribution(generator);
-
-		real_t y_hat = Evaluate(inputSet[outputIndex]);
-		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
-
-		real_t error = y_hat - outputSet[outputIndex];
-
-		// Weight updation
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error, inputSet[outputIndex]));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Bias updation
-		bias -= learning_rate * error;
-
-		y_hat = Evaluate({ inputSet[outputIndex] });
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPLogRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto bacthes = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(bacthes);
-	auto outputMiniBatches = std::get<1>(bacthes);
-
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight gradients
-			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error)));
-			weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-			// Calculating the bias gradients
-			bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size();
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-real_t MLPPLogRegOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPLogRegOld::save(std::string fileName) {
-	MLPPUtilities util;
-	util.saveParameters(fileName, weights, bias);
-}
-
-real_t MLPPLogRegOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPRegOld regularization;
-	class MLPPCostOld cost;
-	return cost.LogLoss(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
-}
-
-std::vector<real_t> MLPPLogRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	return avn.sigmoid(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)));
-}
-
-real_t MLPPLogRegOld::Evaluate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	return avn.sigmoid(alg.dot(weights, x) + bias);
-}
-
-// sigmoid ( wTx + b )
-void MLPPLogRegOld::forwardPass() {
-	y_hat = Evaluate(inputSet);
-}
diff --git a/mlpp/log_reg/log_reg_old.h b/mlpp/log_reg/log_reg_old.h
deleted file mode 100644
index 7aa57d4..0000000
--- a/mlpp/log_reg/log_reg_old.h
+++ /dev/null
@@ -1,51 +0,0 @@
-
-#ifndef MLPP_LOG_REG_OLD_H
-#define MLPP_LOG_REG_OLD_H
-
-//
-//  LogReg.hpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <vector>
-
-class MLPPLogRegOld {
-public:
-	MLPPLogRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
-	real_t modelTest(std::vector<real_t> x);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	void MLE(real_t learning_rate, int max_epoch, bool UI = false);
-	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
-	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
-	real_t score();
-	void save(std::string fileName);
-
-private:
-	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
-
-	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
-	real_t Evaluate(std::vector<real_t> x);
-	void forwardPass();
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-	std::vector<real_t> y_hat;
-	std::vector<real_t> weights;
-	real_t bias;
-
-	int n;
-	int k;
-	//real_t learning_rate;
-
-	// Regularization Params
-	std::string reg;
-	real_t lambda; /* Regularization Parameter */
-	real_t alpha; /* This is the controlling param for Elastic Net*/
-};
-
-#endif /* LogReg_hpp */
diff --git a/mlpp/mann/mann_old.cpp b/mlpp/mann/mann_old.cpp
deleted file mode 100644
index 2cd087b..0000000
--- a/mlpp/mann/mann_old.cpp
+++ /dev/null
@@ -1,189 +0,0 @@
-//
-//  MANN.cpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "mann_old.h"
-
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-
-MLPPMANNOld::MLPPMANNOld(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet) :
-		inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), n_output(outputSet[0].size()) {
-}
-
-MLPPMANNOld::~MLPPMANNOld() {
-	delete outputLayer;
-}
-
-std::vector<std::vector<real_t>> MLPPMANNOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	if (!network.empty()) {
-		network[0].input = X;
-		network[0].forwardPass();
-
-		for (uint32_t i = 1; i < network.size(); i++) {
-			network[i].input = network[i - 1].a;
-			network[i].forwardPass();
-		}
-		outputLayer->input = network[network.size() - 1].a;
-	} else {
-		outputLayer->input = X;
-	}
-	outputLayer->forwardPass();
-	return outputLayer->a;
-}
-
-std::vector<real_t> MLPPMANNOld::modelTest(std::vector<real_t> x) {
-	if (!network.empty()) {
-		network[0].Test(x);
-		for (uint32_t i = 1; i < network.size(); i++) {
-			network[i].Test(network[i - 1].a_test);
-		}
-		outputLayer->Test(network[network.size() - 1].a_test);
-	} else {
-		outputLayer->Test(x);
-	}
-	return outputLayer->a_test;
-}
-
-void MLPPMANNOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	class MLPPCostOld cost;
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-
-		if (outputLayer->activation == "Softmax") {
-			outputLayer->delta = alg.subtraction(y_hat, outputSet);
-		} else {
-			auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
-			auto outputAvn = outputLayer->activation_map[outputLayer->activation];
-			outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
-		}
-
-		std::vector<std::vector<real_t>> outputWGrad = alg.matmult(alg.transpose(outputLayer->input), outputLayer->delta);
-
-		outputLayer->weights = alg.subtraction(outputLayer->weights, alg.scalarMultiply(learning_rate / n, outputWGrad));
-		outputLayer->weights = regularization.regWeights(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
-		outputLayer->bias = alg.subtractMatrixRows(outputLayer->bias, alg.scalarMultiply(learning_rate / n, outputLayer->delta));
-
-		if (!network.empty()) {
-			auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
-			network[network.size() - 1].delta = alg.hadamard_product(alg.matmult(outputLayer->delta, alg.transpose(outputLayer->weights)), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
-			std::vector<std::vector<real_t>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
-
-			network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, alg.scalarMultiply(learning_rate / n, hiddenLayerWGrad));
-			network[network.size() - 1].weights = regularization.regWeights(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg);
-			network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate / n, network[network.size() - 1].delta));
-
-			for (int i = network.size() - 2; i >= 0; i--) {
-				hiddenLayerAvn = network[i].activation_map[network[i].activation];
-				network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, network[i + 1].weights), (avn.*hiddenLayerAvn)(network[i].z, 1));
-				hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
-				network[i].weights = alg.subtraction(network[i].weights, alg.scalarMultiply(learning_rate / n, hiddenLayerWGrad));
-				network[i].weights = regularization.regWeights(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
-				network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate / n, network[i].delta));
-			}
-		}
-
-		forwardPass();
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
-			MLPPUtilities::UI(outputLayer->weights, outputLayer->bias);
-			if (!network.empty()) {
-				std::cout << "Layer " << network.size() << ": " << std::endl;
-				for (int i = network.size() - 1; i >= 0; i--) {
-					std::cout << "Layer " << i + 1 << ": " << std::endl;
-					MLPPUtilities::UI(network[i].weights, network[i].bias);
-				}
-			}
-		}
-
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-real_t MLPPMANNOld::score() {
-	MLPPUtilities util;
-	forwardPass();
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPMANNOld::save(std::string fileName) {
-	MLPPUtilities util;
-	if (!network.empty()) {
-		util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1);
-		for (uint32_t i = 1; i < network.size(); i++) {
-			util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1);
-		}
-		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1);
-	} else {
-		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1);
-	}
-}
-
-void MLPPMANNOld::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
-	if (network.empty()) {
-		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, inputSet, weightInit, reg, lambda, alpha));
-		network[0].forwardPass();
-	} else {
-		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
-		network[network.size() - 1].forwardPass();
-	}
-}
-
-void MLPPMANNOld::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
-	if (!network.empty()) {
-		outputLayer = new MLPPOldMultiOutputLayer(n_output, network[0].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha);
-	} else {
-		outputLayer = new MLPPOldMultiOutputLayer(n_output, k, activation, loss, inputSet, weightInit, reg, lambda, alpha);
-	}
-}
-
-real_t MLPPMANNOld::Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	MLPPRegOld regularization;
-	class MLPPCostOld cost;
-	real_t totalRegTerm = 0;
-
-	auto cost_function = outputLayer->cost_map[outputLayer->cost];
-	if (!network.empty()) {
-		for (uint32_t i = 0; i < network.size() - 1; i++) {
-			totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
-		}
-	}
-	return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
-}
-
-void MLPPMANNOld::forwardPass() {
-	if (!network.empty()) {
-		network[0].input = inputSet;
-		network[0].forwardPass();
-
-		for (uint32_t i = 1; i < network.size(); i++) {
-			network[i].input = network[i - 1].a;
-			network[i].forwardPass();
-		}
-		outputLayer->input = network[network.size() - 1].a;
-	} else {
-		outputLayer->input = inputSet;
-	}
-	outputLayer->forwardPass();
-	y_hat = outputLayer->a;
-}
diff --git a/mlpp/mann/mann_old.h b/mlpp/mann/mann_old.h
deleted file mode 100644
index d5be616..0000000
--- a/mlpp/mann/mann_old.h
+++ /dev/null
@@ -1,51 +0,0 @@
-
-#ifndef MLPP_MANN_OLD_H
-#define MLPP_MANN_OLD_H
-
-//
-//  MANN.hpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include "../hidden_layer/hidden_layer.h"
-#include "../multi_output_layer/multi_output_layer.h"
-
-#include "../hidden_layer/hidden_layer_old.h"
-#include "../multi_output_layer/multi_output_layer_old.h"
-
-#include <string>
-#include <vector>
-
-class MLPPMANNOld {
-public:
-	MLPPMANNOld(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet);
-	~MLPPMANNOld();
-	std::vector<std::vector<real_t>> modelSetTest(std::vector<std::vector<real_t>> X);
-	std::vector<real_t> modelTest(std::vector<real_t> x);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	real_t score();
-	void save(std::string fileName);
-
-	void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-	void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-
-private:
-	real_t Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-	void forwardPass();
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<std::vector<real_t>> outputSet;
-	std::vector<std::vector<real_t>> y_hat;
-
-	std::vector<MLPPOldHiddenLayer> network;
-	MLPPOldMultiOutputLayer *outputLayer;
-
-	int n;
-	int k;
-	int n_output;
-};
-
-#endif /* MANN_hpp */
\ No newline at end of file
diff --git a/mlpp/mlp/mlp_old.cpp b/mlpp/mlp/mlp_old.cpp
deleted file mode 100644
index c5b8a30..0000000
--- a/mlpp/mlp/mlp_old.cpp
+++ /dev/null
@@ -1,287 +0,0 @@
-//
-//  MLP.cpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "mlp_old.h"
-
-#include "core/log/logger.h"
-
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-MLPPMLPOld::MLPPMLPOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet, int p_n_hidden, std::string p_reg, real_t p_lambda, real_t p_alpha) {
-	inputSet = p_inputSet;
-	outputSet = p_outputSet;
-	n_hidden = p_n_hidden;
-	n = p_inputSet.size();
-	k = p_inputSet[0].size();
-	reg = p_reg;
-	lambda = p_lambda;
-	alpha = p_alpha;
-
-	y_hat.resize(n);
-
-	weights1 = MLPPUtilities::weightInitialization(k, n_hidden);
-	weights2 = MLPPUtilities::weightInitialization(n_hidden);
-	bias1 = MLPPUtilities::biasInitialization(n_hidden);
-	bias2 = MLPPUtilities::biasInitialization();
-}
-
-std::vector<real_t> MLPPMLPOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	return Evaluate(X);
-}
-
-real_t MLPPMLPOld::modelTest(std::vector<real_t> x) {
-	return Evaluate(x);
-}
-
-void MLPPMLPOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-
-		// Calculating the errors
-		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
-
-		// Calculating the weight/bias gradients for layer 2
-
-		std::vector<real_t> D2_1 = alg.mat_vec_mult(alg.transpose(a2), error);
-
-		// weights and bias updation for layer 2
-		weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate / n, D2_1));
-		weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
-
-		bias2 -= learning_rate * alg.sum_elements(error) / n;
-
-		// Calculating the weight/bias for layer 1
-
-		std::vector<std::vector<real_t>> D1_1;
-		D1_1.resize(n);
-
-		D1_1 = alg.outerProduct(error, weights2);
-
-		std::vector<std::vector<real_t>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, true));
-
-		std::vector<std::vector<real_t>> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2);
-
-		// weight an bias updation for layer 1
-		weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate / n, D1_3));
-		weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
-
-		bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate / n, D1_2));
-
-		forwardPass();
-
-		// UI PORTION
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			std::cout << "Layer 1:" << std::endl;
-			MLPPUtilities::UI(weights1, bias1);
-			std::cout << "Layer 2:" << std::endl;
-			MLPPUtilities::UI(weights2, bias2);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPMLPOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	while (true) {
-		std::random_device rd;
-		std::default_random_engine generator(rd());
-		std::uniform_int_distribution<int> distribution(0, int(n - 1));
-		int outputIndex = distribution(generator);
-
-		real_t y_hat = Evaluate(inputSet[outputIndex]);
-		auto propagate_result = propagate(inputSet[outputIndex]);
-		auto z2 = std::get<0>(propagate_result);
-		auto a2 = std::get<1>(propagate_result);
-		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
-		real_t error = y_hat - outputSet[outputIndex];
-
-		// Weight updation for layer 2
-		std::vector<real_t> D2_1 = alg.scalarMultiply(error, a2);
-		weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1));
-		weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
-
-		// Bias updation for layer 2
-		bias2 -= learning_rate * error;
-
-		// Weight updation for layer 1
-		std::vector<real_t> D1_1 = alg.scalarMultiply(error, weights2);
-		std::vector<real_t> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, true));
-		std::vector<std::vector<real_t>> D1_3 = alg.outerProduct(inputSet[outputIndex], D1_2);
-
-		weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
-		weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
-		// Bias updation for layer 1
-
-		bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2));
-
-		y_hat = Evaluate(inputSet[outputIndex]);
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
-			std::cout << "Layer 1:" << std::endl;
-			MLPPUtilities::UI(weights1, bias1);
-			std::cout << "Layer 2:" << std::endl;
-			MLPPUtilities::UI(weights2, bias2);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPMLPOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto minibatches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(minibatches);
-	auto outputMiniBatches = std::get<1>(minibatches);
-
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			auto propagate_result = propagate(inputMiniBatches[i]);
-			auto z2 = std::get<0>(propagate_result);
-			auto a2 = std::get<1>(propagate_result);
-
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			// Calculating the errors
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight/bias gradients for layer 2
-
-			std::vector<real_t> D2_1 = alg.mat_vec_mult(alg.transpose(a2), error);
-
-			// weights and bias updation for layser 2
-			weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate / outputMiniBatches[i].size(), D2_1));
-			weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
-
-			// Calculating the bias gradients for layer 2
-			//real_t b_gradient = alg.sum_elements(error);
-
-			// Bias Updation for layer 2
-			bias2 -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size();
-
-			//Calculating the weight/bias for layer 1
-
-			std::vector<std::vector<real_t>> D1_1 = alg.outerProduct(error, weights2);
-
-			std::vector<std::vector<real_t>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, true));
-
-			std::vector<std::vector<real_t>> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2);
-
-			// weight an bias updation for layer 1
-			weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate / outputMiniBatches[i].size(), D1_3));
-			weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
-
-			bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate / outputMiniBatches[i].size(), D1_2));
-
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				std::cout << "Layer 1:" << std::endl;
-				MLPPUtilities::UI(weights1, bias1);
-				std::cout << "Layer 2:" << std::endl;
-				MLPPUtilities::UI(weights2, bias2);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-real_t MLPPMLPOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPMLPOld::save(std::string fileName) {
-	MLPPUtilities util;
-	util.saveParameters(fileName, weights1, bias1, false, 1);
-	util.saveParameters(fileName, weights2, bias2, true, 2);
-}
-
-real_t MLPPMLPOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPRegOld regularization;
-	class MLPPCostOld cost;
-	return cost.LogLoss(y_hat, y) + regularization.regTerm(weights2, lambda, alpha, reg) + regularization.regTerm(weights1, lambda, alpha, reg);
-}
-
-std::vector<real_t> MLPPMLPOld::Evaluate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
-	std::vector<std::vector<real_t>> a2 = avn.sigmoid(z2);
-	return avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2)));
-}
-
-std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPMLPOld::propagate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
-	std::vector<std::vector<real_t>> a2 = avn.sigmoid(z2);
-	return { z2, a2 };
-}
-
-real_t MLPPMLPOld::Evaluate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1);
-	std::vector<real_t> a2 = avn.sigmoid(z2);
-	return avn.sigmoid(alg.dot(weights2, a2) + bias2);
-}
-
-std::tuple<std::vector<real_t>, std::vector<real_t>> MLPPMLPOld::propagate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1);
-	std::vector<real_t> a2 = avn.sigmoid(z2);
-	return { z2, a2 };
-}
-
-void MLPPMLPOld::forwardPass() {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1);
-	a2 = avn.sigmoid(z2);
-	y_hat = avn.sigmoid(alg.scalarAdd(bias2, alg.mat_vec_mult(a2, weights2)));
-}
diff --git a/mlpp/mlp/mlp_old.h b/mlpp/mlp/mlp_old.h
deleted file mode 100644
index 93290ad..0000000
--- a/mlpp/mlp/mlp_old.h
+++ /dev/null
@@ -1,70 +0,0 @@
-
-#ifndef MLPP_MLP_OLD_H
-#define MLPP_MLP_OLD_H
-
-//
-//  MLP.hpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "core/containers/vector.h"
-#include "core/math/math_defs.h"
-#include "core/string/ustring.h"
-#include "core/variant/variant.h"
-
-#include "core/object/reference.h"
-
-#include "../regularization/reg.h"
-
-#include "../lin_alg/mlpp_matrix.h"
-#include "../lin_alg/mlpp_vector.h"
-
-#include <map>
-#include <string>
-#include <vector>
-
-class MLPPMLPOld {
-public:
-	MLPPMLPOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int n_hidden, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
-	real_t modelTest(std::vector<real_t> x);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
-	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
-	real_t score();
-	void save(std::string fileName);
-
-private:
-	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
-
-	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> propagate(std::vector<std::vector<real_t>> X);
-	real_t Evaluate(std::vector<real_t> x);
-	std::tuple<std::vector<real_t>, std::vector<real_t>> propagate(std::vector<real_t> x);
-	void forwardPass();
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-	std::vector<real_t> y_hat;
-
-	std::vector<std::vector<real_t>> weights1;
-	std::vector<real_t> weights2;
-
-	std::vector<real_t> bias1;
-	real_t bias2;
-
-	std::vector<std::vector<real_t>> z2;
-	std::vector<std::vector<real_t>> a2;
-
-	int n;
-	int k;
-	int n_hidden;
-
-	// Regularization Params
-	std::string reg;
-	real_t lambda; /* Regularization Parameter */
-	real_t alpha; /* This is the controlling param for Elastic Net*/
-};
-
-#endif /* MLP_hpp */
diff --git a/mlpp/multi_output_layer/multi_output_layer_old.cpp b/mlpp/multi_output_layer/multi_output_layer_old.cpp
deleted file mode 100644
index b87894e..0000000
--- a/mlpp/multi_output_layer/multi_output_layer_old.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-//
-//  MultiOutputLayer.cpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "multi_output_layer_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-MLPPOldMultiOutputLayer::MLPPOldMultiOutputLayer(int p_n_output, int p_n_hidden, std::string p_activation, std::string p_cost, std::vector<std::vector<real_t>> p_input, std::string p_weightInit, std::string p_reg, real_t p_lambda, real_t p_alpha) {
-	n_output = p_n_output;
-	n_hidden = p_n_hidden;
-	activation = p_activation;
-	cost = p_cost;
-	input = p_input;
-	weightInit = p_weightInit;
-	reg = p_reg;
-	lambda = p_lambda;
-	alpha = p_alpha;
-
-	weights = MLPPUtilities::weightInitialization(n_hidden, n_output, weightInit);
-	bias = MLPPUtilities::biasInitialization(n_output);
-
-	activation_map["Linear"] = &MLPPActivationOld::linear;
-	activationTest_map["Linear"] = &MLPPActivationOld::linear;
-
-	activation_map["Sigmoid"] = &MLPPActivationOld::sigmoid;
-	activationTest_map["Sigmoid"] = &MLPPActivationOld::sigmoid;
-
-	activation_map["Softmax"] = &MLPPActivationOld::softmax;
-	activationTest_map["Softmax"] = &MLPPActivationOld::softmax;
-
-	activation_map["Swish"] = &MLPPActivationOld::swish;
-	activationTest_map["Swish"] = &MLPPActivationOld::swish;
-
-	activation_map["Mish"] = &MLPPActivationOld::mish;
-	activationTest_map["Mish"] = &MLPPActivationOld::mish;
-
-	activation_map["SinC"] = &MLPPActivationOld::sinc;
-	activationTest_map["SinC"] = &MLPPActivationOld::sinc;
-
-	activation_map["Softplus"] = &MLPPActivationOld::softplus;
-	activationTest_map["Softplus"] = &MLPPActivationOld::softplus;
-
-	activation_map["Softsign"] = &MLPPActivationOld::softsign;
-	activationTest_map["Softsign"] = &MLPPActivationOld::softsign;
-
-	activation_map["CLogLog"] = &MLPPActivationOld::cloglog;
-	activationTest_map["CLogLog"] = &MLPPActivationOld::cloglog;
-
-	activation_map["Logit"] = &MLPPActivationOld::logit;
-	activationTest_map["Logit"] = &MLPPActivationOld::logit;
-
-	activation_map["GaussianCDF"] = &MLPPActivationOld::gaussianCDF;
-	activationTest_map["GaussianCDF"] = &MLPPActivationOld::gaussianCDF;
-
-	activation_map["RELU"] = &MLPPActivationOld::RELU;
-	activationTest_map["RELU"] = &MLPPActivationOld::RELU;
-
-	activation_map["GELU"] = &MLPPActivationOld::GELU;
-	activationTest_map["GELU"] = &MLPPActivationOld::GELU;
-
-	activation_map["Sign"] = &MLPPActivationOld::sign;
-	activationTest_map["Sign"] = &MLPPActivationOld::sign;
-
-	activation_map["UnitStep"] = &MLPPActivationOld::unitStep;
-	activationTest_map["UnitStep"] = &MLPPActivationOld::unitStep;
-
-	activation_map["Sinh"] = &MLPPActivationOld::sinh;
-	activationTest_map["Sinh"] = &MLPPActivationOld::sinh;
-
-	activation_map["Cosh"] = &MLPPActivationOld::cosh;
-	activationTest_map["Cosh"] = &MLPPActivationOld::cosh;
-
-	activation_map["Tanh"] = &MLPPActivationOld::tanh;
-	activationTest_map["Tanh"] = &MLPPActivationOld::tanh;
-
-	activation_map["Csch"] = &MLPPActivationOld::csch;
-	activationTest_map["Csch"] = &MLPPActivationOld::csch;
-
-	activation_map["Sech"] = &MLPPActivationOld::sech;
-	activationTest_map["Sech"] = &MLPPActivationOld::sech;
-
-	activation_map["Coth"] = &MLPPActivationOld::coth;
-	activationTest_map["Coth"] = &MLPPActivationOld::coth;
-
-	activation_map["Arsinh"] = &MLPPActivationOld::arsinh;
-	activationTest_map["Arsinh"] = &MLPPActivationOld::arsinh;
-
-	activation_map["Arcosh"] = &MLPPActivationOld::arcosh;
-	activationTest_map["Arcosh"] = &MLPPActivationOld::arcosh;
-
-	activation_map["Artanh"] = &MLPPActivationOld::artanh;
-	activationTest_map["Artanh"] = &MLPPActivationOld::artanh;
-
-	activation_map["Arcsch"] = &MLPPActivationOld::arcsch;
-	activationTest_map["Arcsch"] = &MLPPActivationOld::arcsch;
-
-	activation_map["Arsech"] = &MLPPActivationOld::arsech;
-	activationTest_map["Arsech"] = &MLPPActivationOld::arsech;
-
-	activation_map["Arcoth"] = &MLPPActivationOld::arcoth;
-	activationTest_map["Arcoth"] = &MLPPActivationOld::arcoth;
-
-	costDeriv_map["MSE"] = &MLPPCostOld::MSEDeriv;
-	cost_map["MSE"] = &MLPPCostOld::MSE;
-	costDeriv_map["RMSE"] = &MLPPCostOld::RMSEDeriv;
-	cost_map["RMSE"] = &MLPPCostOld::RMSE;
-	costDeriv_map["MAE"] = &MLPPCostOld::MAEDeriv;
-	cost_map["MAE"] = &MLPPCostOld::MAE;
-	costDeriv_map["MBE"] = &MLPPCostOld::MBEDeriv;
-	cost_map["MBE"] = &MLPPCostOld::MBE;
-	costDeriv_map["LogLoss"] = &MLPPCostOld::LogLossDeriv;
-	cost_map["LogLoss"] = &MLPPCostOld::LogLoss;
-	costDeriv_map["CrossEntropy"] = &MLPPCostOld::CrossEntropyDeriv;
-	cost_map["CrossEntropy"] = &MLPPCostOld::CrossEntropy;
-	costDeriv_map["HingeLoss"] = &MLPPCostOld::HingeLossDeriv;
-	cost_map["HingeLoss"] = &MLPPCostOld::HingeLoss;
-	costDeriv_map["WassersteinLoss"] = &MLPPCostOld::HingeLossDeriv;
-	cost_map["WassersteinLoss"] = &MLPPCostOld::HingeLoss;
-}
-
-void MLPPOldMultiOutputLayer::forwardPass() {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	z = alg.mat_vec_add(alg.matmult(input, weights), bias);
-	a = (avn.*activation_map[activation])(z, false);
-}
-
-void MLPPOldMultiOutputLayer::Test(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	z_test = alg.addition(alg.mat_vec_mult(alg.transpose(weights), x), bias);
-	a_test = (avn.*activationTest_map[activation])(z_test, false);
-}
diff --git a/mlpp/multi_output_layer/multi_output_layer_old.h b/mlpp/multi_output_layer/multi_output_layer_old.h
deleted file mode 100644
index 974ce05..0000000
--- a/mlpp/multi_output_layer/multi_output_layer_old.h
+++ /dev/null
@@ -1,66 +0,0 @@
-
-#ifndef MLPP_MULTI_OUTPUT_LAYER_OLD_H
-#define MLPP_MULTI_OUTPUT_LAYER_OLD_H
-
-//
-//  MultiOutputLayer.hpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "core/math/math_defs.h"
-#include "core/string/ustring.h"
-
-#include "core/object/reference.h"
-
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../regularization/reg.h"
-#include "../utilities/utilities.h"
-
-#include "../lin_alg/mlpp_matrix.h"
-#include "../lin_alg/mlpp_vector.h"
-
-#include <map>
-#include <string>
-#include <vector>
-
-class MLPPOldMultiOutputLayer {
-public:
-	MLPPOldMultiOutputLayer(int n_output, int n_hidden, std::string activation, std::string cost, std::vector<std::vector<real_t>> input, std::string weightInit, std::string reg, real_t lambda, real_t alpha);
-
-	int n_output;
-	int n_hidden;
-	std::string activation;
-	std::string cost;
-
-	std::vector<std::vector<real_t>> input;
-
-	std::vector<std::vector<real_t>> weights;
-	std::vector<real_t> bias;
-
-	std::vector<std::vector<real_t>> z;
-	std::vector<std::vector<real_t>> a;
-
-	std::map<std::string, std::vector<std::vector<real_t>> (MLPPActivationOld::*)(std::vector<std::vector<real_t>>, bool)> activation_map;
-	std::map<std::string, std::vector<real_t> (MLPPActivationOld::*)(std::vector<real_t>, bool)> activationTest_map;
-	std::map<std::string, real_t (MLPPCostOld::*)(std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>)> cost_map;
-	std::map<std::string, std::vector<std::vector<real_t>> (MLPPCostOld::*)(std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>)> costDeriv_map;
-
-	std::vector<real_t> z_test;
-	std::vector<real_t> a_test;
-
-	std::vector<std::vector<real_t>> delta;
-
-	// Regularization Params
-	std::string reg;
-	real_t lambda; /* Regularization Parameter */
-	real_t alpha; /* This is the controlling param for Elastic Net*/
-
-	std::string weightInit;
-
-	void forwardPass();
-	void Test(std::vector<real_t> x);
-};
-
-#endif /* MultiOutputLayer_hpp */
diff --git a/mlpp/multinomial_nb/multinomial_nb_old.cpp b/mlpp/multinomial_nb/multinomial_nb_old.cpp
deleted file mode 100644
index 2894152..0000000
--- a/mlpp/multinomial_nb/multinomial_nb_old.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-//
-//  MultinomialNB.cpp
-//
-//  Created by Marc Melikyan on 1/17/21.
-//
-
-#include "multinomial_nb_old.h"
-
-#include "../lin_alg/lin_alg_old.h"
-#include "../utilities/utilities.h"
-
-#include <algorithm>
-#include <iostream>
-#include <random>
-
-MLPPMultinomialNBOld::MLPPMultinomialNBOld(std::vector<std::vector<real_t>> pinputSet, std::vector<real_t> poutputSet, int pclass_num) {
-	inputSet = pinputSet;
-	outputSet = poutputSet;
-	class_num = pclass_num;
-
-	y_hat.resize(outputSet.size());
-	Evaluate();
-}
-
-std::vector<real_t> MLPPMultinomialNBOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	std::vector<real_t> y_hat;
-	for (uint32_t i = 0; i < X.size(); i++) {
-		y_hat.push_back(modelTest(X[i]));
-	}
-	return y_hat;
-}
-
-real_t MLPPMultinomialNBOld::modelTest(std::vector<real_t> x) {
-	real_t score[class_num];
-	computeTheta();
-
-	for (uint32_t j = 0; j < x.size(); j++) {
-		for (uint32_t k = 0; k < vocab.size(); k++) {
-			if (x[j] == vocab[k]) {
-				for (int p = class_num - 1; p >= 0; p--) {
-					score[p] += std::log(theta[p][vocab[k]]);
-				}
-			}
-		}
-	}
-
-	for (uint32_t i = 0; i < priors.size(); i++) {
-		score[i] += std::log(priors[i]);
-	}
-
-	return std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(real_t)));
-}
-
-real_t MLPPMultinomialNBOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPMultinomialNBOld::computeTheta() {
-	// Resizing theta for the sake of ease & proper access of the elements.
-	theta.resize(class_num);
-
-	// Setting all values in the hasmap by default to 0.
-	for (int i = class_num - 1; i >= 0; i--) {
-		for (uint32_t j = 0; j < vocab.size(); j++) {
-			theta[i][vocab[j]] = 0;
-		}
-	}
-
-	for (uint32_t i = 0; i < inputSet.size(); i++) {
-		for (uint32_t j = 0; j < inputSet[0].size(); j++) {
-			theta[outputSet[i]][inputSet[i][j]]++;
-		}
-	}
-
-	for (uint32_t i = 0; i < theta.size(); i++) {
-		for (uint32_t j = 0; j < theta[i].size(); j++) {
-			theta[i][j] /= priors[i] * y_hat.size();
-		}
-	}
-}
-
-void MLPPMultinomialNBOld::Evaluate() {
-	MLPPLinAlgOld alg;
-	for (uint32_t i = 0; i < outputSet.size(); i++) {
-		// Pr(B | A) * Pr(A)
-		real_t score[class_num];
-
-		// Easy computation of priors, i.e. Pr(C_k)
-		priors.resize(class_num);
-		for (uint32_t ii = 0; ii < outputSet.size(); ii++) {
-			priors[int(outputSet[ii])]++;
-		}
-		priors = alg.scalarMultiply(real_t(1) / real_t(outputSet.size()), priors);
-
-		// Evaluating Theta...
-		computeTheta();
-
-		for (uint32_t j = 0; j < inputSet.size(); j++) {
-			for (uint32_t k = 0; k < vocab.size(); k++) {
-				if (inputSet[i][j] == vocab[k]) {
-					for (int p = class_num - 1; p >= 0; p--) {
-						score[p] += std::log(theta[i][vocab[k]]);
-					}
-				}
-			}
-		}
-
-		for (uint32_t ii = 0; ii < priors.size(); ii++) {
-			score[ii] += std::log(priors[ii]);
-			score[ii] = exp(score[ii]);
-		}
-
-		for (int ii = 0; ii < 2; ii++) {
-			std::cout << score[ii] << std::endl;
-		}
-
-		// Assigning the traning example's y_hat to a class
-		y_hat[i] = std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(real_t)));
-	}
-}
diff --git a/mlpp/multinomial_nb/multinomial_nb_old.h b/mlpp/multinomial_nb/multinomial_nb_old.h
deleted file mode 100644
index 9935a57..0000000
--- a/mlpp/multinomial_nb/multinomial_nb_old.h
+++ /dev/null
@@ -1,40 +0,0 @@
-
-#ifndef MLPP_MULTINOMIAL_NB_OLD_H
-#define MLPP_MULTINOMIAL_NB_OLD_H
-
-//
-//  MultinomialNB.hpp
-//
-//  Created by Marc Melikyan on 1/17/21.
-//
-
-#include "core/math/math_defs.h"
-
-#include <map>
-#include <vector>
-
-class MLPPMultinomialNBOld {
-public:
-	MLPPMultinomialNBOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int class_num);
-	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
-	real_t modelTest(std::vector<real_t> x);
-	real_t score();
-
-private:
-	void computeTheta();
-	void Evaluate();
-
-	// Model Params
-	std::vector<real_t> priors;
-
-	std::vector<std::map<real_t, int>> theta;
-	std::vector<real_t> vocab;
-	int class_num;
-
-	// Datasets
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-	std::vector<real_t> y_hat;
-};
-
-#endif /* MultinomialNB_hpp */
diff --git a/mlpp/numerical_analysis/numerical_analysis_old.cpp b/mlpp/numerical_analysis/numerical_analysis_old.cpp
deleted file mode 100644
index d03124d..0000000
--- a/mlpp/numerical_analysis/numerical_analysis_old.cpp
+++ /dev/null
@@ -1,300 +0,0 @@
-//
-//  NumericalAnalysis.cpp
-//
-//  Created by Marc Melikyan on 11/13/20.
-//
-
-#include "numerical_analysis_old.h"
-#include "../lin_alg/lin_alg_old.h"
-
-#include <climits>
-#include <cmath>
-#include <iostream>
-#include <string>
-
-real_t MLPPNumericalAnalysisOld::numDiff(real_t (*function)(real_t), real_t x) {
-	real_t eps = 1e-10;
-	return (function(x + eps) - function(x)) / eps; // This is just the formal def. of the derivative.
-}
-
-real_t MLPPNumericalAnalysisOld::numDiff_2(real_t (*function)(real_t), real_t x) {
-	real_t eps = 1e-5;
-	return (function(x + 2 * eps) - 2 * function(x + eps) + function(x)) / (eps * eps);
-}
-
-real_t MLPPNumericalAnalysisOld::numDiff_3(real_t (*function)(real_t), real_t x) {
-	real_t eps = 1e-5;
-	real_t t1 = function(x + 3 * eps) - 2 * function(x + 2 * eps) + function(x + eps);
-	real_t t2 = function(x + 2 * eps) - 2 * function(x + eps) + function(x);
-	return (t1 - t2) / (eps * eps * eps);
-}
-
-real_t MLPPNumericalAnalysisOld::constantApproximation(real_t (*function)(real_t), real_t c) {
-	return function(c);
-}
-
-real_t MLPPNumericalAnalysisOld::linearApproximation(real_t (*function)(real_t), real_t c, real_t x) {
-	return constantApproximation(function, c) + numDiff(function, c) * (x - c);
-}
-
-real_t MLPPNumericalAnalysisOld::quadraticApproximation(real_t (*function)(real_t), real_t c, real_t x) {
-	return linearApproximation(function, c, x) + 0.5 * numDiff_2(function, c) * (x - c) * (x - c);
-}
-
-real_t MLPPNumericalAnalysisOld::cubicApproximation(real_t (*function)(real_t), real_t c, real_t x) {
-	return quadraticApproximation(function, c, x) + (1 / 6) * numDiff_3(function, c) * (x - c) * (x - c) * (x - c);
-}
-
-real_t MLPPNumericalAnalysisOld::numDiff(real_t (*function)(std::vector<real_t>), std::vector<real_t> x, int axis) {
-	// For multivariable function analysis.
-	// This will be used for calculating Jacobian vectors.
-	// Diffrentiate with respect to indicated axis. (0, 1, 2 ...)
-	real_t eps = 1e-10;
-	std::vector<real_t> x_eps = x;
-	x_eps[axis] += eps;
-
-	return (function(x_eps) - function(x)) / eps;
-}
-
-real_t MLPPNumericalAnalysisOld::numDiff_2(real_t (*function)(std::vector<real_t>), std::vector<real_t> x, int axis1, int axis2) {
-	//For Hessians.
-	real_t eps = 1e-5;
-
-	std::vector<real_t> x_pp = x;
-	x_pp[axis1] += eps;
-	x_pp[axis2] += eps;
-
-	std::vector<real_t> x_np = x;
-	x_np[axis2] += eps;
-
-	std::vector<real_t> x_pn = x;
-	x_pn[axis1] += eps;
-
-	return (function(x_pp) - function(x_np) - function(x_pn) + function(x)) / (eps * eps);
-}
-
-real_t MLPPNumericalAnalysisOld::numDiff_3(real_t (*function)(std::vector<real_t>), std::vector<real_t> x, int axis1, int axis2, int axis3) {
-	// For third order derivative tensors.
-	// NOTE: Approximations do not appear to be accurate for sinusodial functions...
-	// Should revisit this later.
-	real_t eps = 1e-5;
-
-	std::vector<real_t> x_ppp = x;
-	x_ppp[axis1] += eps;
-	x_ppp[axis2] += eps;
-	x_ppp[axis3] += eps;
-
-	std::vector<real_t> x_npp = x;
-	x_npp[axis2] += eps;
-	x_npp[axis3] += eps;
-
-	std::vector<real_t> x_pnp = x;
-	x_pnp[axis1] += eps;
-	x_pnp[axis3] += eps;
-
-	std::vector<real_t> x_nnp = x;
-	x_nnp[axis3] += eps;
-
-	std::vector<real_t> x_ppn = x;
-	x_ppn[axis1] += eps;
-	x_ppn[axis2] += eps;
-
-	std::vector<real_t> x_npn = x;
-	x_npn[axis2] += eps;
-
-	std::vector<real_t> x_pnn = x;
-	x_pnn[axis1] += eps;
-
-	real_t thirdAxis = function(x_ppp) - function(x_npp) - function(x_pnp) + function(x_nnp);
-	real_t noThirdAxis = function(x_ppn) - function(x_npn) - function(x_pnn) + function(x);
-	return (thirdAxis - noThirdAxis) / (eps * eps * eps);
-}
-
-real_t MLPPNumericalAnalysisOld::newtonRaphsonMethod(real_t (*function)(real_t), real_t x_0, real_t epoch_num) {
-	real_t x = x_0;
-	for (int i = 0; i < epoch_num; i++) {
-		x -= function(x) / numDiff(function, x);
-	}
-	return x;
-}
-
-real_t MLPPNumericalAnalysisOld::halleyMethod(real_t (*function)(real_t), real_t x_0, real_t epoch_num) {
-	real_t x = x_0;
-	for (int i = 0; i < epoch_num; i++) {
-		x -= ((2 * function(x) * numDiff(function, x)) / (2 * numDiff(function, x) * numDiff(function, x) - function(x) * numDiff_2(function, x)));
-	}
-	return x;
-}
-
-real_t MLPPNumericalAnalysisOld::invQuadraticInterpolation(real_t (*function)(real_t), std::vector<real_t> x_0, int epoch_num) {
-	real_t x = 0;
-	std::vector<real_t> currentThree = x_0;
-	for (int i = 0; i < epoch_num; i++) {
-		real_t t1 = ((function(currentThree[1]) * function(currentThree[2])) / ((function(currentThree[0]) - function(currentThree[1])) * (function(currentThree[0]) - function(currentThree[2])))) * currentThree[0];
-		real_t t2 = ((function(currentThree[0]) * function(currentThree[2])) / ((function(currentThree[1]) - function(currentThree[0])) * (function(currentThree[1]) - function(currentThree[2])))) * currentThree[1];
-		real_t t3 = ((function(currentThree[0]) * function(currentThree[1])) / ((function(currentThree[2]) - function(currentThree[0])) * (function(currentThree[2]) - function(currentThree[1])))) * currentThree[2];
-		x = t1 + t2 + t3;
-
-		currentThree.erase(currentThree.begin());
-		currentThree.push_back(x);
-	}
-	return x;
-}
-
-real_t MLPPNumericalAnalysisOld::eulerianMethod(real_t (*derivative)(real_t), std::vector<real_t> q_0, real_t p, real_t h) {
-	int max_epoch = static_cast<int>((p - q_0[0]) / h);
-	real_t x = q_0[0];
-	real_t y = q_0[1];
-	for (int i = 0; i < max_epoch; i++) {
-		y = y + h * derivative(x);
-		x += h;
-	}
-	return y;
-}
-
-real_t MLPPNumericalAnalysisOld::eulerianMethod(real_t (*derivative)(std::vector<real_t>), std::vector<real_t> q_0, real_t p, real_t h) {
-	int max_epoch = static_cast<int>((p - q_0[0]) / h);
-	real_t x = q_0[0];
-	real_t y = q_0[1];
-	for (int i = 0; i < max_epoch; i++) {
-		y = y + h * derivative({ x, y });
-		x += h;
-	}
-	return y;
-}
-
-real_t MLPPNumericalAnalysisOld::growthMethod(real_t C, real_t k, real_t t) {
-	/*
-	dP/dt = kP
-	dP/P = kdt
-	integral(1/P)dP = integral(k) dt
-	ln|P| = kt + C_initial
-	|P| = e^(kt + C_initial)
-	|P| = e^(C_initial) * e^(kt)
-	P = +/- e^(C_initial) * e^(kt)
-	P = C * e^(kt)
-	*/
-
-	// auto growthFunction = [&C, &k](real_t t) { return C * exp(k * t); };
-	return C * std::exp(k * t);
-}
-
-std::vector<real_t> MLPPNumericalAnalysisOld::jacobian(real_t (*function)(std::vector<real_t>), std::vector<real_t> x) {
-	std::vector<real_t> jacobian;
-	jacobian.resize(x.size());
-	for (uint32_t i = 0; i < jacobian.size(); i++) {
-		jacobian[i] = numDiff(function, x, i); // Derivative w.r.t axis i evaluated at x. For all x_i.
-	}
-	return jacobian;
-}
-std::vector<std::vector<real_t>> MLPPNumericalAnalysisOld::hessian(real_t (*function)(std::vector<real_t>), std::vector<real_t> x) {
-	std::vector<std::vector<real_t>> hessian;
-	hessian.resize(x.size());
-
-	for (uint32_t i = 0; i < hessian.size(); i++) {
-		hessian[i].resize(x.size());
-	}
-
-	for (uint32_t i = 0; i < hessian.size(); i++) {
-		for (uint32_t j = 0; j < hessian[i].size(); j++) {
-			hessian[i][j] = numDiff_2(function, x, i, j);
-		}
-	}
-
-	return hessian;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPNumericalAnalysisOld::thirdOrderTensor(real_t (*function)(std::vector<real_t>), std::vector<real_t> x) {
-	std::vector<std::vector<std::vector<real_t>>> tensor;
-	tensor.resize(x.size());
-
-	for (uint32_t i = 0; i < tensor.size(); i++) {
-		tensor[i].resize(x.size());
-		for (uint32_t j = 0; j < tensor[i].size(); j++) {
-			tensor[i][j].resize(x.size());
-		}
-	}
-
-	for (uint32_t i = 0; i < tensor.size(); i++) { // O(n^3) time complexity :(
-		for (uint32_t j = 0; j < tensor[i].size(); j++) {
-			for (uint32_t k = 0; k < tensor[i][j].size(); k++)
-				tensor[i][j][k] = numDiff_3(function, x, i, j, k);
-		}
-	}
-
-	return tensor;
-}
-
-real_t MLPPNumericalAnalysisOld::constantApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c) {
-	return function(c);
-}
-
-real_t MLPPNumericalAnalysisOld::linearApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c, std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	return constantApproximation(function, c) + alg.matmult(alg.transpose({ jacobian(function, c) }), { alg.subtraction(x, c) })[0][0];
-}
-
-real_t MLPPNumericalAnalysisOld::quadraticApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c, std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	return linearApproximation(function, c, x) + 0.5 * alg.matmult({ (alg.subtraction(x, c)) }, alg.matmult(hessian(function, c), alg.transpose({ alg.subtraction(x, c) })))[0][0];
-}
-
-real_t MLPPNumericalAnalysisOld::cubicApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c, std::vector<real_t> x) {
-	/*
-	Not completely sure as the literature seldom discusses the third order taylor approximation,
-	in particular for multivariate cases, but ostensibly, the matrix/tensor/vector multiplies
-	should look something like this:
-
-	(N x N x N) (N x 1) [tensor vector mult] => (N x N x 1) => (N x N)
-	Perform remaining multiplies as done for the 2nd order approximation.
-	Result is a scalar.
-	*/
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<real_t>> resultMat = alg.tensor_vec_mult(thirdOrderTensor(function, c), alg.subtraction(x, c));
-	real_t resultScalar = alg.matmult({ (alg.subtraction(x, c)) }, alg.matmult(resultMat, alg.transpose({ alg.subtraction(x, c) })))[0][0];
-
-	return quadraticApproximation(function, c, x) + (1 / 6) * resultScalar;
-}
-
-real_t MLPPNumericalAnalysisOld::laplacian(real_t (*function)(std::vector<real_t>), std::vector<real_t> x) {
-	std::vector<std::vector<real_t>> hessian_matrix = hessian(function, x);
-	real_t laplacian = 0;
-
-	for (uint32_t i = 0; i < hessian_matrix.size(); i++) {
-		laplacian += hessian_matrix[i][i]; // homogenous 2nd derivs w.r.t i, then i
-	}
-
-	return laplacian;
-}
-
-std::string MLPPNumericalAnalysisOld::secondPartialDerivativeTest(real_t (*function)(std::vector<real_t>), std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<real_t>> hessianMatrix = hessian(function, x);
-	/*
-	The reason we do this is because the 2nd partial derivative test is less conclusive for functions of variables greater than
-	2, and the calculations specific to the bivariate case are less computationally intensive.
-	*/
-	if (x.size() == 2) {
-		real_t det = alg.det(hessianMatrix, hessianMatrix.size());
-		real_t secondDerivative = numDiff_2(function, x, 0, 0);
-		if (secondDerivative > 0 && det > 0) {
-			return "min";
-		} else if (secondDerivative < 0 && det > 0) {
-			return "max";
-		} else if (det < 0) {
-			return "saddle";
-		} else {
-			return "test was inconclusive";
-		}
-	} else {
-		if (alg.positiveDefiniteChecker(hessianMatrix)) {
-			return "min";
-		} else if (alg.negativeDefiniteChecker(hessianMatrix)) {
-			return "max";
-		} else if (!alg.zeroEigenvalue(hessianMatrix)) {
-			return "saddle";
-		} else {
-			return "test was inconclusive";
-		}
-	}
-}
diff --git a/mlpp/numerical_analysis/numerical_analysis_old.h b/mlpp/numerical_analysis/numerical_analysis_old.h
deleted file mode 100644
index bcc2390..0000000
--- a/mlpp/numerical_analysis/numerical_analysis_old.h
+++ /dev/null
@@ -1,59 +0,0 @@
-
-#ifndef MLPP_NUMERICAL_ANALYSIS_OLD_H
-#define MLPP_NUMERICAL_ANALYSIS_OLD_H
-
-//
-//  NumericalAnalysis.hpp
-//
-//
-
-#include "core/math/math_defs.h"
-
-#include "core/object/reference.h"
-
-#include <string>
-#include <vector>
-
-class MLPPNumericalAnalysisOld {
-public:
-	/* A numerical method for derivatives is used. This may be subject to change,
-	as an analytical method for calculating derivatives will most likely be used in
-	the future.
-	*/
-	real_t numDiff(real_t (*function)(real_t), real_t x);
-	real_t numDiff_2(real_t (*function)(real_t), real_t x);
-	real_t numDiff_3(real_t (*function)(real_t), real_t x);
-
-	real_t constantApproximation(real_t (*function)(real_t), real_t c);
-	real_t linearApproximation(real_t (*function)(real_t), real_t c, real_t x);
-	real_t quadraticApproximation(real_t (*function)(real_t), real_t c, real_t x);
-	real_t cubicApproximation(real_t (*function)(real_t), real_t c, real_t x);
-
-	real_t numDiff(real_t (*function)(std::vector<real_t>), std::vector<real_t> x, int axis);
-	real_t numDiff_2(real_t (*function)(std::vector<real_t>), std::vector<real_t> x, int axis1, int axis2);
-	real_t numDiff_3(real_t (*function)(std::vector<real_t>), std::vector<real_t> x, int axis1, int axis2, int axis3);
-
-	real_t newtonRaphsonMethod(real_t (*function)(real_t), real_t x_0, real_t epoch_num);
-	real_t halleyMethod(real_t (*function)(real_t), real_t x_0, real_t epoch_num);
-	real_t invQuadraticInterpolation(real_t (*function)(real_t), std::vector<real_t> x_0, int epoch_num);
-
-	real_t eulerianMethod(real_t (*derivative)(real_t), std::vector<real_t> q_0, real_t p, real_t h); // Euler's method for solving diffrential equations.
-	real_t eulerianMethod(real_t (*derivative)(std::vector<real_t>), std::vector<real_t> q_0, real_t p, real_t h); // Euler's method for solving diffrential equations.
-
-	real_t growthMethod(real_t C, real_t k, real_t t); // General growth-based diffrential equations can be solved by seperation of variables.
-
-	std::vector<real_t> jacobian(real_t (*function)(std::vector<real_t>), std::vector<real_t> x); // Indeed, for functions with scalar outputs the Jacobians will be vectors.
-	std::vector<std::vector<real_t>> hessian(real_t (*function)(std::vector<real_t>), std::vector<real_t> x);
-	std::vector<std::vector<std::vector<real_t>>> thirdOrderTensor(real_t (*function)(std::vector<real_t>), std::vector<real_t> x);
-
-	real_t constantApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c);
-	real_t linearApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c, std::vector<real_t> x);
-	real_t quadraticApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c, std::vector<real_t> x);
-	real_t cubicApproximation(real_t (*function)(std::vector<real_t>), std::vector<real_t> c, std::vector<real_t> x);
-
-	real_t laplacian(real_t (*function)(std::vector<real_t>), std::vector<real_t> x); // laplacian
-
-	std::string secondPartialDerivativeTest(real_t (*function)(std::vector<real_t>), std::vector<real_t> x);
-};
-
-#endif /* NumericalAnalysis_hpp */
diff --git a/mlpp/outlier_finder/outlier_finder_old.cpp b/mlpp/outlier_finder/outlier_finder_old.cpp
deleted file mode 100644
index 6b83a54..0000000
--- a/mlpp/outlier_finder/outlier_finder_old.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-//
-//  OutlierFinder.cpp
-//
-//  Created by Marc Melikyan on 11/13/20.
-//
-
-#include "outlier_finder_old.h"
-
-#include "../stat/stat_old.h"
-#include <iostream>
-
-
-MLPPOutlierFinderOld::MLPPOutlierFinderOld(int threshold) :
-		threshold(threshold) {
-}
-
-std::vector<std::vector<real_t>> MLPPOutlierFinderOld::modelSetTest(std::vector<std::vector<real_t>> inputSet) {
-	MLPPStatOld  stat;
-	std::vector<std::vector<real_t>> outliers;
-	outliers.resize(inputSet.size());
-	for (uint32_t i = 0; i < inputSet.size(); i++) {
-		for (uint32_t j = 0; j < inputSet[i].size(); j++) {
-			real_t z = (inputSet[i][j] - stat.mean(inputSet[i])) / stat.standardDeviation(inputSet[i]);
-			if (abs(z) > threshold) {
-				outliers[i].push_back(inputSet[i][j]);
-			}
-		}
-	}
-	return outliers;
-}
-
-std::vector<real_t> MLPPOutlierFinderOld::modelTest(std::vector<real_t> inputSet) {
-	MLPPStatOld  stat;
-	std::vector<real_t> outliers;
-	for (uint32_t i = 0; i < inputSet.size(); i++) {
-		real_t z = (inputSet[i] - stat.mean(inputSet)) / stat.standardDeviation(inputSet);
-		if (abs(z) > threshold) {
-			outliers.push_back(inputSet[i]);
-		}
-	}
-	return outliers;
-}
diff --git a/mlpp/outlier_finder/outlier_finder_old.h b/mlpp/outlier_finder/outlier_finder_old.h
deleted file mode 100644
index 97619d5..0000000
--- a/mlpp/outlier_finder/outlier_finder_old.h
+++ /dev/null
@@ -1,30 +0,0 @@
-
-#ifndef MLPP_OUTLIER_FINDER_OLD_H
-#define MLPP_OUTLIER_FINDER_OLD_H
-
-//
-//  OutlierFinder.hpp
-//
-//  Created by Marc Melikyan on 11/13/20.
-//
-
-#include "core/math/math_defs.h"
-#include "core/int_types.h"
-
-#include <vector>
-
-
-class MLPPOutlierFinderOld {
-public:
-	// Cnstr
-	MLPPOutlierFinderOld(int threshold);
-
-	std::vector<std::vector<real_t>> modelSetTest(std::vector<std::vector<real_t>> inputSet);
-	std::vector<real_t> modelTest(std::vector<real_t> inputSet);
-
-	// Variables required
-	int threshold;
-};
-
-
-#endif /* OutlierFinder_hpp */
diff --git a/mlpp/output_layer/output_layer_old.cpp b/mlpp/output_layer/output_layer_old.cpp
deleted file mode 100644
index 857c038..0000000
--- a/mlpp/output_layer/output_layer_old.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-//
-//  OutputLayer.cpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "output_layer_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-MLPPOldOutputLayer::MLPPOldOutputLayer(int p_n_hidden, std::string p_activation, std::string p_cost, std::vector<std::vector<real_t>> p_input, std::string p_weightInit, std::string p_reg, real_t p_lambda, real_t p_alpha) {
-	n_hidden = p_n_hidden;
-	activation = p_activation;
-	cost = p_cost;
-	input = p_input;
-	weightInit = p_weightInit;
-	reg = p_reg;
-	lambda = p_lambda;
-	alpha = p_alpha;
-
-	weights = MLPPUtilities::weightInitialization(n_hidden, weightInit);
-	bias = MLPPUtilities::biasInitialization();
-
-	activation_map["Linear"] = &MLPPActivationOld::linear;
-	activationTest_map["Linear"] = &MLPPActivationOld::linear;
-
-	activation_map["Sigmoid"] = &MLPPActivationOld::sigmoid;
-	activationTest_map["Sigmoid"] = &MLPPActivationOld::sigmoid;
-
-	activation_map["Swish"] = &MLPPActivationOld::swish;
-	activationTest_map["Swish"] = &MLPPActivationOld::swish;
-
-	activation_map["Mish"] = &MLPPActivationOld::mish;
-	activationTest_map["Mish"] = &MLPPActivationOld::mish;
-
-	activation_map["SinC"] = &MLPPActivationOld::sinc;
-	activationTest_map["SinC"] = &MLPPActivationOld::sinc;
-
-	activation_map["Softplus"] = &MLPPActivationOld::softplus;
-	activationTest_map["Softplus"] = &MLPPActivationOld::softplus;
-
-	activation_map["Softsign"] = &MLPPActivationOld::softsign;
-	activationTest_map["Softsign"] = &MLPPActivationOld::softsign;
-
-	activation_map["CLogLog"] = &MLPPActivationOld::cloglog;
-	activationTest_map["CLogLog"] = &MLPPActivationOld::cloglog;
-
-	activation_map["Logit"] = &MLPPActivationOld::logit;
-	activationTest_map["Logit"] = &MLPPActivationOld::logit;
-
-	activation_map["GaussianCDF"] = &MLPPActivationOld::gaussianCDF;
-	activationTest_map["GaussianCDF"] = &MLPPActivationOld::gaussianCDF;
-
-	activation_map["RELU"] = &MLPPActivationOld::RELU;
-	activationTest_map["RELU"] = &MLPPActivationOld::RELU;
-
-	activation_map["GELU"] = &MLPPActivationOld::GELU;
-	activationTest_map["GELU"] = &MLPPActivationOld::GELU;
-
-	activation_map["Sign"] = &MLPPActivationOld::sign;
-	activationTest_map["Sign"] = &MLPPActivationOld::sign;
-
-	activation_map["UnitStep"] = &MLPPActivationOld::unitStep;
-	activationTest_map["UnitStep"] = &MLPPActivationOld::unitStep;
-
-	activation_map["Sinh"] = &MLPPActivationOld::sinh;
-	activationTest_map["Sinh"] = &MLPPActivationOld::sinh;
-
-	activation_map["Cosh"] = &MLPPActivationOld::cosh;
-	activationTest_map["Cosh"] = &MLPPActivationOld::cosh;
-
-	activation_map["Tanh"] = &MLPPActivationOld::tanh;
-	activationTest_map["Tanh"] = &MLPPActivationOld::tanh;
-
-	activation_map["Csch"] = &MLPPActivationOld::csch;
-	activationTest_map["Csch"] = &MLPPActivationOld::csch;
-
-	activation_map["Sech"] = &MLPPActivationOld::sech;
-	activationTest_map["Sech"] = &MLPPActivationOld::sech;
-
-	activation_map["Coth"] = &MLPPActivationOld::coth;
-	activationTest_map["Coth"] = &MLPPActivationOld::coth;
-
-	activation_map["Arsinh"] = &MLPPActivationOld::arsinh;
-	activationTest_map["Arsinh"] = &MLPPActivationOld::arsinh;
-
-	activation_map["Arcosh"] = &MLPPActivationOld::arcosh;
-	activationTest_map["Arcosh"] = &MLPPActivationOld::arcosh;
-
-	activation_map["Artanh"] = &MLPPActivationOld::artanh;
-	activationTest_map["Artanh"] = &MLPPActivationOld::artanh;
-
-	activation_map["Arcsch"] = &MLPPActivationOld::arcsch;
-	activationTest_map["Arcsch"] = &MLPPActivationOld::arcsch;
-
-	activation_map["Arsech"] = &MLPPActivationOld::arsech;
-	activationTest_map["Arsech"] = &MLPPActivationOld::arsech;
-
-	activation_map["Arcoth"] = &MLPPActivationOld::arcoth;
-	activationTest_map["Arcoth"] = &MLPPActivationOld::arcoth;
-
-	costDeriv_map["MSE"] = &MLPPCostOld::MSEDeriv;
-	cost_map["MSE"] = &MLPPCostOld::MSE;
-	costDeriv_map["RMSE"] = &MLPPCostOld::RMSEDeriv;
-	cost_map["RMSE"] = &MLPPCostOld::RMSE;
-	costDeriv_map["MAE"] = &MLPPCostOld::MAEDeriv;
-	cost_map["MAE"] = &MLPPCostOld::MAE;
-	costDeriv_map["MBE"] = &MLPPCostOld::MBEDeriv;
-	cost_map["MBE"] = &MLPPCostOld::MBE;
-	costDeriv_map["LogLoss"] = &MLPPCostOld::LogLossDeriv;
-	cost_map["LogLoss"] = &MLPPCostOld::LogLoss;
-	costDeriv_map["CrossEntropy"] = &MLPPCostOld::CrossEntropyDeriv;
-	cost_map["CrossEntropy"] = &MLPPCostOld::CrossEntropy;
-	costDeriv_map["HingeLoss"] = &MLPPCostOld::HingeLossDeriv;
-	cost_map["HingeLoss"] = &MLPPCostOld::HingeLoss;
-	costDeriv_map["WassersteinLoss"] = &MLPPCostOld::HingeLossDeriv;
-	cost_map["WassersteinLoss"] = &MLPPCostOld::HingeLoss;
-}
-
-void MLPPOldOutputLayer::forwardPass() {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	z = alg.scalarAdd(bias, alg.mat_vec_mult(input, weights));
-	a = (avn.*activation_map[activation])(z, false);
-}
-
-void MLPPOldOutputLayer::Test(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	z_test = alg.dot(weights, x) + bias;
-	a_test = (avn.*activationTest_map[activation])(z_test, false);
-}
diff --git a/mlpp/output_layer/output_layer_old.h b/mlpp/output_layer/output_layer_old.h
deleted file mode 100644
index f471ecd..0000000
--- a/mlpp/output_layer/output_layer_old.h
+++ /dev/null
@@ -1,65 +0,0 @@
-
-#ifndef MLPP_OUTPUT_LAYER_OLD_H
-#define MLPP_OUTPUT_LAYER_OLD_H
-
-//
-//  OutputLayer.hpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "core/math/math_defs.h"
-#include "core/string/ustring.h"
-
-#include "core/object/reference.h"
-
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../regularization/reg.h"
-#include "../utilities/utilities.h"
-
-#include "../lin_alg/mlpp_matrix.h"
-#include "../lin_alg/mlpp_vector.h"
-
-#include <map>
-#include <string>
-#include <vector>
-
-class MLPPOldOutputLayer {
-public:
-	MLPPOldOutputLayer(int n_hidden, std::string activation, std::string cost, std::vector<std::vector<real_t>> input, std::string weightInit, std::string reg, real_t lambda, real_t alpha);
-
-	int n_hidden;
-	std::string activation;
-	std::string cost;
-
-	std::vector<std::vector<real_t>> input;
-
-	std::vector<real_t> weights;
-	real_t bias;
-
-	std::vector<real_t> z;
-	std::vector<real_t> a;
-
-	std::map<std::string, std::vector<real_t> (MLPPActivationOld::*)(std::vector<real_t>, bool)> activation_map;
-	std::map<std::string, real_t (MLPPActivationOld::*)(real_t, bool)> activationTest_map;
-	std::map<std::string, real_t (MLPPCostOld::*)(std::vector<real_t>, std::vector<real_t>)> cost_map;
-	std::map<std::string, std::vector<real_t> (MLPPCostOld::*)(std::vector<real_t>, std::vector<real_t>)> costDeriv_map;
-
-	real_t z_test;
-	real_t a_test;
-
-	std::vector<real_t> delta;
-
-	// Regularization Params
-	std::string reg;
-	real_t lambda; /* Regularization Parameter */
-	real_t alpha; /* This is the controlling param for Elastic Net*/
-
-	std::string weightInit;
-
-	void forwardPass();
-	void Test(std::vector<real_t> x);
-};
-
-#endif /* OutputLayer_hpp */
diff --git a/mlpp/pca/pca_old.cpp b/mlpp/pca/pca_old.cpp
deleted file mode 100644
index 1cd3b1b..0000000
--- a/mlpp/pca/pca_old.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//
-//  PCA.cpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "pca_old.h"
-#include "../data/data_old.h"
-#include "../lin_alg/lin_alg_old.h"
-
-#include <iostream>
-#include <random>
-
-
-
-MLPPPCAOld::MLPPPCAOld(std::vector<std::vector<real_t>> inputSet, int k) :
-		inputSet(inputSet), k(k) {
-}
-
-std::vector<std::vector<real_t>> MLPPPCAOld::principalComponents() {
-	MLPPLinAlgOld alg;
-	MLPPDataOld data;
-
-	MLPPLinAlgOld::SVDResultOld svr_res = alg.SVD(alg.cov(inputSet));
-	X_normalized = data.meanCentering(inputSet);
-	U_reduce.resize(svr_res.U.size());
-	for (int i = 0; i < k; i++) {
-		for (uint32_t j = 0; j < svr_res.U.size(); j++) {
-			U_reduce[j].push_back(svr_res.U[j][i]);
-		}
-	}
-	Z = alg.matmult(alg.transpose(U_reduce), X_normalized);
-	return Z;
-}
-
-// Simply tells us the percentage of variance maintained.
-real_t MLPPPCAOld::score() {
-	MLPPLinAlgOld alg;
-	std::vector<std::vector<real_t>> X_approx = alg.matmult(U_reduce, Z);
-	real_t num = 0;
-	real_t den = 0;
-
-	for (uint32_t i = 0; i < X_normalized.size(); i++) {
-		num += alg.norm_sq(alg.subtraction(X_normalized[i], X_approx[i]));
-	}
-
-	num /= X_normalized.size();
-	for (uint32_t i = 0; i < X_normalized.size(); i++) {
-		den += alg.norm_sq(X_normalized[i]);
-	}
-
-	den /= X_normalized.size();
-	if (den == 0) {
-		den += 1e-10; // For numerical sanity as to not recieve a domain error
-	}
-
-	return 1 - num / den;
-}
-
diff --git a/mlpp/pca/pca_old.h b/mlpp/pca/pca_old.h
deleted file mode 100644
index 03ac4c1..0000000
--- a/mlpp/pca/pca_old.h
+++ /dev/null
@@ -1,31 +0,0 @@
-
-#ifndef MLPP_PCA_OLD_H
-#define MLPP_PCA_OLD_H
-
-//
-//  PCA.hpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include <vector>
-
-
-class MLPPPCAOld {
-public:
-	MLPPPCAOld(std::vector<std::vector<real_t>> inputSet, int k);
-	std::vector<std::vector<real_t>> principalComponents();
-	real_t score();
-
-private:
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<std::vector<real_t>> X_normalized;
-	std::vector<std::vector<real_t>> U_reduce;
-	std::vector<std::vector<real_t>> Z;
-	int k;
-};
-
-
-#endif /* PCA_hpp */
diff --git a/mlpp/probit_reg/probit_reg_old.cpp b/mlpp/probit_reg/probit_reg_old.cpp
deleted file mode 100644
index 64122bb..0000000
--- a/mlpp/probit_reg/probit_reg_old.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-//
-//  ProbitReg.cpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "probit_reg_old.h"
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-#ifndef M_PI
-#define M_PI 3.141592653
-#endif
-
-MLPPProbitRegOld::MLPPProbitRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg, real_t lambda, real_t alpha) :
-		inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) {
-	y_hat.resize(n);
-	weights = MLPPUtilities::weightInitialization(k);
-	bias = MLPPUtilities::biasInitialization();
-}
-
-std::vector<real_t> MLPPProbitRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	return Evaluate(X);
-}
-
-real_t MLPPProbitRegOld::modelTest(std::vector<real_t> x) {
-	return Evaluate(x);
-}
-
-void MLPPProbitRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-
-		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
-
-		// Calculating the weight gradients
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.gaussianCDF(z, 1)))));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Calculating the bias gradients
-		bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / n;
-		forwardPass();
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPProbitRegOld::MLE(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-
-		std::vector<real_t> error = alg.subtraction(outputSet, y_hat);
-
-		// Calculating the weight gradients
-		weights = alg.addition(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.gaussianCDF(z, 1)))));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Calculating the bias gradients
-		bias += learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / n;
-		forwardPass();
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPProbitRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
-	// NOTE: ∂y_hat/∂z is sparse
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	while (true) {
-		std::random_device rd;
-		std::default_random_engine generator(rd());
-		std::uniform_int_distribution<int> distribution(0, int(n - 1));
-		int outputIndex = distribution(generator);
-
-		real_t y_hat = Evaluate(inputSet[outputIndex]);
-		real_t z = propagate(inputSet[outputIndex]);
-		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
-
-		real_t error = y_hat - outputSet[outputIndex];
-
-		// Weight Updation
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error * ((1 / sqrt(2 * M_PI)) * exp(-z * z / 2)), inputSet[outputIndex]));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Bias updation
-		bias -= learning_rate * error * ((1 / sqrt(2 * M_PI)) * exp(-z * z / 2));
-
-		y_hat = Evaluate({ inputSet[outputIndex] });
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPProbitRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto createMiniBatchesResult = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(createMiniBatchesResult);
-	auto outputMiniBatches = std::get<1>(createMiniBatchesResult);
-
-	// Creating the mini-batches
-	for (int i = 0; i < n_mini_batch; i++) {
-		std::vector<std::vector<real_t>> currentInputSet;
-		std::vector<real_t> currentOutputSet;
-		for (int j = 0; j < n / n_mini_batch; j++) {
-			currentInputSet.push_back(inputSet[n / n_mini_batch * i + j]);
-			currentOutputSet.push_back(outputSet[n / n_mini_batch * i + j]);
-		}
-		inputMiniBatches.push_back(currentInputSet);
-		outputMiniBatches.push_back(currentOutputSet);
-	}
-
-	if (real_t(n) / real_t(n_mini_batch) - int(n / n_mini_batch) != 0) {
-		for (int i = 0; i < n - n / n_mini_batch * n_mini_batch; i++) {
-			inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n / n_mini_batch * n_mini_batch + i]);
-			outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n / n_mini_batch * n_mini_batch + i]);
-		}
-	}
-
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			std::vector<real_t> z = propagate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight gradients
-			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / outputMiniBatches.size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.gaussianCDF(z, 1)))));
-			weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-			// Calculating the bias gradients
-			bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.gaussianCDF(z, 1))) / outputMiniBatches.size();
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-real_t MLPPProbitRegOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPProbitRegOld::save(std::string fileName) {
-	MLPPUtilities util;
-	util.saveParameters(fileName, weights, bias);
-}
-
-real_t MLPPProbitRegOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPRegOld regularization;
-	class MLPPCostOld cost;
-	return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
-}
-
-std::vector<real_t> MLPPProbitRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	return avn.gaussianCDF(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)));
-}
-
-std::vector<real_t> MLPPProbitRegOld::propagate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights));
-}
-
-real_t MLPPProbitRegOld::Evaluate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	return avn.gaussianCDF(alg.dot(weights, x) + bias);
-}
-
-real_t MLPPProbitRegOld::propagate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	return alg.dot(weights, x) + bias;
-}
-
-// gaussianCDF ( wTx + b )
-void MLPPProbitRegOld::forwardPass() {
-	MLPPActivationOld avn;
-
-	z = propagate(inputSet);
-	y_hat = avn.gaussianCDF(z);
-}
diff --git a/mlpp/probit_reg/probit_reg_old.h b/mlpp/probit_reg/probit_reg_old.h
deleted file mode 100644
index 275a1c0..0000000
--- a/mlpp/probit_reg/probit_reg_old.h
+++ /dev/null
@@ -1,53 +0,0 @@
-
-#ifndef MLPP_PROBIT_REG_OLD_H
-#define MLPP_PROBIT_REG_OLD_H
-
-//
-//  ProbitReg.hpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <vector>
-
-class MLPPProbitRegOld {
-public:
-	MLPPProbitRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
-	real_t modelTest(std::vector<real_t> x);
-	void gradientDescent(real_t learning_rate, int max_epoch = 0, bool UI = false);
-	void MLE(real_t learning_rate, int max_epoch = 0, bool UI = false);
-	void SGD(real_t learning_rate, int max_epoch = 0, bool UI = false);
-	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
-	real_t score();
-	void save(std::string fileName);
-
-private:
-	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
-
-	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
-	std::vector<real_t> propagate(std::vector<std::vector<real_t>> X);
-	real_t Evaluate(std::vector<real_t> x);
-	real_t propagate(std::vector<real_t> x);
-	void forwardPass();
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-	std::vector<real_t> z;
-	std::vector<real_t> y_hat;
-	std::vector<real_t> weights;
-	real_t bias;
-
-	int n;
-	int k;
-
-	// Regularization Params
-	std::string reg;
-	real_t lambda;
-	real_t alpha; /* This is the controlling param for Elastic Net*/
-};
-
-#endif /* ProbitReg_hpp */
diff --git a/mlpp/regularization/reg_old.cpp b/mlpp/regularization/reg_old.cpp
deleted file mode 100644
index 49aea1d..0000000
--- a/mlpp/regularization/reg_old.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-//
-//  Reg.cpp
-//
-//  Created by Marc Melikyan on 1/16/21.
-//
-
-#include "reg_old.h"
-
-#include "core/math/math_defs.h"
-
-#include "../activation/activation_old.h"
-#include "../lin_alg/lin_alg_old.h"
-
-#include <iostream>
-#include <random>
-
-real_t MLPPRegOld::regTerm(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string p_reg) {
-	if (p_reg == "Ridge") {
-		real_t reg = 0;
-		for (uint32_t i = 0; i < weights.size(); i++) {
-			reg += weights[i] * weights[i];
-		}
-		return reg * lambda / 2;
-	} else if (p_reg == "Lasso") {
-		real_t reg = 0;
-		for (uint32_t i = 0; i < weights.size(); i++) {
-			reg += abs(weights[i]);
-		}
-		return reg * lambda;
-	} else if (p_reg == "ElasticNet") {
-		real_t reg = 0;
-		for (uint32_t i = 0; i < weights.size(); i++) {
-			reg += alpha * abs(weights[i]); // Lasso Reg
-			reg += ((1 - alpha) / 2) * weights[i] * weights[i]; // Ridge Reg
-		}
-		return reg * lambda;
-	}
-	return 0;
-}
-
-real_t MLPPRegOld::regTerm(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string p_reg) {
-	if (p_reg == "Ridge") {
-		real_t reg = 0;
-		for (uint32_t i = 0; i < weights.size(); i++) {
-			for (uint32_t j = 0; j < weights[i].size(); j++) {
-				reg += weights[i][j] * weights[i][j];
-			}
-		}
-		return reg * lambda / 2;
-	} else if (p_reg == "Lasso") {
-		real_t reg = 0;
-		for (uint32_t i = 0; i < weights.size(); i++) {
-			for (uint32_t j = 0; j < weights[i].size(); j++) {
-				reg += abs(weights[i][j]);
-			}
-		}
-		return reg * lambda;
-	} else if (p_reg == "ElasticNet") {
-		real_t reg = 0;
-		for (uint32_t i = 0; i < weights.size(); i++) {
-			for (uint32_t j = 0; j < weights[i].size(); j++) {
-				reg += alpha * abs(weights[i][j]); // Lasso Reg
-				reg += ((1 - alpha) / 2) * weights[i][j] * weights[i][j]; // Ridge Reg
-			}
-		}
-		return reg * lambda;
-	}
-	return 0;
-}
-
-std::vector<real_t> MLPPRegOld::regWeights(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg) {
-	MLPPLinAlgOld alg;
-	if (reg == "WeightClipping") {
-		return regDerivTerm(weights, lambda, alpha, reg);
-	}
-	return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg));
-	// for(int i = 0; i < weights.size(); i++){
-	//     weights[i] -= regDerivTerm(weights, lambda, alpha, reg, i);
-	// }
-	// return weights;
-}
-
-std::vector<std::vector<real_t>> MLPPRegOld::regWeights(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string reg) {
-	MLPPLinAlgOld alg;
-	if (reg == "WeightClipping") {
-		return regDerivTerm(weights, lambda, alpha, reg);
-	}
-	return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg));
-	// for(int i = 0; i < weights.size(); i++){
-	//     for(int j = 0; j < weights[i].size(); j++){
-	//         weights[i][j] -= regDerivTerm(weights, lambda, alpha, reg, i, j);
-	//     }
-	// }
-	// return weights;
-}
-
-std::vector<real_t> MLPPRegOld::regDerivTerm(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg) {
-	std::vector<real_t> regDeriv;
-	regDeriv.resize(weights.size());
-
-	for (uint32_t i = 0; i < regDeriv.size(); i++) {
-		regDeriv[i] = regDerivTerm(weights, lambda, alpha, reg, i);
-	}
-	return regDeriv;
-}
-
-std::vector<std::vector<real_t>> MLPPRegOld::regDerivTerm(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string reg) {
-	std::vector<std::vector<real_t>> regDeriv;
-	regDeriv.resize(weights.size());
-	for (uint32_t i = 0; i < regDeriv.size(); i++) {
-		regDeriv[i].resize(weights[0].size());
-	}
-
-	for (uint32_t i = 0; i < regDeriv.size(); i++) {
-		for (uint32_t j = 0; j < regDeriv[i].size(); j++) {
-			regDeriv[i][j] = regDerivTerm(weights, lambda, alpha, reg, i, j);
-		}
-	}
-	return regDeriv;
-}
-
-real_t MLPPRegOld::regDerivTerm(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg, int j) {
-	MLPPActivationOld act;
-	if (reg == "Ridge") {
-		return lambda * weights[j];
-	} else if (reg == "Lasso") {
-		return lambda * act.sign(weights[j]);
-	} else if (reg == "ElasticNet") {
-		return alpha * lambda * act.sign(weights[j]) + (1 - alpha) * lambda * weights[j];
-	} else if (reg == "WeightClipping") { // Preparation for Wasserstein GANs.
-		// We assume lambda is the lower clipping threshold, while alpha is the higher clipping threshold.
-		// alpha > lambda.
-		if (weights[j] > alpha) {
-			return alpha;
-		} else if (weights[j] < lambda) {
-			return lambda;
-		} else {
-			return weights[j];
-		}
-	} else {
-		return 0;
-	}
-}
-
-real_t MLPPRegOld::regDerivTerm(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string reg, int i, int j) {
-	MLPPActivationOld act;
-	if (reg == "Ridge") {
-		return lambda * weights[i][j];
-	} else if (reg == "Lasso") {
-		return lambda * act.sign(weights[i][j]);
-	} else if (reg == "ElasticNet") {
-		return alpha * lambda * act.sign(weights[i][j]) + (1 - alpha) * lambda * weights[i][j];
-	} else if (reg == "WeightClipping") { // Preparation for Wasserstein GANs.
-		// We assume lambda is the lower clipping threshold, while alpha is the higher clipping threshold.
-		// alpha > lambda.
-		if (weights[i][j] > alpha) {
-			return alpha;
-		} else if (weights[i][j] < lambda) {
-			return lambda;
-		} else {
-			return weights[i][j];
-		}
-	} else {
-		return 0;
-	}
-}
diff --git a/mlpp/regularization/reg_old.h b/mlpp/regularization/reg_old.h
deleted file mode 100644
index 8f88cec..0000000
--- a/mlpp/regularization/reg_old.h
+++ /dev/null
@@ -1,33 +0,0 @@
-
-
-#ifndef MLPP_REG_OLD_H
-#define MLPP_REG_OLD_H
-
-//
-//  Reg.hpp
-//
-//  Created by Marc Melikyan on 1/16/21.
-//
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <vector>
-
-class MLPPRegOld {
-public:
-	real_t regTerm(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg);
-	real_t regTerm(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string reg);
-
-	std::vector<real_t> regWeights(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg);
-	std::vector<std::vector<real_t>> regWeights(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string reg);
-
-	std::vector<real_t> regDerivTerm(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg);
-	std::vector<std::vector<real_t>> regDerivTerm(std::vector<std::vector<real_t>>, real_t lambda, real_t alpha, std::string reg);
-
-private:
-	real_t regDerivTerm(std::vector<real_t> weights, real_t lambda, real_t alpha, std::string reg, int j);
-	real_t regDerivTerm(std::vector<std::vector<real_t>> weights, real_t lambda, real_t alpha, std::string reg, int i, int j);
-};
-
-#endif /* Reg_hpp */
diff --git a/mlpp/softmax_net/softmax_net_old.cpp b/mlpp/softmax_net/softmax_net_old.cpp
deleted file mode 100644
index ce8dfa3..0000000
--- a/mlpp/softmax_net/softmax_net_old.cpp
+++ /dev/null
@@ -1,309 +0,0 @@
-//
-//  SoftmaxNet.cpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "softmax_net_old.h"
-
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../data/data.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-MLPPSoftmaxNetOld::MLPPSoftmaxNetOld(std::vector<std::vector<real_t>> pinputSet, std::vector<std::vector<real_t>> poutputSet, int pn_hidden, std::string preg, real_t plambda, real_t palpha) {
-	inputSet = pinputSet;
-	outputSet = poutputSet;
-	n = pinputSet.size();
-	k = pinputSet[0].size();
-	n_hidden = pn_hidden;
-	n_class = poutputSet[0].size();
-	reg = preg;
-	lambda = plambda;
-	alpha = palpha;
-
-	y_hat.resize(n);
-
-	weights1 = MLPPUtilities::weightInitialization(k, n_hidden);
-	weights2 = MLPPUtilities::weightInitialization(n_hidden, n_class);
-	bias1 = MLPPUtilities::biasInitialization(n_hidden);
-	bias2 = MLPPUtilities::biasInitialization(n_class);
-}
-
-std::vector<real_t> MLPPSoftmaxNetOld::modelTest(std::vector<real_t> x) {
-	return Evaluate(x);
-}
-
-std::vector<std::vector<real_t>> MLPPSoftmaxNetOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	return Evaluate(X);
-}
-
-void MLPPSoftmaxNetOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-
-		// Calculating the errors
-		std::vector<std::vector<real_t>> error = alg.subtraction(y_hat, outputSet);
-
-		// Calculating the weight/bias gradients for layer 2
-
-		std::vector<std::vector<real_t>> D2_1 = alg.matmult(alg.transpose(a2), error);
-
-		// weights and bias updation for layer 2
-		weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1));
-		weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
-
-		bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
-
-		//Calculating the weight/bias for layer 1
-
-		std::vector<std::vector<real_t>> D1_1 = alg.matmult(error, alg.transpose(weights2));
-
-		std::vector<std::vector<real_t>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
-
-		std::vector<std::vector<real_t>> D1_3 = alg.matmult(alg.transpose(inputSet), D1_2);
-
-		// weight an bias updation for layer 1
-		weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
-		weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
-
-		bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate, D1_2));
-
-		forwardPass();
-
-		// UI PORTION
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			std::cout << "Layer 1:" << std::endl;
-			MLPPUtilities::UI(weights1, bias1);
-			std::cout << "Layer 2:" << std::endl;
-			MLPPUtilities::UI(weights2, bias2);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPSoftmaxNetOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	while (true) {
-		std::random_device rd;
-		std::default_random_engine generator(rd());
-		std::uniform_int_distribution<int> distribution(0, int(n - 1));
-		int outputIndex = distribution(generator);
-
-		std::vector<real_t> y_hat = Evaluate(inputSet[outputIndex]);
-
-		auto prop_res = propagate(inputSet[outputIndex]);
-		auto z2 = std::get<0>(prop_res);
-		auto a2 = std::get<1>(prop_res);
-
-		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
-		std::vector<real_t> error = alg.subtraction(y_hat, outputSet[outputIndex]);
-
-		// Weight updation for layer 2
-		std::vector<std::vector<real_t>> D2_1 = alg.outerProduct(error, a2);
-		weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, alg.transpose(D2_1)));
-		weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
-
-		// Bias updation for layer 2
-		bias2 = alg.subtraction(bias2, alg.scalarMultiply(learning_rate, error));
-
-		// Weight updation for layer 1
-		std::vector<real_t> D1_1 = alg.mat_vec_mult(weights2, error);
-		std::vector<real_t> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, true));
-		std::vector<std::vector<real_t>> D1_3 = alg.outerProduct(inputSet[outputIndex], D1_2);
-
-		weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
-		weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
-		// Bias updation for layer 1
-
-		bias1 = alg.subtraction(bias1, alg.scalarMultiply(learning_rate, D1_2));
-
-		y_hat = Evaluate(inputSet[outputIndex]);
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
-			std::cout << "Layer 1:" << std::endl;
-			MLPPUtilities::UI(weights1, bias1);
-			std::cout << "Layer 2:" << std::endl;
-			MLPPUtilities::UI(weights2, bias2);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPSoftmaxNetOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	// Creating the mini-batches
-	for (int i = 0; i < n_mini_batch; i++) {
-		std::vector<std::vector<real_t>> currentInputSet;
-		std::vector<std::vector<real_t>> currentOutputSet;
-		for (int j = 0; j < n / n_mini_batch; j++) {
-			currentInputSet.push_back(inputSet[n / n_mini_batch * i + j]);
-			currentOutputSet.push_back(outputSet[n / n_mini_batch * i + j]);
-		}
-		inputMiniBatches.push_back(currentInputSet);
-		outputMiniBatches.push_back(currentOutputSet);
-	}
-
-	if (real_t(n) / real_t(n_mini_batch) - int(n / n_mini_batch) != 0) {
-		for (int i = 0; i < n - n / n_mini_batch * n_mini_batch; i++) {
-			inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n / n_mini_batch * n_mini_batch + i]);
-			outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n / n_mini_batch * n_mini_batch + i]);
-		}
-	}
-
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<std::vector<real_t>> y_hat = Evaluate(inputMiniBatches[i]);
-
-			auto propagate_res = propagate(inputMiniBatches[i]);
-			auto z2 = std::get<0>(propagate_res);
-			auto a2 = std::get<1>(propagate_res);
-
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			// Calculating the errors
-			std::vector<std::vector<real_t>> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight/bias gradients for layer 2
-
-			std::vector<std::vector<real_t>> D2_1 = alg.matmult(alg.transpose(a2), error);
-
-			// weights and bias updation for layser 2
-			weights2 = alg.subtraction(weights2, alg.scalarMultiply(learning_rate, D2_1));
-			weights2 = regularization.regWeights(weights2, lambda, alpha, reg);
-
-			// Bias Updation for layer 2
-			bias2 = alg.subtractMatrixRows(bias2, alg.scalarMultiply(learning_rate, error));
-
-			//Calculating the weight/bias for layer 1
-
-			std::vector<std::vector<real_t>> D1_1 = alg.matmult(error, alg.transpose(weights2));
-
-			std::vector<std::vector<real_t>> D1_2 = alg.hadamard_product(D1_1, avn.sigmoid(z2, 1));
-
-			std::vector<std::vector<real_t>> D1_3 = alg.matmult(alg.transpose(inputMiniBatches[i]), D1_2);
-
-			// weight an bias updation for layer 1
-			weights1 = alg.subtraction(weights1, alg.scalarMultiply(learning_rate, D1_3));
-			weights1 = regularization.regWeights(weights1, lambda, alpha, reg);
-
-			bias1 = alg.subtractMatrixRows(bias1, alg.scalarMultiply(learning_rate, D1_2));
-
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				std::cout << "Layer 1:" << std::endl;
-				MLPPUtilities::UI(weights1, bias1);
-				std::cout << "Layer 2:" << std::endl;
-				MLPPUtilities::UI(weights2, bias2);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-real_t MLPPSoftmaxNetOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPSoftmaxNetOld::save(std::string fileName) {
-	MLPPUtilities util;
-	util.saveParameters(fileName, weights1, bias1, 0, 1);
-	util.saveParameters(fileName, weights2, bias2, 1, 2);
-}
-
-std::vector<std::vector<real_t>> MLPPSoftmaxNetOld::getEmbeddings() {
-	return weights1;
-}
-
-real_t MLPPSoftmaxNetOld::Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	MLPPRegOld regularization;
-	MLPPData data;
-	class MLPPCostOld cost;
-	return cost.CrossEntropy(y_hat, y) + regularization.regTerm(weights1, lambda, alpha, reg) + regularization.regTerm(weights2, lambda, alpha, reg);
-}
-
-std::vector<std::vector<real_t>> MLPPSoftmaxNetOld::Evaluate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
-	std::vector<std::vector<real_t>> a2 = avn.sigmoid(z2);
-	return avn.adjSoftmax(alg.mat_vec_add(alg.matmult(a2, weights2), bias2));
-}
-
-std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> MLPPSoftmaxNetOld::propagate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	std::vector<std::vector<real_t>> z2 = alg.mat_vec_add(alg.matmult(X, weights1), bias1);
-	std::vector<std::vector<real_t>> a2 = avn.sigmoid(z2);
-	return { z2, a2 };
-}
-
-std::vector<real_t> MLPPSoftmaxNetOld::Evaluate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1);
-	std::vector<real_t> a2 = avn.sigmoid(z2);
-	return avn.adjSoftmax(alg.addition(alg.mat_vec_mult(alg.transpose(weights2), a2), bias2));
-}
-
-std::tuple<std::vector<real_t>, std::vector<real_t>> MLPPSoftmaxNetOld::propagate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	std::vector<real_t> z2 = alg.addition(alg.mat_vec_mult(alg.transpose(weights1), x), bias1);
-	std::vector<real_t> a2 = avn.sigmoid(z2);
-	return { z2, a2 };
-}
-
-void MLPPSoftmaxNetOld::forwardPass() {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	z2 = alg.mat_vec_add(alg.matmult(inputSet, weights1), bias1);
-	a2 = avn.sigmoid(z2);
-	y_hat = avn.adjSoftmax(alg.mat_vec_add(alg.matmult(a2, weights2), bias2));
-}
diff --git a/mlpp/softmax_net/softmax_net_old.h b/mlpp/softmax_net/softmax_net_old.h
deleted file mode 100644
index f6312f1..0000000
--- a/mlpp/softmax_net/softmax_net_old.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef MLPP_SOFTMAX_NET_OLD_H
-#define MLPP_SOFTMAX_NET_OLD_H
-
-//
-//  SoftmaxNet.hpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <vector>
-
-class MLPPSoftmaxNetOld {
-public:
-	MLPPSoftmaxNetOld(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, int n_hidden, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-	std::vector<real_t> modelTest(std::vector<real_t> x);
-	std::vector<std::vector<real_t>> modelSetTest(std::vector<std::vector<real_t>> X);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
-	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
-	real_t score();
-	void save(std::string fileName);
-
-	std::vector<std::vector<real_t>> getEmbeddings(); // This class is used (mostly) for word2Vec. This function returns our embeddings.
-private:
-	real_t Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	std::vector<std::vector<real_t>> Evaluate(std::vector<std::vector<real_t>> X);
-	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> propagate(std::vector<std::vector<real_t>> X);
-	std::vector<real_t> Evaluate(std::vector<real_t> x);
-	std::tuple<std::vector<real_t>, std::vector<real_t>> propagate(std::vector<real_t> x);
-	void forwardPass();
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<std::vector<real_t>> outputSet;
-	std::vector<std::vector<real_t>> y_hat;
-
-	std::vector<std::vector<real_t>> weights1;
-	std::vector<std::vector<real_t>> weights2;
-
-	std::vector<real_t> bias1;
-	std::vector<real_t> bias2;
-
-	std::vector<std::vector<real_t>> z2;
-	std::vector<std::vector<real_t>> a2;
-
-	int n;
-	int k;
-	int n_class;
-	int n_hidden;
-
-	// Regularization Params
-	std::string reg;
-	real_t lambda;
-	real_t alpha; /* This is the controlling param for Elastic Net*/
-};
-
-#endif /* SoftmaxNet_hpp */
diff --git a/mlpp/softmax_reg/softmax_reg_old.cpp b/mlpp/softmax_reg/softmax_reg_old.cpp
deleted file mode 100644
index 76e3c53..0000000
--- a/mlpp/softmax_reg/softmax_reg_old.cpp
+++ /dev/null
@@ -1,193 +0,0 @@
-//
-//  SoftmaxReg.cpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "softmax_reg_old.h"
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-MLPPSoftmaxRegOld::MLPPSoftmaxRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, std::string reg, real_t lambda, real_t alpha) :
-		inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), n_class(outputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) {
-	y_hat.resize(n);
-	weights = MLPPUtilities::weightInitialization(k, n_class);
-	bias = MLPPUtilities::biasInitialization(n_class);
-}
-
-std::vector<real_t> MLPPSoftmaxRegOld::modelTest(std::vector<real_t> x) {
-	return Evaluate(x);
-}
-
-std::vector<std::vector<real_t>> MLPPSoftmaxRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	return Evaluate(X);
-}
-
-void MLPPSoftmaxRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-		std::vector<std::vector<real_t>> error = alg.subtraction(y_hat, outputSet);
-
-		//Calculating the weight gradients
-		std::vector<std::vector<real_t>> w_gradient = alg.matmult(alg.transpose(inputSet), error);
-
-		//Weight updation
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Calculating the bias gradients
-		//real_t b_gradient = alg.sum_elements(error);
-
-		// Bias Updation
-		bias = alg.subtractMatrixRows(bias, alg.scalarMultiply(learning_rate, error));
-
-		forwardPass();
-
-		// UI PORTION
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPSoftmaxRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	while (true) {
-		std::random_device rd;
-		std::default_random_engine generator(rd());
-		std::uniform_int_distribution<int> distribution(0, int(n - 1));
-		real_t outputIndex = distribution(generator);
-
-		std::vector<real_t> y_hat = Evaluate(inputSet[outputIndex]);
-		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
-
-		// Calculating the weight gradients
-		std::vector<std::vector<real_t>> w_gradient = alg.outerProduct(inputSet[outputIndex], alg.subtraction(y_hat, outputSet[outputIndex]));
-
-		// Weight Updation
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Calculating the bias gradients
-		std::vector<real_t> b_gradient = alg.subtraction(y_hat, outputSet[outputIndex]);
-
-		// Bias updation
-		bias = alg.subtraction(bias, alg.scalarMultiply(learning_rate, b_gradient));
-
-		//y_hat = Evaluate({ inputSet[outputIndex] });
-		y_hat = Evaluate(inputSet[outputIndex]);
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPSoftmaxRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<std::vector<real_t>> y_hat = Evaluate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			std::vector<std::vector<real_t>> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight gradients
-			std::vector<std::vector<real_t>> w_gradient = alg.matmult(alg.transpose(inputMiniBatches[i]), error);
-
-			//Weight updation
-			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, w_gradient));
-			weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-			// Calculating the bias gradients
-			bias = alg.subtractMatrixRows(bias, alg.scalarMultiply(learning_rate, error));
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-real_t MLPPSoftmaxRegOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPSoftmaxRegOld::save(std::string fileName) {
-	MLPPUtilities util;
-	util.saveParameters(fileName, weights, bias);
-}
-
-real_t MLPPSoftmaxRegOld::Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	MLPPRegOld regularization;
-	class MLPPCostOld cost;
-	return cost.CrossEntropy(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
-}
-
-std::vector<real_t> MLPPSoftmaxRegOld::Evaluate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	return avn.softmax(alg.addition(bias, alg.mat_vec_mult(alg.transpose(weights), x)));
-}
-
-std::vector<std::vector<real_t>> MLPPSoftmaxRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-
-	return avn.softmax(alg.mat_vec_add(alg.matmult(X, weights), bias));
-}
-
-// softmax ( wTx + b )
-void MLPPSoftmaxRegOld::forwardPass() {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-
-	y_hat = avn.softmax(alg.mat_vec_add(alg.matmult(inputSet, weights), bias));
-}
diff --git a/mlpp/softmax_reg/softmax_reg_old.h b/mlpp/softmax_reg/softmax_reg_old.h
deleted file mode 100644
index 864e0ee..0000000
--- a/mlpp/softmax_reg/softmax_reg_old.h
+++ /dev/null
@@ -1,50 +0,0 @@
-
-#ifndef MLPP_SOFTMAX_REG_OLD_H
-#define MLPP_SOFTMAX_REG_OLD_H
-
-//
-//  SoftmaxReg.hpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <vector>
-
-class MLPPSoftmaxRegOld {
-public:
-	MLPPSoftmaxRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-	std::vector<real_t> modelTest(std::vector<real_t> x);
-	std::vector<std::vector<real_t>> modelSetTest(std::vector<std::vector<real_t>> X);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
-	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
-	real_t score();
-	void save(std::string fileName);
-
-private:
-	real_t Cost(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	std::vector<std::vector<real_t>> Evaluate(std::vector<std::vector<real_t>> X);
-	std::vector<real_t> Evaluate(std::vector<real_t> x);
-	void forwardPass();
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<std::vector<real_t>> outputSet;
-	std::vector<std::vector<real_t>> y_hat;
-	std::vector<std::vector<real_t>> weights;
-	std::vector<real_t> bias;
-
-	int n;
-	int k;
-	int n_class;
-
-	// Regularization Params
-	std::string reg;
-	real_t lambda;
-	real_t alpha; /* This is the controlling param for Elastic Net*/
-};
-
-#endif /* SoftmaxReg_hpp */
diff --git a/mlpp/stat/stat_old.cpp b/mlpp/stat/stat_old.cpp
deleted file mode 100644
index 8e60e20..0000000
--- a/mlpp/stat/stat_old.cpp
+++ /dev/null
@@ -1,215 +0,0 @@
-//
-//  Stat.cpp
-//
-//  Created by Marc Melikyan on 9/29/20.
-//
-
-#include "stat_old.h"
-#include "../activation/activation_old.h"
-#include "../data/data.h"
-#include "../lin_alg/lin_alg_old.h"
-#include <algorithm>
-#include <cmath>
-#include <map>
-
-#include <iostream>
-
-real_t MLPPStatOld::b0Estimation(const std::vector<real_t> &x, const std::vector<real_t> &y) {
-	return mean(y) - b1Estimation(x, y) * mean(x);
-}
-
-real_t MLPPStatOld::b1Estimation(const std::vector<real_t> &x, const std::vector<real_t> &y) {
-	return covariance(x, y) / variance(x);
-}
-
-real_t MLPPStatOld::mean(const std::vector<real_t> &x) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < x.size(); i++) {
-		sum += x[i];
-	}
-	return sum / x.size();
-}
-
-real_t MLPPStatOld::median(std::vector<real_t> x) {
-	real_t center = real_t(x.size()) / real_t(2);
-	sort(x.begin(), x.end());
-	if (x.size() % 2 == 0) {
-		return mean({ x[center - 1], x[center] });
-	} else {
-		return x[center - 1 + 0.5];
-	}
-}
-
-std::vector<real_t> MLPPStatOld::mode(const std::vector<real_t> &x) {
-	MLPPData data;
-	std::vector<real_t> x_set = data.vecToSet(x);
-	std::map<real_t, int> element_num;
-	for (uint32_t i = 0; i < x_set.size(); i++) {
-		element_num[x[i]] = 0;
-	}
-	for (uint32_t i = 0; i < x.size(); i++) {
-		element_num[x[i]]++;
-	}
-	std::vector<real_t> modes;
-	real_t max_num = element_num[x_set[0]];
-	for (uint32_t i = 0; i < x_set.size(); i++) {
-		if (element_num[x_set[i]] > max_num) {
-			max_num = element_num[x_set[i]];
-			modes.clear();
-			modes.push_back(x_set[i]);
-		} else if (element_num[x_set[i]] == max_num) {
-			modes.push_back(x_set[i]);
-		}
-	}
-	return modes;
-}
-
-real_t MLPPStatOld::range(const std::vector<real_t> &x) {
-	MLPPLinAlgOld alg;
-	return alg.max(x) - alg.min(x);
-}
-
-real_t MLPPStatOld::midrange(const std::vector<real_t> &x) {
-	return range(x) / 2;
-}
-
-real_t MLPPStatOld::absAvgDeviation(const std::vector<real_t> &x) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < x.size(); i++) {
-		sum += std::abs(x[i] - mean(x));
-	}
-	return sum / x.size();
-}
-
-real_t MLPPStatOld::standardDeviation(const std::vector<real_t> &x) {
-	return std::sqrt(variance(x));
-}
-
-real_t MLPPStatOld::variance(const std::vector<real_t> &x) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < x.size(); i++) {
-		sum += (x[i] - mean(x)) * (x[i] - mean(x));
-	}
-	return sum / (x.size() - 1);
-}
-
-real_t MLPPStatOld::covariance(const std::vector<real_t> &x, const std::vector<real_t> &y) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < x.size(); i++) {
-		sum += (x[i] - mean(x)) * (y[i] - mean(y));
-	}
-	return sum / (x.size() - 1);
-}
-
-real_t MLPPStatOld::correlation(const std::vector<real_t> &x, const std::vector<real_t> &y) {
-	return covariance(x, y) / (standardDeviation(x) * standardDeviation(y));
-}
-
-real_t MLPPStatOld::R2(const std::vector<real_t> &x, const std::vector<real_t> &y) {
-	return correlation(x, y) * correlation(x, y);
-}
-
-real_t MLPPStatOld::chebyshevIneq(const real_t k) {
-	// X may or may not belong to a Gaussian Distribution
-	return 1 - 1 / (k * k);
-}
-
-real_t MLPPStatOld::weightedMean(const std::vector<real_t> &x, const std::vector<real_t> &weights) {
-	real_t sum = 0;
-	real_t weights_sum = 0;
-	for (uint32_t i = 0; i < x.size(); i++) {
-		sum += x[i] * weights[i];
-		weights_sum += weights[i];
-	}
-	return sum / weights_sum;
-}
-
-real_t MLPPStatOld::geometricMean(const std::vector<real_t> &x) {
-	real_t product = 1;
-	for (uint32_t i = 0; i < x.size(); i++) {
-		product *= x[i];
-	}
-	return std::pow(product, 1.0 / x.size());
-}
-
-real_t MLPPStatOld::harmonicMean(const std::vector<real_t> &x) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < x.size(); i++) {
-		sum += 1 / x[i];
-	}
-	return x.size() / sum;
-}
-
-real_t MLPPStatOld::RMS(const std::vector<real_t> &x) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < x.size(); i++) {
-		sum += x[i] * x[i];
-	}
-	return sqrt(sum / x.size());
-}
-
-real_t MLPPStatOld::powerMean(const std::vector<real_t> &x, const real_t p) {
-	real_t sum = 0;
-	for (uint32_t i = 0; i < x.size(); i++) {
-		sum += std::pow(x[i], p);
-	}
-	return std::pow(sum / x.size(), 1 / p);
-}
-
-real_t MLPPStatOld::lehmerMean(const std::vector<real_t> &x, const real_t p) {
-	real_t num = 0;
-	real_t den = 0;
-	for (uint32_t i = 0; i < x.size(); i++) {
-		num += std::pow(x[i], p);
-		den += std::pow(x[i], p - 1);
-	}
-	return num / den;
-}
-
-real_t MLPPStatOld::weightedLehmerMean(const std::vector<real_t> &x, const std::vector<real_t> &weights, const real_t p) {
-	real_t num = 0;
-	real_t den = 0;
-	for (uint32_t i = 0; i < x.size(); i++) {
-		num += weights[i] * std::pow(x[i], p);
-		den += weights[i] * std::pow(x[i], p - 1);
-	}
-	return num / den;
-}
-
-real_t MLPPStatOld::heronianMean(const real_t A, const real_t B) {
-	return (A + sqrt(A * B) + B) / 3;
-}
-
-real_t MLPPStatOld::contraHarmonicMean(const std::vector<real_t> &x) {
-	return lehmerMean(x, 2);
-}
-
-real_t MLPPStatOld::heinzMean(const real_t A, const real_t B, const real_t x) {
-	return (std::pow(A, x) * std::pow(B, 1 - x) + std::pow(A, 1 - x) * std::pow(B, x)) / 2;
-}
-
-real_t MLPPStatOld::neumanSandorMean(const real_t a, const real_t b) {
-	MLPPActivationOld avn;
-	return (a - b) / 2 * avn.arsinh((a - b) / (a + b));
-}
-
-real_t MLPPStatOld::stolarskyMean(const real_t x, const real_t y, const real_t p) {
-	if (x == y) {
-		return x;
-	}
-	return std::pow((std::pow(x, p) - std::pow(y, p)) / (p * (x - y)), 1 / (p - 1));
-}
-
-real_t MLPPStatOld::identricMean(const real_t x, const real_t y) {
-	if (x == y) {
-		return x;
-	}
-	return (1 / M_E) * std::pow(std::pow(x, x) / std::pow(y, y), 1 / (x - y));
-}
-
-real_t MLPPStatOld::logMean(const real_t x, const real_t y) {
-	if (x == y) {
-		return x;
-	}
-	return (y - x) / (log(y) - std::log(x));
-}
diff --git a/mlpp/stat/stat_old.h b/mlpp/stat/stat_old.h
deleted file mode 100644
index d477736..0000000
--- a/mlpp/stat/stat_old.h
+++ /dev/null
@@ -1,52 +0,0 @@
-
-#ifndef MLPP_STAT_OLD_H
-#define MLPP_STAT_OLD_H
-
-//
-//  Stat.hpp
-//
-//  Created by Marc Melikyan on 9/29/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include <vector>
-
-class MLPPStatOld {
-public:
-	// These functions are for univariate lin reg module- not for users.
-	real_t b0Estimation(const std::vector<real_t> &x, const std::vector<real_t> &y);
-	real_t b1Estimation(const std::vector<real_t> &x, const std::vector<real_t> &y);
-
-	// Statistical Functions
-	real_t mean(const std::vector<real_t> &x);
-	real_t median(std::vector<real_t> x);
-	std::vector<real_t> mode(const std::vector<real_t> &x);
-	real_t range(const std::vector<real_t> &x);
-	real_t midrange(const std::vector<real_t> &x);
-	real_t absAvgDeviation(const std::vector<real_t> &x);
-	real_t standardDeviation(const std::vector<real_t> &x);
-	real_t variance(const std::vector<real_t> &x);
-	real_t covariance(const std::vector<real_t> &x, const std::vector<real_t> &y);
-	real_t correlation(const std::vector<real_t> &x, const std::vector<real_t> &y);
-	real_t R2(const std::vector<real_t> &x, const std::vector<real_t> &y);
-	real_t chebyshevIneq(const real_t k);
-
-	// Extras
-	real_t weightedMean(const std::vector<real_t> &x, const std::vector<real_t> &weights);
-	real_t geometricMean(const std::vector<real_t> &x);
-	real_t harmonicMean(const std::vector<real_t> &x);
-	real_t RMS(const std::vector<real_t> &x);
-	real_t powerMean(const std::vector<real_t> &x, const real_t p);
-	real_t lehmerMean(const std::vector<real_t> &x, const real_t p);
-	real_t weightedLehmerMean(const std::vector<real_t> &x, const std::vector<real_t> &weights, const real_t p);
-	real_t contraHarmonicMean(const std::vector<real_t> &x);
-	real_t heronianMean(const real_t A, const real_t B);
-	real_t heinzMean(const real_t A, const real_t B, const real_t x);
-	real_t neumanSandorMean(const real_t a, const real_t b);
-	real_t stolarskyMean(const real_t x, const real_t y, const real_t p);
-	real_t identricMean(const real_t x, const real_t y);
-	real_t logMean(const real_t x, const real_t y);
-};
-
-#endif /* Stat_hpp */
diff --git a/mlpp/svc/svc_old.cpp b/mlpp/svc/svc_old.cpp
deleted file mode 100644
index 95233eb..0000000
--- a/mlpp/svc/svc_old.cpp
+++ /dev/null
@@ -1,198 +0,0 @@
-//
-//  SVC.cpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "svc_old.h"
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-std::vector<real_t> MLPPSVCOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	return Evaluate(X);
-}
-
-real_t MLPPSVCOld::modelTest(std::vector<real_t> x) {
-	return Evaluate(x);
-}
-
-void MLPPSVCOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	class MLPPCostOld cost;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet, weights, C);
-
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), cost.HingeLossDeriv(z, outputSet, C))));
-		weights = regularization.regWeights(weights, learning_rate / n, 0, "Ridge");
-
-		// Calculating the bias gradients
-		bias += learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputSet, C)) / n;
-
-		forwardPass();
-
-		// UI PORTION
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet, weights, C));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPSVCOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
-	class MLPPCostOld cost;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	while (true) {
-		std::random_device rd;
-		std::default_random_engine generator(rd());
-		std::uniform_int_distribution<int> distribution(0, int(n - 1));
-		int outputIndex = distribution(generator);
-
-		//real_t y_hat = Evaluate(inputSet[outputIndex]);
-		real_t z = propagate(inputSet[outputIndex]);
-		cost_prev = Cost({ z }, { outputSet[outputIndex] }, weights, C);
-
-		real_t costDeriv = cost.HingeLossDeriv(std::vector<real_t>({ z }), std::vector<real_t>({ outputSet[outputIndex] }), C)[0]; // Explicit conversion to avoid ambiguity with overloaded function. Error occured on Ubuntu.
-
-		// Weight Updation
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * costDeriv, inputSet[outputIndex]));
-		weights = regularization.regWeights(weights, learning_rate, 0, "Ridge");
-
-		// Bias updation
-		bias -= learning_rate * costDeriv;
-
-		//y_hat = Evaluate({ inputSet[outputIndex] });
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ z }, { outputSet[outputIndex] }, weights, C));
-			MLPPUtilities::UI(weights, bias);
-		}
-
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPSVCOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
-	class MLPPCostOld cost;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			std::vector<real_t> z = propagate(inputMiniBatches[i]);
-			cost_prev = Cost(z, outputMiniBatches[i], weights, C);
-
-			// Calculating the weight gradients
-			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), cost.HingeLossDeriv(z, outputMiniBatches[i], C))));
-			weights = regularization.regWeights(weights, learning_rate / n, 0, "Ridge");
-
-			// Calculating the bias gradients
-			bias -= learning_rate * alg.sum_elements(cost.HingeLossDeriv(y_hat, outputMiniBatches[i], C)) / n;
-
-			forwardPass();
-
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(z, outputMiniBatches[i], weights, C));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-real_t MLPPSVCOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPSVCOld::save(std::string fileName) {
-	MLPPUtilities util;
-	util.saveParameters(fileName, weights, bias);
-}
-
-MLPPSVCOld::MLPPSVCOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<real_t> p_outputSet, real_t p_C) {
-	inputSet = p_inputSet;
-	outputSet = p_outputSet;
-	n = inputSet.size();
-	k = inputSet[0].size();
-	C = p_C;
-
-	y_hat.resize(n);
-	weights = MLPPUtilities::weightInitialization(k);
-	bias = MLPPUtilities::biasInitialization();
-}
-
-real_t MLPPSVCOld::Cost(std::vector<real_t> z, std::vector<real_t> y, std::vector<real_t> weights, real_t C) {
-	class MLPPCostOld cost;
-	return cost.HingeLoss(z, y, weights, C);
-}
-
-std::vector<real_t> MLPPSVCOld::Evaluate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	return avn.sign(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)));
-}
-
-std::vector<real_t> MLPPSVCOld::propagate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights));
-}
-
-real_t MLPPSVCOld::Evaluate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	return avn.sign(alg.dot(weights, x) + bias);
-}
-
-real_t MLPPSVCOld::propagate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	return alg.dot(weights, x) + bias;
-}
-
-// sign ( wTx + b )
-void MLPPSVCOld::forwardPass() {
-	MLPPActivationOld avn;
-
-	z = propagate(inputSet);
-	y_hat = avn.sign(z);
-}
diff --git a/mlpp/svc/svc_old.h b/mlpp/svc/svc_old.h
deleted file mode 100644
index ffc0d33..0000000
--- a/mlpp/svc/svc_old.h
+++ /dev/null
@@ -1,55 +0,0 @@
-
-#ifndef MLPP_SVC_OLD_H
-#define MLPP_SVC_OLD_H
-
-//
-//  SVC.hpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-// https://towardsdatascience.com/svm-implementation-from-scratch-python-2db2fc52e5c2
-// Illustratd a practical definition of the Hinge Loss function and its gradient when optimizing with SGD.
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <vector>
-
-class MLPPSVCOld {
-public:
-	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
-	real_t modelTest(std::vector<real_t> x);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
-	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
-	real_t score();
-	void save(std::string fileName);
-
-	MLPPSVCOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, real_t C);
-
-private:
-	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y, std::vector<real_t> weights, real_t C);
-
-	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
-	std::vector<real_t> propagate(std::vector<std::vector<real_t>> X);
-	real_t Evaluate(std::vector<real_t> x);
-	real_t propagate(std::vector<real_t> x);
-	void forwardPass();
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-	std::vector<real_t> z;
-	std::vector<real_t> y_hat;
-	std::vector<real_t> weights;
-	real_t bias;
-
-	real_t C;
-	int n;
-	int k;
-
-	// UI Portion
-	void UI(int epoch, real_t cost_prev);
-};
-
-#endif /* SVC_hpp */
diff --git a/mlpp/tanh_reg/tanh_reg_old.cpp b/mlpp/tanh_reg/tanh_reg_old.cpp
deleted file mode 100644
index 55a0261..0000000
--- a/mlpp/tanh_reg/tanh_reg_old.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-//
-//  TanhReg.cpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "tanh_reg_old.h"
-
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include <iostream>
-#include <random>
-
-MLPPTanhRegOld::MLPPTanhRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg, real_t lambda, real_t alpha) :
-		inputSet(inputSet), outputSet(outputSet), n(inputSet.size()), k(inputSet[0].size()), reg(reg), lambda(lambda), alpha(alpha) {
-	y_hat.resize(n);
-	weights = MLPPUtilities::weightInitialization(k);
-	bias = MLPPUtilities::biasInitialization();
-}
-
-std::vector<real_t> MLPPTanhRegOld::modelSetTest(std::vector<std::vector<real_t>> X) {
-	return Evaluate(X);
-}
-
-real_t MLPPTanhRegOld::modelTest(std::vector<real_t> x) {
-	return Evaluate(x);
-}
-
-void MLPPTanhRegOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	while (true) {
-		cost_prev = Cost(y_hat, outputSet);
-
-		std::vector<real_t> error = alg.subtraction(y_hat, outputSet);
-
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputSet), alg.hadamard_product(error, avn.tanh(z, 1)))));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Calculating the bias gradients
-		bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.tanh(z, 1))) / n;
-
-		forwardPass();
-
-		// UI PORTION
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-void MLPPTanhRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	while (true) {
-		std::random_device rd;
-		std::default_random_engine generator(rd());
-		std::uniform_int_distribution<int> distribution(0, int(n - 1));
-		int outputIndex = distribution(generator);
-
-		real_t y_hat = Evaluate(inputSet[outputIndex]);
-		cost_prev = Cost({ y_hat }, { outputSet[outputIndex] });
-
-		real_t error = y_hat - outputSet[outputIndex];
-
-		// Weight Updation
-		weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate * error * (1 - y_hat * y_hat), inputSet[outputIndex]));
-		weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-		// Bias updation
-		bias -= learning_rate * error * (1 - y_hat * y_hat);
-
-		y_hat = Evaluate({ inputSet[outputIndex] });
-
-		if (UI) {
-			MLPPUtilities::CostInfo(epoch, cost_prev, Cost({ y_hat }, { outputSet[outputIndex] }));
-			MLPPUtilities::UI(weights, bias);
-		}
-		epoch++;
-
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-void MLPPTanhRegOld::MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI) {
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-
-	real_t cost_prev = 0;
-	int epoch = 1;
-
-	// Creating the mini-batches
-	int n_mini_batch = n / mini_batch_size;
-	auto batches = MLPPUtilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
-	auto inputMiniBatches = std::get<0>(batches);
-	auto outputMiniBatches = std::get<1>(batches);
-
-	while (true) {
-		for (int i = 0; i < n_mini_batch; i++) {
-			std::vector<real_t> y_hat = Evaluate(inputMiniBatches[i]);
-			std::vector<real_t> z = propagate(inputMiniBatches[i]);
-			cost_prev = Cost(y_hat, outputMiniBatches[i]);
-
-			std::vector<real_t> error = alg.subtraction(y_hat, outputMiniBatches[i]);
-
-			// Calculating the weight gradients
-			weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate / n, alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), alg.hadamard_product(error, avn.tanh(z, 1)))));
-			weights = regularization.regWeights(weights, lambda, alpha, reg);
-
-			// Calculating the bias gradients
-			bias -= learning_rate * alg.sum_elements(alg.hadamard_product(error, avn.tanh(z, 1))) / n;
-
-			forwardPass();
-
-			y_hat = Evaluate(inputMiniBatches[i]);
-
-			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
-				MLPPUtilities::UI(weights, bias);
-			}
-		}
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-	forwardPass();
-}
-
-real_t MLPPTanhRegOld::score() {
-	MLPPUtilities util;
-	return util.performance(y_hat, outputSet);
-}
-
-void MLPPTanhRegOld::save(std::string fileName) {
-	MLPPUtilities util;
-	util.saveParameters(fileName, weights, bias);
-}
-
-real_t MLPPTanhRegOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPRegOld regularization;
-	class MLPPCostOld cost;
-	return cost.MSE(y_hat, y) + regularization.regTerm(weights, lambda, alpha, reg);
-}
-
-std::vector<real_t> MLPPTanhRegOld::Evaluate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	return avn.tanh(alg.scalarAdd(bias, alg.mat_vec_mult(X, weights)));
-}
-
-std::vector<real_t> MLPPTanhRegOld::propagate(std::vector<std::vector<real_t>> X) {
-	MLPPLinAlgOld alg;
-	return alg.scalarAdd(bias, alg.mat_vec_mult(X, weights));
-}
-
-real_t MLPPTanhRegOld::Evaluate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	MLPPActivationOld avn;
-	return avn.tanh(alg.dot(weights, x) + bias);
-}
-
-real_t MLPPTanhRegOld::propagate(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	return alg.dot(weights, x) + bias;
-}
-
-// Tanh ( wTx + b )
-void MLPPTanhRegOld::forwardPass() {
-	MLPPActivationOld avn;
-
-	z = propagate(inputSet);
-	y_hat = avn.tanh(z);
-}
diff --git a/mlpp/tanh_reg/tanh_reg_old.h b/mlpp/tanh_reg/tanh_reg_old.h
deleted file mode 100644
index 130977c..0000000
--- a/mlpp/tanh_reg/tanh_reg_old.h
+++ /dev/null
@@ -1,55 +0,0 @@
-
-#ifndef MLPP_TANH_REG_OLD_H
-#define MLPP_TANH_REG_OLD_H
-
-//
-//  TanhReg.hpp
-//
-//  Created by Marc Melikyan on 10/2/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <vector>
-
-class MLPPTanhRegOld {
-public:
-	MLPPTanhRegOld(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-	std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
-	real_t modelTest(std::vector<real_t> x);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	void SGD(real_t learning_rate, int max_epoch, bool UI = false);
-	void MBGD(real_t learning_rate, int max_epoch, int mini_batch_size, bool UI = false);
-	real_t score();
-	void save(std::string fileName);
-
-private:
-	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
-
-	std::vector<real_t> Evaluate(std::vector<std::vector<real_t>> X);
-	std::vector<real_t> propagate(std::vector<std::vector<real_t>> X);
-	real_t Evaluate(std::vector<real_t> x);
-	real_t propagate(std::vector<real_t> x);
-	void forwardPass();
-
-	std::vector<std::vector<real_t>> inputSet;
-	std::vector<real_t> outputSet;
-	std::vector<real_t> z;
-	std::vector<real_t> y_hat;
-	std::vector<real_t> weights;
-	real_t bias;
-
-	int n;
-	int k;
-
-	// UI Portion
-	void UI(int epoch, real_t cost_prev);
-
-	// Regularization Params
-	std::string reg;
-	real_t lambda;
-	real_t alpha; /* This is the controlling param for Elastic Net*/
-};
-
-#endif /* TanhReg_hpp */
diff --git a/mlpp/transforms/transforms_old.cpp b/mlpp/transforms/transforms_old.cpp
deleted file mode 100644
index 6cff13a..0000000
--- a/mlpp/transforms/transforms_old.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-//
-//  Transforms.cpp
-//
-//  Created by Marc Melikyan on 11/13/20.
-//
-
-#include "transforms_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "core/int_types.h"
-#include <cmath>
-#include <iostream>
-#include <string>
-
-#ifndef M_PI
-#define M_PI 3.141592653
-#endif
-
-// DCT ii.
-// https://www.mathworks.com/help/images/discrete-cosine-transform.html
-std::vector<std::vector<real_t>> MLPPTransformsOld::discreteCosineTransform(std::vector<std::vector<real_t>> A) {
-	MLPPLinAlgOld alg;
-	A = alg.scalarAdd(-128, A); // Center around 0.
-
-	std::vector<std::vector<real_t>> B;
-	B.resize(A.size());
-	for (uint32_t i = 0; i < B.size(); i++) {
-		B[i].resize(A[i].size());
-	}
-
-	int M = A.size();
-
-	for (uint32_t i = 0; i < B.size(); i++) {
-		for (uint32_t j = 0; j < B[i].size(); j++) {
-			real_t sum = 0;
-			real_t alphaI;
-			if (i == 0) {
-				alphaI = 1 / std::sqrt(M);
-			} else {
-				alphaI = std::sqrt(real_t(2) / real_t(M));
-			}
-			real_t alphaJ;
-			if (j == 0) {
-				alphaJ = 1 / std::sqrt(M);
-			} else {
-				alphaJ = std::sqrt(real_t(2) / real_t(M));
-			}
-
-			for (uint32_t k = 0; k < B.size(); k++) {
-				for (uint32_t f = 0; f < B[k].size(); f++) {
-					sum += A[k][f] * std::cos((M_PI * i * (2 * k + 1)) / (2 * M)) * std::cos((M_PI * j * (2 * f + 1)) / (2 * M));
-				}
-			}
-			B[i][j] = sum;
-			B[i][j] *= alphaI * alphaJ;
-		}
-	}
-	return B;
-}
diff --git a/mlpp/transforms/transforms_old.h b/mlpp/transforms/transforms_old.h
deleted file mode 100644
index fc9d972..0000000
--- a/mlpp/transforms/transforms_old.h
+++ /dev/null
@@ -1,20 +0,0 @@
-
-#ifndef MLPP_TRANSFORMS_OLD_H
-#define MLPP_TRANSFORMS_OLD_H
-
-//
-//  Transforms.hpp
-//
-//
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <vector>
-
-class MLPPTransformsOld {
-public:
-	std::vector<std::vector<real_t>> discreteCosineTransform(std::vector<std::vector<real_t>> A);
-};
-
-#endif /* Transforms_hpp */
diff --git a/mlpp/uni_lin_reg/uni_lin_reg_old.cpp b/mlpp/uni_lin_reg/uni_lin_reg_old.cpp
deleted file mode 100644
index afa16c9..0000000
--- a/mlpp/uni_lin_reg/uni_lin_reg_old.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-//
-//  UniLinReg.cpp
-//
-//  Created by Marc Melikyan on 9/29/20.
-//
-
-#include "uni_lin_reg_old.h"
-
-#include "../lin_alg/lin_alg_old.h"
-#include "../stat/stat_old.h"
-
-#include <iostream>
-
-// General Multivariate Linear Regression Model
-// ŷ = b0 + b1x1 + b2x2 + ... + bkxk
-
-// Univariate Linear Regression Model
-// ŷ = b0 + b1x1
-
-MLPPUniLinRegOld::MLPPUniLinRegOld(std::vector<real_t> x, std::vector<real_t> y) :
-		inputSet(x), outputSet(y) {
-	MLPPStatOld estimator;
-	b1 = estimator.b1Estimation(inputSet, outputSet);
-	b0 = estimator.b0Estimation(inputSet, outputSet);
-}
-
-std::vector<real_t> MLPPUniLinRegOld::modelSetTest(std::vector<real_t> x) {
-	MLPPLinAlgOld alg;
-	return alg.scalarAdd(b0, alg.scalarMultiply(b1, x));
-}
-
-real_t MLPPUniLinRegOld::modelTest(real_t input) {
-	return b0 + b1 * input;
-}
diff --git a/mlpp/uni_lin_reg/uni_lin_reg_old.h b/mlpp/uni_lin_reg/uni_lin_reg_old.h
deleted file mode 100644
index 670e87f..0000000
--- a/mlpp/uni_lin_reg/uni_lin_reg_old.h
+++ /dev/null
@@ -1,29 +0,0 @@
-
-#ifndef MLPP_UNI_LIN_REG_OLD_H
-#define MLPP_UNI_LIN_REG_OLD_H
-
-//
-//  UniLinReg.hpp
-//
-//  Created by Marc Melikyan on 9/29/20.
-//
-
-#include "core/math/math_defs.h"
-
-#include <vector>
-
-class MLPPUniLinRegOld {
-public:
-	MLPPUniLinRegOld(std::vector<real_t> x, std::vector<real_t> y);
-	std::vector<real_t> modelSetTest(std::vector<real_t> x);
-	real_t modelTest(real_t x);
-
-private:
-	std::vector<real_t> inputSet;
-	std::vector<real_t> outputSet;
-
-	real_t b0;
-	real_t b1;
-};
-
-#endif /* UniLinReg_hpp */
diff --git a/mlpp/utilities/utilities_old.cpp b/mlpp/utilities/utilities_old.cpp
deleted file mode 100644
index 25f8a3d..0000000
--- a/mlpp/utilities/utilities_old.cpp
+++ /dev/null
@@ -1,399 +0,0 @@
-//
-//  Reg.cpp
-//
-//  Created by Marc Melikyan on 1/16/21.
-//
-
-#include "utilities_old.h"
-
-#include <fstream>
-#include <iostream>
-#include <random>
-#include <string>
-
-std::vector<real_t> MLPPUtilitiesOld::weightInitialization(int n, std::string type) {
-	std::random_device rd;
-	std::default_random_engine generator(rd());
-
-	std::vector<real_t> weights;
-	for (int i = 0; i < n; i++) {
-		if (type == "XavierNormal") {
-			std::normal_distribution<real_t> distribution(0, sqrt(2 / (n + 1)));
-			weights.push_back(distribution(generator));
-		} else if (type == "XavierUniform") {
-			std::uniform_real_distribution<real_t> distribution(-sqrt(6 / (n + 1)), sqrt(6 / (n + 1)));
-			weights.push_back(distribution(generator));
-		} else if (type == "HeNormal") {
-			std::normal_distribution<real_t> distribution(0, sqrt(2 / n));
-			weights.push_back(distribution(generator));
-		} else if (type == "HeUniform") {
-			std::uniform_real_distribution<real_t> distribution(-sqrt(6 / n), sqrt(6 / n));
-			weights.push_back(distribution(generator));
-		} else if (type == "LeCunNormal") {
-			std::normal_distribution<real_t> distribution(0, sqrt(1 / n));
-			weights.push_back(distribution(generator));
-		} else if (type == "LeCunUniform") {
-			std::uniform_real_distribution<real_t> distribution(-sqrt(3 / n), sqrt(3 / n));
-			weights.push_back(distribution(generator));
-		} else if (type == "Uniform") {
-			std::uniform_real_distribution<real_t> distribution(-1 / sqrt(n), 1 / sqrt(n));
-			weights.push_back(distribution(generator));
-		} else {
-			std::uniform_real_distribution<real_t> distribution(0, 1);
-			weights.push_back(distribution(generator));
-		}
-	}
-	return weights;
-}
-
-real_t MLPPUtilitiesOld::biasInitialization() {
-	std::random_device rd;
-	std::default_random_engine generator(rd());
-	std::uniform_real_distribution<real_t> distribution(0, 1);
-
-	return distribution(generator);
-}
-
-std::vector<std::vector<real_t>> MLPPUtilitiesOld::weightInitialization(int n, int m, std::string type) {
-	std::random_device rd;
-	std::default_random_engine generator(rd());
-
-	std::vector<std::vector<real_t>> weights;
-	weights.resize(n);
-
-	for (int i = 0; i < n; i++) {
-		for (int j = 0; j < m; j++) {
-			if (type == "XavierNormal") {
-				std::normal_distribution<real_t> distribution(0, sqrt(2 / (n + m)));
-				weights[i].push_back(distribution(generator));
-			} else if (type == "XavierUniform") {
-				std::uniform_real_distribution<real_t> distribution(-sqrt(6 / (n + m)), sqrt(6 / (n + m)));
-				weights[i].push_back(distribution(generator));
-			} else if (type == "HeNormal") {
-				std::normal_distribution<real_t> distribution(0, sqrt(2 / n));
-				weights[i].push_back(distribution(generator));
-			} else if (type == "HeUniform") {
-				std::uniform_real_distribution<real_t> distribution(-sqrt(6 / n), sqrt(6 / n));
-				weights[i].push_back(distribution(generator));
-			} else if (type == "LeCunNormal") {
-				std::normal_distribution<real_t> distribution(0, sqrt(1 / n));
-				weights[i].push_back(distribution(generator));
-			} else if (type == "LeCunUniform") {
-				std::uniform_real_distribution<real_t> distribution(-sqrt(3 / n), sqrt(3 / n));
-				weights[i].push_back(distribution(generator));
-			} else if (type == "Uniform") {
-				std::uniform_real_distribution<real_t> distribution(-1 / sqrt(n), 1 / sqrt(n));
-				weights[i].push_back(distribution(generator));
-			} else {
-				std::uniform_real_distribution<real_t> distribution(0, 1);
-				weights[i].push_back(distribution(generator));
-			}
-		}
-	}
-	return weights;
-}
-
-std::vector<real_t> MLPPUtilitiesOld::biasInitialization(int n) {
-	std::vector<real_t> bias;
-	std::random_device rd;
-	std::default_random_engine generator(rd());
-	std::uniform_real_distribution<real_t> distribution(0, 1);
-
-	for (int i = 0; i < n; i++) {
-		bias.push_back(distribution(generator));
-	}
-	return bias;
-}
-
-real_t MLPPUtilitiesOld::performance(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
-	real_t correct = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		if (std::round(y_hat[i]) == outputSet[i]) {
-			correct++;
-		}
-	}
-	return correct / y_hat.size();
-}
-
-real_t MLPPUtilitiesOld::performance(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y) {
-	real_t correct = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		uint32_t sub_correct = 0;
-		for (uint32_t j = 0; j < y_hat[i].size(); j++) {
-			if (std::round(y_hat[i][j]) == y[i][j]) {
-				sub_correct++;
-			}
-			if (sub_correct == y_hat[0].size()) {
-				correct++;
-			}
-		}
-	}
-	return correct / y_hat.size();
-}
-
-void MLPPUtilitiesOld::saveParameters(std::string fileName, std::vector<real_t> weights, real_t bias, bool app, int layer) {
-	std::string layer_info = "";
-	std::ofstream saveFile;
-
-	if (layer > -1) {
-		layer_info = " for layer " + std::to_string(layer);
-	}
-
-	if (app) {
-		saveFile.open(fileName.c_str(), std::ios_base::app);
-	} else {
-		saveFile.open(fileName.c_str());
-	}
-
-	if (!saveFile.is_open()) {
-		std::cout << fileName << " failed to open." << std::endl;
-	}
-
-	saveFile << "Weight(s)" << layer_info << std::endl;
-	for (uint32_t i = 0; i < weights.size(); i++) {
-		saveFile << weights[i] << std::endl;
-	}
-	saveFile << "Bias" << layer_info << std::endl;
-	saveFile << bias << std::endl;
-
-	saveFile.close();
-}
-
-void MLPPUtilitiesOld::saveParameters(std::string fileName, std::vector<real_t> weights, std::vector<real_t> initial, real_t bias, bool app, int layer) {
-	std::string layer_info = "";
-	std::ofstream saveFile;
-
-	if (layer > -1) {
-		layer_info = " for layer " + std::to_string(layer);
-	}
-
-	if (app) {
-		saveFile.open(fileName.c_str(), std::ios_base::app);
-	} else {
-		saveFile.open(fileName.c_str());
-	}
-
-	if (!saveFile.is_open()) {
-		std::cout << fileName << " failed to open." << std::endl;
-	}
-
-	saveFile << "Weight(s)" << layer_info << std::endl;
-	for (uint32_t i = 0; i < weights.size(); i++) {
-		saveFile << weights[i] << std::endl;
-	}
-
-	saveFile << "Initial(s)" << layer_info << std::endl;
-	for (uint32_t i = 0; i < initial.size(); i++) {
-		saveFile << initial[i] << std::endl;
-	}
-
-	saveFile << "Bias" << layer_info << std::endl;
-	saveFile << bias << std::endl;
-
-	saveFile.close();
-}
-
-void MLPPUtilitiesOld::saveParameters(std::string fileName, std::vector<std::vector<real_t>> weights, std::vector<real_t> bias, bool app, int layer) {
-	std::string layer_info = "";
-	std::ofstream saveFile;
-
-	if (layer > -1) {
-		layer_info = " for layer " + std::to_string(layer);
-	}
-
-	if (app) {
-		saveFile.open(fileName.c_str(), std::ios_base::app);
-	} else {
-		saveFile.open(fileName.c_str());
-	}
-
-	if (!saveFile.is_open()) {
-		std::cout << fileName << " failed to open." << std::endl;
-	}
-
-	saveFile << "Weight(s)" << layer_info << std::endl;
-	for (uint32_t i = 0; i < weights.size(); i++) {
-		for (uint32_t j = 0; j < weights[i].size(); j++) {
-			saveFile << weights[i][j] << std::endl;
-		}
-	}
-	saveFile << "Bias(es)" << layer_info << std::endl;
-	for (uint32_t i = 0; i < bias.size(); i++) {
-		saveFile << bias[i] << std::endl;
-	}
-
-	saveFile.close();
-}
-
-void MLPPUtilitiesOld::UI(std::vector<real_t> weights, real_t bias) {
-	std::cout << "Values of the weight(s):" << std::endl;
-	for (uint32_t i = 0; i < weights.size(); i++) {
-		std::cout << weights[i] << std::endl;
-	}
-	std::cout << "Value of the bias:" << std::endl;
-	std::cout << bias << std::endl;
-}
-
-void MLPPUtilitiesOld::UI(std::vector<std::vector<real_t>> weights, std::vector<real_t> bias) {
-	std::cout << "Values of the weight(s):" << std::endl;
-	for (uint32_t i = 0; i < weights.size(); i++) {
-		for (uint32_t j = 0; j < weights[i].size(); j++) {
-			std::cout << weights[i][j] << std::endl;
-		}
-	}
-	std::cout << "Value of the biases:" << std::endl;
-	for (uint32_t i = 0; i < bias.size(); i++) {
-		std::cout << bias[i] << std::endl;
-	}
-}
-
-void MLPPUtilitiesOld::UI(std::vector<real_t> weights, std::vector<real_t> initial, real_t bias) {
-	std::cout << "Values of the weight(s):" << std::endl;
-	for (uint32_t i = 0; i < weights.size(); i++) {
-		std::cout << weights[i] << std::endl;
-	}
-	std::cout << "Values of the initial(s):" << std::endl;
-	for (uint32_t i = 0; i < initial.size(); i++) {
-		std::cout << initial[i] << std::endl;
-	}
-	std::cout << "Value of the bias:" << std::endl;
-	std::cout << bias << std::endl;
-}
-
-void MLPPUtilitiesOld::CostInfo(int epoch, real_t cost_prev, real_t Cost) {
-	std::cout << "-----------------------------------" << std::endl;
-	std::cout << "This is epoch: " << epoch << std::endl;
-	std::cout << "The cost function has been minimized by " << cost_prev - Cost << std::endl;
-	std::cout << "Current Cost:" << std::endl;
-	std::cout << Cost << std::endl;
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPUtilitiesOld::createMiniBatches(std::vector<std::vector<real_t>> inputSet, int n_mini_batch) {
-	int n = inputSet.size();
-
-	std::vector<std::vector<std::vector<real_t>>> inputMiniBatches;
-
-	// Creating the mini-batches
-	for (int i = 0; i < n_mini_batch; i++) {
-		std::vector<std::vector<real_t>> currentInputSet;
-		for (int j = 0; j < n / n_mini_batch; j++) {
-			currentInputSet.push_back(inputSet[n / n_mini_batch * i + j]);
-		}
-		inputMiniBatches.push_back(currentInputSet);
-	}
-
-	if (real_t(n) / real_t(n_mini_batch) - int(n / n_mini_batch) != 0) {
-		for (int i = 0; i < n - n / n_mini_batch * n_mini_batch; i++) {
-			inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n / n_mini_batch * n_mini_batch + i]);
-		}
-	}
-	return inputMiniBatches;
-}
-
-std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<std::vector<real_t>>> MLPPUtilitiesOld::createMiniBatches(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int n_mini_batch) {
-	int n = inputSet.size();
-
-	std::vector<std::vector<std::vector<real_t>>> inputMiniBatches;
-	std::vector<std::vector<real_t>> outputMiniBatches;
-
-	for (int i = 0; i < n_mini_batch; i++) {
-		std::vector<std::vector<real_t>> currentInputSet;
-		std::vector<real_t> currentOutputSet;
-		for (int j = 0; j < n / n_mini_batch; j++) {
-			currentInputSet.push_back(inputSet[n / n_mini_batch * i + j]);
-			currentOutputSet.push_back(outputSet[n / n_mini_batch * i + j]);
-		}
-		inputMiniBatches.push_back(currentInputSet);
-		outputMiniBatches.push_back(currentOutputSet);
-	}
-
-	if (real_t(n) / real_t(n_mini_batch) - int(n / n_mini_batch) != 0) {
-		for (int i = 0; i < n - n / n_mini_batch * n_mini_batch; i++) {
-			inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n / n_mini_batch * n_mini_batch + i]);
-			outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n / n_mini_batch * n_mini_batch + i]);
-		}
-	}
-	return { inputMiniBatches, outputMiniBatches };
-}
-
-std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<std::vector<std::vector<real_t>>>> MLPPUtilitiesOld::createMiniBatches(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, int n_mini_batch) {
-	int n = inputSet.size();
-
-	std::vector<std::vector<std::vector<real_t>>> inputMiniBatches;
-	std::vector<std::vector<std::vector<real_t>>> outputMiniBatches;
-
-	for (int i = 0; i < n_mini_batch; i++) {
-		std::vector<std::vector<real_t>> currentInputSet;
-		std::vector<std::vector<real_t>> currentOutputSet;
-		for (int j = 0; j < n / n_mini_batch; j++) {
-			currentInputSet.push_back(inputSet[n / n_mini_batch * i + j]);
-			currentOutputSet.push_back(outputSet[n / n_mini_batch * i + j]);
-		}
-		inputMiniBatches.push_back(currentInputSet);
-		outputMiniBatches.push_back(currentOutputSet);
-	}
-
-	if (real_t(n) / real_t(n_mini_batch) - int(n / n_mini_batch) != 0) {
-		for (int i = 0; i < n - n / n_mini_batch * n_mini_batch; i++) {
-			inputMiniBatches[n_mini_batch - 1].push_back(inputSet[n / n_mini_batch * n_mini_batch + i]);
-			outputMiniBatches[n_mini_batch - 1].push_back(outputSet[n / n_mini_batch * n_mini_batch + i]);
-		}
-	}
-	return { inputMiniBatches, outputMiniBatches };
-}
-
-std::tuple<real_t, real_t, real_t, real_t> MLPPUtilitiesOld::TF_PN(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	real_t TP = 0;
-	real_t FP = 0;
-	real_t TN = 0;
-	real_t FN = 0;
-	for (uint32_t i = 0; i < y_hat.size(); i++) {
-		if (y_hat[i] == y[i]) {
-			if (y_hat[i] == 1) {
-				TP++;
-			} else {
-				TN++;
-			}
-		} else {
-			if (y_hat[i] == 1) {
-				FP++;
-			} else {
-				FN++;
-			}
-		}
-	}
-	return { TP, FP, TN, FN };
-}
-
-real_t MLPPUtilitiesOld::recall(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	auto res = TF_PN(y_hat, y);
-	auto TP = std::get<0>(res);
-	//auto FP = std::get<1>(res);
-	//auto TN = std::get<2>(res);
-	auto FN = std::get<3>(res);
-
-	return TP / (TP + FN);
-}
-
-real_t MLPPUtilitiesOld::precision(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	auto res = TF_PN(y_hat, y);
-	auto TP = std::get<0>(res);
-	auto FP = std::get<1>(res);
-	//auto TN = std::get<2>(res);
-	//auto FN = std::get<3>(res);
-
-	return TP / (TP + FP);
-}
-
-real_t MLPPUtilitiesOld::accuracy(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	auto res = TF_PN(y_hat, y);
-	auto TP = std::get<0>(res);
-	auto FP = std::get<1>(res);
-	auto TN = std::get<2>(res);
-	auto FN = std::get<3>(res);
-
-	return (TP + TN) / (TP + FP + FN + TN);
-}
-real_t MLPPUtilitiesOld::f1_score(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	return 2 * precision(y_hat, y) * recall(y_hat, y) / (precision(y_hat, y) + recall(y_hat, y));
-}
diff --git a/mlpp/utilities/utilities_old.h b/mlpp/utilities/utilities_old.h
deleted file mode 100644
index 6c40195..0000000
--- a/mlpp/utilities/utilities_old.h
+++ /dev/null
@@ -1,54 +0,0 @@
-
-#ifndef MLPP_UTILITIES_OLD_H
-#define MLPP_UTILITIES_OLD_H
-
-//
-//  Utilities.hpp
-//
-//  Created by Marc Melikyan on 1/16/21.
-//
-
-#include "core/math/math_defs.h"
-
-#include <string>
-#include <tuple>
-#include <vector>
-
-class MLPPUtilitiesOld {
-public:
-	// Weight Init
-	static std::vector<real_t> weightInitialization(int n, std::string type = "Default");
-	static real_t biasInitialization();
-
-	static std::vector<std::vector<real_t>> weightInitialization(int n, int m, std::string type = "Default");
-	static std::vector<real_t> biasInitialization(int n);
-
-	// Cost/Performance related Functions
-	real_t performance(std::vector<real_t> y_hat, std::vector<real_t> y);
-	real_t performance(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
-
-	// Parameter Saving Functions
-	void saveParameters(std::string fileName, std::vector<real_t> weights, real_t bias, bool app = false, int layer = -1);
-	void saveParameters(std::string fileName, std::vector<real_t> weights, std::vector<real_t> initial, real_t bias, bool app = false, int layer = -1);
-	void saveParameters(std::string fileName, std::vector<std::vector<real_t>> weights, std::vector<real_t> bias, bool app = false, int layer = -1);
-
-	// Gradient Descent related
-	static void UI(std::vector<real_t> weights, real_t bias);
-	static void UI(std::vector<real_t> weights, std::vector<real_t> initial, real_t bias);
-	static void UI(std::vector<std::vector<real_t>> weights, std::vector<real_t> bias);
-
-	static void CostInfo(int epoch, real_t cost_prev, real_t Cost);
-
-	static std::vector<std::vector<std::vector<real_t>>> createMiniBatches(std::vector<std::vector<real_t>> inputSet, int n_mini_batch);
-	static std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<std::vector<real_t>>> createMiniBatches(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int n_mini_batch);
-	static std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<std::vector<std::vector<real_t>>>> createMiniBatches(std::vector<std::vector<real_t>> inputSet, std::vector<std::vector<real_t>> outputSet, int n_mini_batch);
-
-	// F1 score, Precision/Recall, TP, FP, TN, FN, etc.
-	std::tuple<real_t, real_t, real_t, real_t> TF_PN(std::vector<real_t> y_hat, std::vector<real_t> y); //TF_PN = "True", "False", "Positive", "Negative"
-	real_t recall(std::vector<real_t> y_hat, std::vector<real_t> y);
-	real_t precision(std::vector<real_t> y_hat, std::vector<real_t> y);
-	real_t accuracy(std::vector<real_t> y_hat, std::vector<real_t> y);
-	real_t f1_score(std::vector<real_t> y_hat, std::vector<real_t> y);
-};
-
-#endif /* Utilities_hpp */
diff --git a/mlpp/wgan/wgan_old.cpp b/mlpp/wgan/wgan_old.cpp
deleted file mode 100644
index 819c07f..0000000
--- a/mlpp/wgan/wgan_old.cpp
+++ /dev/null
@@ -1,305 +0,0 @@
-//
-//  WGAN.cpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "wgan_old.h"
-
-#include "core/log/logger.h"
-
-#include "../activation/activation_old.h"
-#include "../cost/cost_old.h"
-#include "../lin_alg/lin_alg_old.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include "core/object/method_bind_ext.gen.inc"
-
-#include <cmath>
-#include <iostream>
-
-MLPPWGANOld::MLPPWGANOld(real_t k, std::vector<std::vector<real_t>> outputSet) :
-		outputSet(outputSet), n(outputSet.size()), k(k) {
-}
-
-MLPPWGANOld::~MLPPWGANOld() {
-	delete outputLayer;
-}
-
-std::vector<std::vector<real_t>> MLPPWGANOld::generateExample(int n) {
-	MLPPLinAlgOld alg;
-	return modelSetTestGenerator(alg.gaussianNoise(n, k));
-}
-
-void MLPPWGANOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
-	MLPPLinAlgOld alg;
-	real_t cost_prev = 0;
-	int epoch = 1;
-	forwardPass();
-
-	const int CRITIC_INTERATIONS = 5; // Wasserstein GAN specific parameter.
-
-	while (true) {
-		cost_prev = Cost(y_hat, alg.onevec(n));
-
-		std::vector<std::vector<real_t>> generatorInputSet;
-		std::vector<std::vector<real_t>> discriminatorInputSet;
-
-		std::vector<real_t> y_hat;
-		std::vector<real_t> outputSet;
-
-		// Training of the discriminator.
-		for (int i = 0; i < CRITIC_INTERATIONS; i++) {
-			generatorInputSet = alg.gaussianNoise(n, k);
-			discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
-			discriminatorInputSet.insert(discriminatorInputSet.end(), MLPPWGANOld::outputSet.begin(), MLPPWGANOld::outputSet.end()); // Fake + real inputs.
-
-			y_hat = modelSetTestDiscriminator(discriminatorInputSet);
-			outputSet = alg.scalarMultiply(-1, alg.onevec(n)); // WGAN changes y_i = 1 and y_i = 0 to y_i = 1 and y_i = -1
-			std::vector<real_t> outputSetReal = alg.onevec(n);
-			outputSet.insert(outputSet.end(), outputSetReal.begin(), outputSetReal.end()); // Fake + real output scores.
-
-			auto discriminator_gradient_results = computeDiscriminatorGradients(y_hat, outputSet);
-			auto cumulativeDiscriminatorHiddenLayerWGrad = std::get<0>(discriminator_gradient_results);
-			auto outputDiscriminatorWGrad = std::get<1>(discriminator_gradient_results);
-
-			cumulativeDiscriminatorHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeDiscriminatorHiddenLayerWGrad);
-			outputDiscriminatorWGrad = alg.scalarMultiply(learning_rate / n, outputDiscriminatorWGrad);
-			updateDiscriminatorParameters(cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad, learning_rate);
-		}
-
-		// Training of the generator.
-		generatorInputSet = alg.gaussianNoise(n, k);
-		discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
-		y_hat = modelSetTestDiscriminator(discriminatorInputSet);
-		outputSet = alg.onevec(n);
-
-		std::vector<std::vector<std::vector<real_t>>> cumulativeGeneratorHiddenLayerWGrad = computeGeneratorGradients(y_hat, outputSet);
-		cumulativeGeneratorHiddenLayerWGrad = alg.scalarMultiply(learning_rate / n, cumulativeGeneratorHiddenLayerWGrad);
-		updateGeneratorParameters(cumulativeGeneratorHiddenLayerWGrad, learning_rate);
-
-		forwardPass();
-
-		if (UI) {
-			MLPPWGANOld::UI(epoch, cost_prev, MLPPWGANOld::y_hat, alg.onevec(n));
-		}
-
-		epoch++;
-		if (epoch > max_epoch) {
-			break;
-		}
-	}
-}
-
-real_t MLPPWGANOld::score() {
-	MLPPLinAlgOld alg;
-	MLPPUtilities util;
-	forwardPass();
-	return util.performance(y_hat, alg.onevec(n));
-}
-
-void MLPPWGANOld::save(std::string fileName) {
-	MLPPUtilities util;
-	if (!network.empty()) {
-		util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1);
-		for (uint32_t i = 1; i < network.size(); i++) {
-			util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1);
-		}
-		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1);
-	} else {
-		util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1);
-	}
-}
-
-void MLPPWGANOld::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
-	MLPPLinAlgOld alg;
-	if (network.empty()) {
-		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha));
-		network[0].forwardPass();
-	} else {
-		network.push_back(MLPPOldHiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
-		network[network.size() - 1].forwardPass();
-	}
-}
-
-void MLPPWGANOld::addOutputLayer(std::string weightInit, std::string reg, real_t lambda, real_t alpha) {
-	MLPPLinAlgOld alg;
-	if (!network.empty()) {
-		outputLayer = new MLPPOldOutputLayer(network[network.size() - 1].n_hidden, "Linear", "WassersteinLoss", network[network.size() - 1].a, weightInit, "WeightClipping", -0.01, 0.01);
-	} else { // Should never happen.
-		outputLayer = new MLPPOldOutputLayer(k, "Linear", "WassersteinLoss", alg.gaussianNoise(n, k), weightInit, "WeightClipping", -0.01, 0.01);
-	}
-}
-
-std::vector<std::vector<real_t>> MLPPWGANOld::modelSetTestGenerator(std::vector<std::vector<real_t>> X) {
-	if (!network.empty()) {
-		network[0].input = X;
-		network[0].forwardPass();
-
-		for (uint32_t i = 1; i <= network.size() / 2; i++) {
-			network[i].input = network[i - 1].a;
-			network[i].forwardPass();
-		}
-	}
-	return network[network.size() / 2].a;
-}
-
-std::vector<real_t> MLPPWGANOld::modelSetTestDiscriminator(std::vector<std::vector<real_t>> X) {
-	if (!network.empty()) {
-		for (uint32_t i = network.size() / 2 + 1; i < network.size(); i++) {
-			if (i == network.size() / 2 + 1) {
-				network[i].input = X;
-			} else {
-				network[i].input = network[i - 1].a;
-			}
-			network[i].forwardPass();
-		}
-		outputLayer->input = network[network.size() - 1].a;
-	}
-	outputLayer->forwardPass();
-	return outputLayer->a;
-}
-
-real_t MLPPWGANOld::Cost(std::vector<real_t> y_hat, std::vector<real_t> y) {
-	MLPPRegOld regularization;
-	class MLPPCostOld cost;
-	real_t totalRegTerm = 0;
-
-	auto cost_function = outputLayer->cost_map[outputLayer->cost];
-	if (!network.empty()) {
-		for (uint32_t i = 0; i < network.size() - 1; i++) {
-			totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
-		}
-	}
-	return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
-}
-
-void MLPPWGANOld::forwardPass() {
-	MLPPLinAlgOld alg;
-	if (!network.empty()) {
-		network[0].input = alg.gaussianNoise(n, k);
-		network[0].forwardPass();
-
-		for (uint32_t i = 1; i < network.size(); i++) {
-			network[i].input = network[i - 1].a;
-			network[i].forwardPass();
-		}
-		outputLayer->input = network[network.size() - 1].a;
-	} else { // Should never happen, though.
-		outputLayer->input = alg.gaussianNoise(n, k);
-	}
-	outputLayer->forwardPass();
-	y_hat = outputLayer->a;
-}
-
-void MLPPWGANOld::updateDiscriminatorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate) {
-	MLPPLinAlgOld alg;
-
-	outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
-	outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n;
-
-	if (!network.empty()) {
-		network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]);
-		network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate / n, network[network.size() - 1].delta));
-
-		for (uint32_t i = network.size() - 2; i > network.size() / 2; i--) {
-			network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
-			network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate / n, network[i].delta));
-		}
-	}
-}
-
-void MLPPWGANOld::updateGeneratorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, real_t learning_rate) {
-	MLPPLinAlgOld alg;
-
-	if (!network.empty()) {
-		for (int ii = network.size() / 2; ii >= 0; ii--) {
-			uint32_t i = static_cast<uint32_t>(ii);
-
-			//std::cout << network[i].weights.size() << "x" << network[i].weights[0].size() << std::endl;
-			//std::cout << hiddenLayerUpdations[(network.size() - 2) - i + 1].size() << "x" << hiddenLayerUpdations[(network.size() - 2) - i + 1][0].size() << std::endl;
-			network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
-			network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate / n, network[i].delta));
-		}
-	}
-}
-
-std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> MLPPWGANOld::computeDiscriminatorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
-	class MLPPCostOld cost;
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-
-	std::vector<std::vector<std::vector<real_t>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads.
-
-	auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
-	auto outputAvn = outputLayer->activation_map[outputLayer->activation];
-	outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
-	std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
-	outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
-
-	if (!network.empty()) {
-		auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
-		network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
-		std::vector<std::vector<real_t>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
-
-		cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
-
-		//std::cout << "HIDDENLAYER FIRST:" << hiddenLayerWGrad.size() << "x" << hiddenLayerWGrad[0].size() << std::endl;
-		//std::cout << "WEIGHTS SECOND:" << network[network.size() - 1].weights.size() << "x" << network[network.size() - 1].weights[0].size() << std::endl;
-
-		for (uint32_t i = network.size() - 2; i > network.size() / 2; i--) {
-			auto hiddenLayerAvnl = network[i].activation_map[network[i].activation];
-			network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvnl)(network[i].z, 1));
-			std::vector<std::vector<real_t>> hiddenLayerWGradl = alg.matmult(alg.transpose(network[i].input), network[i].delta);
-
-			cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGradl, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
-		}
-	}
-	return { cumulativeHiddenLayerWGrad, outputWGrad };
-}
-
-std::vector<std::vector<std::vector<real_t>>> MLPPWGANOld::computeGeneratorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
-	class MLPPCostOld cost;
-	MLPPActivationOld avn;
-	MLPPLinAlgOld alg;
-	MLPPRegOld regularization;
-
-	std::vector<std::vector<std::vector<real_t>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads.
-
-	auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
-	auto outputAvn = outputLayer->activation_map[outputLayer->activation];
-	outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
-	std::vector<real_t> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
-	outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
-	if (!network.empty()) {
-		auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
-		network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
-		std::vector<std::vector<real_t>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
-		cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
-
-		for (int ii = network.size() - 2; ii >= 0; ii--) {
-			uint32_t i = static_cast<uint32_t>(ii);
-			auto hiddenLayerAvnl = network[i].activation_map[network[i].activation];
-			network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvnl)(network[i].z, 1));
-			std::vector<std::vector<real_t>> hiddenLayerWGradl = alg.matmult(alg.transpose(network[i].input), network[i].delta);
-			cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGradl, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
-		}
-	}
-	return cumulativeHiddenLayerWGrad;
-}
-
-void MLPPWGANOld::UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet) {
-	MLPPUtilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
-	std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
-	MLPPUtilities::UI(outputLayer->weights, outputLayer->bias);
-	if (!network.empty()) {
-		for (int ii = network.size() - 1; ii >= 0; ii--) {
-			uint32_t i = static_cast<uint32_t>(ii);
-
-			std::cout << "Layer " << i + 1 << ": " << std::endl;
-			MLPPUtilities::UI(network[i].weights, network[i].bias);
-		}
-	}
-}
diff --git a/mlpp/wgan/wgan_old.h b/mlpp/wgan/wgan_old.h
deleted file mode 100644
index 7c46610..0000000
--- a/mlpp/wgan/wgan_old.h
+++ /dev/null
@@ -1,68 +0,0 @@
-
-#ifndef MLPP_WGAN_OLD_H
-#define MLPP_WGAN_OLD_H
-
-//
-//  WGAN.hpp
-//
-//  Created by Marc Melikyan on 11/4/20.
-//
-
-#include "core/containers/vector.h"
-#include "core/math/math_defs.h"
-#include "core/string/ustring.h"
-
-#include "core/object/reference.h"
-
-#include "../lin_alg/mlpp_matrix.h"
-#include "../lin_alg/mlpp_vector.h"
-
-#include "../hidden_layer/hidden_layer_old.h"
-#include "../output_layer/output_layer_old.h"
-
-#include "../activation/activation.h"
-#include "../cost/cost.h"
-#include "../regularization/reg_old.h"
-#include "../utilities/utilities.h"
-
-#include <string>
-#include <tuple>
-#include <vector>
-
-class MLPPWGANOld {
-public:
-	MLPPWGANOld(real_t k, std::vector<std::vector<real_t>> outputSet);
-	~MLPPWGANOld();
-	std::vector<std::vector<real_t>> generateExample(int n);
-	void gradientDescent(real_t learning_rate, int max_epoch, bool UI = false);
-	real_t score();
-	void save(std::string fileName);
-
-	void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-	void addOutputLayer(std::string weightInit = "Default", std::string reg = "None", real_t lambda = 0.5, real_t alpha = 0.5);
-
-private:
-	std::vector<std::vector<real_t>> modelSetTestGenerator(std::vector<std::vector<real_t>> X); // Evaluator for the generator of the WGAN.
-	std::vector<real_t> modelSetTestDiscriminator(std::vector<std::vector<real_t>> X); // Evaluator for the discriminator of the WGAN.
-
-	real_t Cost(std::vector<real_t> y_hat, std::vector<real_t> y);
-
-	void forwardPass();
-	void updateDiscriminatorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, std::vector<real_t> outputLayerUpdation, real_t learning_rate);
-	void updateGeneratorParameters(std::vector<std::vector<std::vector<real_t>>> hiddenLayerUpdations, real_t learning_rate);
-	std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<real_t>> computeDiscriminatorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet);
-	std::vector<std::vector<std::vector<real_t>>> computeGeneratorGradients(std::vector<real_t> y_hat, std::vector<real_t> outputSet);
-
-	void UI(int epoch, real_t cost_prev, std::vector<real_t> y_hat, std::vector<real_t> outputSet);
-
-	std::vector<std::vector<real_t>> outputSet;
-	std::vector<real_t> y_hat;
-
-	std::vector<MLPPOldHiddenLayer> network;
-	MLPPOldOutputLayer *outputLayer;
-
-	int n;
-	int k;
-};
-
-#endif /* WGAN_hpp */
\ No newline at end of file
diff --git a/register_types.cpp b/register_types.cpp
index 59e2336..406b0f0 100644
--- a/register_types.cpp
+++ b/register_types.cpp
@@ -72,10 +72,6 @@ SOFTWARE.
 #ifdef TESTS_ENABLED
 #include "test/mlpp_matrix_tests.h"
 #include "test/mlpp_tests.h"
-
-#ifdef OLD_CLASSES_ENABLED
-#include "test/mlpp_tests_old.h"
-#endif
 #endif
 
 void register_pmlpp_types(ModuleRegistrationLevel p_level) {
@@ -134,10 +130,6 @@ void register_pmlpp_types(ModuleRegistrationLevel p_level) {
 #ifdef TESTS_ENABLED
 		ClassDB::register_class<MLPPTests>();
 		ClassDB::register_class<MLPPMatrixTests>();
-
-#ifdef OLD_CLASSES_ENABLED
-		ClassDB::register_class<MLPPTestsOld>();
-#endif
 #endif
 	}
 }
diff --git a/test/mlpp_tests_old.cpp b/test/mlpp_tests_old.cpp
index 6fec46f..f50d548 100644
--- a/test/mlpp_tests_old.cpp
+++ b/test/mlpp_tests_old.cpp
@@ -80,6 +80,21 @@ void MLPPTestsOld::test_univariate_linear_regression() {
 }
 
 void MLPPTestsOld::test_multivariate_linear_regression_gradient_descent(bool ui) {
+	MLPPData data;
+	MLPPLinAlgOld alg;
+
+	Ref<MLPPDataSimple> ds = data.load_california_housing(_california_housing_data_path);
+
+	MLPPLinRegOld model_old(ds->get_input()->to_std_vector(), ds->get_output()->to_std_vector()); // Can use Lasso, Ridge, ElasticNet Reg
+	model_old.SGD(0.00000001, 300000, ui);
+	alg.printVector(model_old.modelSetTest(ds->get_input()->to_std_vector()));
+
+	//void Momentum(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool UI = false);
+	//void NAG(real_t learning_rate, int max_epoch, int mini_batch_size, real_t gamma, bool UI = false);
+	//void Adagrad(real_t learning_rate, int max_epoch, int mini_batch_size, real_t e, bool UI = false);
+	//void Adadelta(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t e, bool UI = false);
+	//void Adamax(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
+	//void Nadam(real_t learning_rate, int max_epoch, int mini_batch_size, real_t b1, real_t b2, real_t e, bool UI = false);
 }
 
 void MLPPTestsOld::test_multivariate_linear_regression_sgd(bool ui) {