Added wasserstein GANs, weight clipping reg method, wasserstein loss

2025-04-23 21:23:22 +02:00 · 2022-02-24 22:52:40 -08:00 · 2022-02-24 22:52:40 -08:00 · 6940fc1fbc
commit 6940fc1fbc
parent 875a10945d
12 changed files with 444 additions and 17 deletions
--- a/MLPP/Cost/Cost.cpp
+++ b/MLPP/Cost/Cost.cpp
@ -344,6 +344,35 @@ namespace MLPP{
        return deriv;
    }
    double Cost::WassersteinLoss(std::vector <double> y_hat, std::vector<double> y){
        double sum = 0;
        for(int i = 0; i < y_hat.size(); i++){
            sum += y_hat[i] * y[i];
        }
        return -sum / y_hat.size();
    }
    double Cost::WassersteinLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
        double sum = 0;
        for(int i = 0; i < y_hat.size(); i++){
            for(int j = 0; j < y_hat[i].size(); j++){
                sum += y_hat[i][j] * y[i][j];
            }
        }        
        return -sum / y_hat.size();
    }
    std::vector<double> Cost::WassersteinLossDeriv(std::vector<double> y_hat, std::vector<double> y){
        LinAlg alg;
        return alg.scalarMultiply(-1, y); // Simple.
    }
    std::vector<std::vector<double>> Cost::WassersteinLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
        LinAlg alg;
        return alg.scalarMultiply(-1, y); // Simple.
    }
    double Cost::HingeLoss(std::vector <double> y_hat, std::vector<double> y, std::vector<double> weights, double C){
        LinAlg alg; 
        Reg regularization;
--- a/MLPP/Cost/Cost.hpp
+++ b/MLPP/Cost/Cost.hpp
@ -68,6 +68,12 @@ namespace MLPP{
            std::vector<double> HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y, double C); 
            std::vector<std::vector<double>> HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double C);
            double WassersteinLoss(std::vector<double> y_hat, std::vector<double> y);
            double WassersteinLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
            std::vector<double> WassersteinLossDeriv(std::vector<double> y_hat, std::vector<double> y);
            std::vector<std::vector<double>> WassersteinLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
            double dualFormSVM(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y); // TO DO: DON'T forget to add non-linear kernelizations. 
            std::vector<double> dualFormSVMDeriv(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y);
--- a/MLPP/GAN/GAN.cpp
+++ b/MLPP/GAN/GAN.cpp
@ -107,13 +107,13 @@ namespace MLPP {
        }
    }
-    void GAN::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, double lambda, double alpha){
+    void GAN::addOutputLayer(std::string weightInit, std::string reg, double lambda, double alpha){
        LinAlg alg;
        if(!network.empty()){
-            outputLayer = new OutputLayer(network[network.size() - 1].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha);
+            outputLayer = new OutputLayer(network[network.size() - 1].n_hidden, "Sigmoid", "LogLoss", network[network.size() - 1].a, weightInit, reg, lambda, alpha);
        }
        else{
-            outputLayer = new OutputLayer(k, activation, loss, alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha);
+            outputLayer = new OutputLayer(k, "Sigmoid", "LogLoss", alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha);
        }
    }
--- a/MLPP/GAN/GAN.hpp
+++ b/MLPP/GAN/GAN.hpp
@ -26,7 +26,7 @@ class GAN{
        void save(std::string fileName);
        void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
-        void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        void addOutputLayer(std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
        private:
            std::vector<std::vector<double>> modelSetTestGenerator(std::vector<std::vector<double>> X); // Evaluator for the generator of the gan.
--- a/MLPP/MultiOutputLayer/MultiOutputLayer.cpp
+++ b/MLPP/MultiOutputLayer/MultiOutputLayer.cpp
@ -113,6 +113,8 @@ namespace MLPP {
        cost_map["CrossEntropy"] = &Cost::CrossEntropy;
        costDeriv_map["HingeLoss"] = &Cost::HingeLossDeriv;
        cost_map["HingeLoss"] = &Cost::HingeLoss;
        costDeriv_map["WassersteinLoss"] = &Cost::HingeLossDeriv;
        cost_map["WassersteinLoss"] = &Cost::HingeLoss;
    }
    void MultiOutputLayer::forwardPass(){
--- a/MLPP/OutputLayer/OutputLayer.cpp
+++ b/MLPP/OutputLayer/OutputLayer.cpp
@ -110,6 +110,8 @@ namespace MLPP {
        cost_map["CrossEntropy"] = &Cost::CrossEntropy;
        costDeriv_map["HingeLoss"] = &Cost::HingeLossDeriv;
        cost_map["HingeLoss"] = &Cost::HingeLoss;
        costDeriv_map["WassersteinLoss"] = &Cost::HingeLossDeriv;
        cost_map["WassersteinLoss"] = &Cost::HingeLoss;
    }
    void OutputLayer::forwardPass(){
--- a/MLPP/Regularization/Reg.cpp
+++ b/MLPP/Regularization/Reg.cpp
@ -72,6 +72,7 @@ namespace MLPP{
    std::vector<double> Reg::regWeights(std::vector<double> weights, double lambda, double alpha, std::string reg){
        LinAlg alg;
        if(reg == "WeightClipping"){ return regDerivTerm(weights, lambda, alpha, reg); }
        return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg));
        // for(int i = 0; i < weights.size(); i++){
        //     weights[i] -= regDerivTerm(weights, lambda, alpha, reg, i);
@ -81,6 +82,7 @@ namespace MLPP{
    std::vector<std::vector<double>> Reg::regWeights(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg){
        LinAlg alg;
        if(reg == "WeightClipping"){ return regDerivTerm(weights, lambda, alpha, reg); }
        return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg));
        // for(int i = 0; i < weights.size(); i++){
        //     for(int j = 0; j < weights[i].size(); j++){
@ -126,6 +128,19 @@ namespace MLPP{
        else if(reg == "ElasticNet"){
            return alpha * lambda * act.sign(weights[j]) + (1 - alpha) * lambda * weights[j];
        }
        else if(reg == "WeightClipping"){ // Preparation for Wasserstein GANs. 
            // We assume lambda is the lower clipping threshold, while alpha is the higher clipping threshold. 
            // alpha > lambda. 
            if(weights[j] > alpha){
                return alpha;
            }
            else if(weights[j] < lambda){
                return lambda;
            }
            else{
                return weights[j];
            }
        }
        else {
            return 0;
        }
@ -142,6 +157,19 @@ namespace MLPP{
        else if(reg == "ElasticNet"){
            return alpha * lambda * act.sign(weights[i][j]) + (1 - alpha) * lambda * weights[i][j];
        }
        else if(reg == "WeightClipping"){ // Preparation for Wasserstein GANs.
            // We assume lambda is the lower clipping threshold, while alpha is the higher clipping threshold. 
            // alpha > lambda. 
            if(weights[i][j] > alpha){
                return alpha;
            }
            else if(weights[i][j] < lambda){
               return lambda;
            }
            else{
                return weights[i][j];
            }
        }
        else {
            return 0;
        }
--- a/MLPP/WGAN/WGAN.cpp
+++ b/MLPP/WGAN/WGAN.cpp
@ -0,0 +1,300 @@
 //
 //  WGAN.cpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #include "WGAN.hpp"
 #include "Activation/Activation.hpp"
 #include "LinAlg/LinAlg.hpp"
 #include "Regularization/Reg.hpp"
 #include "Utilities/Utilities.hpp"
 #include "Cost/Cost.hpp"
 #include <iostream>
 #include <cmath>
 namespace MLPP {
    WGAN::WGAN(double k, std::vector<std::vector<double>> outputSet)
    : outputSet(outputSet), n(outputSet.size()), k(k)
    {
    }
    WGAN::~WGAN(){
        delete outputLayer;
    }
    std::vector<std::vector<double>> WGAN::generateExample(int n){
        LinAlg alg;
        return modelSetTestGenerator(alg.gaussianNoise(n, k));
    }
    void WGAN::gradientDescent(double learning_rate, int max_epoch, bool UI){
        class Cost cost; 
        LinAlg alg;
        double cost_prev = 0;
        int epoch = 1;
        forwardPass();
        const int CRITIC_INTERATIONS = 5; // Wasserstein GAN specific parameter.
        while(true){
            cost_prev = Cost(y_hat, alg.onevec(n));
            std::vector<std::vector<double>> generatorInputSet;
            std::vector<std::vector<double>> discriminatorInputSet;
            std::vector<double> y_hat;
            std::vector<double> outputSet;
            // Training of the discriminator. 
            for(int i = 0; i < CRITIC_INTERATIONS; i++){
                generatorInputSet = alg.gaussianNoise(n, k);
                discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
                discriminatorInputSet.insert(discriminatorInputSet.end(), WGAN::outputSet.begin(), WGAN::outputSet.end()); // Fake + real inputs.
                y_hat = modelSetTestDiscriminator(discriminatorInputSet);
                outputSet = alg.scalarMultiply(-1, alg.onevec(n)); // WGAN changes y_i = 1 and y_i = 0 to y_i = 1 and y_i = -1
                std::vector<double> outputSetReal = alg.onevec(n);
                outputSet.insert(outputSet.end(), outputSetReal.begin(), outputSetReal.end()); // Fake + real output scores.
                auto [cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad] = computeDiscriminatorGradients(y_hat, outputSet);
                cumulativeDiscriminatorHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeDiscriminatorHiddenLayerWGrad);
                outputDiscriminatorWGrad = alg.scalarMultiply(learning_rate/n, outputDiscriminatorWGrad);
                updateDiscriminatorParameters(cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad, learning_rate);
            }
            // Training of the generator.
            generatorInputSet = alg.gaussianNoise(n, k);
            discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
            y_hat = modelSetTestDiscriminator(discriminatorInputSet);
            outputSet = alg.onevec(n);
            std::vector<std::vector<std::vector<double>>> cumulativeGeneratorHiddenLayerWGrad = computeGeneratorGradients(y_hat, outputSet);
            cumulativeGeneratorHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeGeneratorHiddenLayerWGrad);
            updateGeneratorParameters(cumulativeGeneratorHiddenLayerWGrad, learning_rate);
            forwardPass();
            if(UI) { WGAN::UI(epoch, cost_prev, WGAN::y_hat, alg.onevec(n)); }
            epoch++;
            if(epoch > max_epoch) { break; }
        }
    }
    double WGAN::score(){
        LinAlg alg;
        Utilities util;
        forwardPass();
        return util.performance(y_hat, alg.onevec(n));
    }
    void WGAN::save(std::string fileName){
        Utilities util;
        if(!network.empty()){
            util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1);
            for(int i = 1; i < network.size(); i++){
                util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); 
            }
            util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1);
        }
        else{
            util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1);
        }
     }
    void WGAN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){
        LinAlg alg;
        if(network.empty()){
            network.push_back(HiddenLayer(n_hidden, activation, alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha));
            network[0].forwardPass();
        }
        else{
            network.push_back(HiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
            network[network.size() - 1].forwardPass();
        }
    }
    void WGAN::addOutputLayer(std::string weightInit, std::string reg, double lambda, double alpha){
        LinAlg alg;
        if(!network.empty()){
            outputLayer = new OutputLayer(network[network.size() - 1].n_hidden, "Linear", "WassersteinLoss", network[network.size() - 1].a, weightInit, "WeightClipping", -0.01, 0.01);
        }
        else{ // Should never happen.
            outputLayer = new OutputLayer(k, "Linear", "WassersteinLoss", alg.gaussianNoise(n, k), weightInit, "WeightClipping", -0.01, 0.01);
        }
    }
    std::vector<std::vector<double>> WGAN::modelSetTestGenerator(std::vector<std::vector<double>> X){
        if(!network.empty()){
            network[0].input = X;
            network[0].forwardPass();
            for(int i = 1; i <= network.size()/2; i++){
                network[i].input = network[i - 1].a;
                network[i].forwardPass();
            }
        }
        return network[network.size()/2].a;        
    }
    std::vector<double> WGAN::modelSetTestDiscriminator(std::vector<std::vector<double>> X){
        if(!network.empty()){
            for(int i = network.size()/2 + 1; i < network.size(); i++){
                if(i == network.size()/2 + 1){
                    network[i].input = X; 
                }
                else { network[i].input = network[i - 1].a; }
                network[i].forwardPass();
            }
            outputLayer->input = network[network.size() - 1].a;
        }
        outputLayer->forwardPass();
        return outputLayer->a;
    }
    double WGAN::Cost(std::vector<double> y_hat, std::vector<double> y){
        Reg regularization;
        class Cost cost;
        double totalRegTerm = 0;
        auto cost_function = outputLayer->cost_map[outputLayer->cost];
        if(!network.empty()){
            for(int i = 0; i < network.size() - 1; i++){
                totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
            }
        }
        return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
    }
    void WGAN::forwardPass(){
        LinAlg alg;
        if(!network.empty()){
            network[0].input = alg.gaussianNoise(n, k);
            network[0].forwardPass();
            for(int i = 1; i < network.size(); i++){
                network[i].input = network[i - 1].a;
                network[i].forwardPass();
            }
            outputLayer->input = network[network.size() - 1].a;
        }
        else{ // Should never happen, though.
            outputLayer->input = alg.gaussianNoise(n, k);
        }
        outputLayer->forwardPass();
        y_hat = outputLayer->a;
    }
    void WGAN::updateDiscriminatorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, std::vector<double> outputLayerUpdation, double learning_rate){
        LinAlg alg;
        outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
        outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n;
        if(!network.empty()){
            network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]);
            network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta));
            for(int i = network.size() - 2; i > network.size()/2; i--){
                network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
                network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta));
            }
        }
    }
    void WGAN::updateGeneratorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, double learning_rate){
        LinAlg alg;
        if(!network.empty()){
            for(int i = network.size()/2; i >= 0; i--){
                //std::cout << network[i].weights.size() << "x" << network[i].weights[0].size() << std::endl;
                //std::cout << hiddenLayerUpdations[(network.size() - 2) - i + 1].size() << "x" << hiddenLayerUpdations[(network.size() - 2) - i + 1][0].size() << std::endl;
                network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
                network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta));
            }
        }
    }
    std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<double>> WGAN::computeDiscriminatorGradients(std::vector<double> y_hat, std::vector<double> outputSet){
        class Cost cost; 
        Activation avn;
        LinAlg alg;
        Reg regularization;
        std::vector<std::vector<std::vector<double>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. 
        auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
        auto outputAvn = outputLayer->activation_map[outputLayer->activation];
        outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
        std::vector<double> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
        outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
        if(!network.empty()){
            auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
            network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
            std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
            cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
            //std::cout << "HIDDENLAYER FIRST:" << hiddenLayerWGrad.size() << "x" << hiddenLayerWGrad[0].size() << std::endl;
            //std::cout << "WEIGHTS SECOND:" << network[network.size() - 1].weights.size() << "x" << network[network.size() - 1].weights[0].size() << std::endl;
            for(int i = network.size() - 2; i > network.size()/2; i--){
                auto hiddenLayerAvn = network[i].activation_map[network[i].activation];
                network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
                std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
                cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
            }
        }
        return {cumulativeHiddenLayerWGrad, outputWGrad};
    }
    std::vector<std::vector<std::vector<double>>> WGAN::computeGeneratorGradients(std::vector<double> y_hat, std::vector<double> outputSet){
        class Cost cost; 
        Activation avn;
        LinAlg alg;
        Reg regularization;
        std::vector<std::vector<std::vector<double>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. 
        auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
        auto outputAvn = outputLayer->activation_map[outputLayer->activation];
        outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
        std::vector<double> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
        outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
        if(!network.empty()){
            auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
            network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
            std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
            cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
            for(int i = network.size() - 2; i >= 0; i--){
                auto hiddenLayerAvn = network[i].activation_map[network[i].activation];
                network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
                std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
                cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
            }
        }
        return cumulativeHiddenLayerWGrad;
    }
    void WGAN::UI(int epoch, double cost_prev, std::vector<double> y_hat, std::vector<double> outputSet){
        Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
        std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
        Utilities::UI(outputLayer->weights, outputLayer->bias); 
        if(!network.empty()){ 
            for(int i = network.size() - 1; i >= 0; i--){
                std::cout << "Layer " << i + 1 << ": " << std::endl;
                Utilities::UI(network[i].weights, network[i].bias); 
            }
        }
    }
 }
--- a/MLPP/WGAN/WGAN.hpp
+++ b/MLPP/WGAN/WGAN.hpp
@ -0,0 +1,56 @@
 //
 //  WGAN.hpp
 //
 //  Created by Marc Melikyan on 11/4/20.
 //
 #ifndef WGAN_hpp
 #define WGAN_hpp
 #include "HiddenLayer/HiddenLayer.hpp"
 #include "OutputLayer/OutputLayer.hpp"
 #include <vector>
 #include <tuple>
 #include <string>
 namespace  MLPP{
 class WGAN{
        public:
        WGAN(double k, std::vector<std::vector<double>> outputSet);
        ~WGAN();
        std::vector<std::vector<double>> generateExample(int n);
        void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
        double score(); 
        void save(std::string fileName);
        void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
        void addOutputLayer(std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
        private:
            std::vector<std::vector<double>> modelSetTestGenerator(std::vector<std::vector<double>> X); // Evaluator for the generator of the WGAN.
            std::vector<double> modelSetTestDiscriminator(std::vector<std::vector<double>> X); // Evaluator for the discriminator of the WGAN.
            double Cost(std::vector<double> y_hat, std::vector<double> y);
            void forwardPass();
            void updateDiscriminatorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, std::vector<double> outputLayerUpdation, double learning_rate);
            void updateGeneratorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, double learning_rate);
            std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<double>> computeDiscriminatorGradients(std::vector<double> y_hat, std::vector<double> outputSet);
            std::vector<std::vector<std::vector<double>>> computeGeneratorGradients(std::vector<double> y_hat, std::vector<double> outputSet);
            void UI(int epoch, double cost_prev, std::vector<double> y_hat, std::vector<double> outputSet);
            std::vector<std::vector<double>> outputSet;
            std::vector<double> y_hat;
            std::vector<HiddenLayer> network;
            OutputLayer *outputLayer;
            int n;
            int k;
    };
 }
 #endif /* WGAN_hpp */
--- a/README.md
+++ b/README.md
@ -119,10 +119,12 @@ The result will be the model's predictions for the entire dataset.
        - Log Loss
        - Cross Entropy
        - Hinge Loss
        - Wasserstein Loss
    4. Possible Regularization Methods
        - Lasso
        - Ridge
        - ElasticNet
        - Weight Clipping
    5. Possible Weight Initialization Methods
        - Uniform 
        - Xavier Normal
@ -142,6 +144,7 @@ The result will be the model's predictions for the entire dataset.
    3. Softmax Network
 4. ***Generative Modeling***
    1. Tabular Generative Adversarial Networks
    2. Tabular Wasserstein Generative Adversarial Networks
 5. ***Natural Language Processing***
    1. Word2Vec (Continous Bag of Words, Skip-Gram)
    2. Stemming
--- a/buildSO.sh
+++ b/buildSO.sh
@ -1,6 +1,6 @@
-g++ -I MLPP -c -fPIC main.cpp MLPP/Stat/Stat.cpp MLPP/LinAlg/LinAlg.cpp MLPP/Regularization/Reg.cpp MLPP/Activation/Activation.cpp MLPP/Utilities/Utilities.cpp MLPP/Data/Data.cpp MLPP/Cost/Cost.cpp MLPP/ANN/ANN.cpp MLPP/HiddenLayer/HiddenLayer.cpp MLPP/OutputLayer/OutputLayer.cpp MLPP/MLP/MLP.cpp MLPP/LinReg/LinReg.cpp MLPP/LogReg/LogReg.cpp MLPP/UniLinReg/UniLinReg.cpp MLPP/CLogLogReg/CLogLogReg.cpp MLPP/ExpReg/ExpReg.cpp MLPP/ProbitReg/ProbitReg.cpp MLPP/SoftmaxReg/SoftmaxReg.cpp MLPP/TanhReg/TanhReg.cpp MLPP/SoftmaxNet/SoftmaxNet.cpp MLPP/Convolutions/Convolutions.cpp MLPP/AutoEncoder/AutoEncoder.cpp MLPP/MultinomialNB/MultinomialNB.cpp MLPP/BernoulliNB/BernoulliNB.cpp MLPP/GaussianNB/GaussianNB.cpp MLPP/KMeans/KMeans.cpp MLPP/kNN/kNN.cpp MLPP/PCA/PCA.cpp MLPP/OutlierFinder/OutlierFinder.cpp MLPP/MANN/MANN.cpp MLPP/MultiOutputLayer/MultiOutputLayer.cpp MLPP/SVC/SVC.cpp MLPP/NumericalAnalysis/NumericalAnalysis.cpp MLPP/DualSVC/DualSVC.cpp MLPP/Transforms/Transforms.cpp --std=c++17
+g++ -I MLPP -c -fPIC main.cpp MLPP/Stat/Stat.cpp MLPP/LinAlg/LinAlg.cpp MLPP/Regularization/Reg.cpp MLPP/Activation/Activation.cpp MLPP/Utilities/Utilities.cpp MLPP/Data/Data.cpp MLPP/Cost/Cost.cpp MLPP/ANN/ANN.cpp MLPP/HiddenLayer/HiddenLayer.cpp MLPP/OutputLayer/OutputLayer.cpp MLPP/MLP/MLP.cpp MLPP/LinReg/LinReg.cpp MLPP/LogReg/LogReg.cpp MLPP/UniLinReg/UniLinReg.cpp MLPP/CLogLogReg/CLogLogReg.cpp MLPP/ExpReg/ExpReg.cpp MLPP/ProbitReg/ProbitReg.cpp MLPP/SoftmaxReg/SoftmaxReg.cpp MLPP/TanhReg/TanhReg.cpp MLPP/SoftmaxNet/SoftmaxNet.cpp MLPP/Convolutions/Convolutions.cpp MLPP/AutoEncoder/AutoEncoder.cpp MLPP/MultinomialNB/MultinomialNB.cpp MLPP/BernoulliNB/BernoulliNB.cpp MLPP/GaussianNB/GaussianNB.cpp MLPP/KMeans/KMeans.cpp MLPP/kNN/kNN.cpp MLPP/PCA/PCA.cpp MLPP/OutlierFinder/OutlierFinder.cpp MLPP/MANN/MANN.cpp MLPP/MultiOutputLayer/MultiOutputLayer.cpp MLPP/SVC/SVC.cpp MLPP/NumericalAnalysis/NumericalAnalysis.cpp MLPP/DualSVC/DualSVC.cpp MLPP/Transforms/Transforms.cpp MLPP/GAN/GAN.cpp MLPP/WGAN/WGAN.cpp --std=c++17
-g++ -shared -o MLPP.so Reg.o LinAlg.o Stat.o Activation.o LinReg.o Utilities.o Cost.o LogReg.o ProbitReg.o ExpReg.o CLogLogReg.o SoftmaxReg.o TanhReg.o kNN.o KMeans.o UniLinReg.o SoftmaxNet.o MLP.o AutoEncoder.o HiddenLayer.o OutputLayer.o ANN.o BernoulliNB.o GaussianNB.o MultinomialNB.o Convolutions.o OutlierFinder.o Data.o MultiOutputLayer.o MANN.o  SVC.o NumericalAnalysis.o DualSVC.o 
+g++ -shared -o MLPP.so Reg.o LinAlg.o Stat.o Activation.o LinReg.o Utilities.o Cost.o LogReg.o ProbitReg.o ExpReg.o CLogLogReg.o SoftmaxReg.o TanhReg.o kNN.o KMeans.o UniLinReg.o SoftmaxNet.o MLP.o AutoEncoder.o HiddenLayer.o OutputLayer.o ANN.o BernoulliNB.o GaussianNB.o MultinomialNB.o Convolutions.o OutlierFinder.o Data.o MultiOutputLayer.o MANN.o  SVC.o NumericalAnalysis.o DualSVC.o GAN.o WGAN.o
 sudo mv MLPP.so /usr/local/lib
 rm *.o
--- a/main.cpp
+++ b/main.cpp
@ -48,6 +48,7 @@
 #include "MLPP/NumericalAnalysis/NumericalAnalysis.hpp"
 #include "MLPP/DualSVC/DualSVC.hpp"
 #include "MLPP/GAN/GAN.hpp"
 #include "MLPP/WGAN/WGAN.hpp"
 #include "MLPP/Transforms/Transforms.hpp"
 using namespace MLPP;
@ -364,17 +365,17 @@ int main() {
    // alg.printVector(ann.modelSetTest(alg.transpose(inputSet)));
    // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;
-    //std::vector<std::vector<double>> outputSet = {{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}, 
+    std::vector<std::vector<double>> outputSet = {{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}, 
-    //                                            {2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40}};
+                                               {2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40}};
-    //Vector outputSet = {0,1,1,0};
+
-    // GAN gan(2, alg.transpose(outputSet));
+    WGAN gan(2, alg.transpose(outputSet)); // our gan is a wasserstein gan (wgan)
-    // gan.addLayer(5, "Sigmoid");
+    gan.addLayer(5, "Sigmoid");
-    // gan.addLayer(2, "RELU");
+    gan.addLayer(2, "RELU");
-    // gan.addLayer(5, "Sigmoid");
+    gan.addLayer(5, "Sigmoid");
-    // gan.addOutputLayer("Sigmoid", "LogLoss");
+    gan.addOutputLayer(); // User can specify weight init- if necessary.
-    // gan.gradientDescent(0.1, 25000, 0);
+    gan.gradientDescent(0.1, 55000, 0);
-    // std::cout << "GENERATED INPUT: (Gaussian-sampled noise):" << std::endl;
+    std::cout << "GENERATED INPUT: (Gaussian-sampled noise):" << std::endl;
-    // alg.printMatrix(gan.generateExample(100));
+    alg.printMatrix(gan.generateExample(100));
    // typedef std::vector<std::vector<double>> Matrix;