Added wasserstein GANs, weight clipping reg method, wasserstein loss

2025-04-13 21:00:46 +02:00 · 2022-02-24 22:52:40 -08:00 · 2022-02-24 22:52:40 -08:00 · 6940fc1fbc
commit 6940fc1fbc
parent 875a10945d
12 changed files with 444 additions and 17 deletions
--- a/MLPP/Cost/Cost.cpp
+++ b/MLPP/Cost/Cost.cpp
@ -344,6 +344,35 @@ namespace MLPP{
        return deriv;
    }

+    double Cost::WassersteinLoss(std::vector <double> y_hat, std::vector<double> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            sum += y_hat[i] * y[i];
+        }
+        return -sum / y_hat.size();
+    }
+
+    double Cost::WassersteinLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        double sum = 0;
+        for(int i = 0; i < y_hat.size(); i++){
+            for(int j = 0; j < y_hat[i].size(); j++){
+                sum += y_hat[i][j] * y[i][j];
+            }
+        }        
+        return -sum / y_hat.size();
+    }
+
+    std::vector<double> Cost::WassersteinLossDeriv(std::vector<double> y_hat, std::vector<double> y){
+        LinAlg alg;
+        return alg.scalarMultiply(-1, y); // Simple.
+    }
+
+    std::vector<std::vector<double>> Cost::WassersteinLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y){
+        LinAlg alg;
+        return alg.scalarMultiply(-1, y); // Simple.
+    }
+
+
    double Cost::HingeLoss(std::vector <double> y_hat, std::vector<double> y, std::vector<double> weights, double C){
        LinAlg alg; 
        Reg regularization;
--- a/MLPP/Cost/Cost.hpp
+++ b/MLPP/Cost/Cost.hpp
@ -68,6 +68,12 @@ namespace MLPP{
            std::vector<double> HingeLossDeriv(std::vector <double> y_hat, std::vector<double> y, double C); 
            std::vector<std::vector<double>> HingeLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y, double C);

+            double WassersteinLoss(std::vector<double> y_hat, std::vector<double> y);
+            double WassersteinLoss(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
+            std::vector<double> WassersteinLossDeriv(std::vector<double> y_hat, std::vector<double> y);
+            std::vector<std::vector<double>> WassersteinLossDeriv(std::vector<std::vector<double>> y_hat, std::vector<std::vector<double>> y);
+
            double dualFormSVM(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y); // TO DO: DON'T forget to add non-linear kernelizations. 

            std::vector<double> dualFormSVMDeriv(std::vector<double> alpha, std::vector<std::vector<double>> X, std::vector<double> y);
--- a/MLPP/GAN/GAN.cpp
+++ b/MLPP/GAN/GAN.cpp
@ -107,13 +107,13 @@ namespace MLPP {
        }
    }
    
-    void GAN::addOutputLayer(std::string activation, std::string loss, std::string weightInit, std::string reg, double lambda, double alpha){
+    void GAN::addOutputLayer(std::string weightInit, std::string reg, double lambda, double alpha){
        LinAlg alg;
        if(!network.empty()){
-            outputLayer = new OutputLayer(network[network.size() - 1].n_hidden, activation, loss, network[network.size() - 1].a, weightInit, reg, lambda, alpha);
+            outputLayer = new OutputLayer(network[network.size() - 1].n_hidden, "Sigmoid", "LogLoss", network[network.size() - 1].a, weightInit, reg, lambda, alpha);
        }
        else{
-            outputLayer = new OutputLayer(k, activation, loss, alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha);
+            outputLayer = new OutputLayer(k, "Sigmoid", "LogLoss", alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha);
        }
    }

--- a/MLPP/GAN/GAN.hpp
+++ b/MLPP/GAN/GAN.hpp
@ -26,7 +26,7 @@ class GAN{
        void save(std::string fileName);

        void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
-        void addOutputLayer(std::string activation, std::string loss, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        void addOutputLayer(std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
        
        private:
            std::vector<std::vector<double>> modelSetTestGenerator(std::vector<std::vector<double>> X); // Evaluator for the generator of the gan.
--- a/MLPP/MultiOutputLayer/MultiOutputLayer.cpp
+++ b/MLPP/MultiOutputLayer/MultiOutputLayer.cpp
@ -113,6 +113,8 @@ namespace MLPP {
        cost_map["CrossEntropy"] = &Cost::CrossEntropy;
        costDeriv_map["HingeLoss"] = &Cost::HingeLossDeriv;
        cost_map["HingeLoss"] = &Cost::HingeLoss;
+        costDeriv_map["WassersteinLoss"] = &Cost::HingeLossDeriv;
+        cost_map["WassersteinLoss"] = &Cost::HingeLoss;
    }
    
    void MultiOutputLayer::forwardPass(){
--- a/MLPP/OutputLayer/OutputLayer.cpp
+++ b/MLPP/OutputLayer/OutputLayer.cpp
@ -110,6 +110,8 @@ namespace MLPP {
        cost_map["CrossEntropy"] = &Cost::CrossEntropy;
        costDeriv_map["HingeLoss"] = &Cost::HingeLossDeriv;
        cost_map["HingeLoss"] = &Cost::HingeLoss;
+        costDeriv_map["WassersteinLoss"] = &Cost::HingeLossDeriv;
+        cost_map["WassersteinLoss"] = &Cost::HingeLoss;
    }
    
    void OutputLayer::forwardPass(){
--- a/MLPP/Regularization/Reg.cpp
+++ b/MLPP/Regularization/Reg.cpp
@ -72,6 +72,7 @@ namespace MLPP{

    std::vector<double> Reg::regWeights(std::vector<double> weights, double lambda, double alpha, std::string reg){
        LinAlg alg;
+        if(reg == "WeightClipping"){ return regDerivTerm(weights, lambda, alpha, reg); }
        return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg));
        // for(int i = 0; i < weights.size(); i++){
        //     weights[i] -= regDerivTerm(weights, lambda, alpha, reg, i);
@ -81,6 +82,7 @@ namespace MLPP{

    std::vector<std::vector<double>> Reg::regWeights(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg){
        LinAlg alg;
+        if(reg == "WeightClipping"){ return regDerivTerm(weights, lambda, alpha, reg); }
        return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg));
        // for(int i = 0; i < weights.size(); i++){
        //     for(int j = 0; j < weights[i].size(); j++){
@ -126,6 +128,19 @@ namespace MLPP{
        else if(reg == "ElasticNet"){
            return alpha * lambda * act.sign(weights[j]) + (1 - alpha) * lambda * weights[j];
        }
+        else if(reg == "WeightClipping"){ // Preparation for Wasserstein GANs. 
+            // We assume lambda is the lower clipping threshold, while alpha is the higher clipping threshold. 
+            // alpha > lambda. 
+            if(weights[j] > alpha){
+                return alpha;
+            }
+            else if(weights[j] < lambda){
+                return lambda;
+            }
+            else{
+                return weights[j];
+            }
+        }
        else {
            return 0;
        }
@ -142,6 +157,19 @@ namespace MLPP{
        else if(reg == "ElasticNet"){
            return alpha * lambda * act.sign(weights[i][j]) + (1 - alpha) * lambda * weights[i][j];
        }
+        else if(reg == "WeightClipping"){ // Preparation for Wasserstein GANs.
+            // We assume lambda is the lower clipping threshold, while alpha is the higher clipping threshold. 
+            // alpha > lambda. 
+            if(weights[i][j] > alpha){
+                return alpha;
+            }
+            else if(weights[i][j] < lambda){
+               return lambda;
+            }
+            else{
+                return weights[i][j];
+            }
+        }
        else {
            return 0;
        }
--- a/MLPP/WGAN/WGAN.cpp
+++ b/MLPP/WGAN/WGAN.cpp
@ -0,0 +1,300 @@
+//
+//  WGAN.cpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#include "WGAN.hpp"
+#include "Activation/Activation.hpp"
+#include "LinAlg/LinAlg.hpp"
+#include "Regularization/Reg.hpp"
+#include "Utilities/Utilities.hpp"
+#include "Cost/Cost.hpp"
+
+#include <iostream>
+#include <cmath>
+
+namespace MLPP {
+    WGAN::WGAN(double k, std::vector<std::vector<double>> outputSet)
+    : outputSet(outputSet), n(outputSet.size()), k(k)
+    {
+
+    }
+
+    WGAN::~WGAN(){
+        delete outputLayer;
+    }
+
+    std::vector<std::vector<double>> WGAN::generateExample(int n){
+        LinAlg alg;
+        return modelSetTestGenerator(alg.gaussianNoise(n, k));
+    }
+
+    void WGAN::gradientDescent(double learning_rate, int max_epoch, bool UI){
+        class Cost cost; 
+        LinAlg alg;
+        double cost_prev = 0;
+        int epoch = 1;
+        forwardPass();
+
+        const int CRITIC_INTERATIONS = 5; // Wasserstein GAN specific parameter.
+
+        while(true){
+            cost_prev = Cost(y_hat, alg.onevec(n));
+
+
+            std::vector<std::vector<double>> generatorInputSet;
+            std::vector<std::vector<double>> discriminatorInputSet;
+            
+            std::vector<double> y_hat;
+            std::vector<double> outputSet;
+
+            // Training of the discriminator. 
+            for(int i = 0; i < CRITIC_INTERATIONS; i++){
+                generatorInputSet = alg.gaussianNoise(n, k);
+                discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
+                discriminatorInputSet.insert(discriminatorInputSet.end(), WGAN::outputSet.begin(), WGAN::outputSet.end()); // Fake + real inputs.
+
+                y_hat = modelSetTestDiscriminator(discriminatorInputSet);
+                outputSet = alg.scalarMultiply(-1, alg.onevec(n)); // WGAN changes y_i = 1 and y_i = 0 to y_i = 1 and y_i = -1
+                std::vector<double> outputSetReal = alg.onevec(n);
+                outputSet.insert(outputSet.end(), outputSetReal.begin(), outputSetReal.end()); // Fake + real output scores.
+
+                auto [cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad] = computeDiscriminatorGradients(y_hat, outputSet);
+                cumulativeDiscriminatorHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeDiscriminatorHiddenLayerWGrad);
+                outputDiscriminatorWGrad = alg.scalarMultiply(learning_rate/n, outputDiscriminatorWGrad);
+                updateDiscriminatorParameters(cumulativeDiscriminatorHiddenLayerWGrad, outputDiscriminatorWGrad, learning_rate);
+            }
+
+            // Training of the generator.
+            generatorInputSet = alg.gaussianNoise(n, k);
+            discriminatorInputSet = modelSetTestGenerator(generatorInputSet);
+            y_hat = modelSetTestDiscriminator(discriminatorInputSet);
+            outputSet = alg.onevec(n);
+            
+            std::vector<std::vector<std::vector<double>>> cumulativeGeneratorHiddenLayerWGrad = computeGeneratorGradients(y_hat, outputSet);
+            cumulativeGeneratorHiddenLayerWGrad = alg.scalarMultiply(learning_rate/n, cumulativeGeneratorHiddenLayerWGrad);
+            updateGeneratorParameters(cumulativeGeneratorHiddenLayerWGrad, learning_rate);
+
+            forwardPass();
+            if(UI) { WGAN::UI(epoch, cost_prev, WGAN::y_hat, alg.onevec(n)); }
+
+            epoch++;
+            if(epoch > max_epoch) { break; }
+        }
+    }
+
+    double WGAN::score(){
+        LinAlg alg;
+        Utilities util;
+        forwardPass();
+        return util.performance(y_hat, alg.onevec(n));
+    }
+
+    void WGAN::save(std::string fileName){
+        Utilities util;
+        if(!network.empty()){
+            util.saveParameters(fileName, network[0].weights, network[0].bias, 0, 1);
+            for(int i = 1; i < network.size(); i++){
+                util.saveParameters(fileName, network[i].weights, network[i].bias, 1, i + 1); 
+            }
+            util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 1, network.size() + 1);
+        }
+        else{
+            util.saveParameters(fileName, outputLayer->weights, outputLayer->bias, 0, network.size() + 1);
+        }
+     }
+
+    void WGAN::addLayer(int n_hidden, std::string activation, std::string weightInit, std::string reg, double lambda, double alpha){
+        LinAlg alg;
+        if(network.empty()){
+            network.push_back(HiddenLayer(n_hidden, activation, alg.gaussianNoise(n, k), weightInit, reg, lambda, alpha));
+            network[0].forwardPass();
+        }
+        else{
+            network.push_back(HiddenLayer(n_hidden, activation, network[network.size() - 1].a, weightInit, reg, lambda, alpha));
+            network[network.size() - 1].forwardPass();
+        }
+    }
+    
+    void WGAN::addOutputLayer(std::string weightInit, std::string reg, double lambda, double alpha){
+        LinAlg alg;
+        if(!network.empty()){
+            outputLayer = new OutputLayer(network[network.size() - 1].n_hidden, "Linear", "WassersteinLoss", network[network.size() - 1].a, weightInit, "WeightClipping", -0.01, 0.01);
+        }
+        else{ // Should never happen.
+            outputLayer = new OutputLayer(k, "Linear", "WassersteinLoss", alg.gaussianNoise(n, k), weightInit, "WeightClipping", -0.01, 0.01);
+        }
+    }
+
+    std::vector<std::vector<double>> WGAN::modelSetTestGenerator(std::vector<std::vector<double>> X){
+        if(!network.empty()){
+            network[0].input = X;
+            network[0].forwardPass();
+
+            for(int i = 1; i <= network.size()/2; i++){
+                network[i].input = network[i - 1].a;
+                network[i].forwardPass();
+            }
+        }
+        return network[network.size()/2].a;        
+    }
+
+    std::vector<double> WGAN::modelSetTestDiscriminator(std::vector<std::vector<double>> X){
+        if(!network.empty()){
+            for(int i = network.size()/2 + 1; i < network.size(); i++){
+                if(i == network.size()/2 + 1){
+                    network[i].input = X; 
+                }
+                else { network[i].input = network[i - 1].a; }
+                network[i].forwardPass();
+            }
+            outputLayer->input = network[network.size() - 1].a;
+        }
+        outputLayer->forwardPass();
+        return outputLayer->a;
+    }
+
+    double WGAN::Cost(std::vector<double> y_hat, std::vector<double> y){
+        Reg regularization;
+        class Cost cost;
+        double totalRegTerm = 0;
+
+        auto cost_function = outputLayer->cost_map[outputLayer->cost];
+        if(!network.empty()){
+            for(int i = 0; i < network.size() - 1; i++){
+                totalRegTerm += regularization.regTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg);
+            }
+        }
+        return (cost.*cost_function)(y_hat, y) + totalRegTerm + regularization.regTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg);
+    }
+
+    void WGAN::forwardPass(){
+        LinAlg alg;
+        if(!network.empty()){
+            network[0].input = alg.gaussianNoise(n, k);
+            network[0].forwardPass();
+
+            for(int i = 1; i < network.size(); i++){
+                network[i].input = network[i - 1].a;
+                network[i].forwardPass();
+            }
+            outputLayer->input = network[network.size() - 1].a;
+        }
+        else{ // Should never happen, though.
+            outputLayer->input = alg.gaussianNoise(n, k);
+        }
+        outputLayer->forwardPass();
+        y_hat = outputLayer->a;
+    }
+
+    void WGAN::updateDiscriminatorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, std::vector<double> outputLayerUpdation, double learning_rate){
+        LinAlg alg;
+
+        outputLayer->weights = alg.subtraction(outputLayer->weights, outputLayerUpdation);
+        outputLayer->bias -= learning_rate * alg.sum_elements(outputLayer->delta) / n;
+
+        if(!network.empty()){
+            network[network.size() - 1].weights = alg.subtraction(network[network.size() - 1].weights, hiddenLayerUpdations[0]);
+            network[network.size() - 1].bias = alg.subtractMatrixRows(network[network.size() - 1].bias, alg.scalarMultiply(learning_rate/n, network[network.size() - 1].delta));
+
+            for(int i = network.size() - 2; i > network.size()/2; i--){
+                network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
+                network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta));
+            }
+        }
+    }
+
+    void WGAN::updateGeneratorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, double learning_rate){
+        LinAlg alg;
+
+        if(!network.empty()){
+
+            for(int i = network.size()/2; i >= 0; i--){
+                //std::cout << network[i].weights.size() << "x" << network[i].weights[0].size() << std::endl;
+                //std::cout << hiddenLayerUpdations[(network.size() - 2) - i + 1].size() << "x" << hiddenLayerUpdations[(network.size() - 2) - i + 1][0].size() << std::endl;
+                network[i].weights = alg.subtraction(network[i].weights, hiddenLayerUpdations[(network.size() - 2) - i + 1]);
+                network[i].bias = alg.subtractMatrixRows(network[i].bias, alg.scalarMultiply(learning_rate/n, network[i].delta));
+            }
+        }
+    }
+    
+    std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<double>> WGAN::computeDiscriminatorGradients(std::vector<double> y_hat, std::vector<double> outputSet){
+        class Cost cost; 
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+
+        std::vector<std::vector<std::vector<double>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. 
+
+        auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
+        auto outputAvn = outputLayer->activation_map[outputLayer->activation];
+        outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
+        std::vector<double> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
+        outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
+
+
+        if(!network.empty()){
+            auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
+
+            network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
+            std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
+
+            cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
+
+            //std::cout << "HIDDENLAYER FIRST:" << hiddenLayerWGrad.size() << "x" << hiddenLayerWGrad[0].size() << std::endl;
+            //std::cout << "WEIGHTS SECOND:" << network[network.size() - 1].weights.size() << "x" << network[network.size() - 1].weights[0].size() << std::endl;
+
+            for(int i = network.size() - 2; i > network.size()/2; i--){
+                auto hiddenLayerAvn = network[i].activation_map[network[i].activation];
+                network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
+                std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
+
+                cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
+
+            }
+        }
+        return {cumulativeHiddenLayerWGrad, outputWGrad};
+    }
+
+    std::vector<std::vector<std::vector<double>>> WGAN::computeGeneratorGradients(std::vector<double> y_hat, std::vector<double> outputSet){
+        class Cost cost; 
+        Activation avn;
+        LinAlg alg;
+        Reg regularization;
+
+        std::vector<std::vector<std::vector<double>>> cumulativeHiddenLayerWGrad; // Tensor containing ALL hidden grads. 
+
+        auto costDeriv = outputLayer->costDeriv_map[outputLayer->cost];
+        auto outputAvn = outputLayer->activation_map[outputLayer->activation];
+        outputLayer->delta = alg.hadamard_product((cost.*costDeriv)(y_hat, outputSet), (avn.*outputAvn)(outputLayer->z, 1));
+        std::vector<double> outputWGrad = alg.mat_vec_mult(alg.transpose(outputLayer->input), outputLayer->delta);
+        outputWGrad = alg.addition(outputWGrad, regularization.regDerivTerm(outputLayer->weights, outputLayer->lambda, outputLayer->alpha, outputLayer->reg));
+        if(!network.empty()){
+            auto hiddenLayerAvn = network[network.size() - 1].activation_map[network[network.size() - 1].activation];
+            network[network.size() - 1].delta = alg.hadamard_product(alg.outerProduct(outputLayer->delta, outputLayer->weights), (avn.*hiddenLayerAvn)(network[network.size() - 1].z, 1));
+            std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[network.size() - 1].input), network[network.size() - 1].delta);
+            cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[network.size() - 1].weights, network[network.size() - 1].lambda, network[network.size() - 1].alpha, network[network.size() - 1].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
+
+            for(int i = network.size() - 2; i >= 0; i--){
+                auto hiddenLayerAvn = network[i].activation_map[network[i].activation];
+                network[i].delta = alg.hadamard_product(alg.matmult(network[i + 1].delta, alg.transpose(network[i + 1].weights)), (avn.*hiddenLayerAvn)(network[i].z, 1));
+                std::vector<std::vector<double>> hiddenLayerWGrad = alg.matmult(alg.transpose(network[i].input), network[i].delta);
+                cumulativeHiddenLayerWGrad.push_back(alg.addition(hiddenLayerWGrad, regularization.regDerivTerm(network[i].weights, network[i].lambda, network[i].alpha, network[i].reg))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
+            }
+        }
+        return cumulativeHiddenLayerWGrad;
+    }
+
+    void WGAN::UI(int epoch, double cost_prev, std::vector<double> y_hat, std::vector<double> outputSet){
+        Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputSet));
+        std::cout << "Layer " << network.size() + 1 << ": " << std::endl;
+        Utilities::UI(outputLayer->weights, outputLayer->bias); 
+        if(!network.empty()){ 
+            for(int i = network.size() - 1; i >= 0; i--){
+                std::cout << "Layer " << i + 1 << ": " << std::endl;
+                Utilities::UI(network[i].weights, network[i].bias); 
+            }
+        }
+    }
+}
--- a/MLPP/WGAN/WGAN.hpp
+++ b/MLPP/WGAN/WGAN.hpp
@ -0,0 +1,56 @@
+//
+//  WGAN.hpp
+//
+//  Created by Marc Melikyan on 11/4/20.
+//
+
+#ifndef WGAN_hpp
+#define WGAN_hpp
+
+#include "HiddenLayer/HiddenLayer.hpp"
+#include "OutputLayer/OutputLayer.hpp"
+
+#include <vector>
+#include <tuple>
+#include <string>
+
+namespace  MLPP{
+
+class WGAN{
+        public:
+        WGAN(double k, std::vector<std::vector<double>> outputSet);
+        ~WGAN();
+        std::vector<std::vector<double>> generateExample(int n);
+        void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
+        double score(); 
+        void save(std::string fileName);
+
+        void addLayer(int n_hidden, std::string activation, std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        void addOutputLayer(std::string weightInit = "Default", std::string reg = "None", double lambda = 0.5, double alpha = 0.5); 
+        
+        private:
+            std::vector<std::vector<double>> modelSetTestGenerator(std::vector<std::vector<double>> X); // Evaluator for the generator of the WGAN.
+            std::vector<double> modelSetTestDiscriminator(std::vector<std::vector<double>> X); // Evaluator for the discriminator of the WGAN.
+
+            double Cost(std::vector<double> y_hat, std::vector<double> y);
+
+            void forwardPass();
+            void updateDiscriminatorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, std::vector<double> outputLayerUpdation, double learning_rate);
+            void updateGeneratorParameters(std::vector<std::vector<std::vector<double>>> hiddenLayerUpdations, double learning_rate);
+            std::tuple<std::vector<std::vector<std::vector<double>>>, std::vector<double>> computeDiscriminatorGradients(std::vector<double> y_hat, std::vector<double> outputSet);
+            std::vector<std::vector<std::vector<double>>> computeGeneratorGradients(std::vector<double> y_hat, std::vector<double> outputSet);
+
+            void UI(int epoch, double cost_prev, std::vector<double> y_hat, std::vector<double> outputSet);
+
+            std::vector<std::vector<double>> outputSet;
+            std::vector<double> y_hat;
+
+            std::vector<HiddenLayer> network;
+            OutputLayer *outputLayer;
+
+            int n;
+            int k;
+    };
+}
+
+#endif /* WGAN_hpp */
--- a/README.md
+++ b/README.md
@ -119,10 +119,12 @@ The result will be the model's predictions for the entire dataset.
        - Log Loss
        - Cross Entropy
        - Hinge Loss
+        - Wasserstein Loss
    4. Possible Regularization Methods
        - Lasso
        - Ridge
        - ElasticNet
+        - Weight Clipping
    5. Possible Weight Initialization Methods
        - Uniform 
        - Xavier Normal
@ -142,6 +144,7 @@ The result will be the model's predictions for the entire dataset.
    3. Softmax Network
 4. ***Generative Modeling***
    1. Tabular Generative Adversarial Networks
+    2. Tabular Wasserstein Generative Adversarial Networks
 5. ***Natural Language Processing***
    1. Word2Vec (Continous Bag of Words, Skip-Gram)
    2. Stemming
--- a/buildSO.sh
+++ b/buildSO.sh
@ -1,6 +1,6 @@
-g++ -I MLPP -c -fPIC main.cpp MLPP/Stat/Stat.cpp MLPP/LinAlg/LinAlg.cpp MLPP/Regularization/Reg.cpp MLPP/Activation/Activation.cpp MLPP/Utilities/Utilities.cpp MLPP/Data/Data.cpp MLPP/Cost/Cost.cpp MLPP/ANN/ANN.cpp MLPP/HiddenLayer/HiddenLayer.cpp MLPP/OutputLayer/OutputLayer.cpp MLPP/MLP/MLP.cpp MLPP/LinReg/LinReg.cpp MLPP/LogReg/LogReg.cpp MLPP/UniLinReg/UniLinReg.cpp MLPP/CLogLogReg/CLogLogReg.cpp MLPP/ExpReg/ExpReg.cpp MLPP/ProbitReg/ProbitReg.cpp MLPP/SoftmaxReg/SoftmaxReg.cpp MLPP/TanhReg/TanhReg.cpp MLPP/SoftmaxNet/SoftmaxNet.cpp MLPP/Convolutions/Convolutions.cpp MLPP/AutoEncoder/AutoEncoder.cpp MLPP/MultinomialNB/MultinomialNB.cpp MLPP/BernoulliNB/BernoulliNB.cpp MLPP/GaussianNB/GaussianNB.cpp MLPP/KMeans/KMeans.cpp MLPP/kNN/kNN.cpp MLPP/PCA/PCA.cpp MLPP/OutlierFinder/OutlierFinder.cpp MLPP/MANN/MANN.cpp MLPP/MultiOutputLayer/MultiOutputLayer.cpp MLPP/SVC/SVC.cpp MLPP/NumericalAnalysis/NumericalAnalysis.cpp MLPP/DualSVC/DualSVC.cpp MLPP/Transforms/Transforms.cpp --std=c++17
+g++ -I MLPP -c -fPIC main.cpp MLPP/Stat/Stat.cpp MLPP/LinAlg/LinAlg.cpp MLPP/Regularization/Reg.cpp MLPP/Activation/Activation.cpp MLPP/Utilities/Utilities.cpp MLPP/Data/Data.cpp MLPP/Cost/Cost.cpp MLPP/ANN/ANN.cpp MLPP/HiddenLayer/HiddenLayer.cpp MLPP/OutputLayer/OutputLayer.cpp MLPP/MLP/MLP.cpp MLPP/LinReg/LinReg.cpp MLPP/LogReg/LogReg.cpp MLPP/UniLinReg/UniLinReg.cpp MLPP/CLogLogReg/CLogLogReg.cpp MLPP/ExpReg/ExpReg.cpp MLPP/ProbitReg/ProbitReg.cpp MLPP/SoftmaxReg/SoftmaxReg.cpp MLPP/TanhReg/TanhReg.cpp MLPP/SoftmaxNet/SoftmaxNet.cpp MLPP/Convolutions/Convolutions.cpp MLPP/AutoEncoder/AutoEncoder.cpp MLPP/MultinomialNB/MultinomialNB.cpp MLPP/BernoulliNB/BernoulliNB.cpp MLPP/GaussianNB/GaussianNB.cpp MLPP/KMeans/KMeans.cpp MLPP/kNN/kNN.cpp MLPP/PCA/PCA.cpp MLPP/OutlierFinder/OutlierFinder.cpp MLPP/MANN/MANN.cpp MLPP/MultiOutputLayer/MultiOutputLayer.cpp MLPP/SVC/SVC.cpp MLPP/NumericalAnalysis/NumericalAnalysis.cpp MLPP/DualSVC/DualSVC.cpp MLPP/Transforms/Transforms.cpp MLPP/GAN/GAN.cpp MLPP/WGAN/WGAN.cpp --std=c++17

-g++ -shared -o MLPP.so Reg.o LinAlg.o Stat.o Activation.o LinReg.o Utilities.o Cost.o LogReg.o ProbitReg.o ExpReg.o CLogLogReg.o SoftmaxReg.o TanhReg.o kNN.o KMeans.o UniLinReg.o SoftmaxNet.o MLP.o AutoEncoder.o HiddenLayer.o OutputLayer.o ANN.o BernoulliNB.o GaussianNB.o MultinomialNB.o Convolutions.o OutlierFinder.o Data.o MultiOutputLayer.o MANN.o  SVC.o NumericalAnalysis.o DualSVC.o 
+g++ -shared -o MLPP.so Reg.o LinAlg.o Stat.o Activation.o LinReg.o Utilities.o Cost.o LogReg.o ProbitReg.o ExpReg.o CLogLogReg.o SoftmaxReg.o TanhReg.o kNN.o KMeans.o UniLinReg.o SoftmaxNet.o MLP.o AutoEncoder.o HiddenLayer.o OutputLayer.o ANN.o BernoulliNB.o GaussianNB.o MultinomialNB.o Convolutions.o OutlierFinder.o Data.o MultiOutputLayer.o MANN.o  SVC.o NumericalAnalysis.o DualSVC.o GAN.o WGAN.o
 sudo mv MLPP.so /usr/local/lib

 rm *.o
--- a/main.cpp
+++ b/main.cpp
@ -48,6 +48,7 @@
 #include "MLPP/NumericalAnalysis/NumericalAnalysis.hpp"
 #include "MLPP/DualSVC/DualSVC.hpp"
 #include "MLPP/GAN/GAN.hpp"
+#include "MLPP/WGAN/WGAN.hpp"
 #include "MLPP/Transforms/Transforms.hpp"

 using namespace MLPP;
@ -364,17 +365,17 @@ int main() {
    // alg.printVector(ann.modelSetTest(alg.transpose(inputSet)));
    // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl;

-    //std::vector<std::vector<double>> outputSet = {{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}, 
-    //                                            {2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40}};
-    //Vector outputSet = {0,1,1,0};
-    // GAN gan(2, alg.transpose(outputSet));
-    // gan.addLayer(5, "Sigmoid");
-    // gan.addLayer(2, "RELU");
-    // gan.addLayer(5, "Sigmoid");
-    // gan.addOutputLayer("Sigmoid", "LogLoss");
-    // gan.gradientDescent(0.1, 25000, 0);
-    // std::cout << "GENERATED INPUT: (Gaussian-sampled noise):" << std::endl;
-    // alg.printMatrix(gan.generateExample(100));
+    std::vector<std::vector<double>> outputSet = {{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}, 
+                                               {2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40}};
+
+    WGAN gan(2, alg.transpose(outputSet)); // our gan is a wasserstein gan (wgan)
+    gan.addLayer(5, "Sigmoid");
+    gan.addLayer(2, "RELU");
+    gan.addLayer(5, "Sigmoid");
+    gan.addOutputLayer(); // User can specify weight init- if necessary.
+    gan.gradientDescent(0.1, 55000, 0);
+    std::cout << "GENERATED INPUT: (Gaussian-sampled noise):" << std::endl;
+    alg.printMatrix(gan.generateExample(100));


    // typedef std::vector<std::vector<double>> Matrix;