diff --git a/mlpp/hidden_layer/hidden_layer.cpp b/mlpp/hidden_layer/hidden_layer.cpp index f24a41a..e027c9b 100644 --- a/mlpp/hidden_layer/hidden_layer.cpp +++ b/mlpp/hidden_layer/hidden_layer.cpp @@ -7,112 +7,81 @@ #include "hidden_layer.h" #include "../activation/activation.h" #include "../lin_alg/lin_alg.h" -#include "../utilities/utilities.h" #include #include -/* - void MLPPHiddenLayer::forward_pass() { MLPPLinAlg alg; MLPPActivation avn; - z = alg.mat_vec_add(alg.matmult(input, weights), bias); - a = (avn.*activation_map[activation])(z, false); + + z = alg.mat_vec_addv(alg.matmultm(input, weights), bias); + a = avn.run_activation_norm_matrix(activation, z); } -void MLPPHiddenLayer::test(std::vector x) { +void MLPPHiddenLayer::test(const Ref &x) { MLPPLinAlg alg; MLPPActivation avn; - z_test = alg.addition(alg.mat_vec_mult(alg.transpose(weights), x), bias); - a_test = (avn.*activationTest_map[activation])(z_test, 0); + + z_test = alg.additionm(alg.mat_vec_multv(alg.transposem(weights), x), bias); + a_test = avn.run_activation_norm_matrix(activation, z_test); } -MLPPHiddenLayer::MLPPHiddenLayer(int n_hidden, std::string activation, std::vector> input, std::string weightInit, std::string reg, real_t lambda, real_t alpha) : - n_hidden(n_hidden), activation(activation), input(input), weightInit(weightInit), reg(reg), lambda(lambda), alpha(alpha) { - weights = MLPPUtilities::weightInitialization(input[0].size(), n_hidden, weightInit); - bias = MLPPUtilities::biasInitialization(n_hidden); +MLPPHiddenLayer::MLPPHiddenLayer(int p_n_hidden, MLPPActivation::ActivationFunction p_activation, Ref p_input, MLPPUtilities::WeightDistributionType p_weight_init, String p_reg, real_t p_lambda, real_t p_alpha) { + n_hidden = p_n_hidden; + activation = p_activation; - activation_map["Linear"] = &MLPPActivation::linear; - activationTest_map["Linear"] = &MLPPActivation::linear; + input = p_input; - activation_map["Sigmoid"] = &MLPPActivation::sigmoid; - activationTest_map["Sigmoid"] = &MLPPActivation::sigmoid; + // Regularization Params + reg = p_reg; + lambda = p_lambda; /* Regularization Parameter */ + alpha = p_alpha; /* This is the controlling param for Elastic Net*/ - activation_map["Swish"] = &MLPPActivation::swish; - activationTest_map["Swish"] = &MLPPActivation::swish; + weight_init = p_weight_init; - activation_map["Mish"] = &MLPPActivation::mish; - activationTest_map["Mish"] = &MLPPActivation::mish; + z.instance(); + a.instance(); - activation_map["SinC"] = &MLPPActivation::sinc; - activationTest_map["SinC"] = &MLPPActivation::sinc; + z_test.instance(); + a_test.instance(); - activation_map["Softplus"] = &MLPPActivation::softplus; - activationTest_map["Softplus"] = &MLPPActivation::softplus; + delta.instance(); - activation_map["Softsign"] = &MLPPActivation::softsign; - activationTest_map["Softsign"] = &MLPPActivation::softsign; + weights.instance(); + bias.instance(); - activation_map["CLogLog"] = &MLPPActivation::cloglog; - activationTest_map["CLogLog"] = &MLPPActivation::cloglog; + weights->resize(Size2i(input->size().x, n_hidden)); + bias->resize(n_hidden); - activation_map["Logit"] = &MLPPActivation::logit; - activationTest_map["Logit"] = &MLPPActivation::logit; - - activation_map["GaussianCDF"] = &MLPPActivation::gaussianCDF; - activationTest_map["GaussianCDF"] = &MLPPActivation::gaussianCDF; - - activation_map["RELU"] = &MLPPActivation::RELU; - activationTest_map["RELU"] = &MLPPActivation::RELU; - - activation_map["GELU"] = &MLPPActivation::GELU; - activationTest_map["GELU"] = &MLPPActivation::GELU; - - activation_map["Sign"] = &MLPPActivation::sign; - activationTest_map["Sign"] = &MLPPActivation::sign; - - activation_map["UnitStep"] = &MLPPActivation::unitStep; - activationTest_map["UnitStep"] = &MLPPActivation::unitStep; - - activation_map["Sinh"] = &MLPPActivation::sinh; - activationTest_map["Sinh"] = &MLPPActivation::sinh; - - activation_map["Cosh"] = &MLPPActivation::cosh; - activationTest_map["Cosh"] = &MLPPActivation::cosh; - - activation_map["Tanh"] = &MLPPActivation::tanh; - activationTest_map["Tanh"] = &MLPPActivation::tanh; - - activation_map["Csch"] = &MLPPActivation::csch; - activationTest_map["Csch"] = &MLPPActivation::csch; - - activation_map["Sech"] = &MLPPActivation::sech; - activationTest_map["Sech"] = &MLPPActivation::sech; - - activation_map["Coth"] = &MLPPActivation::coth; - activationTest_map["Coth"] = &MLPPActivation::coth; - - activation_map["Arsinh"] = &MLPPActivation::arsinh; - activationTest_map["Arsinh"] = &MLPPActivation::arsinh; - - activation_map["Arcosh"] = &MLPPActivation::arcosh; - activationTest_map["Arcosh"] = &MLPPActivation::arcosh; - - activation_map["Artanh"] = &MLPPActivation::artanh; - activationTest_map["Artanh"] = &MLPPActivation::artanh; - - activation_map["Arcsch"] = &MLPPActivation::arcsch; - activationTest_map["Arcsch"] = &MLPPActivation::arcsch; - - activation_map["Arsech"] = &MLPPActivation::arsech; - activationTest_map["Arsech"] = &MLPPActivation::arsech; - - activation_map["Arcoth"] = &MLPPActivation::arcoth; - activationTest_map["Arcoth"] = &MLPPActivation::arcoth; + MLPPUtilities::weight_initializationm(weights, weight_init); + MLPPUtilities::bias_initializationv(bias); } -*/ +MLPPHiddenLayer::MLPPHiddenLayer() { + n_hidden = 0; + activation = MLPPActivation::ACTIVATION_FUNCTION_LINEAR; + + // Regularization Params + //reg = 0; + lambda = 0; /* Regularization Parameter */ + alpha = 0; /* This is the controlling param for Elastic Net*/ + + weight_init = MLPPUtilities::WEIGHT_DISTRIBUTION_TYPE_DEFAULT; + + z.instance(); + a.instance(); + + z_test.instance(); + a_test.instance(); + + delta.instance(); + + weights.instance(); + bias.instance(); +} +MLPPHiddenLayer::~MLPPHiddenLayer() { +} MLPPOldHiddenLayer::MLPPOldHiddenLayer(int n_hidden, std::string activation, std::vector> input, std::string weightInit, std::string reg, real_t lambda, real_t alpha) : n_hidden(n_hidden), activation(activation), input(input), weightInit(weightInit), reg(reg), lambda(lambda), alpha(alpha) { @@ -202,12 +171,12 @@ void MLPPOldHiddenLayer::forwardPass() { MLPPLinAlg alg; MLPPActivation avn; z = alg.mat_vec_add(alg.matmult(input, weights), bias); - a = (avn.*activation_map[activation])(z, 0); + a = (avn.*activation_map[activation])(z, false); } void MLPPOldHiddenLayer::Test(std::vector x) { MLPPLinAlg alg; MLPPActivation avn; z_test = alg.addition(alg.mat_vec_mult(alg.transpose(weights), x), bias); - a_test = (avn.*activationTest_map[activation])(z_test, 0); + a_test = (avn.*activationTest_map[activation])(z_test, false); } diff --git a/mlpp/hidden_layer/hidden_layer.h b/mlpp/hidden_layer/hidden_layer.h index 5cbbdd2..edf5d15 100644 --- a/mlpp/hidden_layer/hidden_layer.h +++ b/mlpp/hidden_layer/hidden_layer.h @@ -15,6 +15,7 @@ #include "core/object/reference.h" #include "../activation/activation.h" +#include "../utilities/utilities.h" #include "../lin_alg/mlpp_matrix.h" #include "../lin_alg/mlpp_vector.h" @@ -28,7 +29,7 @@ class MLPPHiddenLayer : public Reference { public: int n_hidden; - int activation; + MLPPActivation::ActivationFunction activation; Ref input; @@ -38,9 +39,6 @@ public: Ref z; Ref a; - HashMap (MLPPActivation::*)(const Ref &, bool)> activation_map; - HashMap (MLPPActivation::*)(const Ref &, bool)> activation_test_map; - Ref z_test; Ref a_test; @@ -51,12 +49,12 @@ public: real_t lambda; /* Regularization Parameter */ real_t alpha; /* This is the controlling param for Elastic Net*/ - String weight_init; + MLPPUtilities::WeightDistributionType weight_init; void forward_pass(); void test(const Ref &x); - MLPPHiddenLayer(int p_n_hidden, int p_activation, Ref p_input, String p_weight_init, String p_reg, real_t p_lambda, real_t p_alpha); + MLPPHiddenLayer(int p_n_hidden, MLPPActivation::ActivationFunction p_activation, Ref p_input, MLPPUtilities::WeightDistributionType p_weight_init, String p_reg, real_t p_lambda, real_t p_alpha); MLPPHiddenLayer(); ~MLPPHiddenLayer(); diff --git a/mlpp/lin_alg/lin_alg.cpp b/mlpp/lin_alg/lin_alg.cpp index 3c23a14..da635cf 100644 --- a/mlpp/lin_alg/lin_alg.cpp +++ b/mlpp/lin_alg/lin_alg.cpp @@ -2180,6 +2180,50 @@ std::vector MLPPLinAlg::mat_vec_mult(std::vector> A, return c; } +Ref MLPPLinAlg::mat_vec_addv(const Ref &A, const Ref &b) { + Ref ret; + ret.instance(); + ret->resize(A->size()); + + Size2i a_size = A->size(); + const real_t *a_ptr = A->ptr(); + const real_t *b_ptr = b->ptr(); + real_t *ret_ptr = ret->ptrw(); + + for (int i = 0; i < a_size.y; ++i) { + for (int j = 0; j < a_size.x; ++j) { + int mat_index = A->calculate_index(i, j); + + ret_ptr[mat_index] = a_ptr[mat_index] + b_ptr[j]; + } + } + + return ret; +} +Ref MLPPLinAlg::mat_vec_multv(const Ref &A, const Ref &b) { + Ref c; + c.instance(); + + Size2i a_size = A->size(); + int b_size = b->size(); + + c->resize(a_size.y); + + const real_t *a_ptr = A->ptr(); + const real_t *b_ptr = b->ptr(); + real_t *c_ptr = c->ptrw(); + + for (int i = 0; i < a_size.y; ++i) { + for (int k = 0; k < b_size; ++k) { + int mat_index = A->calculate_index(i, k); + + c_ptr[i] = a_ptr[mat_index] * b_ptr[k]; + } + } + + return c; +} + std::vector>> MLPPLinAlg::addition(std::vector>> A, std::vector>> B) { for (int i = 0; i < A.size(); i++) { A[i] = addition(A[i], B[i]); diff --git a/mlpp/lin_alg/lin_alg.h b/mlpp/lin_alg/lin_alg.h index 1b5da7d..1f4acf6 100644 --- a/mlpp/lin_alg/lin_alg.h +++ b/mlpp/lin_alg/lin_alg.h @@ -261,9 +261,11 @@ public: // MATRIX-VECTOR FUNCTIONS std::vector> mat_vec_add(std::vector> A, std::vector b); - std::vector mat_vec_mult(std::vector> A, std::vector b); + Ref mat_vec_addv(const Ref &A, const Ref &b); + Ref mat_vec_multv(const Ref &A, const Ref &b); + // TENSOR FUNCTIONS std::vector>> addition(std::vector>> A, std::vector>> B); diff --git a/mlpp/utilities/utilities.cpp b/mlpp/utilities/utilities.cpp index 558b1e0..69c116e 100644 --- a/mlpp/utilities/utilities.cpp +++ b/mlpp/utilities/utilities.cpp @@ -6,8 +6,9 @@ #include "utilities.h" -#include "core/math/math_funcs.h" #include "core/log/logger.h" +#include "core/math/math_funcs.h" +#include "core/math/random_pcg.h" #include #include @@ -108,6 +109,176 @@ std::vector MLPPUtilities::biasInitialization(int n) { return bias; } +void MLPPUtilities::weight_initializationv(Ref weights, WeightDistributionType type) { + ERR_FAIL_COND(!weights.is_valid()); + + int n = weights->size(); + real_t *weights_ptr = weights->ptrw(); + + RandomPCG rnd; + rnd.randomize(); + + std::random_device rd; + std::default_random_engine generator(rd()); + + switch (type) { + case WEIGHT_DISTRIBUTION_TYPE_DEFAULT: { + std::uniform_real_distribution distribution(0, 1); + + for (int i = 0; i < n; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_XAVIER_NORMAL: { + std::normal_distribution distribution(0, Math::sqrt(2.0 / (n + 1.0))); + + for (int i = 0; i < n; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_XAVIER_UNIFORM: { + std::uniform_real_distribution distribution(-Math::sqrt(6.0 / (n + 1.0)), Math::sqrt(6.0 / (n + 1.0))); + + for (int i = 0; i < n; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_HE_NORMAL: { + std::normal_distribution distribution(0, Math::sqrt(2.0 / n)); + + for (int i = 0; i < n; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_HE_UNIFORM: { + std::uniform_real_distribution distribution(-Math::sqrt(6.0 / n), Math::sqrt(6.0 / n)); + + for (int i = 0; i < n; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_LE_CUN_NORMAL: { + std::normal_distribution distribution(0, Math::sqrt(1.0 / n)); + + for (int i = 0; i < n; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_LE_CUN_UNIFORM: { + std::uniform_real_distribution distribution(-Math::sqrt(3.0 / n), Math::sqrt(3.0 / n)); + + for (int i = 0; i < n; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_UNIFORM: { + std::uniform_real_distribution distribution(-1.0 / Math::sqrt(static_cast(n)), 1.0 / Math::sqrt(static_cast(n))); + + for (int i = 0; i < n; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + default: + break; + } +} +void MLPPUtilities::weight_initializationm(Ref weights, WeightDistributionType type) { + ERR_FAIL_COND(!weights.is_valid()); + + int n = weights->size().x; + int m = weights->size().y; + int data_size = weights->data_size(); + real_t *weights_ptr = weights->ptrw(); + + RandomPCG rnd; + rnd.randomize(); + + std::random_device rd; + std::default_random_engine generator(rd()); + + switch (type) { + case WEIGHT_DISTRIBUTION_TYPE_DEFAULT: { + std::uniform_real_distribution distribution(0, 1); + + for (int i = 0; i < data_size; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_XAVIER_NORMAL: { + std::normal_distribution distribution(0, sqrt(2 / (n + m))); + + for (int i = 0; i < data_size; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_XAVIER_UNIFORM: { + std::uniform_real_distribution distribution(-sqrt(6 / (n + m)), sqrt(6 / (n + m))); + + for (int i = 0; i < data_size; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_HE_NORMAL: { + std::normal_distribution distribution(0, sqrt(2 / n)); + + for (int i = 0; i < data_size; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_HE_UNIFORM: { + std::uniform_real_distribution distribution(-sqrt(6 / n), sqrt(6 / n)); + + for (int i = 0; i < data_size; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_LE_CUN_NORMAL: { + std::normal_distribution distribution(0, sqrt(1 / n)); + + for (int i = 0; i < data_size; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_LE_CUN_UNIFORM: { + std::uniform_real_distribution distribution(-sqrt(3 / n), sqrt(3 / n)); + + for (int i = 0; i < data_size; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + case WEIGHT_DISTRIBUTION_TYPE_UNIFORM: { + std::uniform_real_distribution distribution(-1 / sqrt(n), 1 / sqrt(n)); + + for (int i = 0; i < data_size; ++i) { + weights_ptr[i] = distribution(generator); + } + } break; + default: + break; + } +} +real_t MLPPUtilities::bias_initializationr() { + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_real_distribution distribution(0, 1); + + return distribution(generator); +} +void MLPPUtilities::bias_initializationv(Ref z) { + ERR_FAIL_COND(!z.is_valid()); + + std::vector bias; + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_real_distribution distribution(0, 1); + + int n = z->size(); + + for (int i = 0; i < n; i++) { + bias.push_back(distribution(generator)); + } +} + real_t MLPPUtilities::performance(std::vector y_hat, std::vector outputSet) { real_t correct = 0; for (int i = 0; i < y_hat.size(); i++) { diff --git a/mlpp/utilities/utilities.h b/mlpp/utilities/utilities.h index 1b1c22b..a660744 100644 --- a/mlpp/utilities/utilities.h +++ b/mlpp/utilities/utilities.h @@ -8,11 +8,10 @@ // Created by Marc Melikyan on 1/16/21. // - -#include "core/math/math_defs.h" #include "core/containers/vector.h" -#include "core/variant/variant.h" +#include "core/math/math_defs.h" #include "core/string/ustring.h" +#include "core/variant/variant.h" #include "../lin_alg/mlpp_matrix.h" #include "../lin_alg/mlpp_vector.h" @@ -21,7 +20,6 @@ #include #include - class MLPPUtilities { public: // Weight Init @@ -31,6 +29,22 @@ public: static std::vector> weightInitialization(int n, int m, std::string type = "Default"); static std::vector biasInitialization(int n); + enum WeightDistributionType { + WEIGHT_DISTRIBUTION_TYPE_DEFAULT = 0, + WEIGHT_DISTRIBUTION_TYPE_XAVIER_NORMAL, + WEIGHT_DISTRIBUTION_TYPE_XAVIER_UNIFORM, + WEIGHT_DISTRIBUTION_TYPE_HE_NORMAL, + WEIGHT_DISTRIBUTION_TYPE_HE_UNIFORM, + WEIGHT_DISTRIBUTION_TYPE_LE_CUN_NORMAL, + WEIGHT_DISTRIBUTION_TYPE_LE_CUN_UNIFORM, + WEIGHT_DISTRIBUTION_TYPE_UNIFORM, + }; + + static void weight_initializationv(Ref weights, WeightDistributionType type = WEIGHT_DISTRIBUTION_TYPE_DEFAULT); + static void weight_initializationm(Ref weights, WeightDistributionType type = WEIGHT_DISTRIBUTION_TYPE_DEFAULT); + static real_t bias_initializationr(); + static void bias_initializationv(Ref z); + // Cost/Performance related Functions real_t performance(std::vector y_hat, std::vector y); real_t performance(std::vector> y_hat, std::vector> y); @@ -65,5 +79,4 @@ public: private: }; - #endif /* Utilities_hpp */