mirror of
https://github.com/Relintai/pmlpp.git
synced 2025-01-17 14:57:19 +01:00
Api standardization.
This commit is contained in:
parent
539167fee9
commit
590d1ce5e2
@ -855,7 +855,7 @@ Ref<MLPPVector> MLPPActivation::sigmoid_normv(const Ref<MLPPVector> &z) {
|
||||
}
|
||||
Ref<MLPPMatrix> MLPPActivation::sigmoid_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
return alg.element_wise_divisionm(alg.onematm(z->size().x, z->size().y), alg.additionm(alg.onematm(z->size().x, z->size().y), alg.expm(alg.scalar_multiplym(-1, z))));
|
||||
return alg.element_wise_divisionnm(alg.onematm(z->size().x, z->size().y), alg.additionnm(alg.onematm(z->size().x, z->size().y), alg.expm(alg.scalar_multiplynm(-1, z))));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::sigmoid_derivr(real_t z) {
|
||||
@ -876,7 +876,7 @@ Ref<MLPPMatrix> MLPPActivation::sigmoid_derivm(const Ref<MLPPMatrix> &z) {
|
||||
|
||||
Ref<MLPPMatrix> sig_norm = sigmoid_normm(z);
|
||||
|
||||
return alg.subtractionm(sig_norm, alg.hadamard_productm(sig_norm, sig_norm));
|
||||
return alg.subtractionnm(sig_norm, alg.hadamard_productnm(sig_norm, sig_norm));
|
||||
}
|
||||
|
||||
//SOFTMAX
|
||||
@ -1250,7 +1250,7 @@ Ref<MLPPVector> MLPPActivation::softsign_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::softsign_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(z, alg.additionnv(alg.onematm(z->size().x, z->size().y), alg.absm(z)));
|
||||
return alg.element_wise_divisionnm(z, alg.additionnv(alg.onematm(z->size().x, z->size().y), alg.absm(z)));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::softsign_derivr(real_t z) {
|
||||
@ -1264,7 +1264,7 @@ Ref<MLPPVector> MLPPActivation::softsign_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::softsign_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(alg.onematm(z->size().x, z->size().y), alg.exponentiatenv(alg.additionm(alg.onematm(z->size().x, z->size().y), alg.absm(z)), 2));
|
||||
return alg.element_wise_divisionnm(alg.onematm(z->size().x, z->size().y), alg.exponentiatenv(alg.additionnm(alg.onematm(z->size().x, z->size().y), alg.absm(z)), 2));
|
||||
}
|
||||
|
||||
//GAUSSIANCDF
|
||||
@ -1281,7 +1281,7 @@ Ref<MLPPVector> MLPPActivation::gaussian_cdf_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::gaussian_cdf_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.scalar_multiplym(0.5, alg.additionm(alg.onematm(z->size().x, z->size().y), alg.erfm(alg.scalar_multiplym(1 / sqrt(2), z))));
|
||||
return alg.scalar_multiplynm(0.5, alg.additionnm(alg.onematm(z->size().x, z->size().y), alg.erfm(alg.scalar_multiplynm(1 / sqrt(2), z))));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::gaussian_cdf_derivr(real_t z) {
|
||||
@ -1296,7 +1296,7 @@ Ref<MLPPVector> MLPPActivation::gaussian_cdf_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::gaussian_cdf_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.scalar_multiplym(1 / Math::sqrt(2 * M_PI), alg.expm(alg.scalar_multiplym(-1 / 2.0, alg.hadamard_productm(z, z))));
|
||||
return alg.scalar_multiplynm(1 / Math::sqrt(2 * M_PI), alg.expm(alg.scalar_multiplynm(-1 / 2.0, alg.hadamard_productnm(z, z))));
|
||||
}
|
||||
|
||||
//CLOGLOG
|
||||
@ -1313,7 +1313,7 @@ Ref<MLPPVector> MLPPActivation::cloglog_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::cloglog_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.scalar_multiplym(-1, alg.scalar_addm(-1, alg.expm(alg.scalar_multiplym(-1, alg.expm(z)))));
|
||||
return alg.scalar_multiplynm(-1, alg.scalar_addnm(-1, alg.expm(alg.scalar_multiplynm(-1, alg.expm(z)))));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::cloglog_derivr(real_t z) {
|
||||
@ -1328,7 +1328,7 @@ Ref<MLPPVector> MLPPActivation::cloglog_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::cloglog_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.expm(alg.scalar_multiplym(-1, alg.expm(z)));
|
||||
return alg.expm(alg.scalar_multiplynm(-1, alg.expm(z)));
|
||||
}
|
||||
|
||||
//LOGIT
|
||||
@ -1344,7 +1344,7 @@ Ref<MLPPVector> MLPPActivation::logit_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::logit_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.logm(alg.element_wise_divisionm(z, alg.subtractionm(alg.onematm(z->size().x, z->size().y), z)));
|
||||
return alg.logm(alg.element_wise_divisionnm(z, alg.subtractionnm(alg.onematm(z->size().x, z->size().y), z)));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::logit_derivr(real_t z) {
|
||||
@ -1360,11 +1360,11 @@ Ref<MLPPVector> MLPPActivation::logit_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::logit_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.subtractionm(
|
||||
alg.element_wise_divisionm(
|
||||
return alg.subtractionnm(
|
||||
alg.element_wise_divisionnm(
|
||||
alg.onematm(z->size().x, z->size().y), z),
|
||||
alg.element_wise_divisionm(alg.onematm(z->size().x, z->size().y),
|
||||
alg.subtractionm(z, alg.onematm(z->size().x, z->size().y))));
|
||||
alg.element_wise_divisionnm(alg.onematm(z->size().x, z->size().y),
|
||||
alg.subtractionnm(z, alg.onematm(z->size().x, z->size().y))));
|
||||
}
|
||||
|
||||
//UNITSTEP
|
||||
@ -1452,7 +1452,7 @@ Ref<MLPPVector> MLPPActivation::swish_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::swish_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.additionnv(swish_normm(z), alg.subtractionnv(sigmoid_normm(z), alg.hadamard_productm(sigmoid_normm(z), swish_normm(z))));
|
||||
return alg.additionnv(swish_normm(z), alg.subtractionnv(sigmoid_normm(z), alg.hadamard_productnm(sigmoid_normm(z), swish_normm(z))));
|
||||
}
|
||||
|
||||
//MISH
|
||||
@ -1468,7 +1468,7 @@ Ref<MLPPVector> MLPPActivation::mish_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::mish_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.hadamard_productm(z, tanh_normm(softplus_normm(z)));
|
||||
return alg.hadamard_productnm(z, tanh_normm(softplus_normm(z)));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::mish_derivr(real_t z) {
|
||||
@ -1490,13 +1490,13 @@ Ref<MLPPMatrix> MLPPActivation::mish_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.additionnv(
|
||||
alg.hadamard_productm(
|
||||
alg.hadamard_productm(
|
||||
alg.hadamard_productm(
|
||||
alg.hadamard_productnm(
|
||||
alg.hadamard_productnm(
|
||||
alg.hadamard_productnm(
|
||||
sech_normm(softplus_normm(z)), sech_normm(softplus_normm(z))),
|
||||
z),
|
||||
sigmoid_normm(z)),
|
||||
alg.element_wise_divisionm(mish_normm(z), z));
|
||||
alg.element_wise_divisionnm(mish_normm(z), z));
|
||||
}
|
||||
|
||||
//SINC
|
||||
@ -1512,7 +1512,7 @@ Ref<MLPPVector> MLPPActivation::sinc_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::sinc_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(alg.sinm(z), z);
|
||||
return alg.element_wise_divisionnm(alg.sinm(z), z);
|
||||
}
|
||||
|
||||
real_t MLPPActivation::sinc_derivr(real_t z) {
|
||||
@ -1526,7 +1526,7 @@ Ref<MLPPVector> MLPPActivation::sinc_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::sinc_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(alg.subtractionm(alg.hadamard_productm(z, alg.cosm(z)), alg.sinm(z)), alg.hadamard_productm(z, z));
|
||||
return alg.element_wise_divisionnm(alg.subtractionnm(alg.hadamard_productnm(z, alg.cosm(z)), alg.sinm(z)), alg.hadamard_productnm(z, z));
|
||||
}
|
||||
|
||||
//RELU
|
||||
@ -2006,7 +2006,7 @@ Ref<MLPPVector> MLPPActivation::sinh_normv(const Ref<MLPPVector> &z) {
|
||||
}
|
||||
Ref<MLPPMatrix> MLPPActivation::sinh_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
return alg.scalar_multiplym(0.5, alg.subtractionm(alg.expm(z), alg.expm(alg.scalar_multiplym(-1, z))));
|
||||
return alg.scalar_multiplynm(0.5, alg.subtractionnm(alg.expm(z), alg.expm(alg.scalar_multiplynm(-1, z))));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::sinh_derivr(real_t z) {
|
||||
@ -2030,7 +2030,7 @@ Ref<MLPPVector> MLPPActivation::cosh_normv(const Ref<MLPPVector> &z) {
|
||||
}
|
||||
Ref<MLPPMatrix> MLPPActivation::cosh_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
return alg.scalar_multiplym(0.5, alg.additionnv(alg.expm(z), alg.expm(alg.scalar_multiplym(-1, z))));
|
||||
return alg.scalar_multiplynm(0.5, alg.additionnv(alg.expm(z), alg.expm(alg.scalar_multiplynm(-1, z))));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::cosh_derivr(real_t z) {
|
||||
@ -2056,7 +2056,7 @@ Ref<MLPPVector> MLPPActivation::tanh_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::tanh_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(alg.subtractionm(alg.expm(z), alg.expm(alg.scalar_multiplym(-1, z))), alg.additionm(alg.expm(z), alg.expm(alg.scalar_multiplym(-1, z))));
|
||||
return alg.element_wise_divisionnm(alg.subtractionnm(alg.expm(z), alg.expm(alg.scalar_multiplynm(-1, z))), alg.additionnm(alg.expm(z), alg.expm(alg.scalar_multiplynm(-1, z))));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::tanh_derivr(real_t z) {
|
||||
@ -2070,7 +2070,7 @@ Ref<MLPPVector> MLPPActivation::tanh_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::tanh_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.scalar_multiplym(-1, alg.scalar_addm(-1, alg.hadamard_productm(tanh_normm(z), tanh_normm(z))));
|
||||
return alg.scalar_multiplynm(-1, alg.scalar_addnm(-1, alg.hadamard_productnm(tanh_normm(z), tanh_normm(z))));
|
||||
}
|
||||
|
||||
//CSCH
|
||||
@ -2087,7 +2087,7 @@ Ref<MLPPVector> MLPPActivation::csch_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::csch_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(alg.onematm(z->size().x, z->size().y), sinh_normm(z));
|
||||
return alg.element_wise_divisionnm(alg.onematm(z->size().x, z->size().y), sinh_normm(z));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::csch_derivr(real_t z) {
|
||||
@ -2102,7 +2102,7 @@ Ref<MLPPVector> MLPPActivation::csch_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::csch_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.hadamard_productm(alg.scalar_multiplym(-1, csch_normm(z)), coth_normm(z));
|
||||
return alg.hadamard_productnm(alg.scalar_multiplynm(-1, csch_normm(z)), coth_normm(z));
|
||||
}
|
||||
|
||||
//SECH
|
||||
@ -2121,7 +2121,7 @@ Ref<MLPPVector> MLPPActivation::sech_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::sech_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(alg.onematm(z->size().x, z->size().y), cosh_normm(z));
|
||||
return alg.element_wise_divisionnm(alg.onematm(z->size().x, z->size().y), cosh_normm(z));
|
||||
|
||||
// return activation(z, deriv, static_cast<void (*)(real_t, bool)>(&sech));
|
||||
}
|
||||
@ -2138,7 +2138,7 @@ Ref<MLPPVector> MLPPActivation::sech_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::sech_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.hadamard_productm(alg.scalar_multiplym(-1, sech_normm(z)), tanh_normm(z));
|
||||
return alg.hadamard_productnm(alg.scalar_multiplynm(-1, sech_normm(z)), tanh_normm(z));
|
||||
}
|
||||
|
||||
//COTH
|
||||
@ -2154,7 +2154,7 @@ Ref<MLPPVector> MLPPActivation::coth_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::coth_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(alg.onematm(z->size().x, z->size().y), tanh_normm(z));
|
||||
return alg.element_wise_divisionnm(alg.onematm(z->size().x, z->size().y), tanh_normm(z));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::coth_derivr(real_t z) {
|
||||
@ -2168,7 +2168,7 @@ Ref<MLPPVector> MLPPActivation::coth_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::coth_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.hadamard_productm(alg.scalar_multiplym(-1, csch_normm(z)), csch_normm(z));
|
||||
return alg.hadamard_productnm(alg.scalar_multiplynm(-1, csch_normm(z)), csch_normm(z));
|
||||
}
|
||||
|
||||
//ARSINH
|
||||
@ -2186,7 +2186,7 @@ Ref<MLPPVector> MLPPActivation::arsinh_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::arsinh_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.logm(alg.additionm(z, alg.sqrtm(alg.additionm(alg.hadamard_productm(z, z), alg.onematm(z->size().x, z->size().y)))));
|
||||
return alg.logm(alg.additionnm(z, alg.sqrtm(alg.additionnm(alg.hadamard_productnm(z, z), alg.onematm(z->size().x, z->size().y)))));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::arsinh_derivr(real_t z) {
|
||||
@ -2202,7 +2202,7 @@ Ref<MLPPVector> MLPPActivation::arsinh_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::arsinh_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(alg.onematm(z->size().x, z->size().y), alg.sqrtm(alg.additionm(alg.hadamard_productm(z, z), alg.onematm(z->size().x, z->size().y))));
|
||||
return alg.element_wise_divisionnm(alg.onematm(z->size().x, z->size().y), alg.sqrtm(alg.additionnm(alg.hadamard_productnm(z, z), alg.onematm(z->size().x, z->size().y))));
|
||||
}
|
||||
|
||||
//ARCOSH
|
||||
@ -2219,7 +2219,7 @@ Ref<MLPPVector> MLPPActivation::arcosh_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::arcosh_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.logm(alg.additionm(z, alg.sqrtm(alg.subtractionm(alg.hadamard_productm(z, z), alg.onematm(z->size().x, z->size().y)))));
|
||||
return alg.logm(alg.additionnm(z, alg.sqrtm(alg.subtractionnm(alg.hadamard_productnm(z, z), alg.onematm(z->size().x, z->size().y)))));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::arcosh_derivr(real_t z) {
|
||||
@ -2234,7 +2234,7 @@ Ref<MLPPVector> MLPPActivation::arcosh_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::arcosh_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(alg.onematm(z->size().x, z->size().y), alg.sqrtm(alg.subtractionm(alg.hadamard_productm(z, z), alg.onematm(z->size().x, z->size().y))));
|
||||
return alg.element_wise_divisionnm(alg.onematm(z->size().x, z->size().y), alg.sqrtm(alg.subtractionnm(alg.hadamard_productnm(z, z), alg.onematm(z->size().x, z->size().y))));
|
||||
}
|
||||
|
||||
//ARTANH
|
||||
@ -2251,7 +2251,7 @@ Ref<MLPPVector> MLPPActivation::artanh_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::artanh_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.scalar_multiplym(0.5, alg.logm(alg.element_wise_divisionm(alg.additionm(alg.onematm(z->size().x, z->size().y), z), alg.subtractionm(alg.onematm(z->size().x, z->size().y), z))));
|
||||
return alg.scalar_multiplynm(0.5, alg.logm(alg.element_wise_divisionnm(alg.additionnm(alg.onematm(z->size().x, z->size().y), z), alg.subtractionnm(alg.onematm(z->size().x, z->size().y), z))));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::artanh_derivr(real_t z) {
|
||||
@ -2266,7 +2266,7 @@ Ref<MLPPVector> MLPPActivation::artanh_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::artanh_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(alg.onematm(z->size().x, z->size().y), alg.subtractionnv(alg.onematm(z->size().x, z->size().y), alg.hadamard_productm(z, z)));
|
||||
return alg.element_wise_divisionnm(alg.onematm(z->size().x, z->size().y), alg.subtractionnv(alg.onematm(z->size().x, z->size().y), alg.hadamard_productnm(z, z)));
|
||||
}
|
||||
|
||||
//ARCSCH
|
||||
@ -2289,11 +2289,11 @@ Ref<MLPPMatrix> MLPPActivation::arcsch_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.logm(
|
||||
alg.additionm(
|
||||
alg.additionnm(
|
||||
alg.sqrtm(
|
||||
alg.additionm(alg.onematm(z->size().x, z->size().y),
|
||||
alg.element_wise_divisionm(alg.onematm(z->size().x, z->size().y), alg.hadamard_productm(z, z)))),
|
||||
alg.element_wise_divisionm(alg.onematm(z->size().x, z->size().y), z)));
|
||||
alg.additionnm(alg.onematm(z->size().x, z->size().y),
|
||||
alg.element_wise_divisionnm(alg.onematm(z->size().x, z->size().y), alg.hadamard_productnm(z, z)))),
|
||||
alg.element_wise_divisionnm(alg.onematm(z->size().x, z->size().y), z)));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::arcsch_derivr(real_t z) {
|
||||
@ -2304,18 +2304,18 @@ Ref<MLPPVector> MLPPActivation::arcsch_derivv(const Ref<MLPPVector> &z) {
|
||||
|
||||
return alg.element_wise_division(
|
||||
alg.fullv(z->size(), -1),
|
||||
alg.hadamard_productm(
|
||||
alg.hadamard_productnm(
|
||||
alg.hadamard_productnv(z, z),
|
||||
alg.sqrtnv(alg.additionnv(alg.onevecv(z->size()), alg.element_wise_division(alg.onevecv(z->size()), alg.hadamard_productnv(z, z))))));
|
||||
}
|
||||
Ref<MLPPMatrix> MLPPActivation::arcsch_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(
|
||||
return alg.element_wise_divisionnm(
|
||||
alg.fullm(z->size().x, z->size().y, -1),
|
||||
alg.hadamard_productm(alg.hadamard_productm(z, z),
|
||||
alg.sqrtm(alg.additionm(alg.onematm(z->size().x, z->size().y),
|
||||
alg.element_wise_divisionm(alg.onematm(z->size().x, z->size().y), alg.hadamard_productm(z, z))))));
|
||||
alg.hadamard_productnm(alg.hadamard_productnm(z, z),
|
||||
alg.sqrtm(alg.additionnm(alg.onematm(z->size().x, z->size().y),
|
||||
alg.element_wise_divisionnm(alg.onematm(z->size().x, z->size().y), alg.hadamard_productnm(z, z))))));
|
||||
}
|
||||
|
||||
//ARSECH
|
||||
@ -2340,16 +2340,16 @@ Ref<MLPPMatrix> MLPPActivation::arsech_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.logm(
|
||||
alg.additionm(
|
||||
alg.element_wise_divisionm(
|
||||
alg.additionnm(
|
||||
alg.element_wise_divisionnm(
|
||||
alg.onematm(z->size().x, z->size().y), z),
|
||||
alg.hadamard_productm(
|
||||
alg.additionm(
|
||||
alg.element_wise_divisionm(
|
||||
alg.hadamard_productnm(
|
||||
alg.additionnm(
|
||||
alg.element_wise_divisionnm(
|
||||
alg.onematm(z->size().x, z->size().y), z),
|
||||
alg.onematm(z->size().x, z->size().y)),
|
||||
alg.subtractionm(
|
||||
alg.element_wise_divisionm(
|
||||
alg.subtractionnm(
|
||||
alg.element_wise_divisionnm(
|
||||
alg.onematm(z->size().x, z->size().y), z),
|
||||
alg.onematm(z->size().x, z->size().y)))));
|
||||
}
|
||||
@ -2372,11 +2372,11 @@ Ref<MLPPVector> MLPPActivation::arsech_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::arsech_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(
|
||||
return alg.element_wise_divisionnm(
|
||||
alg.fullm(z->size().x, z->size().y, -1),
|
||||
alg.hadamard_productm(
|
||||
alg.hadamard_productnm(
|
||||
z,
|
||||
alg.sqrtm(alg.subtractionm(alg.onematm(z->size().x, z->size().y), alg.hadamard_productm(z, z)))));
|
||||
alg.sqrtm(alg.subtractionnm(alg.onematm(z->size().x, z->size().y), alg.hadamard_productnm(z, z)))));
|
||||
}
|
||||
|
||||
//ARCOTH
|
||||
@ -2395,9 +2395,9 @@ Ref<MLPPVector> MLPPActivation::arcoth_normv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::arcoth_normm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.scalar_multiplym(
|
||||
return alg.scalar_multiplynm(
|
||||
0.5,
|
||||
alg.logm(alg.element_wise_divisionm(alg.additionm(alg.onematm(z->size().x, z->size().y), z), alg.subtractionm(z, alg.onematm(z->size().x, z->size().y)))));
|
||||
alg.logm(alg.element_wise_divisionnm(alg.additionnm(alg.onematm(z->size().x, z->size().y), z), alg.subtractionnm(z, alg.onematm(z->size().x, z->size().y)))));
|
||||
}
|
||||
|
||||
real_t MLPPActivation::arcoth_derivr(real_t z) {
|
||||
@ -2412,7 +2412,7 @@ Ref<MLPPVector> MLPPActivation::arcoth_derivv(const Ref<MLPPVector> &z) {
|
||||
Ref<MLPPMatrix> MLPPActivation::arcoth_derivm(const Ref<MLPPMatrix> &z) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.element_wise_divisionm(alg.onematm(z->size().x, z->size().y), alg.subtractionm(alg.onematm(z->size().x, z->size().y), alg.hadamard_productm(z, z)));
|
||||
return alg.element_wise_divisionnm(alg.onematm(z->size().x, z->size().y), alg.subtractionnm(alg.onematm(z->size().x, z->size().y), alg.hadamard_productnm(z, z)));
|
||||
}
|
||||
|
||||
void MLPPActivation::_bind_methods() {
|
||||
|
@ -607,7 +607,7 @@ void MLPPANN::nadam(real_t learning_rate, int max_epoch, int mini_batch_size, re
|
||||
Ref<MLPPVector> m_output_final = alg.additionnv(alg.scalar_multiplynv(b1, m_output_hat), alg.scalar_multiplynv((1 - b1) / (1.0 - Math::pow(b1, epoch)), grads.output_w_grad));
|
||||
|
||||
Vector<Ref<MLPPMatrix>> hidden_layer_updations = alg.scalar_multiply_vm(learning_rate / _n, alg.element_wise_division_vt(m_hidden_final, alg.scalar_add_vm(e, alg.sqrt_vt(v_hidden_hat))));
|
||||
Ref<MLPPVector> output_layer_updation = alg.scalar_multiplynv(learning_rate / _n, alg.element_wise_divisionm(m_output_final, alg.scalar_addnv(e, alg.sqrtnv(v_output_hat))));
|
||||
Ref<MLPPVector> output_layer_updation = alg.scalar_multiplynv(learning_rate / _n, alg.element_wise_divisionnm(m_output_final, alg.scalar_addnv(e, alg.sqrtnv(v_output_hat))));
|
||||
|
||||
update_parameters(hidden_layer_updations, output_layer_updation, learning_rate); // subject to change. may want bias to have this matrix too.
|
||||
|
||||
@ -844,14 +844,14 @@ void MLPPANN::update_parameters(const Vector<Ref<MLPPMatrix>> &hidden_layer_upda
|
||||
if (!_network.empty()) {
|
||||
Ref<MLPPHiddenLayer> layer = _network[_network.size() - 1];
|
||||
|
||||
layer->set_weights(alg.subtractionm(layer->get_weights(), hidden_layer_updations[0]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplym(learning_rate / _n, layer->get_delta())));
|
||||
layer->set_weights(alg.subtractionnm(layer->get_weights(), hidden_layer_updations[0]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, layer->get_delta())));
|
||||
|
||||
for (int i = _network.size() - 2; i >= 0; i--) {
|
||||
layer = _network[i];
|
||||
|
||||
layer->set_weights(alg.subtractionm(layer->get_weights(), hidden_layer_updations[(_network.size() - 2) - i + 1]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplym(learning_rate / _n, layer->get_delta())));
|
||||
layer->set_weights(alg.subtractionnm(layer->get_weights(), hidden_layer_updations[(_network.size() - 2) - i + 1]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, layer->get_delta())));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -867,25 +867,25 @@ MLPPANN::ComputeGradientsResult MLPPANN::compute_gradients(const Ref<MLPPVector>
|
||||
|
||||
_output_layer->set_delta(alg.hadamard_productnv(mlpp_cost.run_cost_deriv_vector(_output_layer->get_cost(), y_hat, _output_set), avn.run_activation_deriv_vector(_output_layer->get_activation(), _output_layer->get_z())));
|
||||
|
||||
res.output_w_grad = alg.mat_vec_multv(alg.transposem(_output_layer->get_input()), _output_layer->get_delta());
|
||||
res.output_w_grad = alg.mat_vec_multv(alg.transposenm(_output_layer->get_input()), _output_layer->get_delta());
|
||||
res.output_w_grad = alg.additionnv(res.output_w_grad, regularization.reg_deriv_termv(_output_layer->get_weights(), _output_layer->get_lambda(), _output_layer->get_alpha(), _output_layer->get_reg()));
|
||||
|
||||
if (!_network.empty()) {
|
||||
Ref<MLPPHiddenLayer> layer = _network[_network.size() - 1];
|
||||
|
||||
layer->set_delta(alg.hadamard_productm(alg.outer_product(_output_layer->get_delta(), _output_layer->get_weights()), avn.run_activation_deriv_vector(layer->get_activation(), layer->get_z())));
|
||||
layer->set_delta(alg.hadamard_productnm(alg.outer_product(_output_layer->get_delta(), _output_layer->get_weights()), avn.run_activation_deriv_vector(layer->get_activation(), layer->get_z())));
|
||||
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = alg.matmultm(alg.transposem(layer->get_input()), layer->get_delta());
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
|
||||
res.cumulative_hidden_layer_w_grad.push_back(alg.additionm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
res.cumulative_hidden_layer_w_grad.push_back(alg.additionnm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
|
||||
for (int i = _network.size() - 2; i >= 0; i--) {
|
||||
layer = _network[i];
|
||||
Ref<MLPPHiddenLayer> next_layer = _network[i + 1];
|
||||
|
||||
layer->set_delta(alg.hadamard_productm(alg.matmultm(next_layer->get_delta(), alg.transposem(next_layer->get_weights())), avn.run_activation_deriv_vector(layer->get_activation(), layer->get_z())));
|
||||
hidden_layer_w_grad = alg.matmultm(alg.transposem(layer->get_input()), layer->get_delta());
|
||||
res.cumulative_hidden_layer_w_grad.push_back(alg.additionm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
layer->set_delta(alg.hadamard_productnm(alg.matmultnm(next_layer->get_delta(), alg.transposenm(next_layer->get_weights())), avn.run_activation_deriv_vector(layer->get_activation(), layer->get_z())));
|
||||
hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
res.cumulative_hidden_layer_w_grad.push_back(alg.additionnm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -60,27 +60,27 @@ void MLPPAutoEncoder::gradient_descent(real_t learning_rate, int max_epoch, bool
|
||||
cost_prev = cost(_y_hat, _input_set);
|
||||
|
||||
// Calculating the errors
|
||||
Ref<MLPPMatrix> error = alg.subtractionm(_y_hat, _input_set);
|
||||
Ref<MLPPMatrix> error = alg.subtractionnm(_y_hat, _input_set);
|
||||
|
||||
// Calculating the weight/bias gradients for layer 2
|
||||
Ref<MLPPMatrix> D2_1 = alg.matmultm(alg.transposem(_a2), error);
|
||||
Ref<MLPPMatrix> D2_1 = alg.matmultnm(alg.transposenm(_a2), error);
|
||||
|
||||
// weights and bias updation for layer 2
|
||||
_weights2 = alg.subtractionm(_weights2, alg.scalar_multiplym(learning_rate / _n, D2_1));
|
||||
_weights2 = alg.subtractionnm(_weights2, alg.scalar_multiplynm(learning_rate / _n, D2_1));
|
||||
|
||||
// Calculating the bias gradients for layer 2
|
||||
_bias2 = alg.subtract_matrix_rows(_bias2, alg.scalar_multiplym(learning_rate, error));
|
||||
_bias2 = alg.subtract_matrix_rows(_bias2, alg.scalar_multiplynm(learning_rate, error));
|
||||
|
||||
//Calculating the weight/bias for layer 1
|
||||
|
||||
Ref<MLPPMatrix> D1_1 = alg.matmultm(error, alg.transposem(_weights2));
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(_z2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultm(alg.transposem(_input_set), D1_2);
|
||||
Ref<MLPPMatrix> D1_1 = alg.matmultnm(error, alg.transposenm(_weights2));
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productnm(D1_1, avn.sigmoid_derivm(_z2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultnm(alg.transposenm(_input_set), D1_2);
|
||||
|
||||
// weight an bias updation for layer 1
|
||||
_weights1 = alg.subtractionm(_weights1, alg.scalar_multiplym(learning_rate / _n, D1_3));
|
||||
_weights1 = alg.subtractionnm(_weights1, alg.scalar_multiplynm(learning_rate / _n, D1_3));
|
||||
|
||||
_bias1 = alg.subtract_matrix_rows(_bias1, alg.scalar_multiplym(learning_rate / _n, D1_2));
|
||||
_bias1 = alg.subtract_matrix_rows(_bias1, alg.scalar_multiplynm(learning_rate / _n, D1_2));
|
||||
|
||||
forward_pass();
|
||||
|
||||
@ -141,7 +141,7 @@ void MLPPAutoEncoder::sgd(real_t learning_rate, int max_epoch, bool ui) {
|
||||
|
||||
// Weight updation for layer 2
|
||||
Ref<MLPPMatrix> D2_1 = alg.outer_product(error, prop_res.a2);
|
||||
_weights2 = alg.subtractionm(_weights2, alg.scalar_multiplym(learning_rate, alg.transposem(D2_1)));
|
||||
_weights2 = alg.subtractionnm(_weights2, alg.scalar_multiplynm(learning_rate, alg.transposenm(D2_1)));
|
||||
|
||||
// Bias updation for layer 2
|
||||
_bias2 = alg.subtractionnv(_bias2, alg.scalar_multiplynv(learning_rate, error));
|
||||
@ -151,7 +151,7 @@ void MLPPAutoEncoder::sgd(real_t learning_rate, int max_epoch, bool ui) {
|
||||
Ref<MLPPVector> D1_2 = alg.hadamard_productnv(D1_1, avn.sigmoid_derivv(prop_res.z2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.outer_product(input_set_row_tmp, D1_2);
|
||||
|
||||
_weights1 = alg.subtractionm(_weights1, alg.scalar_multiplym(learning_rate, D1_3));
|
||||
_weights1 = alg.subtractionnm(_weights1, alg.scalar_multiplynm(learning_rate, D1_3));
|
||||
// Bias updation for layer 1
|
||||
|
||||
_bias1 = alg.subtractionnv(_bias1, alg.scalar_multiplynv(learning_rate, D1_2));
|
||||
@ -200,27 +200,27 @@ void MLPPAutoEncoder::mbgd(real_t learning_rate, int max_epoch, int mini_batch_s
|
||||
cost_prev = cost(y_hat, current_batch);
|
||||
|
||||
// Calculating the errors
|
||||
Ref<MLPPMatrix> error = alg.subtractionm(y_hat, current_batch);
|
||||
Ref<MLPPMatrix> error = alg.subtractionnm(y_hat, current_batch);
|
||||
|
||||
// Calculating the weight/bias gradients for layer 2
|
||||
|
||||
Ref<MLPPMatrix> D2_1 = alg.matmultm(alg.transposem(prop_res.a2), error);
|
||||
Ref<MLPPMatrix> D2_1 = alg.matmultnm(alg.transposenm(prop_res.a2), error);
|
||||
|
||||
// weights and bias updation for layer 2
|
||||
_weights2 = alg.subtractionm(_weights2, alg.scalar_multiplym(learning_rate / current_batch->size().y, D2_1));
|
||||
_weights2 = alg.subtractionnm(_weights2, alg.scalar_multiplynm(learning_rate / current_batch->size().y, D2_1));
|
||||
|
||||
// Bias Updation for layer 2
|
||||
_bias2 = alg.subtract_matrix_rows(_bias2, alg.scalar_multiplym(learning_rate, error));
|
||||
_bias2 = alg.subtract_matrix_rows(_bias2, alg.scalar_multiplynm(learning_rate, error));
|
||||
|
||||
//Calculating the weight/bias for layer 1
|
||||
|
||||
Ref<MLPPMatrix> D1_1 = alg.matmultm(error, alg.transposem(_weights2));
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(prop_res.z2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultm(alg.transposem(current_batch), D1_2);
|
||||
Ref<MLPPMatrix> D1_1 = alg.matmultnm(error, alg.transposenm(_weights2));
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productnm(D1_1, avn.sigmoid_derivm(prop_res.z2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultnm(alg.transposenm(current_batch), D1_2);
|
||||
|
||||
// weight an bias updation for layer 1
|
||||
_weights1 = alg.subtractionm(_weights1, alg.scalar_multiplym(learning_rate / current_batch->size().x, D1_3));
|
||||
_bias1 = alg.subtract_matrix_rows(_bias1, alg.scalar_multiplym(learning_rate / current_batch->size().x, D1_2));
|
||||
_weights1 = alg.subtractionnm(_weights1, alg.scalar_multiplynm(learning_rate / current_batch->size().x, D1_3));
|
||||
_bias1 = alg.subtract_matrix_rows(_bias1, alg.scalar_multiplynm(learning_rate / current_batch->size().x, D1_2));
|
||||
|
||||
y_hat = evaluatem(current_batch);
|
||||
|
||||
@ -304,10 +304,10 @@ Ref<MLPPVector> MLPPAutoEncoder::evaluatev(const Ref<MLPPVector> &x) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
Ref<MLPPVector> z2 = alg.additionnv(alg.mat_vec_multv(alg.transposem(_weights1), x), _bias1);
|
||||
Ref<MLPPVector> z2 = alg.additionnv(alg.mat_vec_multv(alg.transposenm(_weights1), x), _bias1);
|
||||
Ref<MLPPVector> a2 = avn.sigmoid_normv(z2);
|
||||
|
||||
return alg.additionnv(alg.mat_vec_multv(alg.transposem(_weights2), a2), _bias2);
|
||||
return alg.additionnv(alg.mat_vec_multv(alg.transposenm(_weights2), a2), _bias2);
|
||||
}
|
||||
|
||||
MLPPAutoEncoder::PropagateVResult MLPPAutoEncoder::propagatev(const Ref<MLPPVector> &x) {
|
||||
@ -316,7 +316,7 @@ MLPPAutoEncoder::PropagateVResult MLPPAutoEncoder::propagatev(const Ref<MLPPVect
|
||||
|
||||
PropagateVResult res;
|
||||
|
||||
res.z2 = alg.additionnv(alg.mat_vec_multv(alg.transposem(_weights1), x), _bias1);
|
||||
res.z2 = alg.additionnv(alg.mat_vec_multv(alg.transposenm(_weights1), x), _bias1);
|
||||
res.a2 = avn.sigmoid_normv(res.z2);
|
||||
|
||||
return res;
|
||||
@ -326,10 +326,10 @@ Ref<MLPPMatrix> MLPPAutoEncoder::evaluatem(const Ref<MLPPMatrix> &X) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
Ref<MLPPMatrix> z2 = alg.mat_vec_addv(alg.matmultm(X, _weights1), _bias1);
|
||||
Ref<MLPPMatrix> z2 = alg.mat_vec_addv(alg.matmultnm(X, _weights1), _bias1);
|
||||
Ref<MLPPMatrix> a2 = avn.sigmoid_normm(z2);
|
||||
|
||||
return alg.mat_vec_addv(alg.matmultm(a2, _weights2), _bias2);
|
||||
return alg.mat_vec_addv(alg.matmultnm(a2, _weights2), _bias2);
|
||||
}
|
||||
|
||||
MLPPAutoEncoder::PropagateMResult MLPPAutoEncoder::propagatem(const Ref<MLPPMatrix> &X) {
|
||||
@ -338,7 +338,7 @@ MLPPAutoEncoder::PropagateMResult MLPPAutoEncoder::propagatem(const Ref<MLPPMatr
|
||||
|
||||
PropagateMResult res;
|
||||
|
||||
res.z2 = alg.mat_vec_addv(alg.matmultm(X, _weights1), _bias1);
|
||||
res.z2 = alg.mat_vec_addv(alg.matmultnm(X, _weights1), _bias1);
|
||||
res.a2 = avn.sigmoid_normm(res.z2);
|
||||
|
||||
return res;
|
||||
@ -348,9 +348,9 @@ void MLPPAutoEncoder::forward_pass() {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
_z2 = alg.mat_vec_addv(alg.matmultm(_input_set, _weights1), _bias1);
|
||||
_z2 = alg.mat_vec_addv(alg.matmultnm(_input_set, _weights1), _bias1);
|
||||
_a2 = avn.sigmoid_normm(_z2);
|
||||
_y_hat = alg.mat_vec_addv(alg.matmultm(_a2, _weights2), _bias2);
|
||||
_y_hat = alg.mat_vec_addv(alg.matmultnm(_a2, _weights2), _bias2);
|
||||
}
|
||||
|
||||
void MLPPAutoEncoder::_bind_methods() {
|
||||
|
@ -210,7 +210,6 @@ MLPPAutoEncoderOld::MLPPAutoEncoderOld(std::vector<std::vector<real_t>> pinputSe
|
||||
n = inputSet.size();
|
||||
k = inputSet[0].size();
|
||||
|
||||
MLPPActivationOld avn;
|
||||
y_hat.resize(inputSet.size());
|
||||
|
||||
weights1 = MLPPUtilities::weightInitialization(k, n_hidden);
|
||||
|
@ -37,7 +37,7 @@ void MLPPCLogLogReg::gradient_descent(real_t learning_rate, int max_epoch, bool
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_y_hat, _output_set);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(_input_set), alg.hadamard_productnv(error, avn.cloglog_derivv(_z)))));
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposenm(_input_set), alg.hadamard_productnv(error, avn.cloglog_derivv(_z)))));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
@ -73,7 +73,7 @@ void MLPPCLogLogReg::mle(real_t learning_rate, int max_epoch, bool ui) {
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_y_hat, _output_set);
|
||||
|
||||
_weights = alg.additionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(_input_set), alg.hadamard_productnv(error, avn.cloglog_derivv(_z)))));
|
||||
_weights = alg.additionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposenm(_input_set), alg.hadamard_productnv(error, avn.cloglog_derivv(_z)))));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
@ -182,7 +182,7 @@ void MLPPCLogLogReg::mbgd(real_t learning_rate, int max_epoch, int mini_batch_si
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(current_input_batch), alg.hadamard_productnv(error, avn.cloglog_derivv(z)))));
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposenm(current_input_batch), alg.hadamard_productnv(error, avn.cloglog_derivv(z)))));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
|
@ -48,7 +48,7 @@ Ref<MLPPVector> MLPPCost::mse_derivv(const Ref<MLPPVector> &y_hat, const Ref<MLP
|
||||
|
||||
Ref<MLPPMatrix> MLPPCost::mse_derivm(const Ref<MLPPMatrix> &y_hat, const Ref<MLPPMatrix> &y) {
|
||||
MLPPLinAlg alg;
|
||||
return alg.subtractionm(y_hat, y);
|
||||
return alg.subtractionnm(y_hat, y);
|
||||
}
|
||||
|
||||
real_t MLPPCost::rmsev(const Ref<MLPPVector> &y_hat, const Ref<MLPPVector> &y) {
|
||||
@ -90,7 +90,7 @@ Ref<MLPPVector> MLPPCost::rmse_derivv(const Ref<MLPPVector> &y_hat, const Ref<ML
|
||||
Ref<MLPPMatrix> MLPPCost::rmse_derivm(const Ref<MLPPMatrix> &y_hat, const Ref<MLPPMatrix> &y) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.scalar_multiplym(1 / (2.0 / Math::sqrt(msem(y_hat, y))), mse_derivm(y_hat, y));
|
||||
return alg.scalar_multiplynm(1 / (2.0 / Math::sqrt(msem(y_hat, y))), mse_derivm(y_hat, y));
|
||||
}
|
||||
|
||||
real_t MLPPCost::maev(const Ref<MLPPVector> &y_hat, const Ref<MLPPVector> &y) {
|
||||
@ -256,9 +256,9 @@ Ref<MLPPVector> MLPPCost::log_loss_derivv(const Ref<MLPPVector> &y_hat, const Re
|
||||
|
||||
Ref<MLPPMatrix> MLPPCost::log_loss_derivm(const Ref<MLPPMatrix> &y_hat, const Ref<MLPPMatrix> &y) {
|
||||
MLPPLinAlg alg;
|
||||
return alg.additionm(
|
||||
alg.scalar_multiplym(-1, alg.element_wise_divisionm(y, y_hat)),
|
||||
alg.element_wise_divisionm(alg.scalar_multiplym(-1, alg.scalar_addm(-1, y)), alg.scalar_multiplym(-1, alg.scalar_addm(-1, y_hat))));
|
||||
return alg.additionnm(
|
||||
alg.scalar_multiplynm(-1, alg.element_wise_divisionnm(y, y_hat)),
|
||||
alg.element_wise_divisionnm(alg.scalar_multiplynm(-1, alg.scalar_addnm(-1, y)), alg.scalar_multiplynm(-1, alg.scalar_addnm(-1, y_hat))));
|
||||
}
|
||||
|
||||
real_t MLPPCost::cross_entropyv(const Ref<MLPPVector> &y_hat, const Ref<MLPPVector> &y) {
|
||||
@ -298,7 +298,7 @@ Ref<MLPPVector> MLPPCost::cross_entropy_derivv(const Ref<MLPPVector> &y_hat, con
|
||||
}
|
||||
Ref<MLPPMatrix> MLPPCost::cross_entropy_derivm(const Ref<MLPPMatrix> &y_hat, const Ref<MLPPMatrix> &y) {
|
||||
MLPPLinAlg alg;
|
||||
return alg.scalar_multiplym(-1, alg.element_wise_divisionm(y, y_hat));
|
||||
return alg.scalar_multiplynm(-1, alg.element_wise_divisionnm(y, y_hat));
|
||||
}
|
||||
|
||||
real_t MLPPCost::huber_lossv(const Ref<MLPPVector> &y_hat, const Ref<MLPPVector> &y, real_t delta) {
|
||||
@ -506,7 +506,7 @@ Ref<MLPPMatrix> MLPPCost::hinge_loss_derivwm(const Ref<MLPPMatrix> &y_hat, const
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
|
||||
return alg.scalar_multiplym(C, hinge_loss_derivm(y_hat, y));
|
||||
return alg.scalar_multiplynm(C, hinge_loss_derivm(y_hat, y));
|
||||
}
|
||||
|
||||
real_t MLPPCost::wasserstein_lossv(const Ref<MLPPVector> &y_hat, const Ref<MLPPVector> &y) {
|
||||
@ -548,22 +548,22 @@ Ref<MLPPVector> MLPPCost::wasserstein_loss_derivv(const Ref<MLPPVector> &y_hat,
|
||||
Ref<MLPPMatrix> MLPPCost::wasserstein_loss_derivm(const Ref<MLPPMatrix> &y_hat, const Ref<MLPPMatrix> &y) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
return alg.scalar_multiplym(-1, y); // Simple.
|
||||
return alg.scalar_multiplynm(-1, y); // Simple.
|
||||
}
|
||||
|
||||
real_t MLPPCost::dual_form_svm(const Ref<MLPPVector> &alpha, const Ref<MLPPMatrix> &X, const Ref<MLPPVector> &y) {
|
||||
MLPPLinAlg alg;
|
||||
|
||||
Ref<MLPPMatrix> Y = alg.diagm(y); // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
|
||||
Ref<MLPPMatrix> K = alg.matmultm(X, alg.transposem(X)); // TO DO: DON'T forget to add non-linear kernelizations.
|
||||
Ref<MLPPMatrix> Q = alg.matmultm(alg.matmultm(alg.transposem(Y), K), Y);
|
||||
Ref<MLPPMatrix> K = alg.matmultnm(X, alg.transposenm(X)); // TO DO: DON'T forget to add non-linear kernelizations.
|
||||
Ref<MLPPMatrix> Q = alg.matmultnm(alg.matmultnm(alg.transposenm(Y), K), Y);
|
||||
|
||||
Ref<MLPPMatrix> alpha_m;
|
||||
alpha_m.instance();
|
||||
alpha_m->resize(Size2i(alpha->size(), 1));
|
||||
alpha_m->set_row_mlpp_vector(0, alpha);
|
||||
|
||||
Ref<MLPPMatrix> alpha_m_res = alg.matmultm(alg.matmultm(alpha_m, Q), alg.transposem(alpha_m));
|
||||
Ref<MLPPMatrix> alpha_m_res = alg.matmultnm(alg.matmultnm(alpha_m, Q), alg.transposenm(alpha_m));
|
||||
|
||||
real_t alphaQ = alpha_m_res->get_element(0, 0);
|
||||
Ref<MLPPVector> one = alg.onevecv(alpha->size());
|
||||
@ -575,12 +575,12 @@ Ref<MLPPVector> MLPPCost::dual_form_svm_deriv(const Ref<MLPPVector> &alpha, cons
|
||||
MLPPLinAlg alg;
|
||||
|
||||
Ref<MLPPMatrix> Y = alg.diagm(y); // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y.
|
||||
Ref<MLPPMatrix> K = alg.matmultm(X, alg.transposem(X)); // TO DO: DON'T forget to add non-linear kernelizations.
|
||||
Ref<MLPPMatrix> Q = alg.matmultm(alg.matmultm(alg.transposem(Y), K), Y);
|
||||
Ref<MLPPMatrix> K = alg.matmultnm(X, alg.transposenm(X)); // TO DO: DON'T forget to add non-linear kernelizations.
|
||||
Ref<MLPPMatrix> Q = alg.matmultnm(alg.matmultnm(alg.transposenm(Y), K), Y);
|
||||
Ref<MLPPVector> alphaQDeriv = alg.mat_vec_multv(Q, alpha);
|
||||
Ref<MLPPVector> one = alg.onevecv(alpha->size());
|
||||
|
||||
return alg.subtractionm(alphaQDeriv, one);
|
||||
return alg.subtractionnm(alphaQDeriv, one);
|
||||
}
|
||||
|
||||
MLPPCost::VectorCostFunctionPointer MLPPCost::get_cost_function_ptr_normal_vector(const MLPPCost::CostTypes cost) {
|
||||
|
@ -262,7 +262,7 @@ void MLPPData::set_data_supervised(int k, const String &file_name, Ref<MLPPMatri
|
||||
output_set->set_from_vector(output_set_tmp);
|
||||
|
||||
input_set->set_from_vectors(input_set_tmp);
|
||||
input_set = alg.transposem(input_set);
|
||||
input_set = alg.transposenm(input_set);
|
||||
}
|
||||
|
||||
void MLPPData::set_data_unsupervised(int k, const String &file_name, Ref<MLPPMatrix> input_set) {
|
||||
@ -289,7 +289,7 @@ void MLPPData::set_data_unsupervised(int k, const String &file_name, Ref<MLPPMat
|
||||
memdelete(file);
|
||||
|
||||
input_set->set_from_vectors(input_set_tmp);
|
||||
input_set = alg.transposem(input_set);
|
||||
input_set = alg.transposenm(input_set);
|
||||
}
|
||||
|
||||
void MLPPData::set_data_simple(const String &file_name, Ref<MLPPVector> input_set, Ref<MLPPVector> output_set) {
|
||||
|
@ -294,7 +294,7 @@ Ref<MLPPMatrix> MLPPDualSVC::kernel_functionm(const Ref<MLPPMatrix> &U, const Re
|
||||
MLPPLinAlg alg;
|
||||
|
||||
if (kernel == KERNEL_METHOD_LINEAR) {
|
||||
return alg.matmultm(_input_set, alg.transposem(_input_set));
|
||||
return alg.matmultnm(_input_set, alg.transposenm(_input_set));
|
||||
}
|
||||
|
||||
Ref<MLPPMatrix> m;
|
||||
|
@ -38,7 +38,6 @@ real_t MLPPDualSVCOld::modelTest(std::vector<real_t> x) {
|
||||
|
||||
void MLPPDualSVCOld::gradientDescent(real_t learning_rate, int max_epoch, bool UI) {
|
||||
class MLPPCost cost;
|
||||
MLPPActivationOld avn;
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
|
@ -239,14 +239,14 @@ void MLPPGAN::update_discriminator_parameters(const Vector<Ref<MLPPMatrix>> &hid
|
||||
if (!_network.empty()) {
|
||||
Ref<MLPPHiddenLayer> layer = _network[_network.size() - 1];
|
||||
|
||||
layer->set_weights(alg.subtractionm(layer->get_weights(), hidden_layer_updations[0]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplym(learning_rate / _n, layer->get_delta())));
|
||||
layer->set_weights(alg.subtractionnm(layer->get_weights(), hidden_layer_updations[0]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, layer->get_delta())));
|
||||
|
||||
for (int i = _network.size() - 2; i > _network.size() / 2; i--) {
|
||||
layer = _network[i];
|
||||
|
||||
layer->set_weights(alg.subtractionm(layer->get_weights(), hidden_layer_updations[(_network.size() - 2) - i + 1]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplym(learning_rate / _n, layer->get_delta())));
|
||||
layer->set_weights(alg.subtractionnm(layer->get_weights(), hidden_layer_updations[(_network.size() - 2) - i + 1]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, layer->get_delta())));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -260,8 +260,8 @@ void MLPPGAN::update_generator_parameters(const Vector<Ref<MLPPMatrix>> &hidden_
|
||||
|
||||
//std::cout << network[i].weights.size() << "x" << network[i].weights[0].size() << std::endl;
|
||||
//std::cout << hidden_layer_updations[(network.size() - 2) - i + 1].size() << "x" << hidden_layer_updations[(network.size() - 2) - i + 1][0].size() << std::endl;
|
||||
layer->set_weights(alg.subtractionm(layer->get_weights(), hidden_layer_updations[(_network.size() - 2) - i + 1]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplym(learning_rate / _n, layer->get_delta())));
|
||||
layer->set_weights(alg.subtractionnm(layer->get_weights(), hidden_layer_updations[(_network.size() - 2) - i + 1]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, layer->get_delta())));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -279,7 +279,7 @@ MLPPGAN::ComputeDiscriminatorGradientsResult MLPPGAN::compute_discriminator_grad
|
||||
|
||||
_output_layer->set_delta(alg.hadamard_productnv(cost_deriv, activ_deriv));
|
||||
|
||||
res.output_w_grad = alg.mat_vec_multv(alg.transposem(_output_layer->get_input()), _output_layer->get_delta());
|
||||
res.output_w_grad = alg.mat_vec_multv(alg.transposenm(_output_layer->get_input()), _output_layer->get_delta());
|
||||
res.output_w_grad = alg.additionnv(res.output_w_grad, regularization.reg_deriv_termv(_output_layer->get_weights(), _output_layer->get_lambda(), _output_layer->get_alpha(), _output_layer->get_reg()));
|
||||
|
||||
if (!_network.empty()) {
|
||||
@ -287,10 +287,10 @@ MLPPGAN::ComputeDiscriminatorGradientsResult MLPPGAN::compute_discriminator_grad
|
||||
|
||||
Ref<MLPPVector> hidden_layer_activ_deriv = avn.run_activation_deriv_vector(layer->get_activation(), layer->get_z());
|
||||
|
||||
layer->set_delta(alg.hadamard_productm(alg.outer_product(_output_layer->get_delta(), _output_layer->get_weights()), hidden_layer_activ_deriv));
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = alg.matmultm(alg.transposem(layer->get_input()), layer->get_delta());
|
||||
layer->set_delta(alg.hadamard_productnm(alg.outer_product(_output_layer->get_delta(), _output_layer->get_weights()), hidden_layer_activ_deriv));
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
|
||||
res.cumulative_hidden_layer_w_grad.push_back(alg.additionm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
res.cumulative_hidden_layer_w_grad.push_back(alg.additionnm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
|
||||
for (int i = static_cast<int>(_network.size()) - 2; i > static_cast<int>(_network.size()) / 2; i--) {
|
||||
layer = _network[i];
|
||||
@ -298,10 +298,10 @@ MLPPGAN::ComputeDiscriminatorGradientsResult MLPPGAN::compute_discriminator_grad
|
||||
|
||||
hidden_layer_activ_deriv = avn.run_activation_deriv_vector(layer->get_activation(), layer->get_z());
|
||||
|
||||
layer->set_delta(alg.hadamard_productm(alg.matmultm(next_layer->get_delta(), alg.transposem(next_layer->get_weights())), hidden_layer_activ_deriv));
|
||||
hidden_layer_w_grad = alg.matmultm(alg.transposem(layer->get_input()), layer->get_delta());
|
||||
layer->set_delta(alg.hadamard_productnm(alg.matmultnm(next_layer->get_delta(), alg.transposenm(next_layer->get_weights())), hidden_layer_activ_deriv));
|
||||
hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
|
||||
res.cumulative_hidden_layer_w_grad.push_back(alg.additionm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
res.cumulative_hidden_layer_w_grad.push_back(alg.additionnm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
}
|
||||
}
|
||||
|
||||
@ -321,7 +321,7 @@ Vector<Ref<MLPPMatrix>> MLPPGAN::compute_generator_gradients(const Ref<MLPPVecto
|
||||
|
||||
_output_layer->set_delta(alg.hadamard_productnv(cost_deriv, activ_deriv));
|
||||
|
||||
Ref<MLPPVector> output_w_grad = alg.mat_vec_multv(alg.transposem(_output_layer->get_input()), _output_layer->get_delta());
|
||||
Ref<MLPPVector> output_w_grad = alg.mat_vec_multv(alg.transposenm(_output_layer->get_input()), _output_layer->get_delta());
|
||||
output_w_grad = alg.additionnv(output_w_grad, regularization.reg_deriv_termv(_output_layer->get_weights(), _output_layer->get_lambda(), _output_layer->get_alpha(), _output_layer->get_reg()));
|
||||
|
||||
if (!_network.empty()) {
|
||||
@ -331,8 +331,8 @@ Vector<Ref<MLPPMatrix>> MLPPGAN::compute_generator_gradients(const Ref<MLPPVecto
|
||||
|
||||
layer->set_delta(alg.hadamard_productnv(alg.outer_product(_output_layer->get_delta(), _output_layer->get_weights()), hidden_layer_activ_deriv));
|
||||
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = alg.matmultm(alg.transposem(layer->get_input()), layer->get_delta());
|
||||
cumulative_hidden_layer_w_grad.push_back(alg.additionm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
cumulative_hidden_layer_w_grad.push_back(alg.additionnm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
|
||||
for (int i = _network.size() - 2; i >= 0; i--) {
|
||||
layer = _network[i];
|
||||
@ -340,10 +340,10 @@ Vector<Ref<MLPPMatrix>> MLPPGAN::compute_generator_gradients(const Ref<MLPPVecto
|
||||
|
||||
hidden_layer_activ_deriv = avn.run_activation_deriv_vector(layer->get_activation(), layer->get_z());
|
||||
|
||||
layer->set_delta(alg.hadamard_productm(alg.matmultm(next_layer->get_delta(), alg.transposem(next_layer->get_weights())), hidden_layer_activ_deriv));
|
||||
layer->set_delta(alg.hadamard_productnm(alg.matmultnm(next_layer->get_delta(), alg.transposenm(next_layer->get_weights())), hidden_layer_activ_deriv));
|
||||
|
||||
hidden_layer_w_grad = alg.matmultm(alg.transposem(layer->get_input()), layer->get_delta());
|
||||
cumulative_hidden_layer_w_grad.push_back(alg.additionm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
cumulative_hidden_layer_w_grad.push_back(alg.additionnm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -150,7 +150,7 @@ void MLPPHiddenLayer::forward_pass() {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
_z = alg.mat_vec_addv(alg.matmultm(_input, _weights), _bias);
|
||||
_z = alg.mat_vec_addv(alg.matmultnm(_input, _weights), _bias);
|
||||
_a = avn.run_activation_norm_matrix(_activation, _z);
|
||||
}
|
||||
|
||||
@ -162,7 +162,7 @@ void MLPPHiddenLayer::test(const Ref<MLPPVector> &x) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
_z_test = alg.additionm(alg.mat_vec_multv(alg.transposem(_weights), x), _bias);
|
||||
_z_test = alg.additionnm(alg.mat_vec_multv(alg.transposenm(_weights), x), _bias);
|
||||
_a_test = avn.run_activation_norm_matrix(_activation, _z_test);
|
||||
}
|
||||
|
||||
|
@ -107,7 +107,7 @@ std::vector<std::vector<real_t>> MLPPLinAlg::matmult(std::vector<std::vector<rea
|
||||
return C;
|
||||
}
|
||||
|
||||
Ref<MLPPMatrix> MLPPLinAlg::additionm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B) {
|
||||
Ref<MLPPMatrix> MLPPLinAlg::additionnm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B) {
|
||||
ERR_FAIL_COND_V(!A.is_valid() || !B.is_valid(), Ref<MLPPMatrix>());
|
||||
Size2i a_size = A->size();
|
||||
ERR_FAIL_COND_V(a_size != B->size(), Ref<MLPPMatrix>());
|
||||
@ -128,7 +128,7 @@ Ref<MLPPMatrix> MLPPLinAlg::additionm(const Ref<MLPPMatrix> &A, const Ref<MLPPMa
|
||||
|
||||
return C;
|
||||
}
|
||||
Ref<MLPPMatrix> MLPPLinAlg::subtractionm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B) {
|
||||
Ref<MLPPMatrix> MLPPLinAlg::subtractionnm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B) {
|
||||
ERR_FAIL_COND_V(!A.is_valid() || !B.is_valid(), Ref<MLPPMatrix>());
|
||||
Size2i a_size = A->size();
|
||||
ERR_FAIL_COND_V(a_size != B->size(), Ref<MLPPMatrix>());
|
||||
@ -149,7 +149,7 @@ Ref<MLPPMatrix> MLPPLinAlg::subtractionm(const Ref<MLPPMatrix> &A, const Ref<MLP
|
||||
|
||||
return C;
|
||||
}
|
||||
Ref<MLPPMatrix> MLPPLinAlg::matmultm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B) {
|
||||
Ref<MLPPMatrix> MLPPLinAlg::matmultnm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B) {
|
||||
ERR_FAIL_COND_V(!A.is_valid() || !B.is_valid(), Ref<MLPPMatrix>());
|
||||
|
||||
Size2i a_size = A->size();
|
||||
@ -242,7 +242,7 @@ std::vector<std::vector<real_t>> MLPPLinAlg::elementWiseDivision(std::vector<std
|
||||
return C;
|
||||
}
|
||||
|
||||
Ref<MLPPMatrix> MLPPLinAlg::hadamard_productm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B) {
|
||||
Ref<MLPPMatrix> MLPPLinAlg::hadamard_productnm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B) {
|
||||
ERR_FAIL_COND_V(!A.is_valid() || !B.is_valid(), Ref<MLPPMatrix>());
|
||||
Size2i a_size = A->size();
|
||||
ERR_FAIL_COND_V(a_size != B->size(), Ref<MLPPMatrix>());
|
||||
@ -264,7 +264,7 @@ Ref<MLPPMatrix> MLPPLinAlg::hadamard_productm(const Ref<MLPPMatrix> &A, const Re
|
||||
|
||||
return C;
|
||||
}
|
||||
Ref<MLPPMatrix> MLPPLinAlg::kronecker_productm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B) {
|
||||
Ref<MLPPMatrix> MLPPLinAlg::kronecker_productnm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B) {
|
||||
// [1,1,1,1] [1,2,3,4,5]
|
||||
// [1,1,1,1] [1,2,3,4,5]
|
||||
// [1,2,3,4,5]
|
||||
@ -310,7 +310,7 @@ Ref<MLPPMatrix> MLPPLinAlg::kronecker_productm(const Ref<MLPPMatrix> &A, const R
|
||||
|
||||
return C;
|
||||
}
|
||||
Ref<MLPPMatrix> MLPPLinAlg::element_wise_divisionm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B) {
|
||||
Ref<MLPPMatrix> MLPPLinAlg::element_wise_divisionnm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B) {
|
||||
ERR_FAIL_COND_V(!A.is_valid() || !B.is_valid(), Ref<MLPPMatrix>());
|
||||
Size2i a_size = A->size();
|
||||
ERR_FAIL_COND_V(a_size != B->size(), Ref<MLPPMatrix>());
|
||||
@ -366,7 +366,7 @@ std::vector<std::vector<real_t>> MLPPLinAlg::scalarAdd(real_t scalar, std::vecto
|
||||
return A;
|
||||
}
|
||||
|
||||
Ref<MLPPMatrix> MLPPLinAlg::transposem(const Ref<MLPPMatrix> &A) {
|
||||
Ref<MLPPMatrix> MLPPLinAlg::transposenm(const Ref<MLPPMatrix> &A) {
|
||||
Size2i a_size = A->size();
|
||||
|
||||
Ref<MLPPMatrix> AT;
|
||||
@ -384,7 +384,7 @@ Ref<MLPPMatrix> MLPPLinAlg::transposem(const Ref<MLPPMatrix> &A) {
|
||||
|
||||
return AT;
|
||||
}
|
||||
Ref<MLPPMatrix> MLPPLinAlg::scalar_multiplym(real_t scalar, const Ref<MLPPMatrix> &A) {
|
||||
Ref<MLPPMatrix> MLPPLinAlg::scalar_multiplynm(real_t scalar, const Ref<MLPPMatrix> &A) {
|
||||
Ref<MLPPMatrix> AN = A->duplicate();
|
||||
Size2i a_size = AN->size();
|
||||
real_t *an_ptr = AN->ptrw();
|
||||
@ -398,7 +398,7 @@ Ref<MLPPMatrix> MLPPLinAlg::scalar_multiplym(real_t scalar, const Ref<MLPPMatrix
|
||||
return AN;
|
||||
}
|
||||
|
||||
Ref<MLPPMatrix> MLPPLinAlg::scalar_addm(real_t scalar, const Ref<MLPPMatrix> &A) {
|
||||
Ref<MLPPMatrix> MLPPLinAlg::scalar_addnm(real_t scalar, const Ref<MLPPMatrix> &A) {
|
||||
Ref<MLPPMatrix> AN = A->duplicate();
|
||||
Size2i a_size = AN->size();
|
||||
real_t *an_ptr = AN->ptrw();
|
||||
@ -854,10 +854,10 @@ Ref<MLPPMatrix> MLPPLinAlg::adjointm(const Ref<MLPPMatrix> &A) {
|
||||
return adj;
|
||||
}
|
||||
Ref<MLPPMatrix> MLPPLinAlg::inversem(const Ref<MLPPMatrix> &A) {
|
||||
return scalar_multiplym(1 / detm(A, int(A->size().y)), adjointm(A));
|
||||
return scalar_multiplynm(1 / detm(A, int(A->size().y)), adjointm(A));
|
||||
}
|
||||
Ref<MLPPMatrix> MLPPLinAlg::pinversem(const Ref<MLPPMatrix> &A) {
|
||||
return matmultm(inversem(matmultm(transposem(A), A)), transposem(A));
|
||||
return matmultnm(inversem(matmultnm(transposenm(A), A)), transposenm(A));
|
||||
}
|
||||
|
||||
std::vector<std::vector<real_t>> MLPPLinAlg::zeromat(int n, int m) {
|
||||
@ -1437,7 +1437,7 @@ MLPPLinAlg::EigenResult MLPPLinAlg::eigen(Ref<MLPPMatrix> A) {
|
||||
P->set_element(sub_j, sub_j, Math::cos(theta));
|
||||
P->set_element(sub_j, sub_i, Math::sin(theta));
|
||||
|
||||
a_new = matmultm(matmultm(inversem(P), A), P);
|
||||
a_new = matmultnm(matmultnm(inversem(P), A), P);
|
||||
|
||||
Size2i a_new_size = a_new->size();
|
||||
|
||||
@ -1475,7 +1475,7 @@ MLPPLinAlg::EigenResult MLPPLinAlg::eigen(Ref<MLPPMatrix> A) {
|
||||
}
|
||||
}
|
||||
|
||||
eigenvectors = matmultm(eigenvectors, P);
|
||||
eigenvectors = matmultnm(eigenvectors, P);
|
||||
A = a_new;
|
||||
|
||||
} while (!diagonal);
|
||||
@ -1546,8 +1546,8 @@ MLPPLinAlg::SVDResult MLPPLinAlg::svd(const Ref<MLPPMatrix> &A) {
|
||||
|
||||
Size2i a_size = A->size();
|
||||
|
||||
EigenResult left_eigen = eigen(matmultm(A, transposem(A)));
|
||||
EigenResult right_eigen = eigen(matmultm(transposem(A), A));
|
||||
EigenResult left_eigen = eigen(matmultnm(A, transposenm(A)));
|
||||
EigenResult right_eigen = eigen(matmultnm(transposenm(A), A));
|
||||
|
||||
Ref<MLPPMatrix> singularvals = sqrtm(left_eigen.eigen_values);
|
||||
Ref<MLPPMatrix> sigma = zeromatm(a_size.y, a_size.x);
|
||||
@ -2719,7 +2719,7 @@ Vector<Ref<MLPPMatrix>> MLPPLinAlg::addition_vt(const Vector<Ref<MLPPMatrix>> &A
|
||||
res.resize(A.size());
|
||||
|
||||
for (int i = 0; i < res.size(); i++) {
|
||||
res.write[i] = additionm(A[i], B[i]);
|
||||
res.write[i] = additionnm(A[i], B[i]);
|
||||
}
|
||||
|
||||
return res;
|
||||
@ -2737,7 +2737,7 @@ Vector<Ref<MLPPMatrix>> MLPPLinAlg::element_wise_division_vt(const Vector<Ref<ML
|
||||
res.resize(A.size());
|
||||
|
||||
for (int i = 0; i < A.size(); i++) {
|
||||
res.write[i] = element_wise_divisionm(A[i], B[i]);
|
||||
res.write[i] = element_wise_divisionnm(A[i], B[i]);
|
||||
}
|
||||
|
||||
return res;
|
||||
@ -2827,13 +2827,13 @@ std::vector<std::vector<std::vector<real_t>>> MLPPLinAlg::scalarAdd(real_t scala
|
||||
|
||||
Vector<Ref<MLPPMatrix>> MLPPLinAlg::scalar_multiply_vm(real_t scalar, Vector<Ref<MLPPMatrix>> A) {
|
||||
for (int i = 0; i < A.size(); i++) {
|
||||
A.write[i] = scalar_multiplym(scalar, A[i]);
|
||||
A.write[i] = scalar_multiplynm(scalar, A[i]);
|
||||
}
|
||||
return A;
|
||||
}
|
||||
Vector<Ref<MLPPMatrix>> MLPPLinAlg::scalar_add_vm(real_t scalar, Vector<Ref<MLPPMatrix>> A) {
|
||||
for (int i = 0; i < A.size(); i++) {
|
||||
A.write[i] = scalar_addm(scalar, A[i]);
|
||||
A.write[i] = scalar_addnm(scalar, A[i]);
|
||||
}
|
||||
return A;
|
||||
}
|
||||
|
@ -37,25 +37,25 @@ public:
|
||||
std::vector<std::vector<real_t>> subtraction(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
|
||||
std::vector<std::vector<real_t>> matmult(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
|
||||
|
||||
Ref<MLPPMatrix> additionm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B);
|
||||
Ref<MLPPMatrix> subtractionm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B);
|
||||
Ref<MLPPMatrix> matmultm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B);
|
||||
Ref<MLPPMatrix> additionnm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B);
|
||||
Ref<MLPPMatrix> subtractionnm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B);
|
||||
Ref<MLPPMatrix> matmultnm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B);
|
||||
|
||||
std::vector<std::vector<real_t>> hadamard_product(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
|
||||
std::vector<std::vector<real_t>> kronecker_product(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
|
||||
std::vector<std::vector<real_t>> elementWiseDivision(std::vector<std::vector<real_t>> A, std::vector<std::vector<real_t>> B);
|
||||
|
||||
Ref<MLPPMatrix> hadamard_productm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B);
|
||||
Ref<MLPPMatrix> kronecker_productm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B);
|
||||
Ref<MLPPMatrix> element_wise_divisionm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B);
|
||||
Ref<MLPPMatrix> hadamard_productnm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B);
|
||||
Ref<MLPPMatrix> kronecker_productnm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B);
|
||||
Ref<MLPPMatrix> element_wise_divisionnm(const Ref<MLPPMatrix> &A, const Ref<MLPPMatrix> &B);
|
||||
|
||||
std::vector<std::vector<real_t>> transpose(std::vector<std::vector<real_t>> A);
|
||||
std::vector<std::vector<real_t>> scalarMultiply(real_t scalar, std::vector<std::vector<real_t>> A);
|
||||
std::vector<std::vector<real_t>> scalarAdd(real_t scalar, std::vector<std::vector<real_t>> A);
|
||||
|
||||
Ref<MLPPMatrix> transposem(const Ref<MLPPMatrix> &A);
|
||||
Ref<MLPPMatrix> scalar_multiplym(real_t scalar, const Ref<MLPPMatrix> &A);
|
||||
Ref<MLPPMatrix> scalar_addm(real_t scalar, const Ref<MLPPMatrix> &A);
|
||||
Ref<MLPPMatrix> transposenm(const Ref<MLPPMatrix> &A);
|
||||
Ref<MLPPMatrix> scalar_multiplynm(real_t scalar, const Ref<MLPPMatrix> &A);
|
||||
Ref<MLPPMatrix> scalar_addnm(real_t scalar, const Ref<MLPPMatrix> &A);
|
||||
|
||||
std::vector<std::vector<real_t>> log(std::vector<std::vector<real_t>> A);
|
||||
std::vector<std::vector<real_t>> log10(std::vector<std::vector<real_t>> A);
|
||||
|
@ -92,9 +92,9 @@ void MLPPLinReg::newton_raphson(real_t learning_rate, int max_epoch, bool ui) {
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_y_hat, _output_set);
|
||||
|
||||
// Calculating the weight gradients (2nd derivative)
|
||||
Ref<MLPPVector> first_derivative = alg.mat_vec_multv(alg.transposem(_input_set), error);
|
||||
Ref<MLPPMatrix> second_derivative = alg.matmultm(alg.transposem(_input_set), _input_set);
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(alg.inversem(second_derivative)), first_derivative)));
|
||||
Ref<MLPPVector> first_derivative = alg.mat_vec_multv(alg.transposenm(_input_set), error);
|
||||
Ref<MLPPMatrix> second_derivative = alg.matmultnm(alg.transposenm(_input_set), _input_set);
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposenm(alg.inversem(second_derivative)), first_derivative)));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients (2nd derivative)
|
||||
@ -132,7 +132,7 @@ void MLPPLinReg::gradient_descent(real_t learning_rate, int max_epoch, bool ui)
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_y_hat, _output_set);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(_input_set), error)));
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposenm(_input_set), error)));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
@ -240,7 +240,7 @@ void MLPPLinReg::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposem(current_input_mini_batch), error)));
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposenm(current_input_mini_batch), error)));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
@ -289,7 +289,7 @@ void MLPPLinReg::momentum(real_t learning_rate, int max_epoch, int mini_batch_si
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposem(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposenm(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
|
||||
@ -345,7 +345,7 @@ void MLPPLinReg::nag(real_t learning_rate, int max_epoch, int mini_batch_size, r
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposem(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposenm(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
|
||||
@ -399,7 +399,7 @@ void MLPPLinReg::adagrad(real_t learning_rate, int max_epoch, int mini_batch_siz
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposem(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposenm(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
|
||||
@ -454,7 +454,7 @@ void MLPPLinReg::adadelta(real_t learning_rate, int max_epoch, int mini_batch_si
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposem(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposenm(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
|
||||
@ -509,7 +509,7 @@ void MLPPLinReg::adam(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposem(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposenm(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
|
||||
@ -519,7 +519,7 @@ void MLPPLinReg::adam(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
Ref<MLPPVector> m_hat = alg.scalar_multiplynv(1 / (1 - Math::pow(b1, epoch)), m);
|
||||
Ref<MLPPVector> v_hat = alg.scalar_multiplynv(1 / (1 - Math::pow(b2, epoch)), v);
|
||||
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate, alg.element_wise_divisionm(m_hat, alg.scalar_addnv(e, alg.sqrtnv(v_hat)))));
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate, alg.element_wise_divisionnm(m_hat, alg.scalar_addnv(e, alg.sqrtnv(v_hat)))));
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias -= learning_rate * alg.sum_elementsv(error) / current_output_mini_batch->size(); // As normal
|
||||
@ -567,7 +567,7 @@ void MLPPLinReg::adamax(real_t learning_rate, int max_epoch, int mini_batch_size
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposem(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposenm(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
|
||||
@ -626,7 +626,7 @@ void MLPPLinReg::nadam(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_mini_batch);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposem(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> gradient = alg.scalar_multiplynv(1 / current_output_mini_batch->size(), alg.mat_vec_multv(alg.transposenm(current_input_mini_batch), error));
|
||||
Ref<MLPPVector> reg_deriv_term = regularization.reg_deriv_termv(_weights, _lambda, _alpha, _reg);
|
||||
Ref<MLPPVector> weight_grad = alg.additionnv(gradient, reg_deriv_term); // Weight_grad_final
|
||||
|
||||
@ -665,7 +665,7 @@ void MLPPLinReg::normal_equation() {
|
||||
MLPPLinAlg alg;
|
||||
MLPPStat stat;
|
||||
|
||||
Ref<MLPPMatrix> input_set_t = alg.transposem(_input_set);
|
||||
Ref<MLPPMatrix> input_set_t = alg.transposenm(_input_set);
|
||||
|
||||
Ref<MLPPVector> input_set_t_row_tmp;
|
||||
input_set_t_row_tmp.instance();
|
||||
@ -683,14 +683,14 @@ void MLPPLinReg::normal_equation() {
|
||||
|
||||
Ref<MLPPVector> temp;
|
||||
//temp.resize(_k);
|
||||
temp = alg.mat_vec_multv(alg.inversem(alg.matmultm(alg.transposem(_input_set), _input_set)), alg.mat_vec_multv(alg.transposem(_input_set), _output_set));
|
||||
temp = alg.mat_vec_multv(alg.inversem(alg.matmultnm(alg.transposenm(_input_set), _input_set)), alg.mat_vec_multv(alg.transposenm(_input_set), _output_set));
|
||||
|
||||
ERR_FAIL_COND_MSG(Math::is_nan(temp->get_element(0)), "ERR: Resulting matrix was noninvertible/degenerate, and so the normal equation could not be performed. Try utilizing gradient descent.");
|
||||
|
||||
if (_reg == MLPPReg::REGULARIZATION_TYPE_RIDGE) {
|
||||
_weights = alg.mat_vec_multv(alg.inversem(alg.additionm(alg.matmultm(alg.transposem(_input_set), _input_set), alg.scalar_multiplym(_lambda, alg.identitym(_k)))), alg.mat_vec_multv(alg.transposem(_input_set), _output_set));
|
||||
_weights = alg.mat_vec_multv(alg.inversem(alg.additionnm(alg.matmultnm(alg.transposenm(_input_set), _input_set), alg.scalar_multiplynm(_lambda, alg.identitym(_k)))), alg.mat_vec_multv(alg.transposenm(_input_set), _output_set));
|
||||
} else {
|
||||
_weights = alg.mat_vec_multv(alg.inversem(alg.matmultm(alg.transposem(_input_set), _input_set)), alg.mat_vec_multv(alg.transposem(_input_set), _output_set));
|
||||
_weights = alg.mat_vec_multv(alg.inversem(alg.matmultnm(alg.transposenm(_input_set), _input_set)), alg.mat_vec_multv(alg.transposenm(_input_set), _output_set));
|
||||
}
|
||||
|
||||
_bias = stat.meanv(_output_set) - alg.dotv(_weights, x_means);
|
||||
|
@ -91,7 +91,7 @@ void MLPPLogReg::gradient_descent(real_t learning_rate, int max_epoch, bool ui)
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_y_hat, _output_set);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(_input_set), error)));
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposenm(_input_set), error)));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
@ -129,7 +129,7 @@ void MLPPLogReg::mle(real_t learning_rate, int max_epoch, bool ui) {
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_output_set, _y_hat);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.additionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(_input_set), error)));
|
||||
_weights = alg.additionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposenm(_input_set), error)));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
@ -235,7 +235,7 @@ void MLPPLogReg::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_mini_batch_output_entry);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / current_mini_batch_output_entry->size(), alg.mat_vec_multv(alg.transposem(current_mini_batch_input_entry), error)));
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / current_mini_batch_output_entry->size(), alg.mat_vec_multv(alg.transposenm(current_mini_batch_input_entry), error)));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
|
@ -101,39 +101,39 @@ void MLPPMANN::gradient_descent(real_t learning_rate, int max_epoch, bool ui) {
|
||||
cost_prev = cost(_y_hat, _output_set);
|
||||
|
||||
if (_output_layer->get_activation() == MLPPActivation::ACTIVATION_FUNCTION_SOFTMAX) {
|
||||
_output_layer->set_delta(alg.subtractionm(_y_hat, _output_set));
|
||||
_output_layer->set_delta(alg.subtractionnm(_y_hat, _output_set));
|
||||
} else {
|
||||
_output_layer->set_delta(alg.hadamard_productm(mlpp_cost.run_cost_deriv_matrix(_output_layer->get_cost(), _y_hat, _output_set), avn.run_activation_deriv_matrix(_output_layer->get_activation(), _output_layer->get_z())));
|
||||
_output_layer->set_delta(alg.hadamard_productnm(mlpp_cost.run_cost_deriv_matrix(_output_layer->get_cost(), _y_hat, _output_set), avn.run_activation_deriv_matrix(_output_layer->get_activation(), _output_layer->get_z())));
|
||||
}
|
||||
|
||||
Ref<MLPPMatrix> output_w_grad = alg.matmultm(alg.transposem(_output_layer->get_input()), _output_layer->get_delta());
|
||||
Ref<MLPPMatrix> output_w_grad = alg.matmultnm(alg.transposenm(_output_layer->get_input()), _output_layer->get_delta());
|
||||
|
||||
_output_layer->set_weights(alg.subtractionm(_output_layer->get_weights(), alg.scalar_multiplym(learning_rate / _n, output_w_grad)));
|
||||
_output_layer->set_weights(alg.subtractionnm(_output_layer->get_weights(), alg.scalar_multiplynm(learning_rate / _n, output_w_grad)));
|
||||
_output_layer->set_weights(regularization.reg_weightsm(_output_layer->get_weights(), _output_layer->get_lambda(), _output_layer->get_alpha(), _output_layer->get_reg()));
|
||||
_output_layer->set_bias(alg.subtract_matrix_rows(_output_layer->get_bias(), alg.scalar_multiplym(learning_rate / _n, _output_layer->get_delta())));
|
||||
_output_layer->set_bias(alg.subtract_matrix_rows(_output_layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, _output_layer->get_delta())));
|
||||
|
||||
if (!_network.empty()) {
|
||||
Ref<MLPPHiddenLayer> layer = _network[_network.size() - 1];
|
||||
|
||||
//auto hiddenLayerAvn = layer.activation_map[layer.activation];
|
||||
|
||||
layer->set_delta(alg.hadamard_productm(alg.matmultm(_output_layer->get_delta(), alg.transposem(_output_layer->get_weights())), avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z())));
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = alg.matmultm(alg.transposem(layer->get_input()), layer->get_delta());
|
||||
layer->set_delta(alg.hadamard_productnm(alg.matmultnm(_output_layer->get_delta(), alg.transposenm(_output_layer->get_weights())), avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z())));
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
|
||||
layer->set_weights(alg.subtractionm(layer->get_weights(), alg.scalar_multiplym(learning_rate / _n, hidden_layer_w_grad)));
|
||||
layer->set_weights(alg.subtractionnm(layer->get_weights(), alg.scalar_multiplynm(learning_rate / _n, hidden_layer_w_grad)));
|
||||
layer->set_weights(regularization.reg_weightsm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplym(learning_rate / _n, layer->get_delta())));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, layer->get_delta())));
|
||||
|
||||
for (int i = _network.size() - 2; i >= 0; i--) {
|
||||
layer = _network[i];
|
||||
Ref<MLPPHiddenLayer> next_layer = _network[i + 1];
|
||||
|
||||
//hiddenLayerAvn = layer.activation_map[layer.activation];
|
||||
layer->set_delta(alg.hadamard_productm(alg.matmultm(next_layer->get_delta(), next_layer->get_weights()), avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z())));
|
||||
hidden_layer_w_grad = alg.matmultm(alg.transposem(layer->get_input()), layer->get_delta());
|
||||
layer->set_weights(alg.subtractionm(layer->get_weights(), alg.scalar_multiplym(learning_rate / _n, hidden_layer_w_grad)));
|
||||
layer->set_delta(alg.hadamard_productnm(alg.matmultnm(next_layer->get_delta(), next_layer->get_weights()), avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z())));
|
||||
hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
layer->set_weights(alg.subtractionnm(layer->get_weights(), alg.scalar_multiplynm(learning_rate / _n, hidden_layer_w_grad)));
|
||||
layer->set_weights(regularization.reg_weightsm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplym(learning_rate / _n, layer->get_delta())));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, layer->get_delta())));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -100,7 +100,7 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) {
|
||||
|
||||
// Calculating the weight/bias gradients for layer 2
|
||||
|
||||
Ref<MLPPVector> D2_1 = alg.mat_vec_multv(alg.transposem(_a2), error);
|
||||
Ref<MLPPVector> D2_1 = alg.mat_vec_multv(alg.transposenm(_a2), error);
|
||||
|
||||
// weights and bias updation for layer 2
|
||||
_weights2->set_from_mlpp_vector(alg.subtractionnv(_weights2, alg.scalar_multiplynv(learning_rate / static_cast<real_t>(_n), D2_1)));
|
||||
@ -111,14 +111,14 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) {
|
||||
// Calculating the weight/bias for layer 1
|
||||
|
||||
Ref<MLPPMatrix> D1_1 = alg.outer_product(error, _weights2);
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productm(alg.transposem(D1_1), avn.sigmoid_derivm(_z2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultm(alg.transposem(_input_set), D1_2);
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productnm(alg.transposenm(D1_1), avn.sigmoid_derivm(_z2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultnm(alg.transposenm(_input_set), D1_2);
|
||||
|
||||
// weight an bias updation for layer 1
|
||||
_weights1->set_from_mlpp_matrix(alg.subtractionm(_weights1, alg.scalar_multiplym(learning_rate / _n, D1_3)));
|
||||
_weights1->set_from_mlpp_matrix(alg.subtractionnm(_weights1, alg.scalar_multiplynm(learning_rate / _n, D1_3)));
|
||||
_weights1->set_from_mlpp_matrix(regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg));
|
||||
|
||||
_bias1->set_from_mlpp_vector(alg.subtract_matrix_rows(_bias1, alg.scalar_multiplym(learning_rate / _n, D1_2)));
|
||||
_bias1->set_from_mlpp_vector(alg.subtract_matrix_rows(_bias1, alg.scalar_multiplynm(learning_rate / _n, D1_2)));
|
||||
|
||||
forward_pass();
|
||||
|
||||
@ -196,7 +196,7 @@ void MLPPMLP::sgd(real_t learning_rate, int max_epoch, bool UI) {
|
||||
Ref<MLPPVector> D1_2 = alg.hadamard_productnv(D1_1, avn.sigmoid_derivv(lz2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.outer_product(input_set_row_tmp, D1_2);
|
||||
|
||||
_weights1->set_from_mlpp_matrix(alg.subtractionm(_weights1, alg.scalar_multiplym(learning_rate, D1_3)));
|
||||
_weights1->set_from_mlpp_matrix(alg.subtractionnm(_weights1, alg.scalar_multiplynm(learning_rate, D1_3)));
|
||||
_weights1->set_from_mlpp_matrix(regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg));
|
||||
// Bias updation for layer 1
|
||||
|
||||
@ -254,7 +254,7 @@ void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo
|
||||
Ref<MLPPVector> error = alg.subtractionnv(ly_hat, current_output);
|
||||
|
||||
// Calculating the weight/bias gradients for layer 2
|
||||
Ref<MLPPVector> D2_1 = alg.mat_vec_multv(alg.transposem(la2), error);
|
||||
Ref<MLPPVector> D2_1 = alg.mat_vec_multv(alg.transposenm(la2), error);
|
||||
|
||||
real_t lr_d_cos = learning_rate / static_cast<real_t>(current_output->size());
|
||||
|
||||
@ -270,14 +270,14 @@ void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo
|
||||
|
||||
//Calculating the weight/bias for layer 1
|
||||
Ref<MLPPMatrix> D1_1 = alg.outer_product(error, _weights2);
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(lz2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultm(alg.transposem(current_input), D1_2);
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productnm(D1_1, avn.sigmoid_derivm(lz2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultnm(alg.transposenm(current_input), D1_2);
|
||||
|
||||
// weight an bias updation for layer 1
|
||||
_weights1->set_from_mlpp_matrix(alg.subtractionm(_weights1, alg.scalar_multiplym(lr_d_cos, D1_3)));
|
||||
_weights1->set_from_mlpp_matrix(alg.subtractionnm(_weights1, alg.scalar_multiplynm(lr_d_cos, D1_3)));
|
||||
_weights1->set_from_mlpp_matrix(regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg));
|
||||
|
||||
_bias1->set_from_mlpp_vector(alg.subtract_matrix_rows(_bias1, alg.scalar_multiplym(lr_d_cos, D1_2)));
|
||||
_bias1->set_from_mlpp_vector(alg.subtract_matrix_rows(_bias1, alg.scalar_multiplynm(lr_d_cos, D1_2)));
|
||||
|
||||
_y_hat = evaluatem(current_input);
|
||||
|
||||
@ -359,7 +359,7 @@ Ref<MLPPVector> MLPPMLP::evaluatem(const Ref<MLPPMatrix> &X) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
Ref<MLPPMatrix> pz2 = alg.mat_vec_addv(alg.matmultm(X, _weights1), _bias1);
|
||||
Ref<MLPPMatrix> pz2 = alg.mat_vec_addv(alg.matmultnm(X, _weights1), _bias1);
|
||||
Ref<MLPPMatrix> pa2 = avn.sigmoid_normm(pz2);
|
||||
|
||||
return avn.sigmoid_normv(alg.scalar_addnv(_bias2, alg.mat_vec_multv(pa2, _weights2)));
|
||||
@ -369,7 +369,7 @@ void MLPPMLP::propagatem(const Ref<MLPPMatrix> &X, Ref<MLPPMatrix> z2_out, Ref<M
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
z2_out->set_from_mlpp_matrix(alg.mat_vec_addv(alg.matmultm(X, _weights1), _bias1));
|
||||
z2_out->set_from_mlpp_matrix(alg.mat_vec_addv(alg.matmultnm(X, _weights1), _bias1));
|
||||
a2_out->set_from_mlpp_matrix(avn.sigmoid_normm(z2_out));
|
||||
}
|
||||
|
||||
@ -377,7 +377,7 @@ real_t MLPPMLP::evaluatev(const Ref<MLPPVector> &x) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
Ref<MLPPVector> pz2 = alg.additionnv(alg.mat_vec_multv(alg.transposem(_weights1), x), _bias1);
|
||||
Ref<MLPPVector> pz2 = alg.additionnv(alg.mat_vec_multv(alg.transposenm(_weights1), x), _bias1);
|
||||
Ref<MLPPVector> pa2 = avn.sigmoid_normv(pz2);
|
||||
|
||||
return avn.sigmoid_normr(alg.dotv(_weights2, pa2) + _bias2);
|
||||
@ -387,7 +387,7 @@ void MLPPMLP::propagatev(const Ref<MLPPVector> &x, Ref<MLPPVector> z2_out, Ref<M
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
z2_out->set_from_mlpp_vector(alg.additionnv(alg.mat_vec_multv(alg.transposem(_weights1), x), _bias1));
|
||||
z2_out->set_from_mlpp_vector(alg.additionnv(alg.mat_vec_multv(alg.transposenm(_weights1), x), _bias1));
|
||||
a2_out->set_from_mlpp_vector(avn.sigmoid_normv(z2_out));
|
||||
}
|
||||
|
||||
@ -395,7 +395,7 @@ void MLPPMLP::forward_pass() {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
_z2->set_from_mlpp_matrix(alg.mat_vec_addv(alg.matmultm(_input_set, _weights1), _bias1));
|
||||
_z2->set_from_mlpp_matrix(alg.mat_vec_addv(alg.matmultnm(_input_set, _weights1), _bias1));
|
||||
_a2->set_from_mlpp_matrix(avn.sigmoid_normm(_z2));
|
||||
|
||||
_y_hat->set_from_mlpp_vector(avn.sigmoid_normv(alg.scalar_addnv(_bias2, alg.mat_vec_multv(_a2, _weights2))));
|
||||
|
@ -27,7 +27,6 @@ MLPPMLPOld::MLPPMLPOld(std::vector<std::vector<real_t>> p_inputSet, std::vector<
|
||||
lambda = p_lambda;
|
||||
alpha = p_alpha;
|
||||
|
||||
MLPPActivationOld avn;
|
||||
y_hat.resize(n);
|
||||
|
||||
weights1 = MLPPUtilities::weightInitialization(k, n_hidden);
|
||||
|
@ -124,7 +124,7 @@ void MLPPMultiOutputLayer::forward_pass() {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
_z = alg.mat_vec_addv(alg.matmultm(_input, _weights), _bias);
|
||||
_z = alg.mat_vec_addv(alg.matmultnm(_input, _weights), _bias);
|
||||
_a = avn.run_activation_norm_matrix(_activation, _z);
|
||||
}
|
||||
|
||||
@ -132,7 +132,7 @@ void MLPPMultiOutputLayer::test(const Ref<MLPPVector> &x) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
_z_test = alg.additionm(alg.mat_vec_multv(alg.transposem(_weights), x), _bias);
|
||||
_z_test = alg.additionnm(alg.mat_vec_multv(alg.transposenm(_weights), x), _bias);
|
||||
_a_test = avn.run_activation_norm_vector(_activation, _z_test);
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,7 @@ Ref<MLPPMatrix> MLPPPCA::principal_components() {
|
||||
}
|
||||
}
|
||||
|
||||
_z = alg.matmultm(alg.transposem(_u_reduce), _x_normalized);
|
||||
_z = alg.matmultnm(alg.transposenm(_u_reduce), _x_normalized);
|
||||
|
||||
return _z;
|
||||
}
|
||||
@ -52,7 +52,7 @@ real_t MLPPPCA::score() {
|
||||
|
||||
MLPPLinAlg alg;
|
||||
|
||||
Ref<MLPPMatrix> x_approx = alg.matmultm(_u_reduce, _z);
|
||||
Ref<MLPPMatrix> x_approx = alg.matmultnm(_u_reduce, _z);
|
||||
real_t num = 0;
|
||||
real_t den = 0;
|
||||
|
||||
|
@ -84,7 +84,7 @@ void MLPPProbitReg::gradient_descent(real_t learning_rate, int max_epoch, bool u
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_y_hat, _output_set);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(_input_set), alg.hadamard_productnv(error, avn.gaussian_cdf_derivv(_z)))));
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposenm(_input_set), alg.hadamard_productnv(error, avn.gaussian_cdf_derivv(_z)))));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
@ -122,7 +122,7 @@ void MLPPProbitReg::mle(real_t learning_rate, int max_epoch, bool ui) {
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_output_set, _y_hat);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.additionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(_input_set), alg.hadamard_productnv(error, avn.gaussian_cdf_derivv(_z)))));
|
||||
_weights = alg.additionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposenm(_input_set), alg.hadamard_productnv(error, avn.gaussian_cdf_derivv(_z)))));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
@ -242,7 +242,7 @@ void MLPPProbitReg::mbgd(real_t learning_rate, int max_epoch, int mini_batch_siz
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / batches.input_sets.size(), alg.mat_vec_multv(alg.transposem(current_input), alg.hadamard_productnv(error, avn.gaussian_cdf_derivv(z_tmp)))));
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / batches.input_sets.size(), alg.mat_vec_multv(alg.transposenm(current_input), alg.hadamard_productnv(error, avn.gaussian_cdf_derivv(z_tmp)))));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
|
@ -97,7 +97,6 @@ void MLPPProbitRegOld::MLE(real_t learning_rate, int max_epoch, bool UI) {
|
||||
|
||||
void MLPPProbitRegOld::SGD(real_t learning_rate, int max_epoch, bool UI) {
|
||||
// NOTE: ∂y_hat/∂z is sparse
|
||||
MLPPActivationOld avn;
|
||||
MLPPLinAlg alg;
|
||||
MLPPReg regularization;
|
||||
real_t cost_prev = 0;
|
||||
|
@ -94,7 +94,7 @@ Ref<MLPPMatrix> MLPPReg::reg_weightsm(const Ref<MLPPMatrix> &weights, real_t lam
|
||||
return reg_deriv_termm(weights, lambda, alpha, reg);
|
||||
}
|
||||
|
||||
return alg.subtractionm(weights, reg_deriv_termm(weights, lambda, alpha, reg));
|
||||
return alg.subtractionnm(weights, reg_deriv_termm(weights, lambda, alpha, reg));
|
||||
|
||||
// for(int i = 0; i < weights.size(); i++){
|
||||
// for(int j = 0; j < weights[i].size(); j++){
|
||||
|
@ -86,31 +86,31 @@ void MLPPSoftmaxNet::gradient_descent(real_t learning_rate, int max_epoch, bool
|
||||
cost_prev = cost(_y_hat, _output_set);
|
||||
|
||||
// Calculating the errors
|
||||
Ref<MLPPMatrix> error = alg.subtractionm(_y_hat, _output_set);
|
||||
Ref<MLPPMatrix> error = alg.subtractionnm(_y_hat, _output_set);
|
||||
|
||||
// Calculating the weight/bias gradients for layer 2
|
||||
|
||||
Ref<MLPPMatrix> D2_1 = alg.matmultm(alg.transposem(_a2), error);
|
||||
Ref<MLPPMatrix> D2_1 = alg.matmultnm(alg.transposenm(_a2), error);
|
||||
|
||||
// weights and bias updation for layer 2
|
||||
_weights2 = alg.subtractionm(_weights2, alg.scalar_multiplym(learning_rate, D2_1));
|
||||
_weights2 = alg.subtractionnm(_weights2, alg.scalar_multiplynm(learning_rate, D2_1));
|
||||
_weights2 = regularization.reg_weightsm(_weights2, _lambda, _alpha, _reg);
|
||||
|
||||
_bias2 = alg.subtract_matrix_rows(_bias2, alg.scalar_multiplym(learning_rate, error));
|
||||
_bias2 = alg.subtract_matrix_rows(_bias2, alg.scalar_multiplynm(learning_rate, error));
|
||||
|
||||
//Calculating the weight/bias for layer 1
|
||||
|
||||
Ref<MLPPMatrix> D1_1 = alg.matmultm(error, alg.transposem(_weights2));
|
||||
Ref<MLPPMatrix> D1_1 = alg.matmultnm(error, alg.transposenm(_weights2));
|
||||
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(_z2));
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productnm(D1_1, avn.sigmoid_derivm(_z2));
|
||||
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultm(alg.transposem(_input_set), D1_2);
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultnm(alg.transposenm(_input_set), D1_2);
|
||||
|
||||
// weight an bias updation for layer 1
|
||||
_weights1 = alg.subtractionm(_weights1, alg.scalar_multiplym(learning_rate, D1_3));
|
||||
_weights1 = alg.subtractionnm(_weights1, alg.scalar_multiplynm(learning_rate, D1_3));
|
||||
_weights1 = regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg);
|
||||
|
||||
_bias1 = alg.subtract_matrix_rows(_bias1, alg.scalar_multiplym(learning_rate, D1_2));
|
||||
_bias1 = alg.subtract_matrix_rows(_bias1, alg.scalar_multiplynm(learning_rate, D1_2));
|
||||
|
||||
forward_pass();
|
||||
|
||||
@ -176,7 +176,7 @@ void MLPPSoftmaxNet::sgd(real_t learning_rate, int max_epoch, bool ui) {
|
||||
|
||||
// Weight updation for layer 2
|
||||
Ref<MLPPMatrix> D2_1 = alg.outer_product(error, prop_res.a2);
|
||||
_weights2 = alg.subtractionm(_weights2, alg.scalar_multiplym(learning_rate, alg.transposem(D2_1)));
|
||||
_weights2 = alg.subtractionnm(_weights2, alg.scalar_multiplynm(learning_rate, alg.transposenm(D2_1)));
|
||||
_weights2 = regularization.reg_weightsm(_weights2, _lambda, _alpha, _reg);
|
||||
|
||||
// Bias updation for layer 2
|
||||
@ -184,10 +184,10 @@ void MLPPSoftmaxNet::sgd(real_t learning_rate, int max_epoch, bool ui) {
|
||||
|
||||
// Weight updation for layer 1
|
||||
Ref<MLPPVector> D1_1 = alg.mat_vec_multv(_weights2, error);
|
||||
Ref<MLPPVector> D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivv(prop_res.z2));
|
||||
Ref<MLPPVector> D1_2 = alg.hadamard_productnm(D1_1, avn.sigmoid_derivv(prop_res.z2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.outer_product(input_set_row_tmp, D1_2);
|
||||
|
||||
_weights1 = alg.subtractionm(_weights1, alg.scalar_multiplym(learning_rate, D1_3));
|
||||
_weights1 = alg.subtractionnm(_weights1, alg.scalar_multiplynm(learning_rate, D1_3));
|
||||
_weights1 = regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg);
|
||||
// Bias updation for layer 1
|
||||
|
||||
@ -237,30 +237,30 @@ void MLPPSoftmaxNet::mbgd(real_t learning_rate, int max_epoch, int mini_batch_si
|
||||
cost_prev = cost(y_hat, current_output_mini_batch);
|
||||
|
||||
// Calculating the errors
|
||||
Ref<MLPPMatrix> error = alg.subtractionm(y_hat, current_output_mini_batch);
|
||||
Ref<MLPPMatrix> error = alg.subtractionnm(y_hat, current_output_mini_batch);
|
||||
|
||||
// Calculating the weight/bias gradients for layer 2
|
||||
|
||||
Ref<MLPPMatrix> D2_1 = alg.matmultm(alg.transposem(prop_res.a2), error);
|
||||
Ref<MLPPMatrix> D2_1 = alg.matmultnm(alg.transposenm(prop_res.a2), error);
|
||||
|
||||
// weights and bias updation for layser 2
|
||||
_weights2 = alg.subtractionm(_weights2, alg.scalar_multiplym(learning_rate, D2_1));
|
||||
_weights2 = alg.subtractionnm(_weights2, alg.scalar_multiplynm(learning_rate, D2_1));
|
||||
_weights2 = regularization.reg_weightsm(_weights2, _lambda, _alpha, _reg);
|
||||
|
||||
// Bias Updation for layer 2
|
||||
_bias2 = alg.subtract_matrix_rows(_bias2, alg.scalar_multiplym(learning_rate, error));
|
||||
_bias2 = alg.subtract_matrix_rows(_bias2, alg.scalar_multiplynm(learning_rate, error));
|
||||
|
||||
//Calculating the weight/bias for layer 1
|
||||
|
||||
Ref<MLPPMatrix> D1_1 = alg.matmultm(error, alg.transposem(_weights2));
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(prop_res.z2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultm(alg.transposem(current_input_mini_batch), D1_2);
|
||||
Ref<MLPPMatrix> D1_1 = alg.matmultnm(error, alg.transposenm(_weights2));
|
||||
Ref<MLPPMatrix> D1_2 = alg.hadamard_productnm(D1_1, avn.sigmoid_derivm(prop_res.z2));
|
||||
Ref<MLPPMatrix> D1_3 = alg.matmultnm(alg.transposenm(current_input_mini_batch), D1_2);
|
||||
|
||||
// weight an bias updation for layer 1
|
||||
_weights1 = alg.subtractionm(_weights1, alg.scalar_multiplym(learning_rate, D1_3));
|
||||
_weights1 = alg.subtractionnm(_weights1, alg.scalar_multiplynm(learning_rate, D1_3));
|
||||
_weights1 = regularization.reg_weightsm(_weights1, _lambda, _alpha, _reg);
|
||||
|
||||
_bias1 = alg.subtract_matrix_rows(_bias1, alg.scalar_multiplym(learning_rate, D1_2));
|
||||
_bias1 = alg.subtract_matrix_rows(_bias1, alg.scalar_multiplynm(learning_rate, D1_2));
|
||||
|
||||
y_hat = evaluatem(current_input_mini_batch);
|
||||
|
||||
@ -366,10 +366,10 @@ Ref<MLPPVector> MLPPSoftmaxNet::evaluatev(const Ref<MLPPVector> &x) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
Ref<MLPPVector> z2 = alg.additionnv(alg.mat_vec_multv(alg.transposem(_weights1), x), _bias1);
|
||||
Ref<MLPPVector> z2 = alg.additionnv(alg.mat_vec_multv(alg.transposenm(_weights1), x), _bias1);
|
||||
Ref<MLPPVector> a2 = avn.sigmoid_normv(z2);
|
||||
|
||||
return avn.adj_softmax_normv(alg.additionnv(alg.mat_vec_multv(alg.transposem(_weights2), a2), _bias2));
|
||||
return avn.adj_softmax_normv(alg.additionnv(alg.mat_vec_multv(alg.transposenm(_weights2), a2), _bias2));
|
||||
}
|
||||
|
||||
MLPPSoftmaxNet::PropagateVResult MLPPSoftmaxNet::propagatev(const Ref<MLPPVector> &x) {
|
||||
@ -378,7 +378,7 @@ MLPPSoftmaxNet::PropagateVResult MLPPSoftmaxNet::propagatev(const Ref<MLPPVector
|
||||
|
||||
PropagateVResult res;
|
||||
|
||||
res.z2 = alg.additionnv(alg.mat_vec_multv(alg.transposem(_weights1), x), _bias1);
|
||||
res.z2 = alg.additionnv(alg.mat_vec_multv(alg.transposenm(_weights1), x), _bias1);
|
||||
res.a2 = avn.sigmoid_normv(res.z2);
|
||||
|
||||
return res;
|
||||
@ -388,10 +388,10 @@ Ref<MLPPMatrix> MLPPSoftmaxNet::evaluatem(const Ref<MLPPMatrix> &X) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
Ref<MLPPMatrix> z2 = alg.mat_vec_addv(alg.matmultm(X, _weights1), _bias1);
|
||||
Ref<MLPPMatrix> z2 = alg.mat_vec_addv(alg.matmultnm(X, _weights1), _bias1);
|
||||
Ref<MLPPMatrix> a2 = avn.sigmoid_normm(z2);
|
||||
|
||||
return avn.adj_softmax_normm(alg.mat_vec_addv(alg.matmultm(a2, _weights2), _bias2));
|
||||
return avn.adj_softmax_normm(alg.mat_vec_addv(alg.matmultnm(a2, _weights2), _bias2));
|
||||
}
|
||||
|
||||
MLPPSoftmaxNet::PropagateMResult MLPPSoftmaxNet::propagatem(const Ref<MLPPMatrix> &X) {
|
||||
@ -400,7 +400,7 @@ MLPPSoftmaxNet::PropagateMResult MLPPSoftmaxNet::propagatem(const Ref<MLPPMatrix
|
||||
|
||||
MLPPSoftmaxNet::PropagateMResult res;
|
||||
|
||||
res.z2 = alg.mat_vec_addv(alg.matmultm(X, _weights1), _bias1);
|
||||
res.z2 = alg.mat_vec_addv(alg.matmultnm(X, _weights1), _bias1);
|
||||
res.a2 = avn.sigmoid_normm(res.z2);
|
||||
|
||||
return res;
|
||||
@ -410,9 +410,9 @@ void MLPPSoftmaxNet::forward_pass() {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
_z2 = alg.mat_vec_addv(alg.matmultm(_input_set, _weights1), _bias1);
|
||||
_z2 = alg.mat_vec_addv(alg.matmultnm(_input_set, _weights1), _bias1);
|
||||
_a2 = avn.sigmoid_normm(_z2);
|
||||
_y_hat = avn.adj_softmax_normm(alg.mat_vec_addv(alg.matmultm(_a2, _weights2), _bias2));
|
||||
_y_hat = avn.adj_softmax_normm(alg.mat_vec_addv(alg.matmultnm(_a2, _weights2), _bias2));
|
||||
}
|
||||
|
||||
void MLPPSoftmaxNet::_bind_methods() {
|
||||
|
@ -84,20 +84,20 @@ void MLPPSoftmaxReg::gradient_descent(real_t learning_rate, int max_epoch, bool
|
||||
while (true) {
|
||||
cost_prev = cost(_y_hat, _output_set);
|
||||
|
||||
Ref<MLPPMatrix> error = alg.subtractionm(_y_hat, _output_set);
|
||||
Ref<MLPPMatrix> error = alg.subtractionnm(_y_hat, _output_set);
|
||||
|
||||
//Calculating the weight gradients
|
||||
Ref<MLPPMatrix> w_gradient = alg.matmultm(alg.transposem(_input_set), error);
|
||||
Ref<MLPPMatrix> w_gradient = alg.matmultnm(alg.transposenm(_input_set), error);
|
||||
|
||||
//Weight updation
|
||||
_weights = alg.subtractionm(_weights, alg.scalar_multiplym(learning_rate, w_gradient));
|
||||
_weights = alg.subtractionnm(_weights, alg.scalar_multiplynm(learning_rate, w_gradient));
|
||||
_weights = regularization.reg_weightsm(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
//real_t b_gradient = alg.sum_elements(error);
|
||||
|
||||
// Bias Updation
|
||||
_bias = alg.subtract_matrix_rows(_bias, alg.scalar_multiplym(learning_rate, error));
|
||||
_bias = alg.subtract_matrix_rows(_bias, alg.scalar_multiplynm(learning_rate, error));
|
||||
|
||||
forward_pass();
|
||||
|
||||
@ -162,7 +162,7 @@ void MLPPSoftmaxReg::sgd(real_t learning_rate, int max_epoch, bool ui) {
|
||||
Ref<MLPPMatrix> w_gradient = alg.outer_product(input_set_row_tmp, alg.subtractionnv(y_hat, output_set_row_tmp));
|
||||
|
||||
// Weight Updation
|
||||
_weights = alg.subtractionm(_weights, alg.scalar_multiplym(learning_rate, w_gradient));
|
||||
_weights = alg.subtractionnm(_weights, alg.scalar_multiplynm(learning_rate, w_gradient));
|
||||
_weights = regularization.reg_weightsm(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
@ -208,17 +208,17 @@ void MLPPSoftmaxReg::mbgd(real_t learning_rate, int max_epoch, int mini_batch_si
|
||||
Ref<MLPPMatrix> y_hat = evaluatem(current_inputs);
|
||||
cost_prev = cost(y_hat, current_outputs);
|
||||
|
||||
Ref<MLPPMatrix> error = alg.subtractionm(y_hat, current_outputs);
|
||||
Ref<MLPPMatrix> error = alg.subtractionnm(y_hat, current_outputs);
|
||||
|
||||
// Calculating the weight gradients
|
||||
Ref<MLPPMatrix> w_gradient = alg.matmultm(alg.transposem(current_inputs), error);
|
||||
Ref<MLPPMatrix> w_gradient = alg.matmultnm(alg.transposenm(current_inputs), error);
|
||||
|
||||
//Weight updation
|
||||
_weights = alg.subtractionm(_weights, alg.scalar_multiplym(learning_rate, w_gradient));
|
||||
_weights = alg.subtractionnm(_weights, alg.scalar_multiplynm(learning_rate, w_gradient));
|
||||
_weights = regularization.reg_weightsm(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
_bias = alg.subtract_matrix_rows(_bias, alg.scalar_multiplym(learning_rate, error));
|
||||
_bias = alg.subtract_matrix_rows(_bias, alg.scalar_multiplynm(learning_rate, error));
|
||||
y_hat = evaluatem(current_inputs);
|
||||
|
||||
if (ui) {
|
||||
@ -345,14 +345,14 @@ Ref<MLPPVector> MLPPSoftmaxReg::evaluatev(const Ref<MLPPVector> &x) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
return avn.softmax_normv(alg.additionnv(_bias, alg.mat_vec_multv(alg.transposem(_weights), x)));
|
||||
return avn.softmax_normv(alg.additionnv(_bias, alg.mat_vec_multv(alg.transposenm(_weights), x)));
|
||||
}
|
||||
|
||||
Ref<MLPPMatrix> MLPPSoftmaxReg::evaluatem(const Ref<MLPPMatrix> &X) {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
return avn.softmax_normm(alg.mat_vec_addv(alg.matmultm(X, _weights), _bias));
|
||||
return avn.softmax_normm(alg.mat_vec_addv(alg.matmultnm(X, _weights), _bias));
|
||||
}
|
||||
|
||||
// softmax ( wTx + b )
|
||||
@ -360,7 +360,7 @@ void MLPPSoftmaxReg::forward_pass() {
|
||||
MLPPLinAlg alg;
|
||||
MLPPActivation avn;
|
||||
|
||||
_y_hat = avn.softmax_normm(alg.mat_vec_addv(alg.matmultm(_input_set, _weights), _bias));
|
||||
_y_hat = avn.softmax_normm(alg.mat_vec_addv(alg.matmultnm(_input_set, _weights), _bias));
|
||||
}
|
||||
|
||||
void MLPPSoftmaxReg::_bind_methods() {
|
||||
|
@ -69,7 +69,7 @@ void MLPPSVC::gradient_descent(real_t learning_rate, int max_epoch, bool ui) {
|
||||
while (true) {
|
||||
cost_prev = cost(_y_hat, _output_set, _weights, _c);
|
||||
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(_input_set), mlpp_cost.hinge_loss_derivwv(_z, _output_set, _c))));
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposenm(_input_set), mlpp_cost.hinge_loss_derivwv(_z, _output_set, _c))));
|
||||
_weights = regularization.reg_weightsv(_weights, learning_rate / _n, 0, MLPPReg::REGULARIZATION_TYPE_RIDGE);
|
||||
|
||||
// Calculating the bias gradients
|
||||
@ -190,7 +190,7 @@ void MLPPSVC::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo
|
||||
cost_prev = cost(z, current_output_batch_entry, _weights, _c);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(current_input_batch_entry), mlpp_cost.hinge_loss_derivwv(z, current_output_batch_entry, _c))));
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposenm(current_input_batch_entry), mlpp_cost.hinge_loss_derivwv(z, current_output_batch_entry, _c))));
|
||||
_weights = regularization.reg_weightsv(_weights, learning_rate / _n, 0, MLPPReg::REGULARIZATION_TYPE_RIDGE);
|
||||
|
||||
// Calculating the bias gradients
|
||||
|
@ -87,7 +87,7 @@ void MLPPTanhReg::gradient_descent(real_t learning_rate, int max_epoch, bool ui)
|
||||
|
||||
Ref<MLPPVector> error = alg.subtractionnv(_y_hat, _output_set);
|
||||
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(_input_set), alg.hadamard_productnv(error, avn.tanh_derivv(_z)))));
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposenm(_input_set), alg.hadamard_productnv(error, avn.tanh_derivv(_z)))));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
@ -194,7 +194,7 @@ void MLPPTanhReg::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size,
|
||||
Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output_batch_entry);
|
||||
|
||||
// Calculating the weight gradients
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposem(current_input_batch_entry), alg.hadamard_productnv(error, avn.tanh_derivv(z)))));
|
||||
_weights = alg.subtractionnv(_weights, alg.scalar_multiplynv(learning_rate / _n, alg.mat_vec_multv(alg.transposenm(current_input_batch_entry), alg.hadamard_productnv(error, avn.tanh_derivv(z)))));
|
||||
_weights = regularization.reg_weightsv(_weights, _lambda, _alpha, _reg);
|
||||
|
||||
// Calculating the bias gradients
|
||||
|
@ -270,14 +270,14 @@ void MLPPWGAN::update_discriminator_parameters(Vector<Ref<MLPPMatrix>> hidden_la
|
||||
if (!_network.empty()) {
|
||||
Ref<MLPPHiddenLayer> layer = _network[_network.size() - 1];
|
||||
|
||||
layer->set_weights(alg.subtractionm(layer->get_weights(), hidden_layer_updations[0]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplym(learning_rate / _n, layer->get_delta())));
|
||||
layer->set_weights(alg.subtractionnm(layer->get_weights(), hidden_layer_updations[0]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, layer->get_delta())));
|
||||
|
||||
for (int i = _network.size() - 2; i > _network.size() / 2; i--) {
|
||||
layer = _network[i];
|
||||
|
||||
layer->set_weights(alg.subtractionm(layer->get_weights(), hidden_layer_updations[(_network.size() - 2) - i + 1]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplym(learning_rate / _n, layer->get_delta())));
|
||||
layer->set_weights(alg.subtractionnm(layer->get_weights(), hidden_layer_updations[(_network.size() - 2) - i + 1]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, layer->get_delta())));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -291,8 +291,8 @@ void MLPPWGAN::update_generator_parameters(Vector<Ref<MLPPMatrix>> hidden_layer_
|
||||
|
||||
//std::cout << network[i].weights.size() << "x" << network[i].weights[0].size() << std::endl;
|
||||
//std::cout << hiddenLayerUpdations[(network.size() - 2) - i + 1].size() << "x" << hiddenLayerUpdations[(network.size() - 2) - i + 1][0].size() << std::endl;
|
||||
layer->set_weights(alg.subtractionm(layer->get_weights(), hidden_layer_updations[(_network.size() - 2) - i + 1]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplym(learning_rate / _n, layer->get_delta())));
|
||||
layer->set_weights(alg.subtractionnm(layer->get_weights(), hidden_layer_updations[(_network.size() - 2) - i + 1]));
|
||||
layer->set_bias(alg.subtract_matrix_rows(layer->get_bias(), alg.scalar_multiplynm(learning_rate / _n, layer->get_delta())));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -307,17 +307,17 @@ MLPPWGAN::DiscriminatorGradientResult MLPPWGAN::compute_discriminator_gradients(
|
||||
|
||||
_output_layer->set_delta(alg.hadamard_productnv(mlpp_cost.run_cost_deriv_vector(_output_layer->get_cost(), y_hat, output_set), avn.run_activation_deriv_vector(_output_layer->get_activation(), _output_layer->get_z())));
|
||||
|
||||
data.output_w_grad = alg.mat_vec_multv(alg.transposem(_output_layer->get_input()), _output_layer->get_delta());
|
||||
data.output_w_grad = alg.mat_vec_multv(alg.transposenm(_output_layer->get_input()), _output_layer->get_delta());
|
||||
data.output_w_grad = alg.additionnv(data.output_w_grad, regularization.reg_deriv_termv(_output_layer->get_weights(), _output_layer->get_lambda(), _output_layer->get_alpha(), _output_layer->get_reg()));
|
||||
|
||||
if (!_network.empty()) {
|
||||
Ref<MLPPHiddenLayer> layer = _network[_network.size() - 1];
|
||||
|
||||
layer->set_delta(alg.hadamard_productm(alg.outer_product(_output_layer->get_delta(), _output_layer->get_weights()), avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z())));
|
||||
layer->set_delta(alg.hadamard_productnm(alg.outer_product(_output_layer->get_delta(), _output_layer->get_weights()), avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z())));
|
||||
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = alg.matmultm(alg.transposem(layer->get_input()), layer->get_delta());
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
|
||||
data.cumulative_hidden_layer_w_grad.push_back(alg.additionm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
data.cumulative_hidden_layer_w_grad.push_back(alg.additionnm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
|
||||
//std::cout << "HIDDENLAYER FIRST:" << hiddenLayerWGrad.size() << "x" << hiddenLayerWGrad[0].size() << std::endl;
|
||||
//std::cout << "WEIGHTS SECOND:" << layer.weights.size() << "x" << layer.weights[0].size() << std::endl;
|
||||
@ -326,11 +326,11 @@ MLPPWGAN::DiscriminatorGradientResult MLPPWGAN::compute_discriminator_gradients(
|
||||
layer = _network[i];
|
||||
Ref<MLPPHiddenLayer> next_layer = _network[i + 1];
|
||||
|
||||
layer->set_delta(alg.hadamard_productm(alg.matmultm(next_layer->get_delta(), alg.transposem(next_layer->get_weights())), avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z())));
|
||||
layer->set_delta(alg.hadamard_productnm(alg.matmultnm(next_layer->get_delta(), alg.transposenm(next_layer->get_weights())), avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z())));
|
||||
|
||||
hidden_layer_w_grad = alg.matmultm(alg.transposem(layer->get_input()), layer->get_delta());
|
||||
hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
|
||||
data.cumulative_hidden_layer_w_grad.push_back(alg.additionm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
data.cumulative_hidden_layer_w_grad.push_back(alg.additionnm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
}
|
||||
}
|
||||
|
||||
@ -350,17 +350,17 @@ Vector<Ref<MLPPMatrix>> MLPPWGAN::compute_generator_gradients(const Ref<MLPPVect
|
||||
|
||||
_output_layer->set_delta(alg.hadamard_productnv(cost_deriv_vector, activation_deriv_vector));
|
||||
|
||||
Ref<MLPPVector> output_w_grad = alg.mat_vec_multv(alg.transposem(_output_layer->get_input()), _output_layer->get_delta());
|
||||
Ref<MLPPVector> output_w_grad = alg.mat_vec_multv(alg.transposenm(_output_layer->get_input()), _output_layer->get_delta());
|
||||
output_w_grad = alg.additionnv(output_w_grad, regularization.reg_deriv_termv(_output_layer->get_weights(), _output_layer->get_lambda(), _output_layer->get_alpha(), _output_layer->get_reg()));
|
||||
|
||||
if (!_network.empty()) {
|
||||
Ref<MLPPHiddenLayer> layer = _network[_network.size() - 1];
|
||||
|
||||
Ref<MLPPMatrix> activation_deriv_matrix = avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z());
|
||||
layer->set_delta(alg.hadamard_productm(alg.outer_product(_output_layer->get_delta(), _output_layer->get_weights()), activation_deriv_matrix));
|
||||
layer->set_delta(alg.hadamard_productnm(alg.outer_product(_output_layer->get_delta(), _output_layer->get_weights()), activation_deriv_matrix));
|
||||
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = alg.matmultm(alg.transposem(layer->get_input()), layer->get_delta());
|
||||
cumulative_hidden_layer_w_grad.push_back(alg.additionm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
Ref<MLPPMatrix> hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
cumulative_hidden_layer_w_grad.push_back(alg.additionnm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
|
||||
for (int i = _network.size() - 2; i >= 0; i--) {
|
||||
layer = _network[i];
|
||||
@ -368,9 +368,9 @@ Vector<Ref<MLPPMatrix>> MLPPWGAN::compute_generator_gradients(const Ref<MLPPVect
|
||||
|
||||
activation_deriv_matrix = avn.run_activation_deriv_matrix(layer->get_activation(), layer->get_z());
|
||||
|
||||
layer->set_delta(alg.hadamard_productm(alg.matmultm(next_layer->get_delta(), alg.transposem(next_layer->get_weights())), activation_deriv_matrix));
|
||||
hidden_layer_w_grad = alg.matmultm(alg.transposem(layer->get_input()), layer->get_delta());
|
||||
cumulative_hidden_layer_w_grad.push_back(alg.additionm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
layer->set_delta(alg.hadamard_productnm(alg.matmultnm(next_layer->get_delta(), alg.transposenm(next_layer->get_weights())), activation_deriv_matrix));
|
||||
hidden_layer_w_grad = alg.matmultnm(alg.transposenm(layer->get_input()), layer->get_delta());
|
||||
cumulative_hidden_layer_w_grad.push_back(alg.additionnm(hidden_layer_w_grad, regularization.reg_deriv_termm(layer->get_weights(), layer->get_lambda(), layer->get_alpha(), layer->get_reg()))); // Adding to our cumulative hidden layer grads. Maintain reg terms as well.
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -310,7 +310,7 @@ void MLPPTests::test_multivariate_linear_regression_adam() {
|
||||
alg.printVector(adamModelOld.modelSetTest(ds->get_input()->to_std_vector()));
|
||||
std::cout << "ACCURACY: " << 100 * adamModelOld.score() << "%" << std::endl;
|
||||
|
||||
MLPPLinReg adam_model(alg.transposem(ds->get_input()), ds->get_output());
|
||||
MLPPLinReg adam_model(alg.transposenm(ds->get_input()), ds->get_output());
|
||||
PLOG_MSG(adam_model.model_set_test(ds->get_input())->to_string());
|
||||
PLOG_MSG("ACCURACY: " + String::num(100 * adam_model.score()) + "%");
|
||||
}
|
||||
@ -330,7 +330,7 @@ void MLPPTests::test_multivariate_linear_regression_score_sgd_adam(bool ui) {
|
||||
modelf_old.MBGD(0.001, 5, 1, ui);
|
||||
scoreSGD += modelf_old.score();
|
||||
|
||||
MLPPLinReg modelf(alg.transposem(ds->get_input()), ds->get_output());
|
||||
MLPPLinReg modelf(alg.transposenm(ds->get_input()), ds->get_output());
|
||||
modelf.mbgd(0.001, 5, 1, ui);
|
||||
scoreSGD += modelf.score();
|
||||
|
||||
@ -338,7 +338,7 @@ void MLPPTests::test_multivariate_linear_regression_score_sgd_adam(bool ui) {
|
||||
adamModelf_old.Adam(0.1, 5, 1, 0.9, 0.999, 1e-8, ui); // Change batch size = sgd, bgd
|
||||
scoreADAM += adamModelf_old.score();
|
||||
|
||||
MLPPLinReg adamModelf(alg.transposem(ds->get_input()), ds->get_output());
|
||||
MLPPLinReg adamModelf(alg.transposenm(ds->get_input()), ds->get_output());
|
||||
adamModelf.adam(0.1, 5, 1, 0.9, 0.999, 1e-8, ui); // Change batch size = sgd, bgd
|
||||
scoreADAM += adamModelf.score();
|
||||
}
|
||||
@ -361,7 +361,7 @@ void MLPPTests::test_multivariate_linear_regression_epochs_gradient_descent(bool
|
||||
model3_old.gradientDescent(0.001, 300, ui);
|
||||
alg.printVector(model3_old.modelSetTest(ds->get_input()->to_std_vector()));
|
||||
|
||||
MLPPLinReg model3(alg.transposem(ds->get_input()), ds->get_output()); // Can use Lasso, Ridge, ElasticNet Reg
|
||||
MLPPLinReg model3(alg.transposenm(ds->get_input()), ds->get_output()); // Can use Lasso, Ridge, ElasticNet Reg
|
||||
model3.gradient_descent(0.001, 300, ui);
|
||||
PLOG_MSG(model3.model_set_test(ds->get_input())->to_string());
|
||||
}
|
||||
@ -380,7 +380,7 @@ void MLPPTests::test_multivariate_linear_regression_newton_raphson(bool ui) {
|
||||
model2_old.NewtonRaphson(1.5, 300, ui);
|
||||
alg.printVector(model2_old.modelSetTest(ds->get_input()->to_std_vector()));
|
||||
|
||||
MLPPLinReg model2(alg.transposem(ds->get_input()), ds->get_output());
|
||||
MLPPLinReg model2(alg.transposenm(ds->get_input()), ds->get_output());
|
||||
model2.newton_raphson(1.5, 300, ui);
|
||||
PLOG_MSG(model2.model_set_test(ds->get_input())->to_string());
|
||||
}
|
||||
@ -440,9 +440,9 @@ void MLPPTests::test_c_log_log_regression(bool ui) {
|
||||
output_set.instance();
|
||||
output_set->set_from_std_vector(outputSet);
|
||||
|
||||
MLPPCLogLogReg model(alg.transposem(input_set), output_set);
|
||||
MLPPCLogLogReg model(alg.transposenm(input_set), output_set);
|
||||
model.sgd(0.1, 10000, ui);
|
||||
PLOG_MSG(model.model_set_test(alg.transposem(input_set))->to_string());
|
||||
PLOG_MSG(model.model_set_test(alg.transposenm(input_set))->to_string());
|
||||
PLOG_MSG("ACCURACY: " + String::num(100 * model.score()) + "%");
|
||||
}
|
||||
void MLPPTests::test_exp_reg_regression(bool ui) {
|
||||
@ -465,9 +465,9 @@ void MLPPTests::test_exp_reg_regression(bool ui) {
|
||||
output_set.instance();
|
||||
output_set->set_from_std_vector(outputSet);
|
||||
|
||||
MLPPExpReg model(alg.transposem(input_set), output_set);
|
||||
MLPPExpReg model(alg.transposenm(input_set), output_set);
|
||||
model.sgd(0.001, 10000, ui);
|
||||
PLOG_MSG(model.model_set_test(alg.transposem(input_set))->to_string());
|
||||
PLOG_MSG(model.model_set_test(alg.transposenm(input_set))->to_string());
|
||||
PLOG_MSG("ACCURACY: " + String::num(100 * model.score()) + "%");
|
||||
}
|
||||
void MLPPTests::test_tanh_regression(bool ui) {
|
||||
@ -600,9 +600,9 @@ void MLPPTests::test_autoencoder(bool ui) {
|
||||
input_set.instance();
|
||||
input_set->set_from_std_vectors(inputSet);
|
||||
|
||||
MLPPAutoEncoder model(alg.transposem(input_set), 5);
|
||||
MLPPAutoEncoder model(alg.transposenm(input_set), 5);
|
||||
model.sgd(0.001, 300000, ui);
|
||||
PLOG_MSG(model.model_set_test(alg.transposem(input_set))->to_string());
|
||||
PLOG_MSG(model.model_set_test(alg.transposenm(input_set))->to_string());
|
||||
PLOG_MSG("ACCURACY: " + String::num(100 * model.score()) + "%");
|
||||
}
|
||||
void MLPPTests::test_dynamically_sized_ann(bool ui) {
|
||||
@ -636,7 +636,7 @@ void MLPPTests::test_dynamically_sized_ann(bool ui) {
|
||||
output_set.instance();
|
||||
output_set->set_from_std_vector(outputSet);
|
||||
|
||||
MLPPANN ann(alg.transposem(input_set), output_set);
|
||||
MLPPANN ann(alg.transposenm(input_set), output_set);
|
||||
ann.add_layer(2, MLPPActivation::ACTIVATION_FUNCTION_COSH);
|
||||
ann.add_output_layer(MLPPActivation::ACTIVATION_FUNCTION_SIGMOID, MLPPCost::COST_TYPE_LOGISTIC_LOSS);
|
||||
|
||||
@ -646,7 +646,7 @@ void MLPPTests::test_dynamically_sized_ann(bool ui) {
|
||||
|
||||
ann.set_learning_rate_scheduler_drop(MLPPANN::SCHEDULER_TYPE_STEP, 0.5, 1000);
|
||||
ann.gradient_descent(0.01, 30000);
|
||||
PLOG_MSG(ann.model_set_test(alg.transposem(input_set))->to_string());
|
||||
PLOG_MSG(ann.model_set_test(alg.transposenm(input_set))->to_string());
|
||||
PLOG_MSG("ACCURACY: " + String::num(100 * ann.score()) + "%");
|
||||
}
|
||||
void MLPPTests::test_wgan_old(bool ui) {
|
||||
@ -780,8 +780,8 @@ void MLPPTests::test_train_test_split_mann(bool ui) {
|
||||
Ref<MLPPDataComplex> d;
|
||||
d.instance();
|
||||
|
||||
d->set_input(alg.transposem(input_set_1));
|
||||
d->set_output(alg.transposem(output_set_1));
|
||||
d->set_input(alg.transposenm(input_set_1));
|
||||
d->set_output(alg.transposenm(output_set_1));
|
||||
|
||||
MLPPData::SplitComplexData split_data = data.train_test_split(d, 0.2);
|
||||
|
||||
@ -829,14 +829,14 @@ void MLPPTests::test_naive_bayes() {
|
||||
MLPPBernoulliNBOld BNBOld(alg.transpose(inputSet), outputSet);
|
||||
alg.printVector(BNBOld.modelSetTest(alg.transpose(inputSet)));
|
||||
|
||||
MLPPBernoulliNB BNB(alg.transposem(input_set), output_set);
|
||||
PLOG_MSG(BNB.model_set_test(alg.transposem(input_set))->to_string());
|
||||
MLPPBernoulliNB BNB(alg.transposenm(input_set), output_set);
|
||||
PLOG_MSG(BNB.model_set_test(alg.transposenm(input_set))->to_string());
|
||||
|
||||
MLPPGaussianNBOld GNBOld(alg.transpose(inputSet), outputSet, 2);
|
||||
alg.printVector(GNBOld.modelSetTest(alg.transpose(inputSet)));
|
||||
|
||||
MLPPGaussianNB GNB(alg.transposem(input_set), output_set, 2);
|
||||
PLOG_MSG(GNB.model_set_test(alg.transposem(input_set))->to_string());
|
||||
MLPPGaussianNB GNB(alg.transposenm(input_set), output_set, 2);
|
||||
PLOG_MSG(GNB.model_set_test(alg.transposenm(input_set))->to_string());
|
||||
}
|
||||
void MLPPTests::test_k_means(bool ui) {
|
||||
// KMeans
|
||||
|
Loading…
Reference in New Issue
Block a user