From 77127594ed152d34b079fbf12e4bde0b2c8d827a Mon Sep 17 00:00:00 2001 From: Relintai Date: Tue, 26 Dec 2023 23:16:57 +0100 Subject: [PATCH] Cleaned up and bound everythong in MLPPStat. --- mlpp/stat/stat.cpp | 277 +++++++++++++++++++++++++++++++++------------ mlpp/stat/stat.h | 48 ++++---- 2 files changed, 224 insertions(+), 101 deletions(-) diff --git a/mlpp/stat/stat.cpp b/mlpp/stat/stat.cpp index ddbdbb4..1335174 100644 --- a/mlpp/stat/stat.cpp +++ b/mlpp/stat/stat.cpp @@ -8,6 +8,8 @@ #include "../activation/activation.h" #include "../data/data.h" #include "../lin_alg/lin_alg.h" +#include "core/containers/hash_map.h" + #include #include #include @@ -15,79 +17,120 @@ #include real_t MLPPStat::b0_estimation(const Ref &x, const Ref &y) { + ERR_FAIL_COND_V(!x.is_valid() || !y.is_valid(), 0); + return meanv(y) - b1_estimation(x, y) * meanv(x); } real_t MLPPStat::b1_estimation(const Ref &x, const Ref &y) { + ERR_FAIL_COND_V(!x.is_valid() || !y.is_valid(), 0); + return covariancev(x, y) / variancev(x); } -/* -real_t MLPPStat::median(std::vector x) { - real_t center = real_t(x.size()) / real_t(2); - sort(x.begin(), x.end()); - if (x.size() % 2 == 0) { - return mean({ x[center - 1], x[center] }); +real_t MLPPStat::median(const Ref &p_x) { + ERR_FAIL_COND_V(!p_x.is_valid(), 0); + + Ref x = p_x->duplicate_fast(); + + int center = x->size() / 2; + x->sort(); + + if (x->size() % 2 == 0) { + return (x->element_get(center - 1) + x->element_get(center)) / 2.0; } else { - return x[center - 1 + 0.5]; + return x->element_get(center - 1); } } -std::vector MLPPStat::mode(const std::vector &x) { +Ref MLPPStat::mode(const Ref &p_x) { + ERR_FAIL_COND_V(!p_x.is_valid(), 0); + MLPPData data; - std::vector x_set = data.vecToSet(x); - std::map element_num; - for (uint32_t i = 0; i < x_set.size(); i++) { + Ref x_set = data.vec_to_setnv(p_x); + const real_t *x_set_ptr = x_set->ptr(); + int x_set_size = x_set->size(); + + int x_size = p_x->size(); + + const MLPPVector &x = *(p_x.ptr()); + HashMap element_num; + + for (int i = 0; i < x_set_size; ++i) { element_num[x[i]] = 0; } - for (uint32_t i = 0; i < x.size(); i++) { + + for (int i = 0; i < x_size; ++i) { element_num[x[i]]++; } - std::vector modes; - real_t max_num = element_num[x_set[0]]; - for (uint32_t i = 0; i < x_set.size(); i++) { - if (element_num[x_set[i]] > max_num) { - max_num = element_num[x_set[i]]; + + Ref rmodes; + rmodes.instance(); + MLPPVector &modes = *(rmodes.ptr()); + + real_t max_num = element_num[x_set_ptr[0]]; + + for (int i = 0; i < x_set_size; ++i) { + if (element_num[x_set_ptr[i]] > max_num) { + max_num = element_num[x_set_ptr[i]]; modes.clear(); - modes.push_back(x_set[i]); - } else if (element_num[x_set[i]] == max_num) { - modes.push_back(x_set[i]); + modes.push_back(x_set_ptr[i]); + } else if (element_num[x_set_ptr[i]] == max_num) { + modes.push_back(x_set_ptr[i]); } } - return modes; + + return rmodes; } -real_t MLPPStat::range(const std::vector &x) { +real_t MLPPStat::range(const Ref &x) { + ERR_FAIL_COND_V(!x.is_valid(), 0); + MLPPLinAlg alg; - return alg.max(x) - alg.min(x); + return alg.minvr(x) - alg.minvr(x); } -real_t MLPPStat::midrange(const std::vector &x) { +real_t MLPPStat::midrange(const Ref &x) { + ERR_FAIL_COND_V(!x.is_valid(), 0); + return range(x) / 2; } -real_t MLPPStat::absAvgDeviation(const std::vector &x) { +real_t MLPPStat::abs_avg_deviation(const Ref &p_x) { + ERR_FAIL_COND_V(!p_x.is_valid(), 0); + + real_t x_mean = meanv(p_x); + int x_size = p_x->size(); + const real_t *x_ptr = p_x->ptr(); + real_t sum = 0; - for (uint32_t i = 0; i < x.size(); i++) { - sum += std::abs(x[i] - mean(x)); + for (int i = 0; i < x_size; ++i) { + real_t s = x_ptr[i] - x_mean; + sum += ABS(s); } - return sum / x.size(); + + return sum / x_size; } -real_t MLPPStat::correlation(const std::vector &x, const std::vector &y) { - return covariance(x, y) / (standardDeviation(x) * standardDeviation(y)); +real_t MLPPStat::correlation(const Ref &x, const Ref &y) { + ERR_FAIL_COND_V(!x.is_valid() || !y.is_valid(), 0); + + return covariancev(x, y) / (standard_deviationv(x) * standard_deviationv(y)); } -real_t MLPPStat::R2(const std::vector &x, const std::vector &y) { +real_t MLPPStat::r2(const Ref &x, const Ref &y) { + ERR_FAIL_COND_V(!x.is_valid() || !y.is_valid(), 0); + return correlation(x, y) * correlation(x, y); } -real_t MLPPStat::chebyshevIneq(const real_t k) { +real_t MLPPStat::chebyshev_ineq(const real_t k) { // X may or may not belong to a Gaussian Distribution return 1 - 1 / (k * k); } -*/ real_t MLPPStat::meanv(const Ref &x) { + ERR_FAIL_COND_V(!x.is_valid(), 0); + int x_size = x->size(); const real_t *x_ptr = x->ptr(); @@ -100,10 +143,14 @@ real_t MLPPStat::meanv(const Ref &x) { } real_t MLPPStat::standard_deviationv(const Ref &x) { + ERR_FAIL_COND_V(!x.is_valid(), 0); + return Math::sqrt(variancev(x)); } real_t MLPPStat::variancev(const Ref &x) { + ERR_FAIL_COND_V(!x.is_valid(), 0); + real_t x_mean = meanv(x); int x_size = x->size(); @@ -119,6 +166,7 @@ real_t MLPPStat::variancev(const Ref &x) { } real_t MLPPStat::covariancev(const Ref &x, const Ref &y) { + ERR_FAIL_COND_V(!x.is_valid() || !y.is_valid(), 0); ERR_FAIL_COND_V(x->size() != y->size(), 0); real_t x_mean = meanv(x); @@ -137,107 +185,186 @@ real_t MLPPStat::covariancev(const Ref &x, const Ref &y) return sum / (x_size - 1); } -/* -real_t MLPPStat::weightedMean(const std::vector &x, const std::vector &weights) { +real_t MLPPStat::weighted_mean(const Ref &x, const Ref &weights) { + ERR_FAIL_COND_V(!x.is_valid() || !weights.is_valid(), 0); + ERR_FAIL_COND_V(x->size() != weights->size(), 0); + + int x_size = x->size(); + const real_t *x_ptr = x->ptr(); + const real_t *weights_ptr = weights->ptr(); + real_t sum = 0; real_t weights_sum = 0; - for (uint32_t i = 0; i < x.size(); i++) { - sum += x[i] * weights[i]; - weights_sum += weights[i]; + for (int i = 0; i < x_size; ++i) { + sum += x_ptr[i] * weights_ptr[i]; + weights_sum += weights_ptr[i]; } return sum / weights_sum; } -real_t MLPPStat::geometricMean(const std::vector &x) { +real_t MLPPStat::geometric_mean(const Ref &x) { + ERR_FAIL_COND_V(!x.is_valid(), 0); + + int x_size = x->size(); + const real_t *x_ptr = x->ptr(); + real_t product = 1; - for (uint32_t i = 0; i < x.size(); i++) { - product *= x[i]; + for (int i = 0; i < x_size; ++i) { + product *= x_ptr[i]; } - return std::pow(product, 1.0 / x.size()); + + return Math::pow(product, (real_t)(1.0 / x_size)); } -real_t MLPPStat::harmonicMean(const std::vector &x) { +real_t MLPPStat::harmonic_mean(const Ref &x) { + ERR_FAIL_COND_V(!x.is_valid(), 0); + + int x_size = x->size(); + const real_t *x_ptr = x->ptr(); + real_t sum = 0; - for (uint32_t i = 0; i < x.size(); i++) { - sum += 1 / x[i]; + for (int i = 0; i < x_size; ++i) { + sum += 1 / x_ptr[i]; } - return x.size() / sum; + return x_size / sum; } -real_t MLPPStat::RMS(const std::vector &x) { +real_t MLPPStat::rms(const Ref &x) { + ERR_FAIL_COND_V(!x.is_valid(), 0); + + int x_size = x->size(); + const real_t *x_ptr = x->ptr(); + real_t sum = 0; - for (uint32_t i = 0; i < x.size(); i++) { - sum += x[i] * x[i]; + for (int i = 0; i < x_size; ++i) { + real_t x_i = x_ptr[i]; + + sum += x_i * x_i; } - return sqrt(sum / x.size()); + + return Math::sqrt(sum / x_size); } -real_t MLPPStat::powerMean(const std::vector &x, const real_t p) { +real_t MLPPStat::power_mean(const Ref &x, const real_t p) { + ERR_FAIL_COND_V(!x.is_valid(), 0); + + int x_size = x->size(); + const real_t *x_ptr = x->ptr(); + real_t sum = 0; - for (uint32_t i = 0; i < x.size(); i++) { - sum += std::pow(x[i], p); + for (int i = 0; i < x_size; ++i) { + sum += Math::pow(x_ptr[i], p); } - return std::pow(sum / x.size(), 1 / p); + return Math::pow(sum / x_size, 1 / p); } -real_t MLPPStat::lehmerMean(const std::vector &x, const real_t p) { +real_t MLPPStat::lehmer_mean(const Ref &x, const real_t p) { + ERR_FAIL_COND_V(!x.is_valid(), 0); + + int x_size = x->size(); + const real_t *x_ptr = x->ptr(); + real_t num = 0; real_t den = 0; - for (uint32_t i = 0; i < x.size(); i++) { - num += std::pow(x[i], p); - den += std::pow(x[i], p - 1); + for (int i = 0; i < x_size; ++i) { + num += Math::pow(x_ptr[i], p); + den += Math::pow(x_ptr[i], p - 1); } return num / den; } -real_t MLPPStat::weightedLehmerMean(const std::vector &x, const std::vector &weights, const real_t p) { +real_t MLPPStat::weighted_lehmer_mean(const Ref &x, const Ref &weights, const real_t p) { + ERR_FAIL_COND_V(!x.is_valid() || !weights.is_valid(), 0); + ERR_FAIL_COND_V(x->size() != weights->size(), 0); + + int x_size = x->size(); + const real_t *x_ptr = x->ptr(); + const real_t *weights_ptr = weights->ptr(); + real_t num = 0; real_t den = 0; - for (uint32_t i = 0; i < x.size(); i++) { - num += weights[i] * std::pow(x[i], p); - den += weights[i] * std::pow(x[i], p - 1); + for (int i = 0; i < x_size; ++i) { + num += weights_ptr[i] * Math::pow(x_ptr[i], p); + den += weights_ptr[i] * Math::pow(x_ptr[i], p - 1); } return num / den; } -real_t MLPPStat::heronianMean(const real_t A, const real_t B) { +real_t MLPPStat::heronian_mean(const real_t A, const real_t B) { return (A + sqrt(A * B) + B) / 3; } -real_t MLPPStat::contraHarmonicMean(const std::vector &x) { - return lehmerMean(x, 2); +real_t MLPPStat::contraharmonic_mean(const Ref &x) { + ERR_FAIL_COND_V(!x.is_valid(), 0); + + return lehmer_mean(x, 2); } -real_t MLPPStat::heinzMean(const real_t A, const real_t B, const real_t x) { - return (std::pow(A, x) * std::pow(B, 1 - x) + std::pow(A, 1 - x) * std::pow(B, x)) / 2; +real_t MLPPStat::heinz_mean(const real_t A, const real_t B, const real_t x) { + return (Math::pow(A, x) * Math::pow(B, 1 - x) + Math::pow(A, 1 - x) * Math::pow(B, x)) / 2; } -real_t MLPPStat::neumanSandorMean(const real_t a, const real_t b) { +real_t MLPPStat::neuman_sandor_mean(const real_t a, const real_t b) { MLPPActivation avn; return (a - b) / 2 * avn.arsinh_normr((a - b) / (a + b)); } -real_t MLPPStat::stolarskyMean(const real_t x, const real_t y, const real_t p) { +real_t MLPPStat::stolarsky_mean(const real_t x, const real_t y, const real_t p) { if (x == y) { return x; } - return std::pow((std::pow(x, p) - std::pow(y, p)) / (p * (x - y)), 1 / (p - 1)); + return Math::pow((Math::pow(x, p) - Math::pow(y, p)) / (p * (x - y)), 1 / (p - 1)); } -real_t MLPPStat::identricMean(const real_t x, const real_t y) { +real_t MLPPStat::identric_mean(const real_t x, const real_t y) { if (x == y) { return x; } - return (1 / M_E) * std::pow(std::pow(x, x) / std::pow(y, y), 1 / (x - y)); + return (1 / M_E) * Math::pow(Math::pow(x, x) / Math::pow(y, y), 1 / (x - y)); } -real_t MLPPStat::logMean(const real_t x, const real_t y) { +real_t MLPPStat::log_mean(const real_t x, const real_t y) { if (x == y) { return x; } - return (y - x) / (log(y) - std::log(x)); + return (y - x) / (log(y) - Math::log(x)); } -*/ void MLPPStat::_bind_methods() { + ClassDB::bind_method(D_METHOD("b0_estimation", "x", "y"), &MLPPStat::b0_estimation); + ClassDB::bind_method(D_METHOD("b1_estimation", "x", "y"), &MLPPStat::b1_estimation); + + ClassDB::bind_method(D_METHOD("median", "x"), &MLPPStat::median); + ClassDB::bind_method(D_METHOD("mode", "x"), &MLPPStat::mode); + ClassDB::bind_method(D_METHOD("range", "x"), &MLPPStat::range); + ClassDB::bind_method(D_METHOD("midrange", "x"), &MLPPStat::midrange); + ClassDB::bind_method(D_METHOD("abs_avg_deviation", "x"), &MLPPStat::abs_avg_deviation); + ClassDB::bind_method(D_METHOD("correlation", "x", "y"), &MLPPStat::correlation); + ClassDB::bind_method(D_METHOD("r2", "x", "y"), &MLPPStat::r2); + ClassDB::bind_method(D_METHOD("chebyshev_ineq", "k"), &MLPPStat::chebyshev_ineq); + + ClassDB::bind_method(D_METHOD("meanv", "x"), &MLPPStat::meanv); + ClassDB::bind_method(D_METHOD("standard_deviationv", "x"), &MLPPStat::standard_deviationv); + ClassDB::bind_method(D_METHOD("variancev", "x"), &MLPPStat::variancev); + + ClassDB::bind_method(D_METHOD("covariancev", "x", "y"), &MLPPStat::covariancev); + + ClassDB::bind_method(D_METHOD("weighted_mean", "x", "weights"), &MLPPStat::weighted_mean); + ClassDB::bind_method(D_METHOD("geometric_mean", "x"), &MLPPStat::geometric_mean); + ClassDB::bind_method(D_METHOD("harmonic_mean", "x"), &MLPPStat::harmonic_mean); + ClassDB::bind_method(D_METHOD("rms", "x"), &MLPPStat::rms); + + ClassDB::bind_method(D_METHOD("power_mean", "x", "p"), &MLPPStat::power_mean); + ClassDB::bind_method(D_METHOD("lehmer_mean", "x", "p"), &MLPPStat::lehmer_mean); + + ClassDB::bind_method(D_METHOD("weighted_lehmer_mean", "x", "weights", "p"), &MLPPStat::weighted_lehmer_mean); + + ClassDB::bind_method(D_METHOD("contraharmonic_mean", "x"), &MLPPStat::contraharmonic_mean); + + ClassDB::bind_method(D_METHOD("heronian_mean", "A", "B"), &MLPPStat::heronian_mean); + ClassDB::bind_method(D_METHOD("heinz_mean", "A", "B", "x"), &MLPPStat::heinz_mean); + ClassDB::bind_method(D_METHOD("neuman_sandor_mean", "a", "b"), &MLPPStat::neuman_sandor_mean); + ClassDB::bind_method(D_METHOD("stolarsky_mean", "x", "y", "p"), &MLPPStat::stolarsky_mean); + ClassDB::bind_method(D_METHOD("identric_mean", "x", "y"), &MLPPStat::identric_mean); + ClassDB::bind_method(D_METHOD("log_mean", "x", "y"), &MLPPStat::log_mean); } diff --git a/mlpp/stat/stat.h b/mlpp/stat/stat.h index 10eb4aa..b9547ef 100644 --- a/mlpp/stat/stat.h +++ b/mlpp/stat/stat.h @@ -26,16 +26,14 @@ public: real_t b1_estimation(const Ref &x, const Ref &y); // Statistical Functions - /* - real_t median(std::vector x); - std::vector mode(const std::vector &x); - real_t range(const std::vector &x); - real_t midrange(const std::vector &x); - real_t absAvgDeviation(const std::vector &x); - real_t correlation(const std::vector &x, const std::vector &y); - real_t R2(const std::vector &x, const std::vector &y); - real_t chebyshevIneq(const real_t k); - */ + real_t median(const Ref &x); + Ref mode(const Ref &x); + real_t range(const Ref &x); + real_t midrange(const Ref &x); + real_t abs_avg_deviation(const Ref &x); + real_t correlation(const Ref &x, const Ref &y); + real_t r2(const Ref &x, const Ref &y); + real_t chebyshev_ineq(const real_t k); real_t meanv(const Ref &x); real_t standard_deviationv(const Ref &x); @@ -43,22 +41,20 @@ public: real_t covariancev(const Ref &x, const Ref &y); // Extras - /* - real_t weightedMean(const std::vector &x, const std::vector &weights); - real_t geometricMean(const std::vector &x); - real_t harmonicMean(const std::vector &x); - real_t RMS(const std::vector &x); - real_t powerMean(const std::vector &x, const real_t p); - real_t lehmerMean(const std::vector &x, const real_t p); - real_t weightedLehmerMean(const std::vector &x, const std::vector &weights, const real_t p); - real_t contraHarmonicMean(const std::vector &x); - real_t heronianMean(const real_t A, const real_t B); - real_t heinzMean(const real_t A, const real_t B, const real_t x); - real_t neumanSandorMean(const real_t a, const real_t b); - real_t stolarskyMean(const real_t x, const real_t y, const real_t p); - real_t identricMean(const real_t x, const real_t y); - real_t logMean(const real_t x, const real_t y); - */ + real_t weighted_mean(const Ref &x, const Ref &weights); + real_t geometric_mean(const Ref &x); + real_t harmonic_mean(const Ref &x); + real_t rms(const Ref &x); + real_t power_mean(const Ref &x, const real_t p); + real_t lehmer_mean(const Ref &x, const real_t p); + real_t weighted_lehmer_mean(const Ref &x, const Ref &weights, const real_t p); + real_t contraharmonic_mean(const Ref &x); + real_t heronian_mean(const real_t A, const real_t B); + real_t heinz_mean(const real_t A, const real_t B, const real_t x); + real_t neuman_sandor_mean(const real_t a, const real_t b); + real_t stolarsky_mean(const real_t x, const real_t y, const real_t p); + real_t identric_mean(const real_t x, const real_t y); + real_t log_mean(const real_t x, const real_t y); protected: static void _bind_methods();