From f292f58f00578b0391cbff9890e6a6ed70670c77 Mon Sep 17 00:00:00 2001 From: Relintai Date: Mon, 13 Feb 2023 17:25:10 +0100 Subject: [PATCH] Added MLPPStatOld. --- SCsub | 1 + mlpp/stat/stat_old.cpp | 275 +++++++++++++++++++++++++++++++++++++++++ mlpp/stat/stat_old.h | 70 +++++++++++ 3 files changed, 346 insertions(+) create mode 100644 mlpp/stat/stat_old.cpp create mode 100644 mlpp/stat/stat_old.h diff --git a/SCsub b/SCsub index 1dcbe50..205710a 100644 --- a/SCsub +++ b/SCsub @@ -80,6 +80,7 @@ sources = [ "mlpp/gauss_markov_checker/gauss_markov_checker_old.cpp", "mlpp/utilities/utilities_old.cpp", "mlpp/transforms/transforms_old.cpp", + "mlpp/stat/stat_old.cpp", "test/mlpp_tests.cpp", ] diff --git a/mlpp/stat/stat_old.cpp b/mlpp/stat/stat_old.cpp new file mode 100644 index 0000000..a493819 --- /dev/null +++ b/mlpp/stat/stat_old.cpp @@ -0,0 +1,275 @@ +// +// Stat.cpp +// +// Created by Marc Melikyan on 9/29/20. +// + +#include "stat_old.h" +#include "../activation/activation.h" +#include "../data/data.h" +#include "../lin_alg/lin_alg.h" +#include +#include +#include + +#include + +real_t MLPPStatOld::b0Estimation(const std::vector &x, const std::vector &y) { + return mean(y) - b1Estimation(x, y) * mean(x); +} + +real_t MLPPStatOld::b1Estimation(const std::vector &x, const std::vector &y) { + return covariance(x, y) / variance(x); +} + +real_t MLPPStatOld::b0_estimation(const Ref &x, const Ref &y) { + return meanv(y) - b1_estimation(x, y) * meanv(x); +} +real_t MLPPStatOld::b1_estimation(const Ref &x, const Ref &y) { + return covariancev(x, y) / variancev(x); +} + +real_t MLPPStatOld::mean(const std::vector &x) { + real_t sum = 0; + for (uint32_t i = 0; i < x.size(); i++) { + sum += x[i]; + } + return sum / x.size(); +} + +real_t MLPPStatOld::median(std::vector x) { + real_t center = real_t(x.size()) / real_t(2); + sort(x.begin(), x.end()); + if (x.size() % 2 == 0) { + return mean({ x[center - 1], x[center] }); + } else { + return x[center - 1 + 0.5]; + } +} + +std::vector MLPPStatOld::mode(const std::vector &x) { + MLPPData data; + std::vector x_set = data.vecToSet(x); + std::map element_num; + for (uint32_t i = 0; i < x_set.size(); i++) { + element_num[x[i]] = 0; + } + for (uint32_t i = 0; i < x.size(); i++) { + element_num[x[i]]++; + } + std::vector modes; + real_t max_num = element_num[x_set[0]]; + for (uint32_t i = 0; i < x_set.size(); i++) { + if (element_num[x_set[i]] > max_num) { + max_num = element_num[x_set[i]]; + modes.clear(); + modes.push_back(x_set[i]); + } else if (element_num[x_set[i]] == max_num) { + modes.push_back(x_set[i]); + } + } + return modes; +} + +real_t MLPPStatOld::range(const std::vector &x) { + MLPPLinAlg alg; + return alg.max(x) - alg.min(x); +} + +real_t MLPPStatOld::midrange(const std::vector &x) { + return range(x) / 2; +} + +real_t MLPPStatOld::absAvgDeviation(const std::vector &x) { + real_t sum = 0; + for (uint32_t i = 0; i < x.size(); i++) { + sum += std::abs(x[i] - mean(x)); + } + return sum / x.size(); +} + +real_t MLPPStatOld::standardDeviation(const std::vector &x) { + return std::sqrt(variance(x)); +} + +real_t MLPPStatOld::variance(const std::vector &x) { + real_t sum = 0; + for (uint32_t i = 0; i < x.size(); i++) { + sum += (x[i] - mean(x)) * (x[i] - mean(x)); + } + return sum / (x.size() - 1); +} + +real_t MLPPStatOld::covariance(const std::vector &x, const std::vector &y) { + real_t sum = 0; + for (uint32_t i = 0; i < x.size(); i++) { + sum += (x[i] - mean(x)) * (y[i] - mean(y)); + } + return sum / (x.size() - 1); +} + +real_t MLPPStatOld::correlation(const std::vector &x, const std::vector &y) { + return covariance(x, y) / (standardDeviation(x) * standardDeviation(y)); +} + +real_t MLPPStatOld::R2(const std::vector &x, const std::vector &y) { + return correlation(x, y) * correlation(x, y); +} + +real_t MLPPStatOld::chebyshevIneq(const real_t k) { + // X may or may not belong to a Gaussian Distribution + return 1 - 1 / (k * k); +} + +real_t MLPPStatOld::meanv(const Ref &x) { + int x_size = x->size(); + const real_t *x_ptr = x->ptr(); + + real_t sum = 0; + for (int i = 0; i < x_size; ++i) { + sum += x_ptr[i]; + } + + return sum / x_size; +} + +real_t MLPPStatOld::standard_deviationv(const Ref &x) { + return Math::sqrt(variancev(x)); +} + +real_t MLPPStatOld::variancev(const Ref &x) { + real_t x_mean = meanv(x); + + int x_size = x->size(); + const real_t *x_ptr = x->ptr(); + + real_t sum = 0; + for (int i = 0; i < x_size; ++i) { + real_t xi = x_ptr[i]; + + sum += (xi - x_mean) * (xi - x_mean); + } + return sum / (x_size - 1); +} + +real_t MLPPStatOld::covariancev(const Ref &x, const Ref &y) { + ERR_FAIL_COND_V(x->size() != y->size(), 0); + + real_t x_mean = meanv(x); + real_t y_mean = meanv(y); + + int x_size = x->size(); + const real_t *x_ptr = x->ptr(); + const real_t *y_ptr = y->ptr(); + + real_t sum = 0; + + for (int i = 0; i < x_size; ++i) { + sum += (x_ptr[i] - x_mean) * (y_ptr[i] - y_mean); + } + + return sum / (x_size - 1); +} + +real_t MLPPStatOld::weightedMean(const std::vector &x, const std::vector &weights) { + real_t sum = 0; + real_t weights_sum = 0; + for (uint32_t i = 0; i < x.size(); i++) { + sum += x[i] * weights[i]; + weights_sum += weights[i]; + } + return sum / weights_sum; +} + +real_t MLPPStatOld::geometricMean(const std::vector &x) { + real_t product = 1; + for (uint32_t i = 0; i < x.size(); i++) { + product *= x[i]; + } + return std::pow(product, 1.0 / x.size()); +} + +real_t MLPPStatOld::harmonicMean(const std::vector &x) { + real_t sum = 0; + for (uint32_t i = 0; i < x.size(); i++) { + sum += 1 / x[i]; + } + return x.size() / sum; +} + +real_t MLPPStatOld::RMS(const std::vector &x) { + real_t sum = 0; + for (uint32_t i = 0; i < x.size(); i++) { + sum += x[i] * x[i]; + } + return sqrt(sum / x.size()); +} + +real_t MLPPStatOld::powerMean(const std::vector &x, const real_t p) { + real_t sum = 0; + for (uint32_t i = 0; i < x.size(); i++) { + sum += std::pow(x[i], p); + } + return std::pow(sum / x.size(), 1 / p); +} + +real_t MLPPStatOld::lehmerMean(const std::vector &x, const real_t p) { + real_t num = 0; + real_t den = 0; + for (uint32_t i = 0; i < x.size(); i++) { + num += std::pow(x[i], p); + den += std::pow(x[i], p - 1); + } + return num / den; +} + +real_t MLPPStatOld::weightedLehmerMean(const std::vector &x, const std::vector &weights, const real_t p) { + real_t num = 0; + real_t den = 0; + for (uint32_t i = 0; i < x.size(); i++) { + num += weights[i] * std::pow(x[i], p); + den += weights[i] * std::pow(x[i], p - 1); + } + return num / den; +} + +real_t MLPPStatOld::heronianMean(const real_t A, const real_t B) { + return (A + sqrt(A * B) + B) / 3; +} + +real_t MLPPStatOld::contraHarmonicMean(const std::vector &x) { + return lehmerMean(x, 2); +} + +real_t MLPPStatOld::heinzMean(const real_t A, const real_t B, const real_t x) { + return (std::pow(A, x) * std::pow(B, 1 - x) + std::pow(A, 1 - x) * std::pow(B, x)) / 2; +} + +real_t MLPPStatOld::neumanSandorMean(const real_t a, const real_t b) { + MLPPActivation avn; + return (a - b) / 2 * avn.arsinh((a - b) / (a + b)); +} + +real_t MLPPStatOld::stolarskyMean(const real_t x, const real_t y, const real_t p) { + if (x == y) { + return x; + } + return std::pow((std::pow(x, p) - std::pow(y, p)) / (p * (x - y)), 1 / (p - 1)); +} + +real_t MLPPStatOld::identricMean(const real_t x, const real_t y) { + if (x == y) { + return x; + } + return (1 / M_E) * std::pow(std::pow(x, x) / std::pow(y, y), 1 / (x - y)); +} + +real_t MLPPStatOld::logMean(const real_t x, const real_t y) { + if (x == y) { + return x; + } + return (y - x) / (log(y) - std::log(x)); +} + +void MLPPStatOld::_bind_methods() { +} diff --git a/mlpp/stat/stat_old.h b/mlpp/stat/stat_old.h new file mode 100644 index 0000000..14a795a --- /dev/null +++ b/mlpp/stat/stat_old.h @@ -0,0 +1,70 @@ + +#ifndef MLPP_STAT_OLD_H +#define MLPP_STAT_OLD_H + +// +// Stat.hpp +// +// Created by Marc Melikyan on 9/29/20. +// + +#include "core/math/math_defs.h" + +#include "core/object/reference.h" + +#include "../lin_alg/mlpp_matrix.h" +#include "../lin_alg/mlpp_vector.h" + +#include + +class MLPPStatOld : public Reference { + GDCLASS(MLPPStatOld, Reference); + +public: + // These functions are for univariate lin reg module- not for users. + real_t b0Estimation(const std::vector &x, const std::vector &y); + real_t b1Estimation(const std::vector &x, const std::vector &y); + + real_t b0_estimation(const Ref &x, const Ref &y); + real_t b1_estimation(const Ref &x, const Ref &y); + + // Statistical Functions + real_t mean(const std::vector &x); + real_t median(std::vector x); + std::vector mode(const std::vector &x); + real_t range(const std::vector &x); + real_t midrange(const std::vector &x); + real_t absAvgDeviation(const std::vector &x); + real_t standardDeviation(const std::vector &x); + real_t variance(const std::vector &x); + real_t covariance(const std::vector &x, const std::vector &y); + real_t correlation(const std::vector &x, const std::vector &y); + real_t R2(const std::vector &x, const std::vector &y); + real_t chebyshevIneq(const real_t k); + + real_t meanv(const Ref &x); + real_t standard_deviationv(const Ref &x); + real_t variancev(const Ref &x); + real_t covariancev(const Ref &x, const Ref &y); + + // Extras + real_t weightedMean(const std::vector &x, const std::vector &weights); + real_t geometricMean(const std::vector &x); + real_t harmonicMean(const std::vector &x); + real_t RMS(const std::vector &x); + real_t powerMean(const std::vector &x, const real_t p); + real_t lehmerMean(const std::vector &x, const real_t p); + real_t weightedLehmerMean(const std::vector &x, const std::vector &weights, const real_t p); + real_t contraHarmonicMean(const std::vector &x); + real_t heronianMean(const real_t A, const real_t B); + real_t heinzMean(const real_t A, const real_t B, const real_t x); + real_t neumanSandorMean(const real_t a, const real_t b); + real_t stolarskyMean(const real_t x, const real_t y, const real_t p); + real_t identricMean(const real_t x, const real_t y); + real_t logMean(const real_t x, const real_t y); + +protected: + static void _bind_methods(); +}; + +#endif /* Stat_hpp */