pmlpp/mlpp/stat/stat.cpp
2023-12-30 00:43:39 +01:00

395 lines
12 KiB
C++

/*************************************************************************/
/* stat.cpp */
/*************************************************************************/
/* This file is part of: */
/* PMLPP Machine Learning Library */
/* https://github.com/Relintai/pmlpp */
/*************************************************************************/
/* Copyright (c) 2023-present Péter Magyar. */
/* Copyright (c) 2022-2023 Marc Melikyan */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#include "stat.h"
#include "../activation/activation.h"
#include "../data/data.h"
#include "../lin_alg/lin_alg.h"
#include "core/containers/hash_map.h"
#include <algorithm>
#include <cmath>
#include <map>
#include <iostream>
real_t MLPPStat::b0_estimation(const Ref<MLPPVector> &x, const Ref<MLPPVector> &y) {
ERR_FAIL_COND_V(!x.is_valid() || !y.is_valid(), 0);
return meanv(y) - b1_estimation(x, y) * meanv(x);
}
real_t MLPPStat::b1_estimation(const Ref<MLPPVector> &x, const Ref<MLPPVector> &y) {
ERR_FAIL_COND_V(!x.is_valid() || !y.is_valid(), 0);
return covariancev(x, y) / variancev(x);
}
real_t MLPPStat::median(const Ref<MLPPVector> &p_x) {
ERR_FAIL_COND_V(!p_x.is_valid(), 0);
Ref<MLPPVector> x = p_x->duplicate_fast();
int center = x->size() / 2;
x->sort();
if (x->size() % 2 == 0) {
return (x->element_get(center - 1) + x->element_get(center)) / 2.0;
} else {
return x->element_get(center - 1);
}
}
Ref<MLPPVector> MLPPStat::mode(const Ref<MLPPVector> &p_x) {
ERR_FAIL_COND_V(!p_x.is_valid(), 0);
MLPPData data;
Ref<MLPPVector> x_set = data.vec_to_setnv(p_x);
const real_t *x_set_ptr = x_set->ptr();
int x_set_size = x_set->size();
int x_size = p_x->size();
const MLPPVector &x = *(p_x.ptr());
HashMap<real_t, int> element_num;
for (int i = 0; i < x_set_size; ++i) {
element_num[x[i]] = 0;
}
for (int i = 0; i < x_size; ++i) {
element_num[x[i]]++;
}
Ref<MLPPVector> rmodes;
rmodes.instance();
MLPPVector &modes = *(rmodes.ptr());
real_t max_num = element_num[x_set_ptr[0]];
for (int i = 0; i < x_set_size; ++i) {
if (element_num[x_set_ptr[i]] > max_num) {
max_num = element_num[x_set_ptr[i]];
modes.clear();
modes.push_back(x_set_ptr[i]);
} else if (element_num[x_set_ptr[i]] == max_num) {
modes.push_back(x_set_ptr[i]);
}
}
return rmodes;
}
real_t MLPPStat::range(const Ref<MLPPVector> &x) {
ERR_FAIL_COND_V(!x.is_valid(), 0);
MLPPLinAlg alg;
return alg.maxvr(x) - alg.minvr(x);
}
real_t MLPPStat::midrange(const Ref<MLPPVector> &x) {
ERR_FAIL_COND_V(!x.is_valid(), 0);
return range(x) / 2;
}
real_t MLPPStat::abs_avg_deviation(const Ref<MLPPVector> &p_x) {
ERR_FAIL_COND_V(!p_x.is_valid(), 0);
real_t x_mean = meanv(p_x);
int x_size = p_x->size();
const real_t *x_ptr = p_x->ptr();
real_t sum = 0;
for (int i = 0; i < x_size; ++i) {
real_t s = x_ptr[i] - x_mean;
sum += ABS(s);
}
return sum / x_size;
}
real_t MLPPStat::correlation(const Ref<MLPPVector> &x, const Ref<MLPPVector> &y) {
ERR_FAIL_COND_V(!x.is_valid() || !y.is_valid(), 0);
return covariancev(x, y) / (standard_deviationv(x) * standard_deviationv(y));
}
real_t MLPPStat::r2(const Ref<MLPPVector> &x, const Ref<MLPPVector> &y) {
ERR_FAIL_COND_V(!x.is_valid() || !y.is_valid(), 0);
return correlation(x, y) * correlation(x, y);
}
real_t MLPPStat::chebyshev_ineq(const real_t k) {
// X may or may not belong to a Gaussian Distribution
return 1 - 1 / (k * k);
}
real_t MLPPStat::meanv(const Ref<MLPPVector> &x) {
ERR_FAIL_COND_V(!x.is_valid(), 0);
int x_size = x->size();
const real_t *x_ptr = x->ptr();
real_t sum = 0;
for (int i = 0; i < x_size; ++i) {
sum += x_ptr[i];
}
return sum / x_size;
}
real_t MLPPStat::standard_deviationv(const Ref<MLPPVector> &x) {
ERR_FAIL_COND_V(!x.is_valid(), 0);
return Math::sqrt(variancev(x));
}
real_t MLPPStat::variancev(const Ref<MLPPVector> &x) {
ERR_FAIL_COND_V(!x.is_valid(), 0);
real_t x_mean = meanv(x);
int x_size = x->size();
const real_t *x_ptr = x->ptr();
real_t sum = 0;
for (int i = 0; i < x_size; ++i) {
real_t xi = x_ptr[i];
sum += (xi - x_mean) * (xi - x_mean);
}
return sum / (x_size - 1);
}
real_t MLPPStat::covariancev(const Ref<MLPPVector> &x, const Ref<MLPPVector> &y) {
ERR_FAIL_COND_V(!x.is_valid() || !y.is_valid(), 0);
ERR_FAIL_COND_V(x->size() != y->size(), 0);
real_t x_mean = meanv(x);
real_t y_mean = meanv(y);
int x_size = x->size();
const real_t *x_ptr = x->ptr();
const real_t *y_ptr = y->ptr();
real_t sum = 0;
for (int i = 0; i < x_size; ++i) {
sum += (x_ptr[i] - x_mean) * (y_ptr[i] - y_mean);
}
return sum / (x_size - 1);
}
real_t MLPPStat::weighted_mean(const Ref<MLPPVector> &x, const Ref<MLPPVector> &weights) {
ERR_FAIL_COND_V(!x.is_valid() || !weights.is_valid(), 0);
ERR_FAIL_COND_V(x->size() != weights->size(), 0);
int x_size = x->size();
const real_t *x_ptr = x->ptr();
const real_t *weights_ptr = weights->ptr();
real_t sum = 0;
real_t weights_sum = 0;
for (int i = 0; i < x_size; ++i) {
sum += x_ptr[i] * weights_ptr[i];
weights_sum += weights_ptr[i];
}
return sum / weights_sum;
}
real_t MLPPStat::geometric_mean(const Ref<MLPPVector> &x) {
ERR_FAIL_COND_V(!x.is_valid(), 0);
int x_size = x->size();
const real_t *x_ptr = x->ptr();
real_t product = 1;
for (int i = 0; i < x_size; ++i) {
product *= x_ptr[i];
}
return Math::pow(product, (real_t)(1.0 / x_size));
}
real_t MLPPStat::harmonic_mean(const Ref<MLPPVector> &x) {
ERR_FAIL_COND_V(!x.is_valid(), 0);
int x_size = x->size();
const real_t *x_ptr = x->ptr();
real_t sum = 0;
for (int i = 0; i < x_size; ++i) {
sum += 1 / x_ptr[i];
}
return x_size / sum;
}
real_t MLPPStat::rms(const Ref<MLPPVector> &x) {
ERR_FAIL_COND_V(!x.is_valid(), 0);
int x_size = x->size();
const real_t *x_ptr = x->ptr();
real_t sum = 0;
for (int i = 0; i < x_size; ++i) {
real_t x_i = x_ptr[i];
sum += x_i * x_i;
}
return Math::sqrt(sum / x_size);
}
real_t MLPPStat::power_mean(const Ref<MLPPVector> &x, const real_t p) {
ERR_FAIL_COND_V(!x.is_valid(), 0);
int x_size = x->size();
const real_t *x_ptr = x->ptr();
real_t sum = 0;
for (int i = 0; i < x_size; ++i) {
sum += Math::pow(x_ptr[i], p);
}
return Math::pow(sum / x_size, 1 / p);
}
real_t MLPPStat::lehmer_mean(const Ref<MLPPVector> &x, const real_t p) {
ERR_FAIL_COND_V(!x.is_valid(), 0);
int x_size = x->size();
const real_t *x_ptr = x->ptr();
real_t num = 0;
real_t den = 0;
for (int i = 0; i < x_size; ++i) {
num += Math::pow(x_ptr[i], p);
den += Math::pow(x_ptr[i], p - 1);
}
return num / den;
}
real_t MLPPStat::weighted_lehmer_mean(const Ref<MLPPVector> &x, const Ref<MLPPVector> &weights, const real_t p) {
ERR_FAIL_COND_V(!x.is_valid() || !weights.is_valid(), 0);
ERR_FAIL_COND_V(x->size() != weights->size(), 0);
int x_size = x->size();
const real_t *x_ptr = x->ptr();
const real_t *weights_ptr = weights->ptr();
real_t num = 0;
real_t den = 0;
for (int i = 0; i < x_size; ++i) {
num += weights_ptr[i] * Math::pow(x_ptr[i], p);
den += weights_ptr[i] * Math::pow(x_ptr[i], p - 1);
}
return num / den;
}
real_t MLPPStat::heronian_mean(const real_t A, const real_t B) {
return (A + sqrt(A * B) + B) / 3;
}
real_t MLPPStat::contra_harmonic_mean(const Ref<MLPPVector> &x) {
ERR_FAIL_COND_V(!x.is_valid(), 0);
return lehmer_mean(x, 2);
}
real_t MLPPStat::heinz_mean(const real_t A, const real_t B, const real_t x) {
return (Math::pow(A, x) * Math::pow(B, 1 - x) + Math::pow(A, 1 - x) * Math::pow(B, x)) / 2;
}
real_t MLPPStat::neuman_sandor_mean(const real_t a, const real_t b) {
MLPPActivation avn;
return (a - b) / 2 * avn.arsinh_normr((a - b) / (a + b));
}
real_t MLPPStat::stolarsky_mean(const real_t x, const real_t y, const real_t p) {
if (x == y) {
return x;
}
return Math::pow((Math::pow(x, p) - Math::pow(y, p)) / (p * (x - y)), 1 / (p - 1));
}
real_t MLPPStat::identric_mean(const real_t x, const real_t y) {
if (x == y) {
return x;
}
return (1 / M_E) * Math::pow(Math::pow(x, x) / Math::pow(y, y), 1 / (x - y));
}
real_t MLPPStat::log_mean(const real_t x, const real_t y) {
if (x == y) {
return x;
}
return (y - x) / (log(y) - Math::log(x));
}
void MLPPStat::_bind_methods() {
ClassDB::bind_method(D_METHOD("b0_estimation", "x", "y"), &MLPPStat::b0_estimation);
ClassDB::bind_method(D_METHOD("b1_estimation", "x", "y"), &MLPPStat::b1_estimation);
ClassDB::bind_method(D_METHOD("median", "x"), &MLPPStat::median);
ClassDB::bind_method(D_METHOD("mode", "x"), &MLPPStat::mode);
ClassDB::bind_method(D_METHOD("range", "x"), &MLPPStat::range);
ClassDB::bind_method(D_METHOD("midrange", "x"), &MLPPStat::midrange);
ClassDB::bind_method(D_METHOD("abs_avg_deviation", "x"), &MLPPStat::abs_avg_deviation);
ClassDB::bind_method(D_METHOD("correlation", "x", "y"), &MLPPStat::correlation);
ClassDB::bind_method(D_METHOD("r2", "x", "y"), &MLPPStat::r2);
ClassDB::bind_method(D_METHOD("chebyshev_ineq", "k"), &MLPPStat::chebyshev_ineq);
ClassDB::bind_method(D_METHOD("meanv", "x"), &MLPPStat::meanv);
ClassDB::bind_method(D_METHOD("standard_deviationv", "x"), &MLPPStat::standard_deviationv);
ClassDB::bind_method(D_METHOD("variancev", "x"), &MLPPStat::variancev);
ClassDB::bind_method(D_METHOD("covariancev", "x", "y"), &MLPPStat::covariancev);
ClassDB::bind_method(D_METHOD("weighted_mean", "x", "weights"), &MLPPStat::weighted_mean);
ClassDB::bind_method(D_METHOD("geometric_mean", "x"), &MLPPStat::geometric_mean);
ClassDB::bind_method(D_METHOD("harmonic_mean", "x"), &MLPPStat::harmonic_mean);
ClassDB::bind_method(D_METHOD("rms", "x"), &MLPPStat::rms);
ClassDB::bind_method(D_METHOD("power_mean", "x", "p"), &MLPPStat::power_mean);
ClassDB::bind_method(D_METHOD("lehmer_mean", "x", "p"), &MLPPStat::lehmer_mean);
ClassDB::bind_method(D_METHOD("weighted_lehmer_mean", "x", "weights", "p"), &MLPPStat::weighted_lehmer_mean);
ClassDB::bind_method(D_METHOD("contra_harmonic_mean", "x"), &MLPPStat::contra_harmonic_mean);
ClassDB::bind_method(D_METHOD("heronian_mean", "A", "B"), &MLPPStat::heronian_mean);
ClassDB::bind_method(D_METHOD("heinz_mean", "A", "B", "x"), &MLPPStat::heinz_mean);
ClassDB::bind_method(D_METHOD("neuman_sandor_mean", "a", "b"), &MLPPStat::neuman_sandor_mean);
ClassDB::bind_method(D_METHOD("stolarsky_mean", "x", "y", "p"), &MLPPStat::stolarsky_mean);
ClassDB::bind_method(D_METHOD("identric_mean", "x", "y"), &MLPPStat::identric_mean);
ClassDB::bind_method(D_METHOD("log_mean", "x", "y"), &MLPPStat::log_mean);
}