2023-01-23 21:13:26 +01:00
|
|
|
//
|
|
|
|
// Stat.cpp
|
|
|
|
//
|
|
|
|
// Created by Marc Melikyan on 9/29/20.
|
|
|
|
//
|
|
|
|
|
2023-01-24 19:14:38 +01:00
|
|
|
#include "stat.h"
|
2023-01-24 18:12:23 +01:00
|
|
|
#include "../activation/activation.h"
|
|
|
|
#include "../data/data.h"
|
2023-01-24 19:00:54 +01:00
|
|
|
#include "../lin_alg/lin_alg.h"
|
2023-01-23 21:13:26 +01:00
|
|
|
#include <algorithm>
|
|
|
|
#include <cmath>
|
2023-01-24 19:00:54 +01:00
|
|
|
#include <map>
|
2023-01-23 21:13:26 +01:00
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::b0Estimation(const std::vector<real_t> &x, const std::vector<real_t> &y) {
|
2023-01-24 19:00:54 +01:00
|
|
|
return mean(y) - b1Estimation(x, y) * mean(x);
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::b1Estimation(const std::vector<real_t> &x, const std::vector<real_t> &y) {
|
2023-01-24 19:00:54 +01:00
|
|
|
return covariance(x, y) / variance(x);
|
|
|
|
}
|
|
|
|
|
2023-02-09 02:27:04 +01:00
|
|
|
real_t MLPPStat::b0_estimation(const Ref<MLPPVector> &x, const Ref<MLPPVector> &y) {
|
|
|
|
return meanv(y) - b1_estimation(x, y) * meanv(x);
|
|
|
|
}
|
|
|
|
real_t MLPPStat::b1_estimation(const Ref<MLPPVector> &x, const Ref<MLPPVector> &y) {
|
|
|
|
return covariancev(x, y) / variancev(x);
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::mean(const std::vector<real_t> &x) {
|
|
|
|
real_t sum = 0;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < x.size(); i++) {
|
|
|
|
sum += x[i];
|
|
|
|
}
|
|
|
|
return sum / x.size();
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::median(std::vector<real_t> x) {
|
|
|
|
real_t center = real_t(x.size()) / real_t(2);
|
2023-01-24 19:00:54 +01:00
|
|
|
sort(x.begin(), x.end());
|
|
|
|
if (x.size() % 2 == 0) {
|
|
|
|
return mean({ x[center - 1], x[center] });
|
|
|
|
} else {
|
|
|
|
return x[center - 1 + 0.5];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
std::vector<real_t> MLPPStat::mode(const std::vector<real_t> &x) {
|
2023-01-25 00:21:31 +01:00
|
|
|
MLPPData data;
|
2023-01-27 13:01:16 +01:00
|
|
|
std::vector<real_t> x_set = data.vecToSet(x);
|
|
|
|
std::map<real_t, int> element_num;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < x_set.size(); i++) {
|
|
|
|
element_num[x[i]] = 0;
|
|
|
|
}
|
|
|
|
for (int i = 0; i < x.size(); i++) {
|
|
|
|
element_num[x[i]]++;
|
|
|
|
}
|
2023-01-27 13:01:16 +01:00
|
|
|
std::vector<real_t> modes;
|
|
|
|
real_t max_num = element_num[x_set[0]];
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < x_set.size(); i++) {
|
|
|
|
if (element_num[x_set[i]] > max_num) {
|
|
|
|
max_num = element_num[x_set[i]];
|
|
|
|
modes.clear();
|
|
|
|
modes.push_back(x_set[i]);
|
|
|
|
} else if (element_num[x_set[i]] == max_num) {
|
|
|
|
modes.push_back(x_set[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return modes;
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::range(const std::vector<real_t> &x) {
|
2023-01-25 00:29:02 +01:00
|
|
|
MLPPLinAlg alg;
|
2023-01-24 19:00:54 +01:00
|
|
|
return alg.max(x) - alg.min(x);
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::midrange(const std::vector<real_t> &x) {
|
2023-01-24 19:00:54 +01:00
|
|
|
return range(x) / 2;
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::absAvgDeviation(const std::vector<real_t> &x) {
|
|
|
|
real_t sum = 0;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < x.size(); i++) {
|
|
|
|
sum += std::abs(x[i] - mean(x));
|
|
|
|
}
|
|
|
|
return sum / x.size();
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::standardDeviation(const std::vector<real_t> &x) {
|
2023-01-24 19:00:54 +01:00
|
|
|
return std::sqrt(variance(x));
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::variance(const std::vector<real_t> &x) {
|
|
|
|
real_t sum = 0;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < x.size(); i++) {
|
|
|
|
sum += (x[i] - mean(x)) * (x[i] - mean(x));
|
|
|
|
}
|
|
|
|
return sum / (x.size() - 1);
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::covariance(const std::vector<real_t> &x, const std::vector<real_t> &y) {
|
|
|
|
real_t sum = 0;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < x.size(); i++) {
|
|
|
|
sum += (x[i] - mean(x)) * (y[i] - mean(y));
|
|
|
|
}
|
|
|
|
return sum / (x.size() - 1);
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::correlation(const std::vector<real_t> &x, const std::vector<real_t> &y) {
|
2023-01-24 19:00:54 +01:00
|
|
|
return covariance(x, y) / (standardDeviation(x) * standardDeviation(y));
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::R2(const std::vector<real_t> &x, const std::vector<real_t> &y) {
|
2023-01-24 19:00:54 +01:00
|
|
|
return correlation(x, y) * correlation(x, y);
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::chebyshevIneq(const real_t k) {
|
2023-01-24 19:00:54 +01:00
|
|
|
// X may or may not belong to a Gaussian Distribution
|
|
|
|
return 1 - 1 / (k * k);
|
|
|
|
}
|
|
|
|
|
2023-02-08 01:26:37 +01:00
|
|
|
real_t MLPPStat::meanv(const Ref<MLPPVector> &x) {
|
|
|
|
int x_size = x->size();
|
|
|
|
const real_t *x_ptr = x->ptr();
|
|
|
|
|
|
|
|
real_t sum = 0;
|
|
|
|
for (int i = 0; i < x_size; ++i) {
|
|
|
|
sum += x_ptr[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
return sum / x_size;
|
|
|
|
}
|
|
|
|
|
2023-02-09 15:30:33 +01:00
|
|
|
real_t MLPPStat::standard_deviationv(const Ref<MLPPVector> &x) {
|
|
|
|
return Math::sqrt(variancev(x));
|
|
|
|
}
|
|
|
|
|
2023-02-09 02:27:04 +01:00
|
|
|
real_t MLPPStat::variancev(const Ref<MLPPVector> &x) {
|
|
|
|
real_t x_mean = meanv(x);
|
|
|
|
|
|
|
|
int x_size = x->size();
|
|
|
|
const real_t *x_ptr = x->ptr();
|
|
|
|
|
|
|
|
real_t sum = 0;
|
|
|
|
for (int i = 0; i < x_size; ++i) {
|
|
|
|
real_t xi = x_ptr[i];
|
|
|
|
|
|
|
|
sum += (xi - x_mean) * (xi - x_mean);
|
|
|
|
}
|
|
|
|
return sum / (x_size - 1);
|
|
|
|
}
|
|
|
|
|
2023-02-08 01:26:37 +01:00
|
|
|
real_t MLPPStat::covariancev(const Ref<MLPPVector> &x, const Ref<MLPPVector> &y) {
|
|
|
|
ERR_FAIL_COND_V(x->size() != y->size(), 0);
|
|
|
|
|
|
|
|
real_t x_mean = meanv(x);
|
|
|
|
real_t y_mean = meanv(y);
|
|
|
|
|
|
|
|
int x_size = x->size();
|
|
|
|
const real_t *x_ptr = x->ptr();
|
|
|
|
const real_t *y_ptr = y->ptr();
|
|
|
|
|
|
|
|
real_t sum = 0;
|
|
|
|
|
|
|
|
for (int i = 0; i < x_size; ++i) {
|
|
|
|
sum += (x_ptr[i] - x_mean) * (y_ptr[i] - y_mean);
|
|
|
|
}
|
|
|
|
|
|
|
|
return sum / (x_size - 1);
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::weightedMean(const std::vector<real_t> &x, const std::vector<real_t> &weights) {
|
|
|
|
real_t sum = 0;
|
|
|
|
real_t weights_sum = 0;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < x.size(); i++) {
|
|
|
|
sum += x[i] * weights[i];
|
|
|
|
weights_sum += weights[i];
|
|
|
|
}
|
|
|
|
return sum / weights_sum;
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::geometricMean(const std::vector<real_t> &x) {
|
|
|
|
real_t product = 1;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < x.size(); i++) {
|
|
|
|
product *= x[i];
|
|
|
|
}
|
|
|
|
return std::pow(product, 1.0 / x.size());
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::harmonicMean(const std::vector<real_t> &x) {
|
|
|
|
real_t sum = 0;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < x.size(); i++) {
|
|
|
|
sum += 1 / x[i];
|
|
|
|
}
|
|
|
|
return x.size() / sum;
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::RMS(const std::vector<real_t> &x) {
|
|
|
|
real_t sum = 0;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < x.size(); i++) {
|
|
|
|
sum += x[i] * x[i];
|
|
|
|
}
|
|
|
|
return sqrt(sum / x.size());
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::powerMean(const std::vector<real_t> &x, const real_t p) {
|
|
|
|
real_t sum = 0;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < x.size(); i++) {
|
|
|
|
sum += std::pow(x[i], p);
|
|
|
|
}
|
|
|
|
return std::pow(sum / x.size(), 1 / p);
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::lehmerMean(const std::vector<real_t> &x, const real_t p) {
|
|
|
|
real_t num = 0;
|
|
|
|
real_t den = 0;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < x.size(); i++) {
|
|
|
|
num += std::pow(x[i], p);
|
|
|
|
den += std::pow(x[i], p - 1);
|
|
|
|
}
|
|
|
|
return num / den;
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::weightedLehmerMean(const std::vector<real_t> &x, const std::vector<real_t> &weights, const real_t p) {
|
|
|
|
real_t num = 0;
|
|
|
|
real_t den = 0;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < x.size(); i++) {
|
|
|
|
num += weights[i] * std::pow(x[i], p);
|
|
|
|
den += weights[i] * std::pow(x[i], p - 1);
|
|
|
|
}
|
|
|
|
return num / den;
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::heronianMean(const real_t A, const real_t B) {
|
2023-01-24 19:00:54 +01:00
|
|
|
return (A + sqrt(A * B) + B) / 3;
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::contraHarmonicMean(const std::vector<real_t> &x) {
|
2023-01-24 19:00:54 +01:00
|
|
|
return lehmerMean(x, 2);
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::heinzMean(const real_t A, const real_t B, const real_t x) {
|
2023-01-24 19:00:54 +01:00
|
|
|
return (std::pow(A, x) * std::pow(B, 1 - x) + std::pow(A, 1 - x) * std::pow(B, x)) / 2;
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::neumanSandorMean(const real_t a, const real_t b) {
|
2023-01-24 19:23:30 +01:00
|
|
|
MLPPActivation avn;
|
2023-01-24 19:00:54 +01:00
|
|
|
return (a - b) / 2 * avn.arsinh((a - b) / (a + b));
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::stolarskyMean(const real_t x, const real_t y, const real_t p) {
|
2023-01-24 19:00:54 +01:00
|
|
|
if (x == y) {
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
return std::pow((std::pow(x, p) - std::pow(y, p)) / (p * (x - y)), 1 / (p - 1));
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::identricMean(const real_t x, const real_t y) {
|
2023-01-24 19:00:54 +01:00
|
|
|
if (x == y) {
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
return (1 / M_E) * std::pow(std::pow(x, x) / std::pow(y, y), 1 / (x - y));
|
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPStat::logMean(const real_t x, const real_t y) {
|
2023-01-24 19:00:54 +01:00
|
|
|
if (x == y) {
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
return (y - x) / (log(y) - std::log(x));
|
|
|
|
}
|