2023-01-23 21:13:26 +01:00
|
|
|
//
|
|
|
|
// GaussianNB.cpp
|
|
|
|
//
|
|
|
|
// Created by Marc Melikyan on 1/17/21.
|
|
|
|
//
|
|
|
|
|
2023-01-24 18:12:23 +01:00
|
|
|
#include "gaussian_nb.h"
|
|
|
|
#include "../lin_alg/lin_alg.h"
|
2023-01-24 19:00:54 +01:00
|
|
|
#include "../stat/stat.h"
|
2023-01-24 18:12:23 +01:00
|
|
|
#include "../utilities/utilities.h"
|
2023-01-23 21:13:26 +01:00
|
|
|
|
|
|
|
#include <algorithm>
|
2023-01-24 19:00:54 +01:00
|
|
|
#include <iostream>
|
2023-01-23 21:13:26 +01:00
|
|
|
#include <random>
|
|
|
|
|
2023-01-24 19:20:18 +01:00
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
MLPPGaussianNB::MLPPGaussianNB(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int class_num) :
|
2023-01-24 19:00:54 +01:00
|
|
|
inputSet(inputSet), outputSet(outputSet), class_num(class_num) {
|
|
|
|
y_hat.resize(outputSet.size());
|
|
|
|
Evaluate();
|
2023-01-25 00:29:02 +01:00
|
|
|
MLPPLinAlg alg;
|
2023-01-24 19:00:54 +01:00
|
|
|
}
|
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
std::vector<real_t> MLPPGaussianNB::modelSetTest(std::vector<std::vector<real_t>> X) {
|
|
|
|
std::vector<real_t> y_hat;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < X.size(); i++) {
|
|
|
|
y_hat.push_back(modelTest(X[i]));
|
|
|
|
}
|
|
|
|
return y_hat;
|
|
|
|
}
|
2023-01-23 21:13:26 +01:00
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPGaussianNB::modelTest(std::vector<real_t> x) {
|
2023-01-25 01:09:37 +01:00
|
|
|
MLPPStat stat;
|
2023-01-25 00:29:02 +01:00
|
|
|
MLPPLinAlg alg;
|
2023-01-23 21:13:26 +01:00
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t score[class_num];
|
|
|
|
real_t y_hat_i = 1;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = class_num - 1; i >= 0; i--) {
|
|
|
|
y_hat_i += std::log(priors[i] * (1 / sqrt(2 * M_PI * sigma[i] * sigma[i])) * exp(-(x[i] * mu[i]) * (x[i] * mu[i]) / (2 * sigma[i] * sigma[i])));
|
|
|
|
score[i] = exp(y_hat_i);
|
|
|
|
}
|
2023-01-27 13:01:16 +01:00
|
|
|
return std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(real_t)));
|
2023-01-24 19:00:54 +01:00
|
|
|
}
|
2023-01-23 21:13:26 +01:00
|
|
|
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t MLPPGaussianNB::score() {
|
2023-01-25 01:09:37 +01:00
|
|
|
MLPPUtilities util;
|
2023-01-24 19:00:54 +01:00
|
|
|
return util.performance(y_hat, outputSet);
|
|
|
|
}
|
2023-01-23 21:13:26 +01:00
|
|
|
|
2023-01-25 00:21:31 +01:00
|
|
|
void MLPPGaussianNB::Evaluate() {
|
2023-01-25 01:09:37 +01:00
|
|
|
MLPPStat stat;
|
2023-01-25 00:29:02 +01:00
|
|
|
MLPPLinAlg alg;
|
2023-01-23 21:13:26 +01:00
|
|
|
|
2023-01-24 19:00:54 +01:00
|
|
|
// Computing mu_k_y and sigma_k_y
|
|
|
|
mu.resize(class_num);
|
|
|
|
sigma.resize(class_num);
|
|
|
|
for (int i = class_num - 1; i >= 0; i--) {
|
2023-01-27 13:01:16 +01:00
|
|
|
std::vector<real_t> set;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int j = 0; j < inputSet.size(); j++) {
|
|
|
|
for (int k = 0; k < inputSet[j].size(); k++) {
|
|
|
|
if (outputSet[j] == i) {
|
|
|
|
set.push_back(inputSet[j][k]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mu[i] = stat.mean(set);
|
|
|
|
sigma[i] = stat.standardDeviation(set);
|
|
|
|
}
|
2023-01-23 21:13:26 +01:00
|
|
|
|
2023-01-24 19:00:54 +01:00
|
|
|
// Priors
|
|
|
|
priors.resize(class_num);
|
|
|
|
for (int i = 0; i < outputSet.size(); i++) {
|
|
|
|
priors[int(outputSet[i])]++;
|
|
|
|
}
|
2023-01-27 13:01:16 +01:00
|
|
|
priors = alg.scalarMultiply(real_t(1) / real_t(outputSet.size()), priors);
|
2023-01-23 21:13:26 +01:00
|
|
|
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int i = 0; i < outputSet.size(); i++) {
|
2023-01-27 13:01:16 +01:00
|
|
|
real_t score[class_num];
|
|
|
|
real_t y_hat_i = 1;
|
2023-01-24 19:00:54 +01:00
|
|
|
for (int j = class_num - 1; j >= 0; j--) {
|
|
|
|
for (int k = 0; k < inputSet[i].size(); k++) {
|
|
|
|
y_hat_i += std::log(priors[j] * (1 / sqrt(2 * M_PI * sigma[j] * sigma[j])) * exp(-(inputSet[i][k] * mu[j]) * (inputSet[i][k] * mu[j]) / (2 * sigma[j] * sigma[j])));
|
|
|
|
}
|
|
|
|
score[j] = exp(y_hat_i);
|
|
|
|
std::cout << score[j] << std::endl;
|
|
|
|
}
|
2023-01-27 13:01:16 +01:00
|
|
|
y_hat[i] = std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(real_t)));
|
|
|
|
std::cout << std::distance(score, std::max_element(score, score + sizeof(score) / sizeof(real_t))) << std::endl;
|
2023-01-24 19:00:54 +01:00
|
|
|
}
|
|
|
|
}
|