Now MLPPBernoulliNB uses engine classes.

This commit is contained in:
Relintai 2023-02-16 21:07:31 +01:00
parent 741475a4ab
commit 5ad25ad918
6 changed files with 146 additions and 72 deletions

View File

@ -12,41 +12,51 @@
#include <iostream> #include <iostream>
#include <random> #include <random>
std::vector<real_t> MLPPBernoulliNB::model_set_test(std::vector<std::vector<real_t>> X) { Ref<MLPPVector> MLPPBernoulliNB::model_set_test(const Ref<MLPPMatrix> &X) {
std::vector<real_t> y_hat; Ref<MLPPVector> y_hat;
for (uint32_t i = 0; i < X.size(); i++) { y_hat.instance();
y_hat.push_back(model_test(X[i])); y_hat->resize(X->size().y);
Ref<MLPPVector> x_row_tmp;
x_row_tmp.instance();
x_row_tmp->resize(X->size().x);
for (int i = 0; i < X->size().y; i++) {
X->get_row_into_mlpp_vector(i, x_row_tmp);
y_hat->set_element(i, model_test(x_row_tmp));
} }
return y_hat; return y_hat;
} }
real_t MLPPBernoulliNB::model_test(std::vector<real_t> x) { real_t MLPPBernoulliNB::model_test(const Ref<MLPPVector> &x) {
real_t score_0 = 1; real_t score_0 = 1;
real_t score_1 = 1; real_t score_1 = 1;
std::vector<int> foundIndices; Vector<int> found_indices;
for (uint32_t j = 0; j < x.size(); j++) { for (int j = 0; j < x->size(); j++) {
for (uint32_t k = 0; k < _vocab.size(); k++) { for (int k = 0; k < _vocab->size(); k++) {
if (x[j] == _vocab[k]) { if (x->get_element(j) == _vocab->get_element(k)) {
score_0 *= _theta[0][_vocab[k]]; score_0 *= _theta[0][_vocab->get_element(k)];
score_1 *= _theta[1][_vocab[k]]; score_1 *= _theta[1][_vocab->get_element(k)];
foundIndices.push_back(k); found_indices.push_back(k);
} }
} }
} }
for (uint32_t i = 0; i < _vocab.size(); i++) { for (int i = 0; i < _vocab->size(); i++) {
bool found = false; bool found = false;
for (uint32_t j = 0; j < foundIndices.size(); j++) { for (int j = 0; j < found_indices.size(); j++) {
if (_vocab[i] == _vocab[foundIndices[j]]) { if (_vocab->get_element(i) == _vocab->get_element(found_indices[j])) {
found = true; found = true;
} }
} }
if (!found) { if (!found) {
score_0 *= 1 - _theta[0][_vocab[i]]; score_0 *= 1 - _theta[0][_vocab->get_element(i)];
score_1 *= 1 - _theta[1][_vocab[i]]; score_1 *= 1 - _theta[1][_vocab->get_element(i)];
} }
} }
@ -64,10 +74,11 @@ real_t MLPPBernoulliNB::model_test(std::vector<real_t> x) {
real_t MLPPBernoulliNB::score() { real_t MLPPBernoulliNB::score() {
MLPPUtilities util; MLPPUtilities util;
return util.performance(_y_hat, _output_set);
return util.performance_vec(_y_hat, _output_set);
} }
MLPPBernoulliNB::MLPPBernoulliNB(std::vector<std::vector<real_t>> p_input_set, std::vector<real_t> p_output_set) { MLPPBernoulliNB::MLPPBernoulliNB(const Ref<MLPPMatrix> &p_input_set, const Ref<MLPPVector> &p_output_set) {
_input_set = p_input_set; _input_set = p_input_set;
_output_set = p_output_set; _output_set = p_output_set;
_class_num = 2; _class_num = 2;
@ -75,7 +86,9 @@ MLPPBernoulliNB::MLPPBernoulliNB(std::vector<std::vector<real_t>> p_input_set, s
_prior_1 = 0; _prior_1 = 0;
_prior_0 = 0; _prior_0 = 0;
_y_hat.resize(_output_set.size()); _y_hat.instance();
_y_hat->resize(_output_set->size());
evaluate(); evaluate();
} }
@ -89,7 +102,8 @@ MLPPBernoulliNB::~MLPPBernoulliNB() {
void MLPPBernoulliNB::compute_vocab() { void MLPPBernoulliNB::compute_vocab() {
MLPPLinAlg alg; MLPPLinAlg alg;
MLPPData data; MLPPData data;
_vocab = data.vecToSet<real_t>(alg.flatten(_input_set));
_vocab = data.vec_to_setnv(alg.flattenv(_input_set));
} }
void MLPPBernoulliNB::compute_theta() { void MLPPBernoulliNB::compute_theta() {
@ -98,43 +112,43 @@ void MLPPBernoulliNB::compute_theta() {
// Setting all values in the hasmap by default to 0. // Setting all values in the hasmap by default to 0.
for (int i = _class_num - 1; i >= 0; i--) { for (int i = _class_num - 1; i >= 0; i--) {
for (uint32_t j = 0; j < _vocab.size(); j++) { for (int j = 0; j < _vocab->size(); j++) {
_theta[i][_vocab[j]] = 0; _theta.write[i][_vocab->get_element(j)] = 0;
} }
} }
for (uint32_t i = 0; i < _input_set.size(); i++) { for (int i = 0; i < _input_set->size().y; i++) {
for (uint32_t j = 0; j < _input_set[0].size(); j++) { for (int j = 0; j < _input_set->size().x; j++) {
_theta[_output_set[i]][_input_set[i][j]]++; _theta.write[_output_set->get_element(i)][_input_set->get_element(i, j)]++;
} }
} }
for (uint32_t i = 0; i < _theta.size(); i++) { for (int i = 0; i < _theta.size(); i++) {
for (uint32_t j = 0; j < _theta[i].size(); j++) { for (uint32_t j = 0; j < _theta[i].size(); j++) {
if (i == 0) { if (i == 0) {
_theta[i][j] /= _prior_0 * _y_hat.size(); _theta.write[i][j] /= _prior_0 * _y_hat->size();
} else { } else {
_theta[i][j] /= _prior_1 * _y_hat.size(); _theta.write[i][j] /= _prior_1 * _y_hat->size();
} }
} }
} }
} }
void MLPPBernoulliNB::evaluate() { void MLPPBernoulliNB::evaluate() {
for (uint32_t i = 0; i < _output_set.size(); i++) { for (int i = 0; i < _output_set->size(); i++) {
// Pr(B | A) * Pr(A) // Pr(B | A) * Pr(A)
real_t score_0 = 1; real_t score_0 = 1;
real_t score_1 = 1; real_t score_1 = 1;
real_t sum = 0; real_t sum = 0;
for (uint32_t ii = 0; ii < _output_set.size(); ii++) { for (int ii = 0; ii < _output_set->size(); ii++) {
if (_output_set[ii] == 1) { if (_output_set->get_element(ii) == 1) {
sum += _output_set[ii]; sum += 1;
} }
} }
// Easy computation of priors, i.e. Pr(C_k) // Easy computation of priors, i.e. Pr(C_k)
_prior_1 = sum / _y_hat.size(); _prior_1 = sum / _y_hat->size();
_prior_0 = 1 - _prior_1; _prior_0 = 1 - _prior_1;
// Evaluating Theta... // Evaluating Theta...
@ -143,47 +157,44 @@ void MLPPBernoulliNB::evaluate() {
// Evaluating the vocab set... // Evaluating the vocab set...
compute_vocab(); compute_vocab();
std::vector<int> foundIndices; Vector<int> found_indices;
for (uint32_t j = 0; j < _input_set.size(); j++) { for (int j = 0; j < _input_set->size().y; j++) {
for (uint32_t k = 0; k < _vocab.size(); k++) { for (int k = 0; k < _vocab->size(); k++) {
if (_input_set[i][j] == _vocab[k]) { if (_input_set->get_element(i, j) == _vocab->get_element(k)) {
score_0 += std::log(_theta[0][_vocab[k]]); score_0 += Math::log(static_cast<real_t>(_theta[0][_vocab->get_element(k)]));
score_1 += std::log(_theta[1][_vocab[k]]); score_1 += Math::log(static_cast<real_t>(_theta[1][_vocab->get_element(k)]));
foundIndices.push_back(k); found_indices.push_back(k);
} }
} }
} }
for (uint32_t ii = 0; ii < _vocab.size(); ii++) { for (int ii = 0; ii < _vocab->size(); ii++) {
bool found = false; bool found = false;
for (uint32_t j = 0; j < foundIndices.size(); j++) { for (int j = 0; j < found_indices.size(); j++) {
if (_vocab[ii] == _vocab[foundIndices[j]]) { if (_vocab->get_element(ii) == _vocab->get_element(found_indices[j])) {
found = true; found = true;
} }
} }
if (!found) { if (!found) {
score_0 += std::log(1 - _theta[0][_vocab[ii]]); score_0 += Math::log(1.0 - _theta[0][_vocab->get_element(ii)]);
score_1 += std::log(1 - _theta[1][_vocab[ii]]); score_1 += Math::log(1.0 - _theta[1][_vocab->get_element(ii)]);
} }
} }
score_0 += std::log(_prior_0); score_0 += Math::log(_prior_0);
score_1 += std::log(_prior_1); score_1 += Math::log(_prior_1);
score_0 = exp(score_0); score_0 = Math::exp(score_0);
score_1 = exp(score_1); score_1 = Math::exp(score_1);
std::cout << score_0 << std::endl;
std::cout << score_1 << std::endl;
// Assigning the traning example to a class // Assigning the traning example to a class
if (score_0 > score_1) { if (score_0 > score_1) {
_y_hat[i] = 0; _y_hat->set_element(i, 0);
} else { } else {
_y_hat[i] = 1; _y_hat->set_element(i, 1);
} }
} }
} }

View File

@ -8,23 +8,25 @@
// Created by Marc Melikyan on 1/17/21. // Created by Marc Melikyan on 1/17/21.
// //
#include "core/containers/hash_map.h"
#include "core/containers/vector.h"
#include "core/math/math_defs.h" #include "core/math/math_defs.h"
#include "core/object/reference.h" #include "core/object/reference.h"
#include <map> #include "../lin_alg/mlpp_matrix.h"
#include <vector> #include "../lin_alg/mlpp_vector.h"
class MLPPBernoulliNB : public Reference { class MLPPBernoulliNB : public Reference {
GDCLASS(MLPPBernoulliNB, Reference); GDCLASS(MLPPBernoulliNB, Reference);
public: public:
std::vector<real_t> model_set_test(std::vector<std::vector<real_t>> X); Ref<MLPPVector> model_set_test(const Ref<MLPPMatrix> &X);
real_t model_test(std::vector<real_t> x); real_t model_test(const Ref<MLPPVector> &x);
real_t score(); real_t score();
MLPPBernoulliNB(std::vector<std::vector<real_t>> p_input_set, std::vector<real_t> p_output_set); MLPPBernoulliNB(const Ref<MLPPMatrix> &p_input_set, const Ref<MLPPVector> &p_output_set);
MLPPBernoulliNB(); MLPPBernoulliNB();
~MLPPBernoulliNB(); ~MLPPBernoulliNB();
@ -40,14 +42,14 @@ protected:
real_t _prior_1; real_t _prior_1;
real_t _prior_0; real_t _prior_0;
std::vector<std::map<real_t, int>> _theta; Vector<HashMap<real_t, int>> _theta;
std::vector<real_t> _vocab; Ref<MLPPVector> _vocab;
int _class_num; int _class_num;
// Datasets // Datasets
std::vector<std::vector<real_t>> _input_set; Ref<MLPPMatrix> _input_set;
std::vector<real_t> _output_set; Ref<MLPPVector> _output_set;
std::vector<real_t> _y_hat; Ref<MLPPVector> _y_hat;
}; };
#endif /* BernoulliNB_hpp */ #endif /* BernoulliNB_hpp */

View File

@ -194,6 +194,51 @@ public:
return setInputSet; return setInputSet;
} }
template <class T>
Vector<T> vec_to_set(Vector<T> input_set) {
Vector<T> set_input_set;
for (int i = 0; i < input_set.size(); i++) {
bool new_element = true;
for (int j = 0; j < set_input_set.size(); j++) {
if (set_input_set[j] == input_set[i]) {
new_element = false;
}
}
if (new_element) {
set_input_set.push_back(input_set[i]);
}
}
return set_input_set;
}
Ref<MLPPVector> vec_to_setnv(const Ref<MLPPVector> &input_set) {
Vector<real_t> set_input_set;
for (int i = 0; i < input_set->size(); i++) {
bool new_element = true;
for (int j = 0; j < set_input_set.size(); j++) {
if (set_input_set[j] == input_set->get_element(i)) {
new_element = false;
}
}
if (new_element) {
set_input_set.push_back(input_set->get_element(i));
}
}
Ref<MLPPVector> ret;
ret.instance();
ret->set_from_vector(set_input_set);
return ret;
}
protected: protected:
static void _bind_methods(); static void _bind_methods();
}; };

View File

@ -302,7 +302,7 @@ Ref<MLPPMatrix> MLPPLinAlg::kronecker_productm(const Ref<MLPPMatrix> &A, const R
row.push_back(scalar_multiplynv(a_ptr[A->calculate_index(i, k)], row_tmp)); row.push_back(scalar_multiplynv(a_ptr[A->calculate_index(i, k)], row_tmp));
} }
Ref<MLPPVector> flattened_row = flattenv(row); Ref<MLPPVector> flattened_row = flattenvv(row);
C->set_row_mlpp_vector(i * b_size.y + j, flattened_row); C->set_row_mlpp_vector(i * b_size.y + j, flattened_row);
} }
@ -1009,8 +1009,6 @@ Ref<MLPPVector> MLPPLinAlg::maxnvv(const Ref<MLPPVector> &a, const Ref<MLPPVecto
const real_t *ba = b->ptr(); const real_t *ba = b->ptr();
real_t *ret_ptr = ret->ptrw(); real_t *ret_ptr = ret->ptrw();
real_t dist = 0;
for (int i = 0; i < a_size; i++) { for (int i = 0; i < a_size; i++) {
real_t aa_i = aa[i]; real_t aa_i = aa[i];
real_t bb_i = ba[i]; real_t bb_i = ba[i];
@ -1678,7 +1676,7 @@ std::vector<real_t> MLPPLinAlg::flatten(std::vector<std::vector<real_t>> A) {
return a; return a;
} }
Ref<MLPPVector> MLPPLinAlg::flattenv(const Vector<Ref<MLPPVector>> &A) { Ref<MLPPVector> MLPPLinAlg::flattenvv(const Vector<Ref<MLPPVector>> &A) {
Ref<MLPPVector> a; Ref<MLPPVector> a;
a.instance(); a.instance();
@ -1707,6 +1705,23 @@ Ref<MLPPVector> MLPPLinAlg::flattenv(const Vector<Ref<MLPPVector>> &A) {
return a; return a;
} }
Ref<MLPPVector> MLPPLinAlg::flattenv(const Ref<MLPPMatrix> &A) {
int data_size = A->data_size();
Ref<MLPPVector> res;
res.instance();
res->resize(data_size);
real_t *res_ptr = res->ptrw();
const real_t *a_ptr = A->ptr();
for (int i = 0; i < data_size; ++i) {
res_ptr[i] = a_ptr[i];
}
return res;
}
std::vector<real_t> MLPPLinAlg::solve(std::vector<std::vector<real_t>> A, std::vector<real_t> b) { std::vector<real_t> MLPPLinAlg::solve(std::vector<std::vector<real_t>> A, std::vector<real_t> b) {
return mat_vec_mult(inverse(A), b); return mat_vec_mult(inverse(A), b);
} }

View File

@ -181,7 +181,8 @@ public:
real_t sum_elements(std::vector<std::vector<real_t>> A); real_t sum_elements(std::vector<std::vector<real_t>> A);
std::vector<real_t> flatten(std::vector<std::vector<real_t>> A); std::vector<real_t> flatten(std::vector<std::vector<real_t>> A);
Ref<MLPPVector> flattenv(const Vector<Ref<MLPPVector>> &A); Ref<MLPPVector> flattenvv(const Vector<Ref<MLPPVector>> &A);
Ref<MLPPVector> flattenv(const Ref<MLPPMatrix> &A);
std::vector<real_t> solve(std::vector<std::vector<real_t>> A, std::vector<real_t> b); std::vector<real_t> solve(std::vector<std::vector<real_t>> A, std::vector<real_t> b);

View File

@ -799,8 +799,8 @@ void MLPPTests::test_naive_bayes() {
MLPPBernoulliNBOld BNBOld(alg.transpose(inputSet), outputSet); MLPPBernoulliNBOld BNBOld(alg.transpose(inputSet), outputSet);
alg.printVector(BNBOld.modelSetTest(alg.transpose(inputSet))); alg.printVector(BNBOld.modelSetTest(alg.transpose(inputSet)));
MLPPBernoulliNB BNB(alg.transpose(inputSet), outputSet); MLPPBernoulliNB BNB(alg.transposem(input_set), output_set);
alg.printVector(BNB.model_set_test(alg.transpose(inputSet))); PLOG_MSG(BNB.model_set_test(alg.transposem(input_set))->to_string());
MLPPGaussianNBOld GNBOld(alg.transpose(inputSet), outputSet, 2); MLPPGaussianNBOld GNBOld(alg.transpose(inputSet), outputSet, 2);
alg.printVector(GNBOld.modelSetTest(alg.transpose(inputSet))); alg.printVector(GNBOld.modelSetTest(alg.transpose(inputSet)));