mirror of
https://github.com/Relintai/pmlpp.git
synced 2024-11-08 13:12:09 +01:00
Now MLPPBernoulliNB uses engine classes.
This commit is contained in:
parent
741475a4ab
commit
5ad25ad918
@ -12,41 +12,51 @@
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
|
||||
std::vector<real_t> MLPPBernoulliNB::model_set_test(std::vector<std::vector<real_t>> X) {
|
||||
std::vector<real_t> y_hat;
|
||||
for (uint32_t i = 0; i < X.size(); i++) {
|
||||
y_hat.push_back(model_test(X[i]));
|
||||
Ref<MLPPVector> MLPPBernoulliNB::model_set_test(const Ref<MLPPMatrix> &X) {
|
||||
Ref<MLPPVector> y_hat;
|
||||
y_hat.instance();
|
||||
y_hat->resize(X->size().y);
|
||||
|
||||
Ref<MLPPVector> x_row_tmp;
|
||||
x_row_tmp.instance();
|
||||
x_row_tmp->resize(X->size().x);
|
||||
|
||||
for (int i = 0; i < X->size().y; i++) {
|
||||
X->get_row_into_mlpp_vector(i, x_row_tmp);
|
||||
|
||||
y_hat->set_element(i, model_test(x_row_tmp));
|
||||
}
|
||||
|
||||
return y_hat;
|
||||
}
|
||||
|
||||
real_t MLPPBernoulliNB::model_test(std::vector<real_t> x) {
|
||||
real_t MLPPBernoulliNB::model_test(const Ref<MLPPVector> &x) {
|
||||
real_t score_0 = 1;
|
||||
real_t score_1 = 1;
|
||||
|
||||
std::vector<int> foundIndices;
|
||||
Vector<int> found_indices;
|
||||
|
||||
for (uint32_t j = 0; j < x.size(); j++) {
|
||||
for (uint32_t k = 0; k < _vocab.size(); k++) {
|
||||
if (x[j] == _vocab[k]) {
|
||||
score_0 *= _theta[0][_vocab[k]];
|
||||
score_1 *= _theta[1][_vocab[k]];
|
||||
for (int j = 0; j < x->size(); j++) {
|
||||
for (int k = 0; k < _vocab->size(); k++) {
|
||||
if (x->get_element(j) == _vocab->get_element(k)) {
|
||||
score_0 *= _theta[0][_vocab->get_element(k)];
|
||||
score_1 *= _theta[1][_vocab->get_element(k)];
|
||||
|
||||
foundIndices.push_back(k);
|
||||
found_indices.push_back(k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < _vocab.size(); i++) {
|
||||
for (int i = 0; i < _vocab->size(); i++) {
|
||||
bool found = false;
|
||||
for (uint32_t j = 0; j < foundIndices.size(); j++) {
|
||||
if (_vocab[i] == _vocab[foundIndices[j]]) {
|
||||
for (int j = 0; j < found_indices.size(); j++) {
|
||||
if (_vocab->get_element(i) == _vocab->get_element(found_indices[j])) {
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
score_0 *= 1 - _theta[0][_vocab[i]];
|
||||
score_1 *= 1 - _theta[1][_vocab[i]];
|
||||
score_0 *= 1 - _theta[0][_vocab->get_element(i)];
|
||||
score_1 *= 1 - _theta[1][_vocab->get_element(i)];
|
||||
}
|
||||
}
|
||||
|
||||
@ -64,10 +74,11 @@ real_t MLPPBernoulliNB::model_test(std::vector<real_t> x) {
|
||||
|
||||
real_t MLPPBernoulliNB::score() {
|
||||
MLPPUtilities util;
|
||||
return util.performance(_y_hat, _output_set);
|
||||
|
||||
return util.performance_vec(_y_hat, _output_set);
|
||||
}
|
||||
|
||||
MLPPBernoulliNB::MLPPBernoulliNB(std::vector<std::vector<real_t>> p_input_set, std::vector<real_t> p_output_set) {
|
||||
MLPPBernoulliNB::MLPPBernoulliNB(const Ref<MLPPMatrix> &p_input_set, const Ref<MLPPVector> &p_output_set) {
|
||||
_input_set = p_input_set;
|
||||
_output_set = p_output_set;
|
||||
_class_num = 2;
|
||||
@ -75,7 +86,9 @@ MLPPBernoulliNB::MLPPBernoulliNB(std::vector<std::vector<real_t>> p_input_set, s
|
||||
_prior_1 = 0;
|
||||
_prior_0 = 0;
|
||||
|
||||
_y_hat.resize(_output_set.size());
|
||||
_y_hat.instance();
|
||||
_y_hat->resize(_output_set->size());
|
||||
|
||||
evaluate();
|
||||
}
|
||||
|
||||
@ -89,7 +102,8 @@ MLPPBernoulliNB::~MLPPBernoulliNB() {
|
||||
void MLPPBernoulliNB::compute_vocab() {
|
||||
MLPPLinAlg alg;
|
||||
MLPPData data;
|
||||
_vocab = data.vecToSet<real_t>(alg.flatten(_input_set));
|
||||
|
||||
_vocab = data.vec_to_setnv(alg.flattenv(_input_set));
|
||||
}
|
||||
|
||||
void MLPPBernoulliNB::compute_theta() {
|
||||
@ -98,43 +112,43 @@ void MLPPBernoulliNB::compute_theta() {
|
||||
|
||||
// Setting all values in the hasmap by default to 0.
|
||||
for (int i = _class_num - 1; i >= 0; i--) {
|
||||
for (uint32_t j = 0; j < _vocab.size(); j++) {
|
||||
_theta[i][_vocab[j]] = 0;
|
||||
for (int j = 0; j < _vocab->size(); j++) {
|
||||
_theta.write[i][_vocab->get_element(j)] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < _input_set.size(); i++) {
|
||||
for (uint32_t j = 0; j < _input_set[0].size(); j++) {
|
||||
_theta[_output_set[i]][_input_set[i][j]]++;
|
||||
for (int i = 0; i < _input_set->size().y; i++) {
|
||||
for (int j = 0; j < _input_set->size().x; j++) {
|
||||
_theta.write[_output_set->get_element(i)][_input_set->get_element(i, j)]++;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < _theta.size(); i++) {
|
||||
for (int i = 0; i < _theta.size(); i++) {
|
||||
for (uint32_t j = 0; j < _theta[i].size(); j++) {
|
||||
if (i == 0) {
|
||||
_theta[i][j] /= _prior_0 * _y_hat.size();
|
||||
_theta.write[i][j] /= _prior_0 * _y_hat->size();
|
||||
} else {
|
||||
_theta[i][j] /= _prior_1 * _y_hat.size();
|
||||
_theta.write[i][j] /= _prior_1 * _y_hat->size();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MLPPBernoulliNB::evaluate() {
|
||||
for (uint32_t i = 0; i < _output_set.size(); i++) {
|
||||
for (int i = 0; i < _output_set->size(); i++) {
|
||||
// Pr(B | A) * Pr(A)
|
||||
real_t score_0 = 1;
|
||||
real_t score_1 = 1;
|
||||
|
||||
real_t sum = 0;
|
||||
for (uint32_t ii = 0; ii < _output_set.size(); ii++) {
|
||||
if (_output_set[ii] == 1) {
|
||||
sum += _output_set[ii];
|
||||
for (int ii = 0; ii < _output_set->size(); ii++) {
|
||||
if (_output_set->get_element(ii) == 1) {
|
||||
sum += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Easy computation of priors, i.e. Pr(C_k)
|
||||
_prior_1 = sum / _y_hat.size();
|
||||
_prior_1 = sum / _y_hat->size();
|
||||
_prior_0 = 1 - _prior_1;
|
||||
|
||||
// Evaluating Theta...
|
||||
@ -143,47 +157,44 @@ void MLPPBernoulliNB::evaluate() {
|
||||
// Evaluating the vocab set...
|
||||
compute_vocab();
|
||||
|
||||
std::vector<int> foundIndices;
|
||||
Vector<int> found_indices;
|
||||
|
||||
for (uint32_t j = 0; j < _input_set.size(); j++) {
|
||||
for (uint32_t k = 0; k < _vocab.size(); k++) {
|
||||
if (_input_set[i][j] == _vocab[k]) {
|
||||
score_0 += std::log(_theta[0][_vocab[k]]);
|
||||
score_1 += std::log(_theta[1][_vocab[k]]);
|
||||
for (int j = 0; j < _input_set->size().y; j++) {
|
||||
for (int k = 0; k < _vocab->size(); k++) {
|
||||
if (_input_set->get_element(i, j) == _vocab->get_element(k)) {
|
||||
score_0 += Math::log(static_cast<real_t>(_theta[0][_vocab->get_element(k)]));
|
||||
score_1 += Math::log(static_cast<real_t>(_theta[1][_vocab->get_element(k)]));
|
||||
|
||||
foundIndices.push_back(k);
|
||||
found_indices.push_back(k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t ii = 0; ii < _vocab.size(); ii++) {
|
||||
for (int ii = 0; ii < _vocab->size(); ii++) {
|
||||
bool found = false;
|
||||
for (uint32_t j = 0; j < foundIndices.size(); j++) {
|
||||
if (_vocab[ii] == _vocab[foundIndices[j]]) {
|
||||
for (int j = 0; j < found_indices.size(); j++) {
|
||||
if (_vocab->get_element(ii) == _vocab->get_element(found_indices[j])) {
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
score_0 += std::log(1 - _theta[0][_vocab[ii]]);
|
||||
score_1 += std::log(1 - _theta[1][_vocab[ii]]);
|
||||
score_0 += Math::log(1.0 - _theta[0][_vocab->get_element(ii)]);
|
||||
score_1 += Math::log(1.0 - _theta[1][_vocab->get_element(ii)]);
|
||||
}
|
||||
}
|
||||
|
||||
score_0 += std::log(_prior_0);
|
||||
score_1 += std::log(_prior_1);
|
||||
score_0 += Math::log(_prior_0);
|
||||
score_1 += Math::log(_prior_1);
|
||||
|
||||
score_0 = exp(score_0);
|
||||
score_1 = exp(score_1);
|
||||
|
||||
std::cout << score_0 << std::endl;
|
||||
std::cout << score_1 << std::endl;
|
||||
score_0 = Math::exp(score_0);
|
||||
score_1 = Math::exp(score_1);
|
||||
|
||||
// Assigning the traning example to a class
|
||||
|
||||
if (score_0 > score_1) {
|
||||
_y_hat[i] = 0;
|
||||
_y_hat->set_element(i, 0);
|
||||
} else {
|
||||
_y_hat[i] = 1;
|
||||
_y_hat->set_element(i, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8,23 +8,25 @@
|
||||
// Created by Marc Melikyan on 1/17/21.
|
||||
//
|
||||
|
||||
#include "core/containers/hash_map.h"
|
||||
#include "core/containers/vector.h"
|
||||
#include "core/math/math_defs.h"
|
||||
|
||||
#include "core/object/reference.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "../lin_alg/mlpp_matrix.h"
|
||||
#include "../lin_alg/mlpp_vector.h"
|
||||
|
||||
class MLPPBernoulliNB : public Reference {
|
||||
GDCLASS(MLPPBernoulliNB, Reference);
|
||||
|
||||
public:
|
||||
std::vector<real_t> model_set_test(std::vector<std::vector<real_t>> X);
|
||||
real_t model_test(std::vector<real_t> x);
|
||||
Ref<MLPPVector> model_set_test(const Ref<MLPPMatrix> &X);
|
||||
real_t model_test(const Ref<MLPPVector> &x);
|
||||
|
||||
real_t score();
|
||||
|
||||
MLPPBernoulliNB(std::vector<std::vector<real_t>> p_input_set, std::vector<real_t> p_output_set);
|
||||
MLPPBernoulliNB(const Ref<MLPPMatrix> &p_input_set, const Ref<MLPPVector> &p_output_set);
|
||||
|
||||
MLPPBernoulliNB();
|
||||
~MLPPBernoulliNB();
|
||||
@ -40,14 +42,14 @@ protected:
|
||||
real_t _prior_1;
|
||||
real_t _prior_0;
|
||||
|
||||
std::vector<std::map<real_t, int>> _theta;
|
||||
std::vector<real_t> _vocab;
|
||||
Vector<HashMap<real_t, int>> _theta;
|
||||
Ref<MLPPVector> _vocab;
|
||||
int _class_num;
|
||||
|
||||
// Datasets
|
||||
std::vector<std::vector<real_t>> _input_set;
|
||||
std::vector<real_t> _output_set;
|
||||
std::vector<real_t> _y_hat;
|
||||
Ref<MLPPMatrix> _input_set;
|
||||
Ref<MLPPVector> _output_set;
|
||||
Ref<MLPPVector> _y_hat;
|
||||
};
|
||||
|
||||
#endif /* BernoulliNB_hpp */
|
@ -194,6 +194,51 @@ public:
|
||||
return setInputSet;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
Vector<T> vec_to_set(Vector<T> input_set) {
|
||||
Vector<T> set_input_set;
|
||||
|
||||
for (int i = 0; i < input_set.size(); i++) {
|
||||
bool new_element = true;
|
||||
|
||||
for (int j = 0; j < set_input_set.size(); j++) {
|
||||
if (set_input_set[j] == input_set[i]) {
|
||||
new_element = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (new_element) {
|
||||
set_input_set.push_back(input_set[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return set_input_set;
|
||||
}
|
||||
|
||||
Ref<MLPPVector> vec_to_setnv(const Ref<MLPPVector> &input_set) {
|
||||
Vector<real_t> set_input_set;
|
||||
|
||||
for (int i = 0; i < input_set->size(); i++) {
|
||||
bool new_element = true;
|
||||
|
||||
for (int j = 0; j < set_input_set.size(); j++) {
|
||||
if (set_input_set[j] == input_set->get_element(i)) {
|
||||
new_element = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (new_element) {
|
||||
set_input_set.push_back(input_set->get_element(i));
|
||||
}
|
||||
}
|
||||
|
||||
Ref<MLPPVector> ret;
|
||||
ret.instance();
|
||||
ret->set_from_vector(set_input_set);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
protected:
|
||||
static void _bind_methods();
|
||||
};
|
||||
|
@ -302,7 +302,7 @@ Ref<MLPPMatrix> MLPPLinAlg::kronecker_productm(const Ref<MLPPMatrix> &A, const R
|
||||
row.push_back(scalar_multiplynv(a_ptr[A->calculate_index(i, k)], row_tmp));
|
||||
}
|
||||
|
||||
Ref<MLPPVector> flattened_row = flattenv(row);
|
||||
Ref<MLPPVector> flattened_row = flattenvv(row);
|
||||
|
||||
C->set_row_mlpp_vector(i * b_size.y + j, flattened_row);
|
||||
}
|
||||
@ -1009,8 +1009,6 @@ Ref<MLPPVector> MLPPLinAlg::maxnvv(const Ref<MLPPVector> &a, const Ref<MLPPVecto
|
||||
const real_t *ba = b->ptr();
|
||||
real_t *ret_ptr = ret->ptrw();
|
||||
|
||||
real_t dist = 0;
|
||||
|
||||
for (int i = 0; i < a_size; i++) {
|
||||
real_t aa_i = aa[i];
|
||||
real_t bb_i = ba[i];
|
||||
@ -1678,7 +1676,7 @@ std::vector<real_t> MLPPLinAlg::flatten(std::vector<std::vector<real_t>> A) {
|
||||
return a;
|
||||
}
|
||||
|
||||
Ref<MLPPVector> MLPPLinAlg::flattenv(const Vector<Ref<MLPPVector>> &A) {
|
||||
Ref<MLPPVector> MLPPLinAlg::flattenvv(const Vector<Ref<MLPPVector>> &A) {
|
||||
Ref<MLPPVector> a;
|
||||
a.instance();
|
||||
|
||||
@ -1707,6 +1705,23 @@ Ref<MLPPVector> MLPPLinAlg::flattenv(const Vector<Ref<MLPPVector>> &A) {
|
||||
return a;
|
||||
}
|
||||
|
||||
Ref<MLPPVector> MLPPLinAlg::flattenv(const Ref<MLPPMatrix> &A) {
|
||||
int data_size = A->data_size();
|
||||
|
||||
Ref<MLPPVector> res;
|
||||
res.instance();
|
||||
res->resize(data_size);
|
||||
|
||||
real_t *res_ptr = res->ptrw();
|
||||
const real_t *a_ptr = A->ptr();
|
||||
|
||||
for (int i = 0; i < data_size; ++i) {
|
||||
res_ptr[i] = a_ptr[i];
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
std::vector<real_t> MLPPLinAlg::solve(std::vector<std::vector<real_t>> A, std::vector<real_t> b) {
|
||||
return mat_vec_mult(inverse(A), b);
|
||||
}
|
||||
|
@ -181,7 +181,8 @@ public:
|
||||
real_t sum_elements(std::vector<std::vector<real_t>> A);
|
||||
|
||||
std::vector<real_t> flatten(std::vector<std::vector<real_t>> A);
|
||||
Ref<MLPPVector> flattenv(const Vector<Ref<MLPPVector>> &A);
|
||||
Ref<MLPPVector> flattenvv(const Vector<Ref<MLPPVector>> &A);
|
||||
Ref<MLPPVector> flattenv(const Ref<MLPPMatrix> &A);
|
||||
|
||||
std::vector<real_t> solve(std::vector<std::vector<real_t>> A, std::vector<real_t> b);
|
||||
|
||||
|
@ -799,8 +799,8 @@ void MLPPTests::test_naive_bayes() {
|
||||
MLPPBernoulliNBOld BNBOld(alg.transpose(inputSet), outputSet);
|
||||
alg.printVector(BNBOld.modelSetTest(alg.transpose(inputSet)));
|
||||
|
||||
MLPPBernoulliNB BNB(alg.transpose(inputSet), outputSet);
|
||||
alg.printVector(BNB.model_set_test(alg.transpose(inputSet)));
|
||||
MLPPBernoulliNB BNB(alg.transposem(input_set), output_set);
|
||||
PLOG_MSG(BNB.model_set_test(alg.transposem(input_set))->to_string());
|
||||
|
||||
MLPPGaussianNBOld GNBOld(alg.transpose(inputSet), outputSet, 2);
|
||||
alg.printVector(GNBOld.modelSetTest(alg.transpose(inputSet)));
|
||||
|
Loading…
Reference in New Issue
Block a user