/*************************************************************************/ /* multinomial_nb.cpp */ /*************************************************************************/ /* This file is part of: */ /* PMLPP Machine Learning Library */ /* https://github.com/Relintai/pmlpp */ /*************************************************************************/ /* Copyright (c) 2023-present Péter Magyar. */ /* Copyright (c) 2022-2023 Marc Melikyan */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ /* "Software"), to deal in the Software without restriction, including */ /* without limitation the rights to use, copy, modify, merge, publish, */ /* distribute, sublicense, and/or sell copies of the Software, and to */ /* permit persons to whom the Software is furnished to do so, subject to */ /* the following conditions: */ /* */ /* The above copyright notice and this permission notice shall be */ /* included in all copies or substantial portions of the Software. */ /* */ /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /*************************************************************************/ #include "multinomial_nb.h" #include "core/containers/local_vector.h" #include "../utilities/utilities.h" #include /* Ref MLPPMultinomialNB::get_input_set() { return _input_set; } void MLPPMultinomialNB::set_input_set(const Ref &val) { _input_set = val; _initialized = false; } Ref MLPPMultinomialNB::get_output_set() { return _output_set; } void MLPPMultinomialNB::set_output_set(const Ref &val) { _output_set = val; _initialized = false; } real_t MLPPMultinomialNB::get_class_num() { return _class_num; } void MLPPMultinomialNB::set_class_num(const real_t val) { _class_num = val; _initialized = false; } */ Ref MLPPMultinomialNB::model_set_test(const Ref &X) { ERR_FAIL_COND_V(!_initialized, Ref()); Size2i x_size = X->size(); Ref x_row_tmp; x_row_tmp.instance(); x_row_tmp->resize(x_size.x); Ref y_hat; y_hat.instance(); y_hat->resize(x_size.y); for (int i = 0; i < x_size.y; i++) { X->row_get_into_mlpp_vector(i, x_row_tmp); y_hat->element_set(i, model_test(x_row_tmp)); } return y_hat; } real_t MLPPMultinomialNB::model_test(const Ref &x) { ERR_FAIL_COND_V(!_initialized, 0); int x_size = x->size(); LocalVector score; score.resize(_class_num); compute_theta(); int vocab_size = _vocab->size(); for (int j = 0; j < x_size; j++) { for (int k = 0; k < vocab_size; k++) { real_t x_j = x->element_get(j); real_t vocab_k = _vocab->element_get(k); if (Math::is_equal_approx(x_j, vocab_k)) { for (int p = _class_num - 1; p >= 0; p--) { real_t theta_p_k = _theta[p][vocab_k]; score[p] += Math::log(theta_p_k); } } } } for (int i = 0; i < _priors->size(); i++) { score[i] += Math::log(_priors->element_get(i)); } int max_index = 0; real_t max_element = score[0]; for (uint32_t i = 1; i < score.size(); ++i) { real_t si = score[i]; if (si > max_element) { max_index = i; max_element = si; } } return max_index; } real_t MLPPMultinomialNB::score() { ERR_FAIL_COND_V(!_initialized, 0); MLPPUtilities util; return util.performance_vec(_y_hat, _output_set); } bool MLPPMultinomialNB::is_initialized() { return _initialized; } void MLPPMultinomialNB::initialize() { if (_initialized) { return; } //ERR_FAIL_COND(!_input_set.is_valid() || !_output_set.is_valid()); _initialized = true; } MLPPMultinomialNB::MLPPMultinomialNB(const Ref &p_input_set, const Ref &p_output_set, int pclass_num) { _input_set = p_input_set; _output_set = p_output_set; _class_num = pclass_num; _priors.instance(); _vocab.instance(); _y_hat.instance(); _y_hat->resize(_output_set->size()); _initialized = true; evaluate(); } MLPPMultinomialNB::MLPPMultinomialNB() { _initialized = false; } MLPPMultinomialNB::~MLPPMultinomialNB() { } void MLPPMultinomialNB::compute_theta() { // Resizing theta for the sake of ease & proper access of the elements. _theta.resize(_class_num); int vocab_size = _vocab->size(); // Setting all values in the hasmap by default to 0. for (int i = _class_num - 1; i >= 0; i--) { for (int j = 0; j < vocab_size; j++) { _theta.write[i][_vocab->element_get(j)] = 0; } } Size2i input_set_size = _input_set->size(); for (int i = 0; i < input_set_size.y; i++) { for (int j = 0; j < input_set_size.x; j++) { _theta.write[_output_set->element_get(i)][_input_set->element_get(i, j)]++; } } for (int i = 0; i < _theta.size(); i++) { uint32_t theta_i_size = _theta[i].size(); for (uint32_t j = 0; j < theta_i_size; j++) { _theta.write[i][j] /= _priors->element_get(i) * _y_hat->size(); } } } void MLPPMultinomialNB::evaluate() { int output_set_size = _output_set->size(); Size2i input_set_size = _input_set->size(); for (int i = 0; i < output_set_size; i++) { // Pr(B | A) * Pr(A) LocalVector score; score.resize(_class_num); // Easy computation of priors, i.e. Pr(C_k) _priors->resize(_class_num); for (int ii = 0; ii < _output_set->size(); ii++) { int osii = static_cast(_output_set->element_get(ii)); _priors->element_set(osii, _priors->element_get(osii) + 1); } _priors->scalar_multiply(real_t(1) / real_t(output_set_size)); // Evaluating Theta... compute_theta(); for (int j = 0; j < input_set_size.y; j++) { for (int k = 0; k < _vocab->size(); k++) { real_t input_set_i_j = _input_set->element_get(i, j); real_t vocab_k = _vocab->element_get(k); if (Math::is_equal_approx(input_set_i_j, vocab_k)) { real_t theta_i_k = _theta[i][vocab_k]; theta_i_k = Math::log(theta_i_k); for (int p = _class_num - 1; p >= 0; p--) { score[p] += theta_i_k; } } } } int priors_size = _priors->size(); for (int ii = 0; ii < priors_size; ii++) { score[ii] += Math::log(_priors->element_get(ii)); score[ii] = Math::exp(score[ii]); } // Assigning the traning example's y_hat to a class int max_index = 0; real_t max_element = score[0]; for (uint32_t ii = 1; ii < score.size(); ++ii) { real_t si = score[ii]; if (si > max_element) { max_index = ii; max_element = si; } } _y_hat->element_set(i, max_index); } } void MLPPMultinomialNB::_bind_methods() { /* ClassDB::bind_method(D_METHOD("get_input_set"), &MLPPMultinomialNB::get_input_set); ClassDB::bind_method(D_METHOD("set_input_set", "val"), &MLPPMultinomialNB::set_input_set); ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "input_set", PROPERTY_HINT_RESOURCE_TYPE, "MLPPMatrix"), "set_input_set", "get_input_set"); ClassDB::bind_method(D_METHOD("get_output_set"), &MLPPMultinomialNB::get_output_set); ClassDB::bind_method(D_METHOD("set_output_set", "val"), &MLPPMultinomialNB::set_output_set); ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "output_set", PROPERTY_HINT_RESOURCE_TYPE, "MLPPVector"), "set_output_set", "get_output_set"); ClassDB::bind_method(D_METHOD("get_c"), &MLPPMultinomialNB::get_c); ClassDB::bind_method(D_METHOD("set_c", "val"), &MLPPMultinomialNB::set_c); ADD_PROPERTY(PropertyInfo(Variant::REAL, "c"), "set_c", "get_c"); ClassDB::bind_method(D_METHOD("model_set_test", "X"), &MLPPMultinomialNB::model_set_test); ClassDB::bind_method(D_METHOD("model_test", "x"), &MLPPMultinomialNB::model_test); ClassDB::bind_method(D_METHOD("gradient_descent", "learning_rate", "max_epoch", "ui"), &MLPPMultinomialNB::gradient_descent, false); ClassDB::bind_method(D_METHOD("sgd", "learning_rate", "max_epoch", "ui"), &MLPPMultinomialNB::sgd, false); ClassDB::bind_method(D_METHOD("mbgd", "learning_rate", "max_epoch", "mini_batch_size", "ui"), &MLPPMultinomialNB::mbgd, false); ClassDB::bind_method(D_METHOD("score"), &MLPPMultinomialNB::score); ClassDB::bind_method(D_METHOD("save", "file_name"), &MLPPMultinomialNB::save); ClassDB::bind_method(D_METHOD("is_initialized"), &MLPPMultinomialNB::is_initialized); ClassDB::bind_method(D_METHOD("initialize"), &MLPPMultinomialNB::initialize); */ }