diff --git a/mlpp/knn/knn.cpp b/mlpp/knn/knn.cpp index 009d06a..6139ef4 100644 --- a/mlpp/knn/knn.cpp +++ b/mlpp/knn/knn.cpp @@ -8,77 +8,172 @@ #include "../lin_alg/lin_alg.h" #include "../utilities/utilities.h" -#include -#include -#include +#include "core/containers/hash_map.h" +#include "core/containers/vector.h" - -MLPPKNN::MLPPKNN(std::vector> inputSet, std::vector outputSet, int k) : - inputSet(inputSet), outputSet(outputSet), k(k) { +Ref MLPPKNN::get_input_set() { + return _input_set; +} +void MLPPKNN::set_input_set(const Ref &val) { + _input_set = val; } -std::vector MLPPKNN::modelSetTest(std::vector> X) { - std::vector y_hat; - for (int i = 0; i < X.size(); i++) { - y_hat.push_back(modelTest(X[i])); +Ref MLPPKNN::get_output_set() { + return _output_set; +} +void MLPPKNN::set_output_set(const Ref &val) { + _output_set = val; +} + +int MLPPKNN::get_k() { + return _k; +} +void MLPPKNN::set_k(const int val) { + _k = val; +} + +PoolIntArray MLPPKNN::model_set_test(const Ref &X) { + ERR_FAIL_COND_V(!X.is_valid(), PoolIntArray()); + + Ref v; + v.instance(); + + int y_size = X->size().y; + + PoolIntArray y_hat; + y_hat.resize(y_size); + + for (int i = 0; i < y_size; i++) { + X->get_row_into_mlpp_vector(i, v); + + y_hat.set(i, model_test(v)); } + return y_hat; } -int MLPPKNN::modelTest(std::vector x) { - return determineClass(nearestNeighbors(x)); +int MLPPKNN::model_test(const Ref &x) { + return determine_class(nearest_neighbors(x)); } real_t MLPPKNN::score() { - MLPPUtilities util; - return util.performance(modelSetTest(inputSet), outputSet); + MLPPUtilities util; + return util.performance_pool_int_array_vec(model_set_test(_input_set), _output_set); } -int MLPPKNN::determineClass(std::vector knn) { - std::map class_nums; - for (int i = 0; i < outputSet.size(); i++) { - class_nums[outputSet[i]] = 0; - } - for (int i = 0; i < knn.size(); i++) { - for (int j = 0; j < outputSet.size(); j++) { - if (knn[i] == outputSet[j]) { - class_nums[outputSet[j]]++; - } - } - } - int max = class_nums[outputSet[0]]; - int final_class = outputSet[0]; - - for (int i = 0; i < outputSet.size(); i++) { - if (class_nums[outputSet[i]] > max) { - max = class_nums[outputSet[i]]; - } - } - for (auto [c, v] : class_nums) { - if (v == max) { - final_class = c; - } - } - return final_class; +MLPPKNN::MLPPKNN(std::vector> inputSet, std::vector outputSet, int k) { + _k = k; } -std::vector MLPPKNN::nearestNeighbors(std::vector x) { +MLPPKNN::MLPPKNN() { + _k = 0; +} + +MLPPKNN::~MLPPKNN() { +} + +// Private Model Functions +PoolIntArray MLPPKNN::nearest_neighbors(const Ref &x) { + ERR_FAIL_COND_V(!_input_set.is_valid(), PoolIntArray()); + MLPPLinAlg alg; // The nearest neighbors - std::vector knn; + PoolIntArray knn; + + HashMap skip_map; + + Ref tmpv1; + tmpv1.instance(); + Ref tmpv2; + tmpv2.instance(); + + int iuss = _input_set->size().y; - std::vector> inputUseSet = inputSet; //Perfom this loop unless and until all k nearest neighbors are found, appended, and returned - for (int i = 0; i < k; i++) { + for (int i = 0; i < _k; ++i) { int neighbor = 0; - for (int j = 0; j < inputUseSet.size(); j++) { - bool isNeighborNearer = alg.euclideanDistance(x, inputUseSet[j]) < alg.euclideanDistance(x, inputUseSet[neighbor]); - if (isNeighborNearer) { + + for (int j = 0; j < iuss; j++) { + if (skip_map.has(j)) { + continue; + } + + _input_set->get_row_into_mlpp_vector(j, tmpv1); + _input_set->get_row_into_mlpp_vector(neighbor, tmpv2); + + bool is_neighbor_nearer = alg.euclidean_distance(x, tmpv1) < alg.euclidean_distance(x, tmpv2); + + if (is_neighbor_nearer) { neighbor = j; } } - knn.push_back(neighbor); - inputUseSet.erase(inputUseSet.begin() + neighbor); // This is why we maintain an extra input"Use"Set + + if (!skip_map.has(neighbor)) { + knn.push_back(neighbor); + skip_map.set(neighbor, true); + } } + return knn; } + +int MLPPKNN::determine_class(const PoolIntArray &knn) { + ERR_FAIL_COND_V(!_output_set.is_valid(), 0); + + int output_set_size = _output_set->size(); + + ERR_FAIL_COND_V(output_set_size == 0, 0); + + const real_t *os_ptr = _output_set->ptr(); + + HashMap class_nums; + + for (int i = 0; i < output_set_size; ++i) { + class_nums[static_cast(os_ptr[i])] = 0; + } + + PoolIntArray::Read knn_r = knn.read(); + const int *knn_ptr = knn_r.ptr(); + int knn_size = knn.size(); + + for (int i = 0; i < knn_size; ++i) { + for (int j = 0; j < output_set_size; j++) { + int opj = static_cast(os_ptr[j]); + if (knn_ptr[i] == opj) { + class_nums[opj]++; + } + } + } + + int final_class = static_cast(os_ptr[0]); + int max = class_nums[final_class]; + + for (int i = 0; i < output_set_size; ++i) { + int opi = static_cast(os_ptr[i]); + + if (class_nums[opi] > max) { + max = class_nums[opi]; + final_class = opi; + } + } + + return final_class; +} + +void MLPPKNN::_bind_methods() { + ClassDB::bind_method(D_METHOD("get_input_set"), &MLPPKNN::get_input_set); + ClassDB::bind_method(D_METHOD("set_input_set", "value"), &MLPPKNN::set_input_set); + ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "input_set", PROPERTY_HINT_RESOURCE_TYPE, "MLPPMatrix"), "set_input_set", "get_input_set"); + + ClassDB::bind_method(D_METHOD("get_output_set"), &MLPPKNN::get_output_set); + ClassDB::bind_method(D_METHOD("set_output_set", "value"), &MLPPKNN::set_output_set); + ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "output_set", PROPERTY_HINT_RESOURCE_TYPE, "MLPPVector"), "set_output_set", "get_output_set"); + + ClassDB::bind_method(D_METHOD("get_k"), &MLPPKNN::get_k); + ClassDB::bind_method(D_METHOD("set_k", "value"), &MLPPKNN::set_k); + ADD_PROPERTY(PropertyInfo(Variant::VECTOR2, "k"), "set_k", "get_k"); + + ClassDB::bind_method(D_METHOD("model_set_test", "X"), &MLPPKNN::model_set_test); + ClassDB::bind_method(D_METHOD("model_test", "x"), &MLPPKNN::model_test); + ClassDB::bind_method(D_METHOD("score"), &MLPPKNN::score); +} diff --git a/mlpp/knn/knn.h b/mlpp/knn/knn.h index a269c40..534c750 100644 --- a/mlpp/knn/knn.h +++ b/mlpp/knn/knn.h @@ -10,26 +10,44 @@ #include "core/math/math_defs.h" -#include +#include "core/object/reference.h" +#include "../lin_alg/mlpp_matrix.h" +#include "../lin_alg/mlpp_vector.h" + +class MLPPKNN : public Reference { + GDCLASS(MLPPKNN, Reference); -class MLPPKNN { public: - MLPPKNN(std::vector> inputSet, std::vector outputSet, int k); - std::vector modelSetTest(std::vector> X); - int modelTest(std::vector x); + Ref get_input_set(); + void set_input_set(const Ref &val); + + Ref get_output_set(); + void set_output_set(const Ref &val); + + int get_k(); + void set_k(const int val); + + PoolIntArray model_set_test(const Ref &X); + int model_test(const Ref &x); real_t score(); -private: + MLPPKNN(std::vector> inputSet, std::vector outputSet, int k); + + MLPPKNN(); + ~MLPPKNN(); + +protected: // Private Model Functions - std::vector nearestNeighbors(std::vector x); - int determineClass(std::vector knn); + PoolIntArray nearest_neighbors(const Ref &x); + int determine_class(const PoolIntArray &knn); + + static void _bind_methods(); // Model Inputs and Parameters - std::vector> inputSet; - std::vector outputSet; - int k; + Ref _input_set; + Ref _output_set; + int _k; }; - #endif /* kNN_hpp */ diff --git a/mlpp/lin_alg/lin_alg.cpp b/mlpp/lin_alg/lin_alg.cpp index 4500b18..13c0e5c 100644 --- a/mlpp/lin_alg/lin_alg.cpp +++ b/mlpp/lin_alg/lin_alg.cpp @@ -5,6 +5,9 @@ // #include "lin_alg.h" + +#include "core/math/math_funcs.h" + #include "../stat/stat.h" #include #include @@ -1220,6 +1223,43 @@ real_t MLPPLinAlg::euclideanDistance(std::vector a, std::vector return std::sqrt(dist); } +real_t MLPPLinAlg::euclidean_distance(const Ref &a, const Ref &b) { + ERR_FAIL_COND_V(!a.is_valid() || !b.is_valid(), 0); + + int a_size = a->size(); + + ERR_FAIL_COND_V(a_size != b->size(), 0); + + const real_t *aa = a->ptr(); + const real_t *ba = b->ptr(); + + real_t dist = 0; + + for (int i = 0; i < a_size; i++) { + dist += (aa[i] - ba[i]) * (aa[i] - ba[i]); + } + + return Math::sqrt(dist); +} +real_t MLPPLinAlg::euclidean_distance_squared(const Ref &a, const Ref &b) { + ERR_FAIL_COND_V(!a.is_valid() || !b.is_valid(), 0); + + int a_size = a->size(); + + ERR_FAIL_COND_V(a_size != b->size(), 0); + + const real_t *aa = a->ptr(); + const real_t *ba = b->ptr(); + + real_t dist = 0; + + for (int i = 0; i < a_size; i++) { + dist += (aa[i] - ba[i]) * (aa[i] - ba[i]); + } + + return dist; +} + real_t MLPPLinAlg::norm_2(std::vector a) { return std::sqrt(norm_sq(a)); } diff --git a/mlpp/lin_alg/lin_alg.h b/mlpp/lin_alg/lin_alg.h index 494bd4c..dd8adfd 100644 --- a/mlpp/lin_alg/lin_alg.h +++ b/mlpp/lin_alg/lin_alg.h @@ -10,6 +10,9 @@ #include "core/math/math_defs.h" +#include "../lin_alg/mlpp_matrix.h" +#include "../lin_alg/mlpp_vector.h" + #include #include @@ -211,6 +214,8 @@ public: std::vector round(std::vector a); real_t euclideanDistance(std::vector a, std::vector b); + real_t euclidean_distance(const Ref &a, const Ref &b); + real_t euclidean_distance_squared(const Ref &a, const Ref &b); real_t norm_2(std::vector a); diff --git a/mlpp/lin_alg/mlpp_matrix.cpp b/mlpp/lin_alg/mlpp_matrix.cpp index c8f4be9..7a91076 100644 --- a/mlpp/lin_alg/mlpp_matrix.cpp +++ b/mlpp/lin_alg/mlpp_matrix.cpp @@ -25,7 +25,7 @@ String MLPPMatrix::to_string() { void MLPPMatrix::_bind_methods() { ClassDB::bind_method(D_METHOD("add_row", "row"), &MLPPMatrix::add_row_pool_vector); ClassDB::bind_method(D_METHOD("remove_row", "index"), &MLPPMatrix::remove_row); - ClassDB::bind_method(D_METHOD("remove_unordered", "index"), &MLPPMatrix::remove_unordered); + ClassDB::bind_method(D_METHOD("remove_row_unordered", "index"), &MLPPMatrix::remove_row_unordered); ClassDB::bind_method(D_METHOD("swap_row", "index_1", "index_2"), &MLPPMatrix::swap_row); ClassDB::bind_method(D_METHOD("clear"), &MLPPMatrix::clear); @@ -40,7 +40,12 @@ void MLPPMatrix::_bind_methods() { ClassDB::bind_method(D_METHOD("get_element", "index_x", "index_y"), &MLPPMatrix::get_element_bind); ClassDB::bind_method(D_METHOD("set_element", "index_x", "index_y", "val"), &MLPPMatrix::set_element_bind); + ClassDB::bind_method(D_METHOD("get_row_pool_vector", "index_y"), &MLPPMatrix::get_row_pool_vector); + ClassDB::bind_method(D_METHOD("get_row_mlpp_vector", "index_y"), &MLPPMatrix::get_row_mlpp_vector); + ClassDB::bind_method(D_METHOD("get_row_into_mlpp_vector", "index_y", "target"), &MLPPMatrix::get_row_into_mlpp_vector); + ClassDB::bind_method(D_METHOD("set_row_pool_vector", "index_y", "row"), &MLPPMatrix::set_row_pool_vector); + ClassDB::bind_method(D_METHOD("set_row_mlpp_vector", "index_y", "row"), &MLPPMatrix::set_row_mlpp_vector); ClassDB::bind_method(D_METHOD("fill", "val"), &MLPPMatrix::fill); diff --git a/mlpp/lin_alg/mlpp_matrix.h b/mlpp/lin_alg/mlpp_matrix.h index e154a14..698fe5a 100644 --- a/mlpp/lin_alg/mlpp_matrix.h +++ b/mlpp/lin_alg/mlpp_matrix.h @@ -21,7 +21,7 @@ class MLPPMatrix : public Reference { GDCLASS(MLPPMatrix, Reference); public: - real_t *ptr() { + real_t *ptrw() { return _data; } @@ -95,7 +95,7 @@ public: // Removes the item copying the last value into the position of the one to // remove. It's generally faster than `remove`. - void remove_unordered(int p_index) { + void remove_row_unordered(int p_index) { ERR_FAIL_INDEX(p_index, _size.y); --_size.y; @@ -203,6 +203,85 @@ public: _data[p_index_x * p_index_y] = p_val; } + _FORCE_INLINE_ Vector get_row_vector(int p_index_y) { + ERR_FAIL_INDEX_V(p_index_y, _size.y, Vector()); + + Vector ret; + + if (unlikely(_size.x == 0)) { + return ret; + } + + ret.resize(_size.x); + + int ind_start = p_index_y * _size.x; + + real_t *row_ptr = ret.ptrw(); + + for (int i = 0; i < _size.x; ++i) { + row_ptr[i] = _data[ind_start + i]; + } + } + + _FORCE_INLINE_ PoolRealArray get_row_pool_vector(int p_index_y) { + ERR_FAIL_INDEX_V(p_index_y, _size.y, PoolRealArray()); + + PoolRealArray ret; + + if (unlikely(_size.x == 0)) { + return ret; + } + + ret.resize(_size.x); + + int ind_start = p_index_y * _size.x; + + PoolRealArray::Write w = ret.write(); + real_t *row_ptr = w.ptr(); + + for (int i = 0; i < _size.x; ++i) { + row_ptr[i] = _data[ind_start + i]; + } + } + + _FORCE_INLINE_ Ref get_row_mlpp_vector(int p_index_y) { + ERR_FAIL_INDEX_V(p_index_y, _size.y, Ref()); + + Ref ret; + ret.instance(); + + if (unlikely(_size.x == 0)) { + return ret; + } + + ret->resize(_size.x); + + int ind_start = p_index_y * _size.x; + + real_t *row_ptr = ret->ptrw(); + + for (int i = 0; i < _size.x; ++i) { + row_ptr[i] = _data[ind_start + i]; + } + } + + _FORCE_INLINE_ void get_row_into_mlpp_vector(int p_index_y, Ref target) const { + ERR_FAIL_COND(!target.is_valid()); + ERR_FAIL_INDEX(p_index_y, _size.y); + + if (unlikely(target->size() != _size.x)) { + target->resize(_size.x); + } + + int ind_start = p_index_y * _size.x; + + real_t *row_ptr = target->ptrw(); + + for (int i = 0; i < _size.x; ++i) { + row_ptr[i] = _data[ind_start + i]; + } + } + _FORCE_INLINE_ void set_row_vector(int p_index_y, const Vector &p_row) { ERR_FAIL_COND(p_row.size() != _size.x); ERR_FAIL_INDEX(p_index_y, _size.y); @@ -230,6 +309,20 @@ public: } } + _FORCE_INLINE_ void set_row_mlpp_vector(int p_index_y, const Ref &p_row) { + ERR_FAIL_COND(!p_row.is_valid()); + ERR_FAIL_COND(p_row->size() != _size.x); + ERR_FAIL_INDEX(p_index_y, _size.y); + + int ind_start = p_index_y * _size.x; + + const real_t *row_ptr = p_row->ptr(); + + for (int i = 0; i < _size.x; ++i) { + _data[ind_start + i] = row_ptr[i]; + } + } + void fill(real_t p_val) { int ds = data_size(); for (int i = 0; i < ds; i++) { diff --git a/mlpp/lin_alg/mlpp_vector.h b/mlpp/lin_alg/mlpp_vector.h index d42f3f1..f919c25 100644 --- a/mlpp/lin_alg/mlpp_vector.h +++ b/mlpp/lin_alg/mlpp_vector.h @@ -18,7 +18,7 @@ class MLPPVector : public Reference { GDCLASS(MLPPVector, Reference); public: - real_t *ptr() { + real_t *ptrw() { return _data; } diff --git a/mlpp/utilities/utilities.cpp b/mlpp/utilities/utilities.cpp index cfba97f..a3b80a5 100644 --- a/mlpp/utilities/utilities.cpp +++ b/mlpp/utilities/utilities.cpp @@ -5,13 +5,14 @@ // #include "utilities.h" + +#include "core/math/math_funcs.h" + #include #include #include #include - - std::vector MLPPUtilities::weightInitialization(int n, std::string type) { std::random_device rd; std::default_random_engine generator(rd()); @@ -132,6 +133,50 @@ real_t MLPPUtilities::performance(std::vector> y_hat, std::v return correct / y_hat.size(); } +real_t MLPPUtilities::performance_vec(const Ref &y_hat, const Ref &output_set) { + ERR_FAIL_COND_V(!y_hat.is_valid(), 0); + ERR_FAIL_COND_V(!output_set.is_valid(), 0); + + real_t correct = 0; + for (int i = 0; i < y_hat->size(); i++) { + if (Math::is_equal_approx(y_hat->get_element(i), output_set->get_element(i))) { + correct++; + } + } + return correct / y_hat->size(); +} +real_t MLPPUtilities::performance_mat(const Ref &y_hat, const Ref &y) { + ERR_FAIL_COND_V(!y_hat.is_valid(), 0); + ERR_FAIL_COND_V(!y.is_valid(), 0); + + real_t correct = 0; + for (int i = 0; i < y_hat->size().y; i++) { + int sub_correct = 0; + + for (int j = 0; j < y_hat->size().x; j++) { + if (Math::round(y_hat->get_element(i, j)) == y->get_element(i, j)) { + sub_correct++; + } + + if (sub_correct == y_hat->size().x) { + correct++; + } + } + } + return correct / y_hat->size().y; +} +real_t MLPPUtilities::performance_pool_int_array_vec(PoolIntArray y_hat, const Ref &output_set) { + ERR_FAIL_COND_V(!output_set.is_valid(), 0); + + real_t correct = 0; + for (int i = 0; i < y_hat.size(); i++) { + if (y_hat[i] == Math::round(output_set->get_element(i))) { + correct++; + } + } + return correct / y_hat.size(); +} + void MLPPUtilities::saveParameters(std::string fileName, std::vector weights, real_t bias, bool app, int layer) { std::string layer_info = ""; std::ofstream saveFile; diff --git a/mlpp/utilities/utilities.h b/mlpp/utilities/utilities.h index 2a9f431..816b1dc 100644 --- a/mlpp/utilities/utilities.h +++ b/mlpp/utilities/utilities.h @@ -10,6 +10,11 @@ #include "core/math/math_defs.h" +#include "core/containers/vector.h" +#include "core/variant/variant.h" + +#include "../lin_alg/mlpp_matrix.h" +#include "../lin_alg/mlpp_vector.h" #include #include @@ -29,6 +34,10 @@ public: real_t performance(std::vector y_hat, std::vector y); real_t performance(std::vector> y_hat, std::vector> y); + real_t performance_vec(const Ref &y_hat, const Ref &output_set); + real_t performance_mat(const Ref &y_hat, const Ref &y); + real_t performance_pool_int_array_vec(PoolIntArray y_hat, const Ref &output_set); + // Parameter Saving Functions void saveParameters(std::string fileName, std::vector weights, real_t bias, bool app = 0, int layer = -1); void saveParameters(std::string fileName, std::vector weights, std::vector initial, real_t bias, bool app = 0, int layer = -1); diff --git a/register_types.cpp b/register_types.cpp index 5dcd88c..67379ce 100644 --- a/register_types.cpp +++ b/register_types.cpp @@ -24,8 +24,10 @@ SOFTWARE. #include "register_types.h" #include "mlpp/data/data.h" -#include "mlpp/lin_alg/mlpp_vector.h" #include "mlpp/lin_alg/mlpp_matrix.h" +#include "mlpp/lin_alg/mlpp_vector.h" + +#include "mlpp/knn/knn.h" #include "test/mlpp_tests.h" @@ -34,6 +36,8 @@ void register_pmlpp_types(ModuleRegistrationLevel p_level) { ClassDB::register_class(); ClassDB::register_class(); + ClassDB::register_class(); + ClassDB::register_class(); ClassDB::register_class(); ClassDB::register_class(); diff --git a/test/mlpp_tests.cpp b/test/mlpp_tests.cpp index 44e7e7e..74eafa7 100644 --- a/test/mlpp_tests.cpp +++ b/test/mlpp_tests.cpp @@ -553,11 +553,39 @@ void MLPPTests::test_knn(bool ui) { MLPPLinAlg alg; // kNN - std::vector> inputSet = { { 1, 2, 3, 4, 5, 6, 7, 8 }, { 0, 0, 0, 0, 1, 1, 1, 1 } }; + std::vector> inputSet = { + { 1, 2, 3, 4, 5, 6, 7, 8 }, + { 0, 0, 0, 0, 1, 1, 1, 1 } + }; std::vector outputSet = { 0, 0, 0, 0, 1, 1, 1, 1 }; - MLPPKNN knn(alg.transpose(inputSet), outputSet, 8); - alg.printVector(knn.modelSetTest(alg.transpose(inputSet))); - std::cout << "ACCURACY: " << 100 * knn.score() << "%" << std::endl; + + Ref ism; + ism.instance(); + ism->set_from_std_vectors(alg.transpose(inputSet)); + + //ERR_PRINT(ism->to_string()); + + Ref osm; + osm.instance(); + osm->set_from_std_vector(outputSet); + + //ERR_PRINT(osm->to_string()); + + Ref knn; + knn.instance(); + + knn->set_k(7); + knn->set_input_set(ism); + knn->set_output_set(osm); + + PoolIntArray res = knn->model_set_test(ism); + + ERR_PRINT(String(Variant(res))); + ERR_PRINT("ACCURACY: " + itos(100 * knn->score()) + "%"); + + //(alg.transpose(inputSet), outputSet, 8); + //alg.printVector(knn.modelSetTest(alg.transpose(inputSet))); + //std::cout << "ACCURACY: " << 100 * knn.score() << "%" << std::endl; } void MLPPTests::test_convolution_tensors_etc() {