From 27d187c67ae98391f285fabbb6f28a7cb7894240 Mon Sep 17 00:00:00 2001 From: Relintai Date: Sun, 29 Jan 2023 15:46:55 +0100 Subject: [PATCH] Ported MLPPKMeans logic. --- mlpp/kmeans/kmeans.cpp | 448 ++++++++++++++++++++++++++++++++++- mlpp/kmeans/kmeans.h | 6 +- mlpp/lin_alg/lin_alg.cpp | 135 +++++++++++ mlpp/lin_alg/lin_alg.h | 8 + mlpp/lin_alg/mlpp_vector.cpp | 2 + mlpp/lin_alg/mlpp_vector.h | 43 +++- mlpp/utilities/utilities.cpp | 9 + mlpp/utilities/utilities.h | 2 + test/mlpp_tests.cpp | 27 ++- 9 files changed, 651 insertions(+), 29 deletions(-) diff --git a/mlpp/kmeans/kmeans.cpp b/mlpp/kmeans/kmeans.cpp index d0db130..597da47 100644 --- a/mlpp/kmeans/kmeans.cpp +++ b/mlpp/kmeans/kmeans.cpp @@ -8,6 +8,8 @@ #include "../lin_alg/lin_alg.h" #include "../utilities/utilities.h" +#include "core/math/random_pcg.h" + #include #include #include @@ -37,29 +39,259 @@ void MLPPKMeans::set_mean_type(const MLPPKMeans::MeanType val) { } void MLPPKMeans::initialize() { + ERR_FAIL_COND(!_input_set.is_valid()); + if (_mean_type == MEAN_TYPE_KMEANSPP) { - _kmeanspp_initialization(_k); + _kmeanspp_initialization(); } else { - _centroid_initialization(_k); + _centroid_initialization(); } + + _initialized = true; } Ref MLPPKMeans::model_set_test(const Ref &X) { - return Ref(); + ERR_FAIL_COND_V(!X.is_valid(), Ref()); + ERR_FAIL_COND_V(!_initialized, Ref()); + + MLPPLinAlg alg; + + int input_set_size_y = _input_set->size().y; + + Ref closest_centroids; + closest_centroids.instance(); + closest_centroids->resize(Size2i(_mu->size().x, input_set_size_y)); + + Ref closest_centroid; + closest_centroid.instance(); + closest_centroid->resize(_mu->size().x); + + Ref tmp_xiv; + tmp_xiv.instance(); + tmp_xiv->resize(X->size().x); + + Ref tmp_mujv; + tmp_mujv.instance(); + tmp_mujv->resize(_mu->size().x); + + int r0_size = _r->size().x; + + for (int i = 0; i < input_set_size_y; ++i) { + _mu->get_row_into_mlpp_vector(0, closest_centroid); + X->get_row_into_mlpp_vector(i, tmp_xiv); + + for (int j = 0; j < r0_size; ++j) { + _mu->get_row_into_mlpp_vector(j, tmp_mujv); + + bool is_centroid_closer = alg.euclidean_distance(tmp_xiv, tmp_mujv) < alg.euclidean_distance(tmp_xiv, closest_centroid); + + if (is_centroid_closer) { + closest_centroid->set_from_mlpp_vector(tmp_mujv); + } + } + + closest_centroids->set_row_mlpp_vector(i, closest_centroid); + } + + return closest_centroids; } Ref MLPPKMeans::model_test(const Ref &x) { - return Ref(); + ERR_FAIL_COND_V(!x.is_valid(), Ref()); + ERR_FAIL_COND_V(!_initialized, Ref()); + + MLPPLinAlg alg; + + Ref closest_centroid; + closest_centroid.instance(); + closest_centroid->resize(_mu->size().x); + + _mu->get_row_into_mlpp_vector(0, closest_centroid); + + int mu_size_y = _mu->size().y; + + Ref tmp_mujv; + tmp_mujv.instance(); + tmp_mujv->resize(_mu->size().x); + + for (int j = 0; j < mu_size_y; ++j) { + _mu->get_row_into_mlpp_vector(j, tmp_mujv); + + if (alg.euclidean_distance(x, tmp_mujv) < alg.euclidean_distance(x, closest_centroid)) { + closest_centroid->set_from_mlpp_vector(tmp_mujv); + } + } + + return closest_centroid; } void MLPPKMeans::train(int epoch_num, bool UI) { + ERR_FAIL_COND(!_input_set.is_valid()); + + if (!_initialized) { + initialize(); + } + + real_t cost_prev = 0; + int epoch = 1; + + _evaluate(); + + while (true) { + // STEPS OF THE ALGORITHM + // 1. DETERMINE r_nk + // 2. DETERMINE J + // 3. DETERMINE mu_k + + // STOP IF CONVERGED, ELSE REPEAT + + cost_prev = _cost(); + + _compute_mu(); + _evaluate(); + + // UI PORTION + if (UI) { + MLPPUtilities::cost_info(epoch, cost_prev, _cost()); + } + + epoch++; + + if (epoch > epoch_num) { + break; + } + } } + real_t MLPPKMeans::score() { - return 0; + return _cost(); } + Ref MLPPKMeans::silhouette_scores() { - return Ref(); + ERR_FAIL_COND_V(!_initialized, Ref()); + + MLPPLinAlg alg; + + Ref closest_centroids = model_set_test(_input_set); + + ERR_FAIL_COND_V(!closest_centroids.is_valid(), Ref()); + + int input_set_size_y = _input_set->size().y; + int input_set_size_x = _input_set->size().x; + + int mu_size_y = _mu->size().y; + + int closest_centroids_size_y = closest_centroids->size().y; + + Ref silhouette_scores; + silhouette_scores.instance(); + silhouette_scores->resize(input_set_size_y); + + Ref input_set_i_tempv; + input_set_i_tempv.instance(); + input_set_i_tempv->resize(input_set_size_x); + + Ref input_set_j_tempv; + input_set_j_tempv.instance(); + input_set_j_tempv->resize(input_set_size_x); + + Ref input_set_k_tempv; + input_set_k_tempv.instance(); + input_set_k_tempv->resize(input_set_size_x); + + Ref r_i_tempv; + r_i_tempv.instance(); + r_i_tempv->resize(_r->size().x); + + Ref r_j_tempv; + r_j_tempv.instance(); + r_j_tempv->resize(_r->size().x); + + Ref closest_centroids_i_tempv; + closest_centroids_i_tempv.instance(); + closest_centroids_i_tempv->resize(closest_centroids->size().x); + + Ref closest_centroids_k_tempv; + closest_centroids_k_tempv.instance(); + closest_centroids_k_tempv->resize(closest_centroids->size().x); + + Ref mu_j_tempv; + mu_j_tempv.instance(); + mu_j_tempv->resize(_mu->size().x); + + for (int i = 0; i < input_set_size_y; ++i) { + _r->get_row_into_mlpp_vector(i, r_i_tempv); + _input_set->get_row_into_mlpp_vector(i, input_set_i_tempv); + + // COMPUTING a[i] + real_t a = 0; + for (int j = 0; j < input_set_size_y; ++j) { + if (i == j) { + continue; + } + + _r->get_row_into_mlpp_vector(j, r_j_tempv); + + if (r_i_tempv->is_equal_approx(r_j_tempv)) { + _input_set->get_row_into_mlpp_vector(j, input_set_j_tempv); + + a += alg.euclidean_distance(input_set_i_tempv, input_set_j_tempv); + } + } + + // NORMALIZE a[i] + a /= closest_centroids->size().x - 1; + + closest_centroids->get_row_into_mlpp_vector(i, closest_centroids_i_tempv); + + // COMPUTING b[i] + real_t b = INT_MAX; + for (int j = 0; j < mu_size_y; ++j) { + _mu->get_row_into_mlpp_vector(j, mu_j_tempv); + + if (!closest_centroids_i_tempv->is_equal_approx(mu_j_tempv)) { + real_t sum = 0; + for (int k = 0; k < input_set_size_y; ++k) { + _input_set->get_row_into_mlpp_vector(k, input_set_k_tempv); + + sum += alg.euclidean_distance(input_set_i_tempv, input_set_k_tempv); + } + + // NORMALIZE b[i] + real_t k_cluster_size = 0; + for (int k = 0; k < closest_centroids_size_y; ++k) { + _input_set->get_row_into_mlpp_vector(k, closest_centroids_k_tempv); + + if (closest_centroids_k_tempv->is_equal_approx(mu_j_tempv)) { + ++k_cluster_size; + } + } + + if (sum / k_cluster_size < b) { + b = sum / k_cluster_size; + } + } + } + + silhouette_scores->set_element(i, (b - a) / fmax(a, b)); + + // Or the expanded version: + // if(a < b) { + // silhouette_scores->set_element(i, 1 - a/b); + // } + // else if(a == b){ + // silhouette_scores->set_element(i, 0); + // } + // else{ + // silhouette_scores->set_element(i, b/a - 1); + // } + } + + return silhouette_scores; } MLPPKMeans::MLPPKMeans() { + _mu.instance(); + _r.instance(); + _accuracy_threshold = 0; _k = 0; _initialized = false; @@ -69,17 +301,211 @@ MLPPKMeans::MLPPKMeans() { MLPPKMeans::~MLPPKMeans() { } +// This simply computes r_nk void MLPPKMeans::_evaluate() { -} -void MLPPKMeans::_compute_mu() { + ERR_FAIL_COND(!_initialized); + + MLPPLinAlg alg; + + if (_r->size() != Size2i(_k, _input_set->size().y)) { + _r->resize(Size2i(_k, _input_set->size().y)); + } + + int r_size_y = _r->size().y; + int r_size_x = _r->size().x; + + Ref closest_centroid; + closest_centroid.instance(); + closest_centroid->resize(_mu->size().x); + + Ref input_set_i_tempv; + input_set_i_tempv.instance(); + input_set_i_tempv->resize(_input_set->size().x); + + Ref mu_j_tempv; + mu_j_tempv.instance(); + mu_j_tempv->resize(_mu->size().x); + + real_t closest_centroid_current_dist = 0; + int closest_centroid_index = 0; + + _r->fill(0); + + for (int i = 0; i < r_size_y; ++i) { + _mu->get_row_into_mlpp_vector(0, closest_centroid); + _input_set->get_row_into_mlpp_vector(i, input_set_i_tempv); + + closest_centroid_current_dist = alg.euclidean_distance(input_set_i_tempv, closest_centroid); + + for (int j = 0; j < r_size_x; ++j) { + _mu->get_row_into_mlpp_vector(j, mu_j_tempv); + + bool is_centroid_closer = alg.euclidean_distance(input_set_i_tempv, mu_j_tempv) < closest_centroid_current_dist; + + if (is_centroid_closer) { + _mu->get_row_into_mlpp_vector(j, closest_centroid); + closest_centroid_current_dist = alg.euclidean_distance(input_set_i_tempv, closest_centroid); + closest_centroid_index = j; + } + } + + _r->set_element(i, closest_centroid_index, 1); + } } -void MLPPKMeans::_centroid_initialization(int k) { +// This simply computes or re-computes mu_k +void MLPPKMeans::_compute_mu() { + MLPPLinAlg alg; + + int mu_size_y = _mu->size().y; + int r_size_y = _r->size().y; + + Ref num; + num.instance(); + num->resize(_r->size().x); + + Ref input_set_j_tempv; + input_set_j_tempv.instance(); + input_set_j_tempv->resize(_input_set->size().x); + + Ref mat_tempv; + mat_tempv.instance(); + mat_tempv->resize(_input_set->size().x); + + Ref mu_tempv; + mu_tempv.instance(); + mu_tempv->resize(_mu->size().x); + + for (int i = 0; i < mu_size_y; ++i) { + num->fill(0); + + real_t den = 0; + for (int j = 0; j < r_size_y; ++j) { + _input_set->get_row_into_mlpp_vector(j, input_set_j_tempv); + + real_t r_j_i = _r->get_element(j, i); + + alg.scalar_multiplyv(_r->get_element(j, i), input_set_j_tempv, mat_tempv); + alg.additionv(num, mat_tempv, num); + + den += r_j_i; + } + + alg.scalar_multiplyv(real_t(1) / real_t(den), num, mu_tempv); + + _mu->set_row_mlpp_vector(i, mu_tempv); + } } -void MLPPKMeans::_kmeanspp_initialization(int k) { + +void MLPPKMeans::_centroid_initialization() { + RandomPCG rand; + rand.randomize(); + + Size2i mu_size = Size2i(_input_set->size().x, _k); + + if (_mu->size() != mu_size) { + _mu->resize(mu_size); + } + + Ref mu_tempv; + mu_tempv.instance(); + mu_tempv->resize(_mu->size().x); + + int input_set_size_y_rand = _input_set->size().y - 1; + + for (int i = 0; i < _k; ++i) { + int indx = rand.random(0, input_set_size_y_rand); + + _input_set->get_row_into_mlpp_vector(indx, mu_tempv); + _mu->set_row_mlpp_vector(i, mu_tempv); + } +} + +void MLPPKMeans::_kmeanspp_initialization() { + MLPPLinAlg alg; + + RandomPCG rand; + rand.randomize(); + + Size2i mu_size = Size2i(_input_set->size().x, _k); + + if (_mu->size() != mu_size) { + _mu->resize(mu_size); + } + + int input_set_size_y = _input_set->size().y; + + Ref mu_tempv; + mu_tempv.instance(); + mu_tempv->resize(_mu->size().x); + + _input_set->get_row_into_mlpp_vector(rand.random(0, input_set_size_y - 1), mu_tempv); + _mu->set_row_mlpp_vector(0, mu_tempv); + + Ref input_set_j_tempv; + input_set_j_tempv.instance(); + input_set_j_tempv->resize(_input_set->size().x); + + Ref farthest_centroid; + farthest_centroid.instance(); + farthest_centroid->resize(_input_set->size().x); + + for (int i = 1; i < _k - 1; ++i) { + for (int j = 0; j < input_set_size_y; ++j) { + _input_set->get_row_into_mlpp_vector(j, input_set_j_tempv); + + real_t max_dist = 0; + // SUM ALL THE SQUARED DISTANCES, CHOOSE THE ONE THAT'S FARTHEST + // AS TO SPREAD OUT THE CLUSTER CENTROIDS. + real_t sum = 0; + for (int k = 0; k < i; k++) { + _mu->get_row_into_mlpp_vector(k, mu_tempv); + + sum += alg.euclidean_distance(input_set_j_tempv, mu_tempv); + } + + if (sum * sum > max_dist) { + farthest_centroid->set_from_mlpp_vector(input_set_j_tempv); + max_dist = sum * sum; + } + } + + _mu->set_row_mlpp_vector(i, farthest_centroid); + } } real_t MLPPKMeans::_cost() { - return 0; + ERR_FAIL_COND_V(!_initialized, 0); + + MLPPLinAlg alg; + + Ref input_set_i_tempv; + input_set_i_tempv.instance(); + input_set_i_tempv->resize(_input_set->size().x); + + Ref mu_j_tempv; + mu_j_tempv.instance(); + mu_j_tempv->resize(_mu->size().x); + + Ref sub_tempv; + sub_tempv.instance(); + sub_tempv->resize(_input_set->size().x); + + int r_size_y = _r->size().y; + int r_size_x = _r->size().x; + + real_t sum = 0; + for (int i = 0; i < r_size_y; i++) { + _input_set->get_row_into_mlpp_vector(i, input_set_i_tempv); + + for (int j = 0; j < r_size_x; j++) { + _mu->get_row_into_mlpp_vector(j, mu_j_tempv); + + alg.subtractionv(input_set_i_tempv, mu_j_tempv, sub_tempv); + sum += _r->get_element(i, j) * alg.norm_sqv(sub_tempv); + } + } + + return sum; } void MLPPKMeans::_bind_methods() { diff --git a/mlpp/kmeans/kmeans.h b/mlpp/kmeans/kmeans.h index 8dbea48..4b86fd6 100644 --- a/mlpp/kmeans/kmeans.h +++ b/mlpp/kmeans/kmeans.h @@ -46,13 +46,11 @@ public: ~MLPPKMeans(); protected: - - void _evaluate(); void _compute_mu(); - void _centroid_initialization(int k); - void _kmeanspp_initialization(int k); + void _centroid_initialization(); + void _kmeanspp_initialization(); real_t _cost(); static void _bind_methods(); diff --git a/mlpp/lin_alg/lin_alg.cpp b/mlpp/lin_alg/lin_alg.cpp index 13c0e5c..d4132fa 100644 --- a/mlpp/lin_alg/lin_alg.cpp +++ b/mlpp/lin_alg/lin_alg.cpp @@ -1002,6 +1002,42 @@ std::vector MLPPLinAlg::scalarMultiply(real_t scalar, std::vector MLPPLinAlg::scalar_multiplynv(real_t scalar, const Ref &a) { + ERR_FAIL_COND_V(!a.is_valid(), Ref()); + + Ref out; + out.instance(); + + int size = a->size(); + + out->resize(size); + + const real_t *a_ptr = a->ptr(); + real_t *out_ptr = out->ptrw(); + + for (int i = 0; i < size; ++i) { + out_ptr[i] = a_ptr[i] * scalar; + } + + return out; +} +void MLPPLinAlg::scalar_multiplyv(real_t scalar, const Ref &a, Ref out) { + ERR_FAIL_COND(!a.is_valid() || !out.is_valid()); + + int size = a->size(); + + if (unlikely(out->size() != size)) { + out->resize(size); + } + + const real_t *a_ptr = a->ptr(); + real_t *out_ptr = out->ptrw(); + + for (int i = 0; i < size; ++i) { + out_ptr[i] = a_ptr[i] * scalar; + } +} + std::vector MLPPLinAlg::scalarAdd(real_t scalar, std::vector a) { for (int i = 0; i < a.size(); i++) { a[i] += scalar; @@ -1018,6 +1054,47 @@ std::vector MLPPLinAlg::addition(std::vector a, std::vector MLPPLinAlg::additionnv(const Ref &a, const Ref &b) { + ERR_FAIL_COND_V(!a.is_valid() || !b.is_valid(), Ref()); + + int size = a->size(); + + ERR_FAIL_COND_V(size != b->size(), Ref()); + + Ref out; + out.instance(); + out->resize(size); + + const real_t *a_ptr = a->ptr(); + const real_t *b_ptr = b->ptr(); + real_t *out_ptr = out->ptrw(); + + for (int i = 0; i < size; ++i) { + out_ptr[i] = a_ptr[i] + b_ptr[i]; + } + + return out; +} +void MLPPLinAlg::additionv(const Ref &a, const Ref &b, Ref out) { + ERR_FAIL_COND(!a.is_valid() || !b.is_valid() || !out.is_valid()); + + int size = a->size(); + + ERR_FAIL_COND(size != b->size()); + + if (unlikely(out->size() != size)) { + out->resize(size); + } + + const real_t *a_ptr = a->ptr(); + const real_t *b_ptr = b->ptr(); + real_t *out_ptr = out->ptrw(); + + for (int i = 0; i < size; ++i) { + out_ptr[i] = a_ptr[i] + b_ptr[i]; + } +} + std::vector MLPPLinAlg::subtraction(std::vector a, std::vector b) { std::vector c; c.resize(a.size()); @@ -1027,6 +1104,52 @@ std::vector MLPPLinAlg::subtraction(std::vector a, std::vector MLPPLinAlg::subtractionnv(const Ref &a, const Ref &b) { + ERR_FAIL_COND_V(!a.is_valid() || !b.is_valid(), Ref()); + + int size = a->size(); + + ERR_FAIL_COND_V(size != b->size(), Ref()); + + Ref out; + out.instance(); + + if (unlikely(size == 0)) { + return out; + } + + out->resize(size); + + const real_t *a_ptr = a->ptr(); + const real_t *b_ptr = b->ptr(); + real_t *out_ptr = out->ptrw(); + + for (int i = 0; i < size; ++i) { + out_ptr[i] = a_ptr[i] - b_ptr[i]; + } + + return out; +} +void MLPPLinAlg::subtractionv(const Ref &a, const Ref &b, Ref out) { + ERR_FAIL_COND(!a.is_valid() || !b.is_valid() || !out.is_valid()); + + int size = a->size(); + + ERR_FAIL_COND(size != b->size()); + + if (unlikely(out->size() != size)) { + out->resize(size); + } + + const real_t *a_ptr = a->ptr(); + const real_t *b_ptr = b->ptr(); + real_t *out_ptr = out->ptrw(); + + for (int i = 0; i < size; ++i) { + out_ptr[i] = a_ptr[i] - b_ptr[i]; + } +} + std::vector MLPPLinAlg::subtractMatrixRows(std::vector a, std::vector> B) { for (int i = 0; i < B.size(); i++) { a = subtraction(a, B[i]); @@ -1271,6 +1394,18 @@ real_t MLPPLinAlg::norm_sq(std::vector a) { } return n_sq; } +real_t MLPPLinAlg::norm_sqv(const Ref &a) { + ERR_FAIL_COND_V(!a.is_valid(), 0); + + int size = a->size(); + const real_t *a_ptr = a->ptr(); + + real_t n_sq = 0; + for (int i = 0; i < size; ++i) { + n_sq += a_ptr[i] * a_ptr[i]; + } + return n_sq; +} real_t MLPPLinAlg::sum_elements(std::vector a) { real_t sum = 0; diff --git a/mlpp/lin_alg/lin_alg.h b/mlpp/lin_alg/lin_alg.h index dd8adfd..f73e105 100644 --- a/mlpp/lin_alg/lin_alg.h +++ b/mlpp/lin_alg/lin_alg.h @@ -164,13 +164,20 @@ public: std::vector elementWiseDivision(std::vector a, std::vector b); std::vector scalarMultiply(real_t scalar, std::vector a); + Ref scalar_multiplynv(real_t scalar, const Ref &a); + void scalar_multiplyv(real_t scalar, const Ref &a, Ref out); std::vector scalarAdd(real_t scalar, std::vector a); std::vector addition(std::vector a, std::vector b); + Ref additionnv(const Ref &a, const Ref &b); + void additionv(const Ref &a, const Ref &b, Ref out); std::vector subtraction(std::vector a, std::vector b); + Ref subtractionnv(const Ref &a, const Ref &b); + void subtractionv(const Ref &a, const Ref &b, Ref out); + std::vector subtractMatrixRows(std::vector a, std::vector> B); std::vector log(std::vector a); @@ -220,6 +227,7 @@ public: real_t norm_2(std::vector a); real_t norm_sq(std::vector a); + real_t norm_sqv(const Ref &a); real_t sum_elements(std::vector a); diff --git a/mlpp/lin_alg/mlpp_vector.cpp b/mlpp/lin_alg/mlpp_vector.cpp index 8eac4f3..3a151e0 100644 --- a/mlpp/lin_alg/mlpp_vector.cpp +++ b/mlpp/lin_alg/mlpp_vector.cpp @@ -46,4 +46,6 @@ void MLPPVector::_bind_methods() { ClassDB::bind_method(D_METHOD("set_from_mlpp_vector", "from"), &MLPPVector::set_from_mlpp_vector); ClassDB::bind_method(D_METHOD("set_from_pool_vector", "from"), &MLPPVector::set_from_pool_vector); + + ClassDB::bind_method(D_METHOD("is_equal_approx", "with", "tolerance"), &MLPPVector::is_equal_approx, CMP_EPSILON); } diff --git a/mlpp/lin_alg/mlpp_vector.h b/mlpp/lin_alg/mlpp_vector.h index f919c25..68b2418 100644 --- a/mlpp/lin_alg/mlpp_vector.h +++ b/mlpp/lin_alg/mlpp_vector.h @@ -2,6 +2,7 @@ #define MLPP_VECTOR_H #include "core/math/math_defs.h" +#include "core/math/math_funcs.h" #include "core/containers/pool_vector.h" #include "core/containers/sort_array.h" @@ -243,7 +244,10 @@ public: } _FORCE_INLINE_ void set_from_mlpp_vectorr(const MLPPVector &p_from) { - resize(p_from.size()); + if (_size != p_from.size()) { + resize(p_from.size()); + } + for (int i = 0; i < p_from._size; i++) { _data[i] = p_from._data[i]; } @@ -251,13 +255,21 @@ public: _FORCE_INLINE_ void set_from_mlpp_vector(const Ref &p_from) { ERR_FAIL_COND(!p_from.is_valid()); - resize(p_from->size()); + + if (_size != p_from->size()) { + resize(p_from->size()); + } + for (int i = 0; i < p_from->_size; i++) { _data[i] = p_from->_data[i]; } } _FORCE_INLINE_ void set_from_vector(const Vector &p_from) { + if (_size != p_from.size()) { + resize(p_from.size()); + } + resize(p_from.size()); for (int i = 0; i < _size; i++) { _data[i] = p_from[i]; @@ -265,13 +277,36 @@ public: } _FORCE_INLINE_ void set_from_pool_vector(const PoolRealArray &p_from) { - resize(p_from.size()); - typename PoolRealArray::Read r = p_from.read(); + if (_size != p_from.size()) { + resize(p_from.size()); + } + + PoolRealArray::Read r = p_from.read(); for (int i = 0; i < _size; i++) { _data[i] = r[i]; } } + _FORCE_INLINE_ bool is_equal_approx(const Ref &p_with, real_t tolerance = static_cast(CMP_EPSILON)) const { + ERR_FAIL_COND_V(!p_with.is_valid(), false); + + if (unlikely(this == p_with.ptr())) { + return true; + } + + if (_size != p_with->size()) { + return false; + } + + for (int i = 0; i < _size; ++i) { + if (!Math::is_equal_approx(_data[i], p_with->_data[i], tolerance)) { + return false; + } + } + + return true; + } + String to_string(); _FORCE_INLINE_ MLPPVector() { diff --git a/mlpp/utilities/utilities.cpp b/mlpp/utilities/utilities.cpp index a3b80a5..558b1e0 100644 --- a/mlpp/utilities/utilities.cpp +++ b/mlpp/utilities/utilities.cpp @@ -7,6 +7,7 @@ #include "utilities.h" #include "core/math/math_funcs.h" +#include "core/log/logger.h" #include #include @@ -314,6 +315,14 @@ void MLPPUtilities::CostInfo(int epoch, real_t cost_prev, real_t Cost) { std::cout << Cost << std::endl; } +void MLPPUtilities::cost_info(int epoch, real_t cost_prev, real_t cost) { + String str = "This is epoch: " + itos(epoch) + ","; + str += "The cost function has been minimized by " + String::num(cost_prev - cost); + str += ", Current Cost:" + String::num(cost); + + PLOG_MSG(str); +} + std::vector>> MLPPUtilities::createMiniBatches(std::vector> inputSet, int n_mini_batch) { int n = inputSet.size(); diff --git a/mlpp/utilities/utilities.h b/mlpp/utilities/utilities.h index 816b1dc..1b1c22b 100644 --- a/mlpp/utilities/utilities.h +++ b/mlpp/utilities/utilities.h @@ -12,6 +12,7 @@ #include "core/math/math_defs.h" #include "core/containers/vector.h" #include "core/variant/variant.h" +#include "core/string/ustring.h" #include "../lin_alg/mlpp_matrix.h" #include "../lin_alg/mlpp_vector.h" @@ -48,6 +49,7 @@ public: static void UI(std::vector weights, std::vector initial, real_t bias); static void UI(std::vector>, std::vector bias); static void CostInfo(int epoch, real_t cost_prev, real_t Cost); + static void cost_info(int epoch, real_t cost_prev, real_t cost); static std::vector>> createMiniBatches(std::vector> inputSet, int n_mini_batch); static std::tuple>>, std::vector>> createMiniBatches(std::vector> inputSet, std::vector outputSet, int n_mini_batch); diff --git a/test/mlpp_tests.cpp b/test/mlpp_tests.cpp index 9822cbb..a1962bf 100644 --- a/test/mlpp_tests.cpp +++ b/test/mlpp_tests.cpp @@ -3,6 +3,8 @@ #include "core/math/math_funcs.h" +#include "core/log/logger.h" + //TODO remove #include #include @@ -538,18 +540,23 @@ void MLPPTests::test_naive_bayes() { alg.printVector(GNB.modelSetTest(alg.transpose(inputSet))); } void MLPPTests::test_k_means(bool ui) { - MLPPLinAlg alg; - // KMeans - /* std::vector> inputSet = { { 32, 0, 7 }, { 2, 28, 17 }, { 0, 9, 23 } }; - MLPPKMeans kmeans(inputSet, 3, "KMeans++"); - kmeans.train(3, ui); - std::cout << std::endl; - alg.printMatrix(kmeans.modelSetTest(inputSet)); // Returns the assigned centroids to each of the respective training examples - std::cout << std::endl; - alg.printVector(kmeans.silhouette_scores()); - */ + + Ref input_set; + input_set.instance(); + input_set->set_from_std_vectors(inputSet); + + Ref kmeans; + kmeans.instance(); + kmeans->set_input_set(input_set); + kmeans->set_k(3); + kmeans->set_mean_type(MLPPKMeans::MEAN_TYPE_KMEANSPP); + + kmeans->train(3, ui); + + PLOG_MSG(kmeans->model_set_test(input_set)->to_string()); + PLOG_MSG(kmeans->silhouette_scores()->to_string()); } void MLPPTests::test_knn(bool ui) { MLPPLinAlg alg;