Ported MLPPKMeans logic.

2025-04-07 02:31:49 +02:00 · 2023-01-29 15:46:55 +01:00 · 2023-01-29 15:46:55 +01:00 · 27d187c67a
commit 27d187c67a
parent bd67fcecc6
9 changed files with 651 additions and 29 deletions
--- a/mlpp/kmeans/kmeans.cpp
+++ b/mlpp/kmeans/kmeans.cpp
@ -8,6 +8,8 @@
 #include "../lin_alg/lin_alg.h"
 #include "../utilities/utilities.h"

+#include "core/math/random_pcg.h"
+
 #include <climits>
 #include <iostream>
 #include <random>
@ -37,29 +39,259 @@ void MLPPKMeans::set_mean_type(const MLPPKMeans::MeanType val) {
 }

 void MLPPKMeans::initialize() {
+	ERR_FAIL_COND(!_input_set.is_valid());
+
 	if (_mean_type == MEAN_TYPE_KMEANSPP) {
-		_kmeanspp_initialization(_k);
+		_kmeanspp_initialization();
 	} else {
-		_centroid_initialization(_k);
+		_centroid_initialization();
 	}
+
+	_initialized = true;
 }

 Ref<MLPPMatrix> MLPPKMeans::model_set_test(const Ref<MLPPMatrix> &X) {
-	return Ref<MLPPMatrix>();
+	ERR_FAIL_COND_V(!X.is_valid(), Ref<MLPPMatrix>());
+	ERR_FAIL_COND_V(!_initialized, Ref<MLPPMatrix>());
+
+	MLPPLinAlg alg;
+
+	int input_set_size_y = _input_set->size().y;
+
+	Ref<MLPPMatrix> closest_centroids;
+	closest_centroids.instance();
+	closest_centroids->resize(Size2i(_mu->size().x, input_set_size_y));
+
+	Ref<MLPPVector> closest_centroid;
+	closest_centroid.instance();
+	closest_centroid->resize(_mu->size().x);
+
+	Ref<MLPPVector> tmp_xiv;
+	tmp_xiv.instance();
+	tmp_xiv->resize(X->size().x);
+
+	Ref<MLPPVector> tmp_mujv;
+	tmp_mujv.instance();
+	tmp_mujv->resize(_mu->size().x);
+
+	int r0_size = _r->size().x;
+
+	for (int i = 0; i < input_set_size_y; ++i) {
+		_mu->get_row_into_mlpp_vector(0, closest_centroid);
+		X->get_row_into_mlpp_vector(i, tmp_xiv);
+
+		for (int j = 0; j < r0_size; ++j) {
+			_mu->get_row_into_mlpp_vector(j, tmp_mujv);
+
+			bool is_centroid_closer = alg.euclidean_distance(tmp_xiv, tmp_mujv) < alg.euclidean_distance(tmp_xiv, closest_centroid);
+
+			if (is_centroid_closer) {
+				closest_centroid->set_from_mlpp_vector(tmp_mujv);
+			}
+		}
+
+		closest_centroids->set_row_mlpp_vector(i, closest_centroid);
+	}
+
+	return closest_centroids;
 }
 Ref<MLPPVector> MLPPKMeans::model_test(const Ref<MLPPVector> &x) {
-	return Ref<MLPPVector>();
+	ERR_FAIL_COND_V(!x.is_valid(), Ref<MLPPVector>());
+	ERR_FAIL_COND_V(!_initialized, Ref<MLPPVector>());
+
+	MLPPLinAlg alg;
+
+	Ref<MLPPVector> closest_centroid;
+	closest_centroid.instance();
+	closest_centroid->resize(_mu->size().x);
+
+	_mu->get_row_into_mlpp_vector(0, closest_centroid);
+
+	int mu_size_y = _mu->size().y;
+
+	Ref<MLPPVector> tmp_mujv;
+	tmp_mujv.instance();
+	tmp_mujv->resize(_mu->size().x);
+
+	for (int j = 0; j < mu_size_y; ++j) {
+		_mu->get_row_into_mlpp_vector(j, tmp_mujv);
+
+		if (alg.euclidean_distance(x, tmp_mujv) < alg.euclidean_distance(x, closest_centroid)) {
+			closest_centroid->set_from_mlpp_vector(tmp_mujv);
+		}
+	}
+
+	return closest_centroid;
 }
 void MLPPKMeans::train(int epoch_num, bool UI) {
+	ERR_FAIL_COND(!_input_set.is_valid());
+
+	if (!_initialized) {
+		initialize();
+	}
+
+	real_t cost_prev = 0;
+	int epoch = 1;
+
+	_evaluate();
+
+	while (true) {
+		// STEPS OF THE ALGORITHM
+		// 1. DETERMINE r_nk
+		// 2. DETERMINE J
+		// 3. DETERMINE mu_k
+
+		// STOP IF CONVERGED, ELSE REPEAT
+
+		cost_prev = _cost();
+
+		_compute_mu();
+		_evaluate();
+
+		// UI PORTION
+		if (UI) {
+			MLPPUtilities::cost_info(epoch, cost_prev, _cost());
+		}
+
+		epoch++;
+
+		if (epoch > epoch_num) {
+			break;
+		}
+	}
 }
+
 real_t MLPPKMeans::score() {
-	return 0;
+	return _cost();
 }
+
 Ref<MLPPVector> MLPPKMeans::silhouette_scores() {
-	return Ref<MLPPVector>();
+	ERR_FAIL_COND_V(!_initialized, Ref<MLPPVector>());
+
+	MLPPLinAlg alg;
+
+	Ref<MLPPMatrix> closest_centroids = model_set_test(_input_set);
+
+	ERR_FAIL_COND_V(!closest_centroids.is_valid(), Ref<MLPPVector>());
+
+	int input_set_size_y = _input_set->size().y;
+	int input_set_size_x = _input_set->size().x;
+
+	int mu_size_y = _mu->size().y;
+
+	int closest_centroids_size_y = closest_centroids->size().y;
+
+	Ref<MLPPVector> silhouette_scores;
+	silhouette_scores.instance();
+	silhouette_scores->resize(input_set_size_y);
+
+	Ref<MLPPVector> input_set_i_tempv;
+	input_set_i_tempv.instance();
+	input_set_i_tempv->resize(input_set_size_x);
+
+	Ref<MLPPVector> input_set_j_tempv;
+	input_set_j_tempv.instance();
+	input_set_j_tempv->resize(input_set_size_x);
+
+	Ref<MLPPVector> input_set_k_tempv;
+	input_set_k_tempv.instance();
+	input_set_k_tempv->resize(input_set_size_x);
+
+	Ref<MLPPVector> r_i_tempv;
+	r_i_tempv.instance();
+	r_i_tempv->resize(_r->size().x);
+
+	Ref<MLPPVector> r_j_tempv;
+	r_j_tempv.instance();
+	r_j_tempv->resize(_r->size().x);
+
+	Ref<MLPPVector> closest_centroids_i_tempv;
+	closest_centroids_i_tempv.instance();
+	closest_centroids_i_tempv->resize(closest_centroids->size().x);
+
+	Ref<MLPPVector> closest_centroids_k_tempv;
+	closest_centroids_k_tempv.instance();
+	closest_centroids_k_tempv->resize(closest_centroids->size().x);
+
+	Ref<MLPPVector> mu_j_tempv;
+	mu_j_tempv.instance();
+	mu_j_tempv->resize(_mu->size().x);
+
+	for (int i = 0; i < input_set_size_y; ++i) {
+		_r->get_row_into_mlpp_vector(i, r_i_tempv);
+		_input_set->get_row_into_mlpp_vector(i, input_set_i_tempv);
+
+		// COMPUTING a[i]
+		real_t a = 0;
+		for (int j = 0; j < input_set_size_y; ++j) {
+			if (i == j) {
+				continue;
+			}
+
+			_r->get_row_into_mlpp_vector(j, r_j_tempv);
+
+			if (r_i_tempv->is_equal_approx(r_j_tempv)) {
+				_input_set->get_row_into_mlpp_vector(j, input_set_j_tempv);
+
+				a += alg.euclidean_distance(input_set_i_tempv, input_set_j_tempv);
+			}
+		}
+
+		// NORMALIZE a[i]
+		a /= closest_centroids->size().x - 1;
+
+		closest_centroids->get_row_into_mlpp_vector(i, closest_centroids_i_tempv);
+
+		// COMPUTING b[i]
+		real_t b = INT_MAX;
+		for (int j = 0; j < mu_size_y; ++j) {
+			_mu->get_row_into_mlpp_vector(j, mu_j_tempv);
+
+			if (!closest_centroids_i_tempv->is_equal_approx(mu_j_tempv)) {
+				real_t sum = 0;
+				for (int k = 0; k < input_set_size_y; ++k) {
+					_input_set->get_row_into_mlpp_vector(k, input_set_k_tempv);
+
+					sum += alg.euclidean_distance(input_set_i_tempv, input_set_k_tempv);
+				}
+
+				// NORMALIZE b[i]
+				real_t k_cluster_size = 0;
+				for (int k = 0; k < closest_centroids_size_y; ++k) {
+					_input_set->get_row_into_mlpp_vector(k, closest_centroids_k_tempv);
+
+					if (closest_centroids_k_tempv->is_equal_approx(mu_j_tempv)) {
+						++k_cluster_size;
+					}
+				}
+
+				if (sum / k_cluster_size < b) {
+					b = sum / k_cluster_size;
+				}
+			}
+		}
+
+		silhouette_scores->set_element(i, (b - a) / fmax(a, b));
+
+		// Or the expanded version:
+		// if(a < b) {
+		//     silhouette_scores->set_element(i, 1 - a/b);
+		// }
+		// else if(a == b){
+		//     silhouette_scores->set_element(i, 0);
+		// }
+		// else{
+		//     silhouette_scores->set_element(i, b/a - 1);
+		// }
+	}
+
+	return silhouette_scores;
 }

 MLPPKMeans::MLPPKMeans() {
+	_mu.instance();
+	_r.instance();
+
 	_accuracy_threshold = 0;
 	_k = 0;
 	_initialized = false;
@ -69,17 +301,211 @@ MLPPKMeans::MLPPKMeans() {
 MLPPKMeans::~MLPPKMeans() {
 }

+// This simply computes r_nk
 void MLPPKMeans::_evaluate() {
-}
-void MLPPKMeans::_compute_mu() {
+	ERR_FAIL_COND(!_initialized);
+
+	MLPPLinAlg alg;
+
+	if (_r->size() != Size2i(_k, _input_set->size().y)) {
+		_r->resize(Size2i(_k, _input_set->size().y));
+	}
+
+	int r_size_y = _r->size().y;
+	int r_size_x = _r->size().x;
+
+	Ref<MLPPVector> closest_centroid;
+	closest_centroid.instance();
+	closest_centroid->resize(_mu->size().x);
+
+	Ref<MLPPVector> input_set_i_tempv;
+	input_set_i_tempv.instance();
+	input_set_i_tempv->resize(_input_set->size().x);
+
+	Ref<MLPPVector> mu_j_tempv;
+	mu_j_tempv.instance();
+	mu_j_tempv->resize(_mu->size().x);
+
+	real_t closest_centroid_current_dist = 0;
+	int closest_centroid_index = 0;
+
+	_r->fill(0);
+
+	for (int i = 0; i < r_size_y; ++i) {
+		_mu->get_row_into_mlpp_vector(0, closest_centroid);
+		_input_set->get_row_into_mlpp_vector(i, input_set_i_tempv);
+
+		closest_centroid_current_dist = alg.euclidean_distance(input_set_i_tempv, closest_centroid);
+
+		for (int j = 0; j < r_size_x; ++j) {
+			_mu->get_row_into_mlpp_vector(j, mu_j_tempv);
+
+			bool is_centroid_closer = alg.euclidean_distance(input_set_i_tempv, mu_j_tempv) < closest_centroid_current_dist;
+
+			if (is_centroid_closer) {
+				_mu->get_row_into_mlpp_vector(j, closest_centroid);
+				closest_centroid_current_dist = alg.euclidean_distance(input_set_i_tempv, closest_centroid);
+				closest_centroid_index = j;
+			}
+		}
+
+		_r->set_element(i, closest_centroid_index, 1);
+	}
 }

-void MLPPKMeans::_centroid_initialization(int k) {
+// This simply computes or re-computes mu_k
+void MLPPKMeans::_compute_mu() {
+	MLPPLinAlg alg;
+
+	int mu_size_y = _mu->size().y;
+	int r_size_y = _r->size().y;
+
+	Ref<MLPPVector> num;
+	num.instance();
+	num->resize(_r->size().x);
+
+	Ref<MLPPVector> input_set_j_tempv;
+	input_set_j_tempv.instance();
+	input_set_j_tempv->resize(_input_set->size().x);
+
+	Ref<MLPPVector> mat_tempv;
+	mat_tempv.instance();
+	mat_tempv->resize(_input_set->size().x);
+
+	Ref<MLPPVector> mu_tempv;
+	mu_tempv.instance();
+	mu_tempv->resize(_mu->size().x);
+
+	for (int i = 0; i < mu_size_y; ++i) {
+		num->fill(0);
+
+		real_t den = 0;
+		for (int j = 0; j < r_size_y; ++j) {
+			_input_set->get_row_into_mlpp_vector(j, input_set_j_tempv);
+
+			real_t r_j_i = _r->get_element(j, i);
+
+			alg.scalar_multiplyv(_r->get_element(j, i), input_set_j_tempv, mat_tempv);
+			alg.additionv(num, mat_tempv, num);
+
+			den += r_j_i;
+		}
+
+		alg.scalar_multiplyv(real_t(1) / real_t(den), num, mu_tempv);
+
+		_mu->set_row_mlpp_vector(i, mu_tempv);
+	}
 }
-void MLPPKMeans::_kmeanspp_initialization(int k) {
+
+void MLPPKMeans::_centroid_initialization() {
+	RandomPCG rand;
+	rand.randomize();
+
+	Size2i mu_size = Size2i(_input_set->size().x, _k);
+
+	if (_mu->size() != mu_size) {
+		_mu->resize(mu_size);
+	}
+
+	Ref<MLPPVector> mu_tempv;
+	mu_tempv.instance();
+	mu_tempv->resize(_mu->size().x);
+
+	int input_set_size_y_rand = _input_set->size().y - 1;
+
+	for (int i = 0; i < _k; ++i) {
+		int indx = rand.random(0, input_set_size_y_rand);
+
+		_input_set->get_row_into_mlpp_vector(indx, mu_tempv);
+		_mu->set_row_mlpp_vector(i, mu_tempv);
+	}
+}
+
+void MLPPKMeans::_kmeanspp_initialization() {
+	MLPPLinAlg alg;
+
+	RandomPCG rand;
+	rand.randomize();
+
+	Size2i mu_size = Size2i(_input_set->size().x, _k);
+
+	if (_mu->size() != mu_size) {
+		_mu->resize(mu_size);
+	}
+
+	int input_set_size_y = _input_set->size().y;
+
+	Ref<MLPPVector> mu_tempv;
+	mu_tempv.instance();
+	mu_tempv->resize(_mu->size().x);
+
+	_input_set->get_row_into_mlpp_vector(rand.random(0, input_set_size_y - 1), mu_tempv);
+	_mu->set_row_mlpp_vector(0, mu_tempv);
+
+	Ref<MLPPVector> input_set_j_tempv;
+	input_set_j_tempv.instance();
+	input_set_j_tempv->resize(_input_set->size().x);
+
+	Ref<MLPPVector> farthest_centroid;
+	farthest_centroid.instance();
+	farthest_centroid->resize(_input_set->size().x);
+
+	for (int i = 1; i < _k - 1; ++i) {
+		for (int j = 0; j < input_set_size_y; ++j) {
+			_input_set->get_row_into_mlpp_vector(j, input_set_j_tempv);
+
+			real_t max_dist = 0;
+			// SUM ALL THE SQUARED DISTANCES, CHOOSE THE ONE THAT'S FARTHEST
+			// AS TO SPREAD OUT THE CLUSTER CENTROIDS.
+			real_t sum = 0;
+			for (int k = 0; k < i; k++) {
+				_mu->get_row_into_mlpp_vector(k, mu_tempv);
+
+				sum += alg.euclidean_distance(input_set_j_tempv, mu_tempv);
+			}
+
+			if (sum * sum > max_dist) {
+				farthest_centroid->set_from_mlpp_vector(input_set_j_tempv);
+				max_dist = sum * sum;
+			}
+		}
+
+		_mu->set_row_mlpp_vector(i, farthest_centroid);
+	}
 }
 real_t MLPPKMeans::_cost() {
-	return 0;
+	ERR_FAIL_COND_V(!_initialized, 0);
+
+	MLPPLinAlg alg;
+
+	Ref<MLPPVector> input_set_i_tempv;
+	input_set_i_tempv.instance();
+	input_set_i_tempv->resize(_input_set->size().x);
+
+	Ref<MLPPVector> mu_j_tempv;
+	mu_j_tempv.instance();
+	mu_j_tempv->resize(_mu->size().x);
+
+	Ref<MLPPVector> sub_tempv;
+	sub_tempv.instance();
+	sub_tempv->resize(_input_set->size().x);
+
+	int r_size_y = _r->size().y;
+	int r_size_x = _r->size().x;
+
+	real_t sum = 0;
+	for (int i = 0; i < r_size_y; i++) {
+		_input_set->get_row_into_mlpp_vector(i, input_set_i_tempv);
+
+		for (int j = 0; j < r_size_x; j++) {
+			_mu->get_row_into_mlpp_vector(j, mu_j_tempv);
+
+			alg.subtractionv(input_set_i_tempv, mu_j_tempv, sub_tempv);
+			sum += _r->get_element(i, j) * alg.norm_sqv(sub_tempv);
+		}
+	}
+
+	return sum;
 }

 void MLPPKMeans::_bind_methods() {
--- a/mlpp/kmeans/kmeans.h
+++ b/mlpp/kmeans/kmeans.h
@ -46,13 +46,11 @@ public:
 	~MLPPKMeans();

 protected:
-	
-
 	void _evaluate();
 	void _compute_mu();

-	void _centroid_initialization(int k);
-	void _kmeanspp_initialization(int k);
+	void _centroid_initialization();
+	void _kmeanspp_initialization();
 	real_t _cost();

 	static void _bind_methods();
--- a/mlpp/lin_alg/lin_alg.cpp
+++ b/mlpp/lin_alg/lin_alg.cpp
@ -1002,6 +1002,42 @@ std::vector<real_t> MLPPLinAlg::scalarMultiply(real_t scalar, std::vector<real_t
 	return a;
 }

+Ref<MLPPVector> MLPPLinAlg::scalar_multiplynv(real_t scalar, const Ref<MLPPVector> &a) {
+	ERR_FAIL_COND_V(!a.is_valid(), Ref<MLPPVector>());
+
+	Ref<MLPPVector> out;
+	out.instance();
+
+	int size = a->size();
+
+	out->resize(size);
+
+	const real_t *a_ptr = a->ptr();
+	real_t *out_ptr = out->ptrw();
+
+	for (int i = 0; i < size; ++i) {
+		out_ptr[i] = a_ptr[i] * scalar;
+	}
+
+	return out;
+}
+void MLPPLinAlg::scalar_multiplyv(real_t scalar, const Ref<MLPPVector> &a, Ref<MLPPVector> out) {
+	ERR_FAIL_COND(!a.is_valid() || !out.is_valid());
+
+	int size = a->size();
+
+	if (unlikely(out->size() != size)) {
+		out->resize(size);
+	}
+
+	const real_t *a_ptr = a->ptr();
+	real_t *out_ptr = out->ptrw();
+
+	for (int i = 0; i < size; ++i) {
+		out_ptr[i] = a_ptr[i] * scalar;
+	}
+}
+
 std::vector<real_t> MLPPLinAlg::scalarAdd(real_t scalar, std::vector<real_t> a) {
 	for (int i = 0; i < a.size(); i++) {
 		a[i] += scalar;
@ -1018,6 +1054,47 @@ std::vector<real_t> MLPPLinAlg::addition(std::vector<real_t> a, std::vector<real
 	return c;
 }

+Ref<MLPPVector> MLPPLinAlg::additionnv(const Ref<MLPPVector> &a, const Ref<MLPPVector> &b) {
+	ERR_FAIL_COND_V(!a.is_valid() || !b.is_valid(), Ref<MLPPVector>());
+
+	int size = a->size();
+
+	ERR_FAIL_COND_V(size != b->size(), Ref<MLPPVector>());
+
+	Ref<MLPPVector> out;
+	out.instance();
+	out->resize(size);
+
+	const real_t *a_ptr = a->ptr();
+	const real_t *b_ptr = b->ptr();
+	real_t *out_ptr = out->ptrw();
+
+	for (int i = 0; i < size; ++i) {
+		out_ptr[i] = a_ptr[i] + b_ptr[i];
+	}
+
+	return out;
+}
+void MLPPLinAlg::additionv(const Ref<MLPPVector> &a, const Ref<MLPPVector> &b, Ref<MLPPVector> out) {
+	ERR_FAIL_COND(!a.is_valid() || !b.is_valid() || !out.is_valid());
+
+	int size = a->size();
+
+	ERR_FAIL_COND(size != b->size());
+
+	if (unlikely(out->size() != size)) {
+		out->resize(size);
+	}
+
+	const real_t *a_ptr = a->ptr();
+	const real_t *b_ptr = b->ptr();
+	real_t *out_ptr = out->ptrw();
+
+	for (int i = 0; i < size; ++i) {
+		out_ptr[i] = a_ptr[i] + b_ptr[i];
+	}
+}
+
 std::vector<real_t> MLPPLinAlg::subtraction(std::vector<real_t> a, std::vector<real_t> b) {
 	std::vector<real_t> c;
 	c.resize(a.size());
@ -1027,6 +1104,52 @@ std::vector<real_t> MLPPLinAlg::subtraction(std::vector<real_t> a, std::vector<r
 	return c;
 }

+Ref<MLPPVector> MLPPLinAlg::subtractionnv(const Ref<MLPPVector> &a, const Ref<MLPPVector> &b) {
+	ERR_FAIL_COND_V(!a.is_valid() || !b.is_valid(), Ref<MLPPVector>());
+
+	int size = a->size();
+
+	ERR_FAIL_COND_V(size != b->size(), Ref<MLPPVector>());
+
+	Ref<MLPPVector> out;
+	out.instance();
+
+	if (unlikely(size == 0)) {
+		return out;
+	}
+
+	out->resize(size);
+
+	const real_t *a_ptr = a->ptr();
+	const real_t *b_ptr = b->ptr();
+	real_t *out_ptr = out->ptrw();
+
+	for (int i = 0; i < size; ++i) {
+		out_ptr[i] = a_ptr[i] - b_ptr[i];
+	}
+
+	return out;
+}
+void MLPPLinAlg::subtractionv(const Ref<MLPPVector> &a, const Ref<MLPPVector> &b, Ref<MLPPVector> out) {
+	ERR_FAIL_COND(!a.is_valid() || !b.is_valid() || !out.is_valid());
+
+	int size = a->size();
+
+	ERR_FAIL_COND(size != b->size());
+
+	if (unlikely(out->size() != size)) {
+		out->resize(size);
+	}
+
+	const real_t *a_ptr = a->ptr();
+	const real_t *b_ptr = b->ptr();
+	real_t *out_ptr = out->ptrw();
+
+	for (int i = 0; i < size; ++i) {
+		out_ptr[i] = a_ptr[i] - b_ptr[i];
+	}
+}
+
 std::vector<real_t> MLPPLinAlg::subtractMatrixRows(std::vector<real_t> a, std::vector<std::vector<real_t>> B) {
 	for (int i = 0; i < B.size(); i++) {
 		a = subtraction(a, B[i]);
@ -1271,6 +1394,18 @@ real_t MLPPLinAlg::norm_sq(std::vector<real_t> a) {
 	}
 	return n_sq;
 }
+real_t MLPPLinAlg::norm_sqv(const Ref<MLPPVector> &a) {
+	ERR_FAIL_COND_V(!a.is_valid(), 0);
+
+	int size = a->size();
+	const real_t *a_ptr = a->ptr();
+
+	real_t n_sq = 0;
+	for (int i = 0; i < size; ++i) {
+		n_sq += a_ptr[i] * a_ptr[i];
+	}
+	return n_sq;
+}

 real_t MLPPLinAlg::sum_elements(std::vector<real_t> a) {
 	real_t sum = 0;
--- a/mlpp/lin_alg/lin_alg.h
+++ b/mlpp/lin_alg/lin_alg.h
@ -164,13 +164,20 @@ public:
 	std::vector<real_t> elementWiseDivision(std::vector<real_t> a, std::vector<real_t> b);

 	std::vector<real_t> scalarMultiply(real_t scalar, std::vector<real_t> a);
+	Ref<MLPPVector> scalar_multiplynv(real_t scalar, const Ref<MLPPVector> &a);
+	void scalar_multiplyv(real_t scalar, const Ref<MLPPVector> &a, Ref<MLPPVector> out);

 	std::vector<real_t> scalarAdd(real_t scalar, std::vector<real_t> a);

 	std::vector<real_t> addition(std::vector<real_t> a, std::vector<real_t> b);
+	Ref<MLPPVector> additionnv(const Ref<MLPPVector> &a, const Ref<MLPPVector> &b);
+	void additionv(const Ref<MLPPVector> &a, const Ref<MLPPVector> &b, Ref<MLPPVector> out);

 	std::vector<real_t> subtraction(std::vector<real_t> a, std::vector<real_t> b);

+	Ref<MLPPVector> subtractionnv(const Ref<MLPPVector> &a, const Ref<MLPPVector> &b);
+	void subtractionv(const Ref<MLPPVector> &a, const Ref<MLPPVector> &b, Ref<MLPPVector> out);
+
 	std::vector<real_t> subtractMatrixRows(std::vector<real_t> a, std::vector<std::vector<real_t>> B);

 	std::vector<real_t> log(std::vector<real_t> a);
@ -220,6 +227,7 @@ public:
 	real_t norm_2(std::vector<real_t> a);

 	real_t norm_sq(std::vector<real_t> a);
+	real_t norm_sqv(const Ref<MLPPVector> &a);

 	real_t sum_elements(std::vector<real_t> a);

--- a/mlpp/lin_alg/mlpp_vector.cpp
+++ b/mlpp/lin_alg/mlpp_vector.cpp
@ -46,4 +46,6 @@ void MLPPVector::_bind_methods() {

 	ClassDB::bind_method(D_METHOD("set_from_mlpp_vector", "from"), &MLPPVector::set_from_mlpp_vector);
 	ClassDB::bind_method(D_METHOD("set_from_pool_vector", "from"), &MLPPVector::set_from_pool_vector);
+
+	ClassDB::bind_method(D_METHOD("is_equal_approx", "with", "tolerance"), &MLPPVector::is_equal_approx, CMP_EPSILON);
 }
--- a/mlpp/lin_alg/mlpp_vector.h
+++ b/mlpp/lin_alg/mlpp_vector.h
@ -2,6 +2,7 @@
 #define MLPP_VECTOR_H

 #include "core/math/math_defs.h"
+#include "core/math/math_funcs.h"

 #include "core/containers/pool_vector.h"
 #include "core/containers/sort_array.h"
@ -243,7 +244,10 @@ public:
 	}

 	_FORCE_INLINE_ void set_from_mlpp_vectorr(const MLPPVector &p_from) {
-		resize(p_from.size());
+		if (_size != p_from.size()) {
+			resize(p_from.size());
+		}
+
 		for (int i = 0; i < p_from._size; i++) {
 			_data[i] = p_from._data[i];
 		}
@ -251,13 +255,21 @@ public:

 	_FORCE_INLINE_ void set_from_mlpp_vector(const Ref<MLPPVector> &p_from) {
 		ERR_FAIL_COND(!p_from.is_valid());
-		resize(p_from->size());
+
+		if (_size != p_from->size()) {
+			resize(p_from->size());
+		}
+
 		for (int i = 0; i < p_from->_size; i++) {
 			_data[i] = p_from->_data[i];
 		}
 	}

 	_FORCE_INLINE_ void set_from_vector(const Vector<real_t> &p_from) {
+		if (_size != p_from.size()) {
+			resize(p_from.size());
+		}
+
 		resize(p_from.size());
 		for (int i = 0; i < _size; i++) {
 			_data[i] = p_from[i];
@ -265,13 +277,36 @@ public:
 	}

 	_FORCE_INLINE_ void set_from_pool_vector(const PoolRealArray &p_from) {
-		resize(p_from.size());
-		typename PoolRealArray::Read r = p_from.read();
+		if (_size != p_from.size()) {
+			resize(p_from.size());
+		}
+
+		PoolRealArray::Read r = p_from.read();
 		for (int i = 0; i < _size; i++) {
 			_data[i] = r[i];
 		}
 	}

+	_FORCE_INLINE_ bool is_equal_approx(const Ref<MLPPVector> &p_with, real_t tolerance = static_cast<real_t>(CMP_EPSILON)) const {
+		ERR_FAIL_COND_V(!p_with.is_valid(), false);
+
+		if (unlikely(this == p_with.ptr())) {
+			return true;
+		}
+
+		if (_size != p_with->size()) {
+			return false;
+		}
+
+		for (int i = 0; i < _size; ++i) {
+			if (!Math::is_equal_approx(_data[i], p_with->_data[i], tolerance)) {
+				return false;
+			}
+		}
+
+		return true;
+	}
+
 	String to_string();

 	_FORCE_INLINE_ MLPPVector() {
--- a/mlpp/utilities/utilities.cpp
+++ b/mlpp/utilities/utilities.cpp
@ -7,6 +7,7 @@
 #include "utilities.h"

 #include "core/math/math_funcs.h"
+#include "core/log/logger.h"

 #include <fstream>
 #include <iostream>
@ -314,6 +315,14 @@ void MLPPUtilities::CostInfo(int epoch, real_t cost_prev, real_t Cost) {
 	std::cout << Cost << std::endl;
 }

+void MLPPUtilities::cost_info(int epoch, real_t cost_prev, real_t cost) {
+	String str = "This is epoch: " + itos(epoch) + ",";
+	str += "The cost function has been minimized by " + String::num(cost_prev - cost);
+	str += ", Current Cost:" + String::num(cost);
+
+	PLOG_MSG(str);
+}
+
 std::vector<std::vector<std::vector<real_t>>> MLPPUtilities::createMiniBatches(std::vector<std::vector<real_t>> inputSet, int n_mini_batch) {
 	int n = inputSet.size();

--- a/mlpp/utilities/utilities.h
+++ b/mlpp/utilities/utilities.h
@ -12,6 +12,7 @@
 #include "core/math/math_defs.h"
 #include "core/containers/vector.h"
 #include "core/variant/variant.h"
+#include "core/string/ustring.h"

 #include "../lin_alg/mlpp_matrix.h"
 #include "../lin_alg/mlpp_vector.h"
@ -48,6 +49,7 @@ public:
 	static void UI(std::vector<real_t> weights, std::vector<real_t> initial, real_t bias);
 	static void UI(std::vector<std::vector<real_t>>, std::vector<real_t> bias);
 	static void CostInfo(int epoch, real_t cost_prev, real_t Cost);
+	static void cost_info(int epoch, real_t cost_prev, real_t cost);

 	static std::vector<std::vector<std::vector<real_t>>> createMiniBatches(std::vector<std::vector<real_t>> inputSet, int n_mini_batch);
 	static std::tuple<std::vector<std::vector<std::vector<real_t>>>, std::vector<std::vector<real_t>>> createMiniBatches(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int n_mini_batch);
--- a/test/mlpp_tests.cpp
+++ b/test/mlpp_tests.cpp
@ -3,6 +3,8 @@

 #include "core/math/math_funcs.h"

+#include "core/log/logger.h"
+
 //TODO remove
 #include <cmath>
 #include <ctime>
@ -538,18 +540,23 @@ void MLPPTests::test_naive_bayes() {
 	alg.printVector(GNB.modelSetTest(alg.transpose(inputSet)));
 }
 void MLPPTests::test_k_means(bool ui) {
-	MLPPLinAlg alg;
-
 	// KMeans
-	/*
 	std::vector<std::vector<real_t>> inputSet = { { 32, 0, 7 }, { 2, 28, 17 }, { 0, 9, 23 } };
-	MLPPKMeans kmeans(inputSet, 3, "KMeans++");
-	kmeans.train(3, ui);
-	std::cout << std::endl;
-	alg.printMatrix(kmeans.modelSetTest(inputSet)); // Returns the assigned centroids to each of the respective training examples
-	std::cout << std::endl;
-	alg.printVector(kmeans.silhouette_scores());
-	*/
+
+	Ref<MLPPMatrix> input_set;
+	input_set.instance();
+	input_set->set_from_std_vectors(inputSet);
+
+	Ref<MLPPKMeans> kmeans;
+	kmeans.instance();
+	kmeans->set_input_set(input_set);
+	kmeans->set_k(3);
+	kmeans->set_mean_type(MLPPKMeans::MEAN_TYPE_KMEANSPP);
+
+	kmeans->train(3, ui);
+
+	PLOG_MSG(kmeans->model_set_test(input_set)->to_string());
+	PLOG_MSG(kmeans->silhouette_scores()->to_string());
 }
 void MLPPTests::test_knn(bool ui) {
 	MLPPLinAlg alg;