Fix more crashes and issues in MLPPMLP. Also added more tests for it.

2025-04-25 16:55:00 +02:00 · 2023-02-05 13:05:36 +01:00 · 2023-02-05 13:05:36 +01:00 · b8e3f41fda
commit b8e3f41fda
parent e6afa5b715
2 changed files with 63 additions and 45 deletions
--- a/mlpp/mlp/mlp.cpp
+++ b/mlpp/mlp/mlp.cpp
@ -86,6 +86,8 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) {
 	real_t cost_prev = 0;
 	int epoch = 1;
 	y_hat->fill(0);
 	forward_pass();
 	while (true) {
@ -99,10 +101,10 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) {
 		Ref<MLPPVector> D2_1 = alg.mat_vec_multv(alg.transposem(a2), error);
 		// weights and bias updation for layer 2
-		weights2 = alg.subtractionnv(weights2, alg.scalar_multiplynv(learning_rate / n, D2_1));
+		weights2->set_from_mlpp_vector(alg.subtractionnv(weights2, alg.scalar_multiplynv(learning_rate / static_cast<real_t>(n), D2_1)));
-		weights2 = regularization.reg_weightsv(weights2, lambda, alpha, reg);
+		weights2->set_from_mlpp_vector(regularization.reg_weightsv(weights2, lambda, alpha, reg));
-		bias2 -= learning_rate * alg.sum_elementsv(error) / n;
+		bias2 -= learning_rate * alg.sum_elementsv(error) / static_cast<real_t>(n);
 		// Calculating the weight/bias for layer 1
@ -111,10 +113,10 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) {
 		Ref<MLPPMatrix> D1_3 = alg.matmultm(alg.transposem(input_set), D1_2);
 		// weight an bias updation for layer 1
-		weights1 = alg.subtractionm(weights1, alg.scalar_multiplym(learning_rate / n, D1_3));
+		weights1->set_from_mlpp_matrix(alg.subtractionm(weights1, alg.scalar_multiplym(learning_rate / n, D1_3)));
-		weights1 = regularization.reg_weightsm(weights1, lambda, alpha, reg);
+		weights1->set_from_mlpp_matrix(regularization.reg_weightsm(weights1, lambda, alpha, reg));
-		bias1 = alg.subtract_matrix_rows(bias1, alg.scalar_multiplym(learning_rate / n, D1_2));
+		bias1->set_from_mlpp_vector(alg.subtract_matrix_rows(bias1, alg.scalar_multiplym(learning_rate / n, D1_2)));
 		forward_pass();
@ -126,6 +128,7 @@ void MLPPMLP::gradient_descent(real_t learning_rate, int max_epoch, bool UI) {
 			std::cout << "Layer 2:" << std::endl;
 			MLPPUtilities::print_ui_vb(weights2, bias2);
 		}
 		epoch++;
 		if (epoch > max_epoch) {
@ -159,9 +162,9 @@ void MLPPMLP::sgd(real_t learning_rate, int max_epoch, bool UI) {
 	y_hat_row_tmp.instance();
 	y_hat_row_tmp->resize(1);
-	Ref<MLPPMatrix> lz2;
+	Ref<MLPPVector> lz2;
 	lz2.instance();
-	Ref<MLPPMatrix> la2;
+	Ref<MLPPVector> la2;
 	la2.instance();
 	while (true) {
@ -171,32 +174,33 @@ void MLPPMLP::sgd(real_t learning_rate, int max_epoch, bool UI) {
 		real_t output_element = output_set->get_element(output_Index);
 		output_set_row_tmp->set_element(0, output_element);
-		real_t y_hat = evaluatev(input_set_row_tmp);
+		real_t ly_hat = evaluatev(input_set_row_tmp);
-		y_hat_row_tmp->set_element(0, y_hat);
+		y_hat_row_tmp->set_element(0, ly_hat);
 		propagatev(input_set_row_tmp, lz2, la2);
 		cost_prev = cost(y_hat_row_tmp, output_set_row_tmp);
-		real_t error = y_hat - output_element;
+		real_t error = ly_hat - output_element;
 		// Weight updation for layer 2
-		Ref<MLPPVector> D2_1 = alg.scalar_multiplym(error, a2);
+		Ref<MLPPVector> D2_1 = alg.scalar_multiplynv(error, la2);
-		weights2 = alg.subtractionm(weights2, alg.scalar_multiplym(learning_rate, D2_1));
+
-		weights2 = regularization.reg_weightsm(weights2, lambda, alpha, reg);
+		weights2->set_from_mlpp_vector(alg.subtractionnv(weights2, alg.scalar_multiplynv(learning_rate, D2_1)));
 		weights2->set_from_mlpp_vector(regularization.reg_weightsv(weights2, lambda, alpha, reg));
 		// Bias updation for layer 2
 		bias2 -= learning_rate * error;
 		// Weight updation for layer 1
-		Ref<MLPPVector> D1_1 = alg.scalar_multiplym(error, weights2);
+		Ref<MLPPVector> D1_1 = alg.scalar_multiplynv(error, weights2);
-		Ref<MLPPVector> D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(z2));
+		Ref<MLPPVector> D1_2 = alg.hadamard_productnv(D1_1, avn.sigmoid_derivv(lz2));
 		Ref<MLPPMatrix> D1_3 = alg.outer_product(input_set_row_tmp, D1_2);
-		weights1 = alg.subtractionm(weights1, alg.scalar_multiplym(learning_rate, D1_3));
+		weights1->set_from_mlpp_matrix(alg.subtractionm(weights1, alg.scalar_multiplym(learning_rate, D1_3)));
-		weights1 = regularization.reg_weightsm(weights1, lambda, alpha, reg);
+		weights1->set_from_mlpp_matrix(regularization.reg_weightsm(weights1, lambda, alpha, reg));
 		// Bias updation for layer 1
-		bias1 = alg.subtractionm(bias1, alg.scalar_multiplym(learning_rate, D1_2));
+		bias1->set_from_mlpp_vector(alg.subtractionnv(bias1, alg.scalar_multiplynv(learning_rate, D1_2)));
-		y_hat = evaluatev(input_set_row_tmp);
+		ly_hat = evaluatev(input_set_row_tmp);
 		if (UI) {
 			MLPPUtilities::cost_info(epoch, cost_prev, cost_prev);
@ -240,22 +244,21 @@ void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo
 			Ref<MLPPMatrix> current_input = batches.input_sets[i];
 			Ref<MLPPVector> current_output = batches.output_sets[i];
-			Ref<MLPPVector> y_hat = evaluatem(current_input);
+			Ref<MLPPVector> ly_hat = evaluatem(current_input);
-			propagatev(current_input, lz2, la2);
+			propagatem(current_input, lz2, la2);
-			cost_prev = cost(y_hat, current_output);
+			cost_prev = cost(ly_hat, current_output);
 			// Calculating the errors
-			Ref<MLPPVector> error = alg.subtractionnv(y_hat, current_output);
+			Ref<MLPPVector> error = alg.subtractionnv(ly_hat, current_output);
 			// Calculating the weight/bias gradients for layer 2
-
+			Ref<MLPPVector> D2_1 = alg.mat_vec_multv(alg.transposem(la2), error);
 			Ref<MLPPVector> D2_1 = alg.mat_vec_multv(alg.transposem(a2), error);
 			real_t lr_d_cos = learning_rate / static_cast<real_t>(current_output->size());
 			// weights and bias updation for layser 2
-			weights2 = alg.subtractionnv(weights2, alg.scalar_multiplynv(lr_d_cos, D2_1));
+			weights2->set_from_mlpp_vector(alg.subtractionnv(weights2, alg.scalar_multiplynv(lr_d_cos, D2_1)));
-			weights2 = regularization.reg_weightsm(weights2, lambda, alpha, reg);
+			weights2->set_from_mlpp_vector(regularization.reg_weightsv(weights2, lambda, alpha, reg));
 			// Calculating the bias gradients for layer 2
 			real_t b_gradient = alg.sum_elementsv(error);
@ -264,23 +267,20 @@ void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo
 			bias2 -= learning_rate * b_gradient / current_output->size();
 			//Calculating the weight/bias for layer 1
 			Ref<MLPPMatrix> D1_1 = alg.outer_product(error, weights2);
-
+			Ref<MLPPMatrix> D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(lz2));
 			Ref<MLPPMatrix> D1_2 = alg.hadamard_productm(D1_1, avn.sigmoid_derivm(z2));
 			Ref<MLPPMatrix> D1_3 = alg.matmultm(alg.transposem(current_input), D1_2);
 			// weight an bias updation for layer 1
-			weights1 = alg.subtractionm(weights1, alg.scalar_multiplym(lr_d_cos, D1_3));
+			weights1->set_from_mlpp_matrix(alg.subtractionm(weights1, alg.scalar_multiplym(lr_d_cos, D1_3)));
-			weights1 = regularization.reg_weightsm(weights1, lambda, alpha, reg);
+			weights1->set_from_mlpp_matrix(regularization.reg_weightsm(weights1, lambda, alpha, reg));
-			bias1 = alg.subtract_matrix_rows(bias1, alg.scalar_multiplym(lr_d_cos, D1_2));
+			bias1->set_from_mlpp_vector(alg.subtract_matrix_rows(bias1, alg.scalar_multiplym(lr_d_cos, D1_2)));
 			y_hat = evaluatem(current_input);
 			if (UI) {
-				MLPPUtilities::CostInfo(epoch, cost_prev, cost(y_hat, current_output));
+				MLPPUtilities::CostInfo(epoch, cost_prev, cost(ly_hat, current_output));
 				std::cout << "Layer 1:" << std::endl;
 				MLPPUtilities::print_ui_mb(weights1, bias1);
 				std::cout << "Layer 2:" << std::endl;
@ -300,7 +300,7 @@ void MLPPMLP::mbgd(real_t learning_rate, int max_epoch, int mini_batch_size, boo
 real_t MLPPMLP::score() {
 	MLPPUtilities util;
-	return util.performance_mat(y_hat, output_set);
+	return util.performance_vec(y_hat, output_set);
 }
 void MLPPMLP::save(const String &fileName) {
@ -346,11 +346,11 @@ void MLPPMLP::initialize() {
 	_initialized = true;
 }
-real_t MLPPMLP::cost(const Ref<MLPPVector> &y_hat, const Ref<MLPPVector> &y) {
+real_t MLPPMLP::cost(const Ref<MLPPVector> &p_y_hat, const Ref<MLPPVector> &p_y) {
 	MLPPReg regularization;
 	class MLPPCost cost;
-	return cost.log_lossv(y_hat, y) + regularization.reg_termv(weights2, lambda, alpha, reg) + regularization.reg_termm(weights1, lambda, alpha, reg);
+	return cost.log_lossv(p_y_hat, p_y) + regularization.reg_termv(weights2, lambda, alpha, reg) + regularization.reg_termm(weights1, lambda, alpha, reg);
 }
 Ref<MLPPVector> MLPPMLP::evaluatem(const Ref<MLPPMatrix> &X) {
@ -368,7 +368,7 @@ void MLPPMLP::propagatem(const Ref<MLPPMatrix> &X, Ref<MLPPMatrix> z2_out, Ref<M
 	MLPPActivation avn;
 	z2_out->set_from_mlpp_matrix(alg.mat_vec_addv(alg.matmultm(X, weights1), bias1));
-	a2_out->set_from_mlpp_matrix(avn.sigmoid_normm(z2));
+	a2_out->set_from_mlpp_matrix(avn.sigmoid_normm(z2_out));
 }
 real_t MLPPMLP::evaluatev(const Ref<MLPPVector> &x) {
@ -386,7 +386,7 @@ void MLPPMLP::propagatev(const Ref<MLPPVector> &x, Ref<MLPPVector> z2_out, Ref<M
 	MLPPActivation avn;
 	z2_out->set_from_mlpp_vector(alg.additionnv(alg.mat_vec_multv(alg.transposem(weights1), x), bias1));
-	a2_out->set_from_mlpp_vector(avn.sigmoid_normv(z2));
+	a2_out->set_from_mlpp_vector(avn.sigmoid_normv(z2_out));
 }
 void MLPPMLP::forward_pass() {
@ -396,7 +396,7 @@ void MLPPMLP::forward_pass() {
 	z2->set_from_mlpp_matrix(alg.mat_vec_addv(alg.matmultm(input_set, weights1), bias1));
 	a2->set_from_mlpp_matrix(avn.sigmoid_normm(z2));
-	y_hat = avn.sigmoid_normv(alg.scalar_addnv(bias2, alg.mat_vec_multv(a2, weights2)));
+	y_hat->set_from_mlpp_vector(avn.sigmoid_normv(alg.scalar_addnv(bias2, alg.mat_vec_multv(a2, weights2))));
 }
 MLPPMLP::MLPPMLP(const Ref<MLPPMatrix> &p_input_set, const Ref<MLPPVector> &p_output_set, int p_n_hidden, MLPPReg::RegularizationType p_reg, real_t p_lambda, real_t p_alpha) {
--- a/test/mlpp_tests.cpp
+++ b/test/mlpp_tests.cpp
@ -389,8 +389,12 @@ void MLPPTests::test_mlp(bool ui) {
 	MLPPLinAlg alg;
 	// MLP
-	std::vector<std::vector<real_t>> inputSet = { { 0, 0, 1, 1 }, { 0, 1, 0, 1 } };
+	std::vector<std::vector<real_t>> inputSet = {
-	inputSet = alg.transpose(inputSet);
+		{ 0, 0 },
 		{ 1, 1 },
 		{ 0, 1 },
 		{ 1, 0 }
 	};
 	std::vector<real_t> outputSet = { 0, 1, 1, 0 };
 	MLPPMLPOld model(inputSet, outputSet, 2);
@ -409,7 +413,21 @@ void MLPPTests::test_mlp(bool ui) {
 	MLPPMLP model_new(input_set, output_set, 2);
 	model_new.gradient_descent(0.1, 10000, ui);
 	String res = model_new.model_set_test(input_set)->to_string();
-	res += "\nACCURACY: " + String::num(100 * model_new.score()) + "%";
+	res += "\nACCURACY (gradient_descent): " + String::num(100 * model_new.score()) + "%";
 	PLOG_MSG(res);
 	MLPPMLP model_new2(input_set, output_set, 2);
 	model_new2.sgd(0.01, 10000, ui);
 	res = model_new2.model_set_test(input_set)->to_string();
 	res += "\nACCURACY (sgd): " + String::num(100 * model_new2.score()) + "%";
 	PLOG_MSG(res);
 	MLPPMLP model_new3(input_set, output_set, 2);
 	model_new3.mbgd(0.01, 10000, 2, ui);
 	res = model_new3.model_set_test(input_set)->to_string();
 	res += "\nACCURACY (mbgd): " + String::num(100 * model_new3.score()) + "%";
 	PLOG_MSG(res);
 }