Cleanups to tests.

2025-04-15 03:28:26 +02:00 · 2023-12-28 18:00:52 +01:00 · 2023-12-28 18:00:52 +01:00 · f409265513
commit f409265513
parent ece344400c
2 changed files with 54 additions and 63 deletions
--- a/test/mlpp_tests.cpp
+++ b/test/mlpp_tests.cpp
@ -882,40 +882,31 @@ void MLPPTests::test_convolution_tensors_etc() {
 	ERR_PRINT(conv.convolve_2d(conv.gaussian_filter_2d(5, 1), laplacian, 1)->to_string());
 }
 void MLPPTests::test_pca_svd_eigenvalues_eigenvectors(bool ui) {
 	/*
 	MLPPLinAlg alg;
-	// PCA, SVD, eigenvalues & eigenvectors
+	const real_t input_set_arr[] = {
-	std::vector<std::vector<real_t>> inputSet = { { 1, 1 }, { 1, 1 } };
+		1, 1, //
 		1, 1 //
 	};
-	MLPPLinAlg::EigenResultOld eigen = alg.eigen_old(inputSet);
+	Ref<MLPPMatrix> input_set = Ref<MLPPMatrix>(memnew(MLPPMatrix(input_set_arr, 2, 2)));
-	std::cout << "Eigenvectors:" << std::endl;
+	// eigenvalues & eigenvectors
 	alg.printMatrix(eigen.eigen_vectors);
 	std::cout << std::endl;
 	std::cout << "Eigenvalues:" << std::endl;
 	alg.printMatrix(eigen.eigen_values);
-	std::cout << "SVD OLD START" << std::endl;
+	MLPPLinAlg::EigenResult eigen = alg.eigen(input_set);
-	MLPPLinAlg::SVDResultOld svd_old = alg.SVD(inputSet);
+	PLOG_MSG("== Eigen ==");
-	std::cout << "U:" << std::endl;
+	PLOG_MSG("Eigenvectors:");
-	alg.printMatrix(svd_old.U);
+	PLOG_MSG(eigen.eigen_vectors->to_string());
-	std::cout << "S:" << std::endl;
+	PLOG_MSG("Eigenvalues:");
-	alg.printMatrix(svd_old.S);
+	PLOG_MSG(eigen.eigen_values->to_string());
 	std::cout << "Vt:" << std::endl;
 	alg.printMatrix(svd_old.Vt);
-	std::cout << "SVD OLD FIN" << std::endl;
+	// SVD
-	Ref<MLPPMatrix> input_set;
+	PLOG_MSG("== SVD ==");
 	input_set.instance();
 	input_set->set_from_std_vectors(inputSet);
 	*/
-	/*
+	String str_svd;
 	String str_svd = "SVD\n";
 	MLPPLinAlg::SVDResult svd = alg.svd(input_set);
@ -928,17 +919,10 @@ void MLPPTests::test_pca_svd_eigenvalues_eigenvectors(bool ui) {
 	str_svd += "\n";
 	PLOG_MSG(str_svd);
 	*/
-	/*
+	// PCA
 	std::cout << "PCA" << std::endl;
-	// PCA done using Jacobi's method to approximate eigenvalues and eigenvectors.
+	PLOG_MSG("== PCA ==");
 	MLPPPCAOld dr_old(inputSet, 1); // 1 dimensional representation.
 	std::cout << std::endl;
 	std::cout << "OLD Dimensionally reduced representation:" << std::endl;
 	alg.printMatrix(dr_old.principalComponents());
 	std::cout << "SCORE: " << dr_old.score() << std::endl;
 	// PCA done using Jacobi's method to approximate eigenvalues and eigenvectors.
 	MLPPPCA dr(input_set, 1); // 1 dimensional representation.
@ -947,47 +931,54 @@ void MLPPTests::test_pca_svd_eigenvalues_eigenvectors(bool ui) {
 	str += dr.principal_components()->to_string();
 	str += "\nSCORE: " + String::num(dr.score()) + "\n";
 	PLOG_MSG(str);
 	*/
 }
 void MLPPTests::test_nlp_and_data(bool ui) {
 	/*
 	MLPPLinAlg alg;
 	MLPPData data;
 	// NLP/DATA
-	std::string verbText = "I am appearing and thinking, as well as conducting.";
+	String verb_text = "I am appearing and thinking, as well as conducting.";
 	std::cout << "Stemming Example:" << std::endl;
 	std::cout << data.stemming(verbText) << std::endl;
 	std::cout << std::endl;
-	std::vector<std::string> sentences = { "He is a good boy", "She is a good girl", "The boy and girl are good" };
+	data.load_default_suffixes();
-	std::cout << "Bag of Words Example:" << std::endl;
+	data.load_default_stop_words();
 	alg.printMatrix(data.BOW(sentences, "Default"));
 	std::cout << std::endl;
 	std::cout << "TFIDF Example:" << std::endl;
 	alg.printMatrix(data.TFIDF(sentences));
 	std::cout << std::endl;
-	std::cout << "Tokenization:" << std::endl;
+	PLOG_MSG("Stemming Example:");
-	alg.printVector(data.tokenize(verbText));
+	PLOG_MSG(data.stemming(verb_text));
 	std::cout << std::endl;
-	std::cout << "Word2Vec:" << std::endl;
+	Vector<String> sentences = String("He is a good boy|She is a good girl|The boy and girl are good").split("|");
 	std::string textArchive = { "He is a good boy. She is a good girl. The boy and girl are good." };
 	std::vector<std::string> corpus = data.splitSentences(textArchive);
-	MLPPData::WordsToVecResult wtvres = data.word_to_vec(corpus, "CBOW", 2, 2, 0.1, 10000); // Can use either CBOW or Skip-n-gram.
+	PLOG_MSG("Bag of Words Example (BAG_OF_WORDS_TYPE_DEFAULT):");
 	PLOG_MSG(data.bag_of_words(sentences, MLPPData::BAG_OF_WORDS_TYPE_DEFAULT)->to_string());
-	alg.printMatrix(wtvres.word_embeddings);
+	PLOG_MSG("Bag of Words Example (BAG_OF_WORDS_TYPE_BINARY):");
-	std::cout << std::endl;
+	PLOG_MSG(data.bag_of_words(sentences, MLPPData::BAG_OF_WORDS_TYPE_BINARY)->to_string());
-	std::vector<std::string> textArchive2 = { "pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi" };
+	PLOG_MSG("TFIDF Example:");
 	PLOG_MSG(data.tfidf(sentences)->to_string());
-	alg.printMatrix(data.LSA(textArchive2, 2));
+	PLOG_MSG("Tokenization:");
-	//alg.printMatrix(data.BOW(textArchive, "Default"));
+	PLOG_MSG(String(Variant(data.tokenize(verb_text))));
 	std::cout << std::endl;
 	String text_archive = "He is a good boy. She is a good girl. The boy and girl are good.";
 	Vector<String> corpus = data.split_sentences(text_archive);
 	PLOG_MSG("Word2Vec (WORD_TO_VEC_TYPE_CBOW):");
 	MLPPData::WordsToVecResult wtvres = data.word_to_vec(corpus, MLPPData::WORD_TO_VEC_TYPE_CBOW, 2, 2, 0.1, 10000); // Can use either CBOW or Skip-n-gram.
 	PLOG_MSG(wtvres.word_embeddings->to_string());
 	PLOG_MSG("Word2Vec (WORD_TO_VEC_TYPE_SKIPGRAM):");
 	MLPPData::WordsToVecResult wtvres2 = data.word_to_vec(corpus, MLPPData::WORD_TO_VEC_TYPE_SKIPGRAM, 2, 2, 0.1, 10000); // Can use either CBOW or Skip-n-gram.
 	PLOG_MSG(wtvres2.word_embeddings->to_string());
 	Vector<String> text_archive2 = String("pizza|pizza hamburger cookie|hamburger|ramen|sushi|ramen sushi").split("|");
 	PLOG_MSG("LSA:");
 	PLOG_MSG(data.lsa(text_archive2, 2)->to_string());
 	/*
 	std::vector<std::vector<real_t>> inputSet = { { 1, 2 }, { 2, 3 }, { 3, 4 }, { 4, 5 }, { 5, 6 } };
 	std::cout << "Feature Scaling Example:" << std::endl;
 	alg.printMatrix(data.featureScaling(inputSet));
--- a/test/mlpp_tests_old.cpp
+++ b/test/mlpp_tests_old.cpp
@ -348,7 +348,7 @@ void MLPPTestsOld::test_pca_svd_eigenvalues_eigenvectors(bool ui) {
 void MLPPTestsOld::test_nlp_and_data(bool ui) {
 	MLPPLinAlgOld alg;
-	MLPPData data;
+	MLPPDataOld data;
 	// NLP/DATA
 	std::string verbText = "I am appearing and thinking, as well as conducting.";
@ -372,15 +372,15 @@ void MLPPTestsOld::test_nlp_and_data(bool ui) {
 	std::string textArchive = { "He is a good boy. She is a good girl. The boy and girl are good." };
 	std::vector<std::string> corpus = data.splitSentences(textArchive);
-	MLPPData::WordsToVecResult wtvres = data.word_to_vec(corpus, "CBOW", 2, 2, 0.1, 10000); // Can use either CBOW or Skip-n-gram.
+	std::tuple<std::vector<std::vector<real_t>>, std::vector<std::string>> wtvres = data.word2Vec(corpus, "CBOW", 2, 2, 0.1, 10000); // Can use either CBOW or Skip-n-gram.
-	alg.printMatrix(wtvres.word_embeddings);
+	alg.printMatrix(std::get<0>(wtvres));
 	std::cout << std::endl;
 	std::vector<std::string> textArchive2 = { "pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi" };
 	alg.printMatrix(data.LSA(textArchive2, 2));
-	//alg.printMatrix(data.BOW(textArchive, "Default"));
+	alg.printMatrix(data.BOW(textArchive2, "Default"));
 	std::cout << std::endl;
 	std::vector<std::vector<real_t>> inputSet = { { 1, 2 }, { 2, 3 }, { 3, 4 }, { 4, 5 }, { 5, 6 } };