From 2dabbb42be847b0a5a59278b664aeba444471d7c Mon Sep 17 00:00:00 2001 From: Relintai Date: Thu, 26 Jan 2023 14:52:49 +0100 Subject: [PATCH] Ported all tests. --- mlpp/data/data.cpp | 119 ++++++ mlpp/data/data.h | 18 + mlpp/lin_alg/lin_alg.cpp | 185 +++++++- mlpp/lin_alg/lin_alg.h | 32 +- test/mlpp_tests.cpp | 892 +++++++++++++++++++-------------------- test/mlpp_tests.h | 2 +- 6 files changed, 779 insertions(+), 469 deletions(-) diff --git a/mlpp/data/data.cpp b/mlpp/data/data.cpp index 2647b92..f4d3973 100644 --- a/mlpp/data/data.cpp +++ b/mlpp/data/data.cpp @@ -200,6 +200,53 @@ void MLPPData::set_data_simple(const String &file_name, std::vector &inp memdelete(file); } +MLPPData::SplitComplexData MLPPData::train_test_split(const Ref &data, double test_size) { + SplitComplexData res; + + res.train.instance(); + res.test.instance(); + + ERR_FAIL_COND_V(!data.is_valid(), res); + + int is = MIN(data->input.size(), data->output.size()); + + Array indices; + indices.resize(is); + + for (int i = 0; i < is; ++i) { + indices[i] = i; + } + + indices.shuffle(); + + int test_input_number = test_size * is; // implicit usage of floor + + for (int i = 0; i < test_input_number; ++i) { + int index = indices[i]; + + res.test->input.push_back(data->input[i]); + res.test->output.push_back(data->output[i]); + } + + for (int i = test_input_number; i < is; ++i) { + int index = indices[i]; + + res.train->input.push_back(data->input[i]); + res.train->output.push_back(data->output[i]); + } + + return res; +} +Array MLPPData::train_test_split_bind(const Ref &data, double test_size) { + SplitComplexData res = train_test_split(data, test_size); + + Array arr; + arr.push_back(res.train); + arr.push_back(res.test); + + return arr; +} + // Loading Datasets std::tuple>, std::vector> MLPPData::loadBreastCancer() { const int BREAST_CANCER_SIZE = 30; // k = 30 @@ -280,6 +327,9 @@ std::tuple, std::vector> MLPPData::loadFiresAndCrime return { inputSet, outputSet }; } +// Note that inputs and outputs should be pairs (technically), but this +// implementation will separate them. (My implementation keeps them tied together.) +// Not yet sure whether this is intentional or not (or it's something like a compiler specific difference) std::tuple>, std::vector>, std::vector>, std::vector>> MLPPData::trainTestSplit(std::vector> inputSet, std::vector> outputSet, double testSize) { std::random_device rd; std::default_random_engine generator(rd()); @@ -817,6 +867,73 @@ std::tuple>, std::vector> MLPPData: return { wordEmbeddings, wordList }; } +struct WordsToVecResult { + std::vector> word_embeddings; + std::vector word_list; +}; + +MLPPData::WordsToVecResult MLPPData::word_to_vec(std::vector sentences, std::string type, int windowSize, int dimension, double learning_rate, int max_epoch) { + WordsToVecResult res; + + res.word_list = removeNullByte(removeStopWords(createWordList(sentences))); + + std::vector> segmented_sentences; + segmented_sentences.resize(sentences.size()); + + for (int i = 0; i < sentences.size(); i++) { + segmented_sentences[i] = removeStopWords(sentences[i]); + } + + std::vector inputStrings; + std::vector outputStrings; + + for (int i = 0; i < segmented_sentences.size(); i++) { + for (int j = 0; j < segmented_sentences[i].size(); j++) { + for (int k = windowSize; k > 0; k--) { + if (j - k >= 0) { + inputStrings.push_back(segmented_sentences[i][j]); + + outputStrings.push_back(segmented_sentences[i][j - k]); + } + if (j + k <= segmented_sentences[i].size() - 1) { + inputStrings.push_back(segmented_sentences[i][j]); + outputStrings.push_back(segmented_sentences[i][j + k]); + } + } + } + } + + int inputSize = inputStrings.size(); + + inputStrings.insert(inputStrings.end(), outputStrings.begin(), outputStrings.end()); + + std::vector> BOW = MLPPData::BOW(inputStrings, "Binary"); + + std::vector> inputSet; + std::vector> outputSet; + + for (int i = 0; i < inputSize; i++) { + inputSet.push_back(BOW[i]); + } + + for (int i = inputSize; i < BOW.size(); i++) { + outputSet.push_back(BOW[i]); + } + MLPPLinAlg alg; + MLPPSoftmaxNet *model; + if (type == "Skipgram") { + model = new MLPPSoftmaxNet(outputSet, inputSet, dimension); + } else { // else = CBOW. We maintain it is a default. + model = new MLPPSoftmaxNet(inputSet, outputSet, dimension); + } + model->gradientDescent(learning_rate, max_epoch, false); + + res.word_embeddings = model->getEmbeddings(); + delete model; + + return res; +} + std::vector> MLPPData::LSA(std::vector sentences, int dim) { MLPPLinAlg alg; std::vector> docWordData = BOW(sentences, "Binary"); @@ -946,4 +1063,6 @@ void MLPPData::_bind_methods() { ClassDB::bind_method(D_METHOD("load_mnist_test", "path"), &MLPPData::load_mnist_test); ClassDB::bind_method(D_METHOD("load_california_housing", "path"), &MLPPData::load_california_housing); ClassDB::bind_method(D_METHOD("load_fires_and_crime", "path"), &MLPPData::load_fires_and_crime); + + ClassDB::bind_method(D_METHOD("train_test_split", "data", "test_size"), &MLPPData::train_test_split_bind); } diff --git a/mlpp/data/data.h b/mlpp/data/data.h index b639645..e6fd6fd 100644 --- a/mlpp/data/data.h +++ b/mlpp/data/data.h @@ -10,6 +10,7 @@ // #include "core/string/ustring.h" +#include "core/variant/array.h" #include "core/object/reference.h" @@ -68,6 +69,14 @@ public: void set_data_unsupervised(int k, const String &file_name, std::vector> &inputSet); void set_data_simple(const String &file_name, std::vector &inputSet, std::vector &outputSet); + struct SplitComplexData { + Ref train; + Ref test; + }; + + SplitComplexData train_test_split(const Ref &data, double test_size); + Array train_test_split_bind(const Ref &data, double test_size); + // Load Datasets std::tuple>, std::vector> loadBreastCancer(); std::tuple>, std::vector> loadBreastCancerSVC(); @@ -114,7 +123,16 @@ public: std::vector> BOW(std::vector sentences, std::string = "Default"); std::vector> TFIDF(std::vector sentences); + std::tuple>, std::vector> word2Vec(std::vector sentences, std::string type, int windowSize, int dimension, double learning_rate, int max_epoch); + + struct WordsToVecResult { + std::vector> word_embeddings; + std::vector word_list; + }; + + WordsToVecResult word_to_vec(std::vector sentences, std::string type, int windowSize, int dimension, double learning_rate, int max_epoch); + std::vector> LSA(std::vector sentences, int dim); std::vector createWordList(std::vector sentences); diff --git a/mlpp/lin_alg/lin_alg.cpp b/mlpp/lin_alg/lin_alg.cpp index 02700fc..46a091a 100644 --- a/mlpp/lin_alg/lin_alg.cpp +++ b/mlpp/lin_alg/lin_alg.cpp @@ -11,8 +11,6 @@ #include #include - - std::vector> MLPPLinAlg::gramMatrix(std::vector> A) { return matmult(transpose(A), A); // AtA } @@ -507,7 +505,7 @@ std::vector> MLPPLinAlg::identity(double d) { } std::vector> MLPPLinAlg::cov(std::vector> A) { - MLPPStat stat; + MLPPStat stat; std::vector> covMat; covMat.resize(A.size()); for (int i = 0; i < covMat.size(); i++) { @@ -641,6 +639,131 @@ std::tuple>, std::vector>> M return { eigenvectors, a_new }; } +MLPPLinAlg::EigenResult MLPPLinAlg::eigen(std::vector> A) { + /* + A (the entered parameter) in most use cases will be X'X, XX', etc. and must be symmetric. + That simply means that 1) X' = X and 2) X is a square matrix. This function that computes the + eigenvalues of a matrix is utilizing Jacobi's method. + */ + + double diagonal = true; // Perform the iterative Jacobi algorithm unless and until we reach a diagonal matrix which yields us the eigenvals. + + std::map val_to_vec; + std::vector> a_new; + std::vector> eigenvectors = identity(A.size()); + do { + double a_ij = A[0][1]; + double sub_i = 0; + double sub_j = 1; + for (int i = 0; i < A.size(); i++) { + for (int j = 0; j < A[i].size(); j++) { + if (i != j && std::abs(A[i][j]) > a_ij) { + a_ij = A[i][j]; + sub_i = i; + sub_j = j; + } else if (i != j && std::abs(A[i][j]) == a_ij) { + if (i < sub_i) { + a_ij = A[i][j]; + sub_i = i; + sub_j = j; + } + } + } + } + + double a_ii = A[sub_i][sub_i]; + double a_jj = A[sub_j][sub_j]; + double a_ji = A[sub_j][sub_i]; + double theta; + + if (a_ii == a_jj) { + theta = M_PI / 4; + } else { + theta = 0.5 * atan(2 * a_ij / (a_ii - a_jj)); + } + + std::vector> P = identity(A.size()); + P[sub_i][sub_j] = -std::sin(theta); + P[sub_i][sub_i] = std::cos(theta); + P[sub_j][sub_j] = std::cos(theta); + P[sub_j][sub_i] = std::sin(theta); + + a_new = matmult(matmult(inverse(P), A), P); + + for (int i = 0; i < a_new.size(); i++) { + for (int j = 0; j < a_new[i].size(); j++) { + if (i != j && std::round(a_new[i][j]) == 0) { + a_new[i][j] = 0; + } + } + } + + bool non_zero = false; + for (int i = 0; i < a_new.size(); i++) { + for (int j = 0; j < a_new[i].size(); j++) { + if (i != j && std::round(a_new[i][j]) != 0) { + non_zero = true; + } + } + } + + if (non_zero) { + diagonal = false; + } else { + diagonal = true; + } + + if (a_new == A) { + diagonal = true; + for (int i = 0; i < a_new.size(); i++) { + for (int j = 0; j < a_new[i].size(); j++) { + if (i != j) { + a_new[i][j] = 0; + } + } + } + } + + eigenvectors = matmult(eigenvectors, P); + A = a_new; + + } while (!diagonal); + + std::vector> a_new_prior = a_new; + + // Bubble Sort. Should change this later. + for (int i = 0; i < a_new.size() - 1; i++) { + for (int j = 0; j < a_new.size() - 1 - i; j++) { + if (a_new[j][j] < a_new[j + 1][j + 1]) { + double temp = a_new[j + 1][j + 1]; + a_new[j + 1][j + 1] = a_new[j][j]; + a_new[j][j] = temp; + } + } + } + + for (int i = 0; i < a_new.size(); i++) { + for (int j = 0; j < a_new.size(); j++) { + if (a_new[i][i] == a_new_prior[j][j]) { + val_to_vec[i] = j; + } + } + } + + std::vector> eigen_temp = eigenvectors; + for (int i = 0; i < eigenvectors.size(); i++) { + for (int j = 0; j < eigenvectors[i].size(); j++) { + eigenvectors[i][j] = eigen_temp[i][val_to_vec[j]]; + } + } + + EigenResult res; + res.eigen_vectors = eigenvectors; + res.eigen_values = a_new; + + return res; +} + std::tuple>, std::vector>, std::vector>> MLPPLinAlg::SVD(std::vector> A) { auto [left_eigenvecs, eigenvals] = eig(matmult(A, transpose(A))); auto [right_eigenvecs, right_eigenvals] = eig(matmult(transpose(A), A)); @@ -655,6 +778,26 @@ std::tuple>, std::vector>, s return { left_eigenvecs, sigma, right_eigenvecs }; } +MLPPLinAlg::SDVResult MLPPLinAlg::svd(std::vector> A) { + EigenResult left_eigen = eigen(matmult(A, transpose(A))); + EigenResult right_eigen = eigen(matmult(transpose(A), A)); + + std::vector> singularvals = sqrt(left_eigen.eigen_values); + std::vector> sigma = zeromat(A.size(), A[0].size()); + for (int i = 0; i < singularvals.size(); i++) { + for (int j = 0; j < singularvals[i].size(); j++) { + sigma[i][j] = singularvals[i][j]; + } + } + + SDVResult res; + res.U = left_eigen.eigen_vectors; + res.S = sigma; + res.Vt = right_eigen.eigen_vectors; + + return res; +} + std::vector MLPPLinAlg::vectorProjection(std::vector a, std::vector b) { double product = dot(a, b) / dot(a, a); return scalarMultiply(product, a); // Projection of vector a onto b. Denotated as proj_a(b). @@ -686,6 +829,15 @@ std::tuple>, std::vector>> M return { Q, R }; } +MLPPLinAlg::QRDResult MLPPLinAlg::qrd(std::vector> A) { + QRDResult res; + + res.Q = gramSchmidtProcess(A); + res.R = matmult(transpose(res.Q), A); + + return res; +} + std::tuple>, std::vector>> MLPPLinAlg::chol(std::vector> A) { std::vector> L = zeromat(A.size(), A[0].size()); for (int j = 0; j < L.size(); j++) { // Matrices entered must be square. No problem here. @@ -708,6 +860,33 @@ std::tuple>, std::vector>> M return { L, transpose(L) }; // Indeed, L.T is our upper triangular matrix. } +MLPPLinAlg::CholeskyResult MLPPLinAlg::cholesky(std::vector> A) { + std::vector> L = zeromat(A.size(), A[0].size()); + for (int j = 0; j < L.size(); j++) { // Matrices entered must be square. No problem here. + for (int i = j; i < L.size(); i++) { + if (i == j) { + double sum = 0; + for (int k = 0; k < j; k++) { + sum += L[i][k] * L[i][k]; + } + L[i][j] = std::sqrt(A[i][j] - sum); + } else { // That is, i!=j + double sum = 0; + for (int k = 0; k < j; k++) { + sum += L[i][k] * L[j][k]; + } + L[i][j] = (A[i][j] - sum) / L[j][j]; + } + } + } + + CholeskyResult res; + res.L = L; + res.Lt = transpose(L); // Indeed, L.T is our upper triangular matrix. + + return res; +} + double MLPPLinAlg::sum_elements(std::vector> A) { double sum = 0; for (int i = 0; i < A.size(); i++) { diff --git a/mlpp/lin_alg/lin_alg.h b/mlpp/lin_alg/lin_alg.h index bb3c343..85109b5 100644 --- a/mlpp/lin_alg/lin_alg.h +++ b/mlpp/lin_alg/lin_alg.h @@ -11,7 +11,6 @@ #include #include - class MLPPLinAlg { public: // MATRIX FUNCTIONS @@ -98,16 +97,45 @@ public: std::tuple>, std::vector>> eig(std::vector> A); + struct EigenResult { + std::vector> eigen_vectors; + std::vector> eigen_values; + }; + + EigenResult eigen(std::vector> A); + std::tuple>, std::vector>, std::vector>> SVD(std::vector> A); + struct SDVResult { + std::vector> U; + std::vector> S; + std::vector> Vt; + }; + + SDVResult svd(std::vector> A); + std::vector vectorProjection(std::vector a, std::vector b); std::vector> gramSchmidtProcess(std::vector> A); std::tuple>, std::vector>> QRD(std::vector> A); + struct QRDResult { + std::vector> Q; + std::vector> R; + }; + + QRDResult qrd(std::vector> A); + std::tuple>, std::vector>> chol(std::vector> A); + struct CholeskyResult { + std::vector> L; + std::vector> Lt; + }; + + CholeskyResult cholesky(std::vector> A); + double sum_elements(std::vector> A); std::vector flatten(std::vector> A); @@ -231,6 +259,4 @@ public: private: }; - - #endif /* LinAlg_hpp */ \ No newline at end of file diff --git a/test/mlpp_tests.cpp b/test/mlpp_tests.cpp index 660c658..34d2440 100644 --- a/test/mlpp_tests.cpp +++ b/test/mlpp_tests.cpp @@ -321,632 +321,601 @@ void MLPPTests::test_probit_regression(bool ui) { std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } void MLPPTests::test_c_log_log_regression(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; - // // CLOGLOG REGRESSION - // std::vector> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}}; - // std::vector outputSet = {0,0,0,0,1,1,1,1}; - // CLogLogReg model(alg.transpose(inputSet), outputSet); - // model.SGD(0.1, 10000, 0); - // alg.printVector(model.modelSetTest(alg.transpose(inputSet))); - // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + // CLOGLOG REGRESSION + std::vector> inputSet = { { 1, 2, 3, 4, 5, 6, 7, 8 }, { 0, 0, 0, 0, 1, 1, 1, 1 } }; + std::vector outputSet = { 0, 0, 0, 0, 1, 1, 1, 1 }; + MLPPCLogLogReg model(alg.transpose(inputSet), outputSet); + model.SGD(0.1, 10000, ui); + alg.printVector(model.modelSetTest(alg.transpose(inputSet))); + std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } void MLPPTests::test_exp_reg_regression(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; - // // EXPREG REGRESSION - // std::vector> inputSet = {{0,1,2,3,4}}; - // std::vector outputSet = {1,2,4,8,16}; - // ExpReg model(alg.transpose(inputSet), outputSet); - // model.SGD(0.001, 10000, 0); - // alg.printVector(model.modelSetTest(alg.transpose(inputSet))); - // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + // EXPREG REGRESSION + std::vector> inputSet = { { 0, 1, 2, 3, 4 } }; + std::vector outputSet = { 1, 2, 4, 8, 16 }; + MLPPExpReg model(alg.transpose(inputSet), outputSet); + model.SGD(0.001, 10000, ui); + alg.printVector(model.modelSetTest(alg.transpose(inputSet))); + std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } void MLPPTests::test_tanh_regression(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; - // // TANH REGRESSION - // std::vector> inputSet = {{4,3,0,-3,-4}, {0,0,0,1,1}}; - // std::vector outputSet = {1,1,0,-1,-1}; - // TanhReg model(alg.transpose(inputSet), outputSet); - // model.SGD(0.1, 10000, 0); - // alg.printVector(model.modelSetTest(alg.transpose(inputSet))); - // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + // TANH REGRESSION + std::vector> inputSet = { { 4, 3, 0, -3, -4 }, { 0, 0, 0, 1, 1 } }; + std::vector outputSet = { 1, 1, 0, -1, -1 }; + MLPPTanhReg model(alg.transpose(inputSet), outputSet); + model.SGD(0.1, 10000, ui); + alg.printVector(model.modelSetTest(alg.transpose(inputSet))); + std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } void MLPPTests::test_softmax_regression(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; + MLPPData data; - // // SOFTMAX REGRESSION - // auto [inputSet, outputSet] = data.loadIris(); - // SoftmaxReg model(inputSet, outputSet); - // model.SGD(0.1, 10000, 1); - // alg.printMatrix(model.modelSetTest(inputSet)); - // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + // SOFTMAX REGRESSION + Ref dt = data.load_iris(_iris_data_path); + MLPPSoftmaxReg model(dt->input, dt->output); + model.SGD(0.1, 10000, ui); + alg.printMatrix(model.modelSetTest(dt->input)); + std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } void MLPPTests::test_support_vector_classification(bool ui) { //MLPPStat stat; - //MLPPLinAlg alg; + MLPPLinAlg alg; //MLPPActivation avn; //MLPPCost cost; - //MLPPData data; + MLPPData data; //MLPPConvolutions conv; - // // SUPPORT VECTOR CLASSIFICATION - // auto [inputSet, outputSet] = data.loadBreastCancerSVC(); - // SVC model(inputSet, outputSet, 1); - // model.SGD(0.00001, 100000, 1); - // alg.printVector(model.modelSetTest(inputSet)); - // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; - - // SoftmaxReg model(inputSet, outputSet); - // model.SGD(0.001, 20000, 0); - // alg.printMatrix(model.modelSetTest(inputSet)); + // SUPPORT VECTOR CLASSIFICATION + Ref dt = data.load_breast_cancer_svc(_breast_cancer_svm_data_path); + MLPPSVC model(dt->input, dt->output, ui); + model.SGD(0.00001, 100000, ui); + alg.printVector(model.modelSetTest(dt->input)); + std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } void MLPPTests::test_mlp(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; - // // MLP - // std::vector> inputSet = {{0,0,1,1}, {0,1,0,1}}; - // inputSet = alg.transpose(inputSet); - // std::vector outputSet = {0,1,1,0}; + // MLP + std::vector> inputSet = { { 0, 0, 1, 1 }, { 0, 1, 0, 1 } }; + inputSet = alg.transpose(inputSet); + std::vector outputSet = { 0, 1, 1, 0 }; - // MLP model(inputSet, outputSet, 2); - // model.gradientDescent(0.1, 10000, 0); - // alg.printVector(model.modelSetTest(inputSet)); - // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + MLPPMLP model(inputSet, outputSet, 2); + model.gradientDescent(0.1, 10000, ui); + alg.printVector(model.modelSetTest(inputSet)); + std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } void MLPPTests::test_soft_max_network(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; + MLPPData data; - // // SOFTMAX NETWORK - // auto [inputSet, outputSet] = data.loadWine(); - // SoftmaxNet model(inputSet, outputSet, 1); - // model.gradientDescent(0.01, 100000, 1); - // alg.printMatrix(model.modelSetTest(inputSet)); - // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + // SOFTMAX NETWORK + Ref dt = data.load_wine(_wine_data_path); + + MLPPSoftmaxNet model(dt->input, dt->output, 1); + model.gradientDescent(0.01, 100000, ui); + alg.printMatrix(model.modelSetTest(dt->input)); + std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } void MLPPTests::test_autoencoder(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; - // // AUTOENCODER - // std::vector> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}}; - // AutoEncoder model(alg.transpose(inputSet), 5); - // model.SGD(0.001, 300000, 0); - // alg.printMatrix(model.modelSetTest(alg.transpose(inputSet))); - // std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; + // AUTOENCODER + std::vector> inputSet = { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, { 3, 5, 9, 12, 15, 18, 21, 24, 27, 30 } }; + MLPPAutoEncoder model(alg.transpose(inputSet), 5); + model.SGD(0.001, 300000, ui); + alg.printMatrix(model.modelSetTest(alg.transpose(inputSet))); + std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } void MLPPTests::test_dynamically_sized_ann(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; // DYNAMICALLY SIZED ANN // Possible Weight Init Methods: Default, Uniform, HeNormal, HeUniform, XavierNormal, XavierUniform // Possible Activations: Linear, Sigmoid, Swish, Softplus, Softsign, CLogLog, Ar{Sinh, Cosh, Tanh, Csch, Sech, Coth}, GaussianCDF, GELU, UnitStep // Possible Loss Functions: MSE, RMSE, MBE, LogLoss, CrossEntropy, HingeLoss - // std::vector> inputSet = {{0,0,1,1}, {0,1,0,1}}; - // std::vector outputSet = {0,1,1,0}; - // ANN ann(alg.transpose(inputSet), outputSet); - // ann.addLayer(2, "Cosh"); - // ann.addOutputLayer("Sigmoid", "LogLoss"); + std::vector> inputSet = { { 0, 0, 1, 1 }, { 0, 1, 0, 1 } }; + std::vector outputSet = { 0, 1, 1, 0 }; + MLPPANN ann(alg.transpose(inputSet), outputSet); + ann.addLayer(2, "Cosh"); + ann.addOutputLayer("Sigmoid", "LogLoss"); - // ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, 1); - // ann.Adadelta(1, 1000, 2, 0.9, 0.000001, 1); - // ann.Momentum(0.1, 8000, 2, 0.9, true, 1); + ann.AMSGrad(0.1, 10000, 1, 0.9, 0.999, 0.000001, ui); + ann.Adadelta(1, 1000, 2, 0.9, 0.000001, ui); + ann.Momentum(0.1, 8000, 2, 0.9, true, ui); - //ann.setLearningRateScheduler("Step", 0.5, 1000); - // ann.gradientDescent(0.01, 30000); - // alg.printVector(ann.modelSetTest(alg.transpose(inputSet))); - // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; + ann.setLearningRateScheduler("Step", 0.5, 1000); + ann.gradientDescent(0.01, 30000); + alg.printVector(ann.modelSetTest(alg.transpose(inputSet))); + std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; } void MLPPTests::test_wgan(bool ui) { //MLPPStat stat; - //MLPPLinAlg alg; + MLPPLinAlg alg; //MLPPActivation avn; //MLPPCost cost; //MLPPData data; //MLPPConvolutions conv; - /* - std::vector> outputSet = {{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}, - {2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40}}; + std::vector> outputSet = { + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 }, + { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40 } + }; - WGAN gan(2, alg.transpose(outputSet)); // our gan is a wasserstein gan (wgan) + MLPPWGAN gan(2, alg.transpose(outputSet)); // our gan is a wasserstein gan (wgan) gan.addLayer(5, "Sigmoid"); gan.addLayer(2, "RELU"); gan.addLayer(5, "Sigmoid"); gan.addOutputLayer(); // User can specify weight init- if necessary. - gan.gradientDescent(0.1, 55000, 0); + gan.gradientDescent(0.1, 55000, ui); std::cout << "GENERATED INPUT: (Gaussian-sampled noise):" << std::endl; alg.printMatrix(gan.generateExample(100)); - */ } void MLPPTests::test_ann(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; - // typedef std::vector> Matrix; - // typedef std::vector Vector; + std::vector> inputSet = { { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 } }; // XOR + std::vector outputSet = { 0, 1, 1, 0 }; - // Matrix inputSet = {{0,0}, {0,1}, {1,0}, {1,1}}; // XOR - // Vector outputSet = {0,1,1,0}; + MLPPANN ann(inputSet, outputSet); + ann.addLayer(5, "Sigmoid"); + ann.addLayer(8, "Sigmoid"); // Add more layers as needed. + ann.addOutputLayer("Sigmoid", "LogLoss"); + ann.gradientDescent(1, 20000, ui); - // ANN ann(inputSet, outputSet); - // ann.addLayer(5, "Sigmoid"); - // ann.addLayer(8, "Sigmoid"); // Add more layers as needed. - // ann.addOutputLayer("Sigmoid", "LogLoss"); - // ann.gradientDescent(1, 20000, 1); - - // Vector predictions = ann.modelSetTest(inputSet); - // alg.printVector(predictions); // Testing out the model's preds for train set. - // std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; // Accuracy. + std::vector predictions = ann.modelSetTest(inputSet); + alg.printVector(predictions); // Testing out the model's preds for train set. + std::cout << "ACCURACY: " << 100 * ann.score() << "%" << std::endl; // Accuracy. } void MLPPTests::test_dynamically_sized_mann(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; + MLPPData data; - // // DYNAMICALLY SIZED MANN (Multidimensional Output ANN) - // std::vector> inputSet = {{1,2,3},{2,4,6},{3,6,9},{4,8,12}}; - // std::vector> outputSet = {{1,5}, {2,10}, {3,15}, {4,20}}; + // DYNAMICALLY SIZED MANN (Multidimensional Output ANN) + std::vector> inputSet = { { 1, 2, 3 }, { 2, 4, 6 }, { 3, 6, 9 }, { 4, 8, 12 } }; + std::vector> outputSet = { { 1, 5 }, { 2, 10 }, { 3, 15 }, { 4, 20 } }; - // MANN mann(inputSet, outputSet); - // mann.addOutputLayer("Linear", "MSE"); - // mann.gradientDescent(0.001, 80000, 0); - // alg.printMatrix(mann.modelSetTest(inputSet)); - // std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl; - - // std::vector> inputSet; - // std::vector tempOutputSet; - // data.setData(4, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet); - // std::vector> outputSet = data.oneHotRep(tempOutputSet, 3); + MLPPMANN mann(inputSet, outputSet); + mann.addOutputLayer("Linear", "MSE"); + mann.gradientDescent(0.001, 80000, 0); + alg.printMatrix(mann.modelSetTest(inputSet)); + std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl; } void MLPPTests::test_train_test_split_mann(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; + MLPPData data; // TRAIN TEST SPLIT CHECK - // std::vector> inputSet1 = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}}; - // std::vector> outputSet1 = {{2,4,6,8,10,12,14,16,18,20}}; - // auto [inputSet, outputSet, inputTestSet, outputTestSet] = data.trainTestSplit(alg.transpose(inputSet1), alg.transpose(outputSet1), 0.2); - // alg.printMatrix(inputSet); - // alg.printMatrix(outputSet); - // alg.printMatrix(inputTestSet); - // alg.printMatrix(outputTestSet); + std::vector> inputSet1 = { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, { 3, 5, 9, 12, 15, 18, 21, 24, 27, 30 } }; + std::vector> outputSet1 = { { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20 } }; - // alg.printMatrix(inputSet); - // alg.printMatrix(outputSet); + Ref d; + d.instance(); - // MANN mann(inputSet, outputSet); - // mann.addLayer(100, "RELU", "XavierNormal"); - // mann.addOutputLayer("Softmax", "CrossEntropy", "XavierNormal"); - // mann.gradientDescent(0.1, 80000, 1); - // alg.printMatrix(mann.modelSetTest(inputSet)); - // std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl; + d->input = alg.transpose(inputSet1); + d->output = alg.transpose(outputSet1); + + MLPPData::SplitComplexData split_data = data.train_test_split(d, 0.2); + + alg.printMatrix(split_data.train->input); + alg.printMatrix(split_data.train->output); + alg.printMatrix(split_data.test->input); + alg.printMatrix(split_data.test->output); + + MLPPMANN mann(split_data.train->input, split_data.train->output); + mann.addLayer(100, "RELU", "XavierNormal"); + mann.addOutputLayer("Softmax", "CrossEntropy", "XavierNormal"); + mann.gradientDescent(0.1, 80000, 1); + alg.printMatrix(mann.modelSetTest(split_data.test->input)); + std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl; } -void MLPPTests::test_naive_bayes(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; +void MLPPTests::test_naive_bayes() { + MLPPLinAlg alg; - // // NAIVE BAYES - // std::vector> inputSet = {{1,1,1,1,1}, {0,0,1,1,1}, {0,0,1,0,1}}; - // std::vector outputSet = {0,1,0,1,1}; + // NAIVE BAYES + std::vector> inputSet = { { 1, 1, 1, 1, 1 }, { 0, 0, 1, 1, 1 }, { 0, 0, 1, 0, 1 } }; + std::vector outputSet = { 0, 1, 0, 1, 1 }; - // MultinomialNB MNB(alg.transpose(inputSet), outputSet, 2); - // alg.printVector(MNB.modelSetTest(alg.transpose(inputSet))); + MLPPMultinomialNB MNB(alg.transpose(inputSet), outputSet, 2); + alg.printVector(MNB.modelSetTest(alg.transpose(inputSet))); - // BernoulliNB BNB(alg.transpose(inputSet), outputSet); - // alg.printVector(BNB.modelSetTest(alg.transpose(inputSet))); + MLPPBernoulliNB BNB(alg.transpose(inputSet), outputSet); + alg.printVector(BNB.modelSetTest(alg.transpose(inputSet))); - // GaussianNB GNB(alg.transpose(inputSet), outputSet, 2); - // alg.printVector(GNB.modelSetTest(alg.transpose(inputSet))); + MLPPGaussianNB GNB(alg.transpose(inputSet), outputSet, 2); + alg.printVector(GNB.modelSetTest(alg.transpose(inputSet))); } void MLPPTests::test_k_means(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; - // // KMeans - // std::vector> inputSet = {{32, 0, 7}, {2, 28, 17}, {0, 9, 23}}; - // KMeans kmeans(inputSet, 3, "KMeans++"); - // kmeans.train(3, 1); - // std::cout << std::endl; - // alg.printMatrix(kmeans.modelSetTest(inputSet)); // Returns the assigned centroids to each of the respective training examples - // std::cout << std::endl; - // alg.printVector(kmeans.silhouette_scores()); + // KMeans + std::vector> inputSet = { { 32, 0, 7 }, { 2, 28, 17 }, { 0, 9, 23 } }; + MLPPKMeans kmeans(inputSet, 3, "KMeans++"); + kmeans.train(3, ui); + std::cout << std::endl; + alg.printMatrix(kmeans.modelSetTest(inputSet)); // Returns the assigned centroids to each of the respective training examples + std::cout << std::endl; + alg.printVector(kmeans.silhouette_scores()); } void MLPPTests::test_knn(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; - // // kNN - // std::vector> inputSet = {{1,2,3,4,5,6,7,8}, {0,0,0,0,1,1,1,1}}; - // std::vector outputSet = {0,0,0,0,1,1,1,1}; - // kNN knn(alg.transpose(inputSet), outputSet, 8); - // alg.printVector(knn.modelSetTest(alg.transpose(inputSet))); - // std::cout << "ACCURACY: " << 100 * knn.score() << "%" << std::endl; + // kNN + std::vector> inputSet = { { 1, 2, 3, 4, 5, 6, 7, 8 }, { 0, 0, 0, 0, 1, 1, 1, 1 } }; + std::vector outputSet = { 0, 0, 0, 0, 1, 1, 1, 1 }; + MLPPKNN knn(alg.transpose(inputSet), outputSet, 8); + alg.printVector(knn.modelSetTest(alg.transpose(inputSet))); + std::cout << "ACCURACY: " << 100 * knn.score() << "%" << std::endl; } void MLPPTests::test_convolution_tensors_etc() { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; + MLPPData data; + MLPPConvolutions conv; - // // CONVOLUTION, POOLING, ETC.. - // std::vector> input = { - // {1}, - // }; + // CONVOLUTION, POOLING, ETC.. + std::vector> input = { + { 1 }, + }; - // std::vector>> tensorSet; - // tensorSet.push_back(input); - // tensorSet.push_back(input); - // tensorSet.push_back(input); + std::vector>> tensorSet; + tensorSet.push_back(input); + tensorSet.push_back(input); + tensorSet.push_back(input); - // alg.printTensor(data.rgb2xyz(tensorSet)); + alg.printTensor(data.rgb2xyz(tensorSet)); - // std::vector> input = { - // {62,55,55,54,49,48,47,55}, - // {62,57,54,52,48,47,48,53}, - // {61,60,52,49,48,47,49,54}, - // {63,61,60,60,63,65,68,65}, - // {67,67,70,74,79,85,91,92}, - // {82,95,101,106,114,115,112,117}, - // {96,111,115,119,128,128,130,127}, - // {109,121,127,133,139,141,140,133}, - // }; + std::vector> input2 = { + { 62, 55, 55, 54, 49, 48, 47, 55 }, + { 62, 57, 54, 52, 48, 47, 48, 53 }, + { 61, 60, 52, 49, 48, 47, 49, 54 }, + { 63, 61, 60, 60, 63, 65, 68, 65 }, + { 67, 67, 70, 74, 79, 85, 91, 92 }, + { 82, 95, 101, 106, 114, 115, 112, 117 }, + { 96, 111, 115, 119, 128, 128, 130, 127 }, + { 109, 121, 127, 133, 139, 141, 140, 133 }, + }; - // Transforms trans; + MLPPTransforms trans; - // alg.printMatrix(trans.discreteCosineTransform(input)); + alg.printMatrix(trans.discreteCosineTransform(input2)); - // alg.printMatrix(conv.convolve(input, conv.getPrewittVertical(), 1)); // Can use padding - // alg.printMatrix(conv.pool(input, 4, 4, "Max")); // Can use Max, Min, or Average pooling. + alg.printMatrix(conv.convolve(input2, conv.getPrewittVertical(), 1)); // Can use padding + alg.printMatrix(conv.pool(input2, 4, 4, "Max")); // Can use Max, Min, or Average pooling. - // std::vector>> tensorSet; - // tensorSet.push_back(input); - // tensorSet.push_back(input); - // alg.printVector(conv.globalPool(tensorSet, "Average")); // Can use Max, Min, or Average global pooling. + std::vector>> tensorSet2; + tensorSet2.push_back(input2); + tensorSet2.push_back(input2); + alg.printVector(conv.globalPool(tensorSet2, "Average")); // Can use Max, Min, or Average global pooling. - // std::vector> laplacian = {{1, 1, 1}, {1, -4, 1}, {1, 1, 1}}; - // alg.printMatrix(conv.convolve(conv.gaussianFilter2D(5, 1), laplacian, 1)); + std::vector> laplacian = { { 1, 1, 1 }, { 1, -4, 1 }, { 1, 1, 1 } }; + alg.printMatrix(conv.convolve(conv.gaussianFilter2D(5, 1), laplacian, 1)); } void MLPPTests::test_pca_svd_eigenvalues_eigenvectors(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; - // // PCA, SVD, eigenvalues & eigenvectors - // std::vector> inputSet = {{1,1}, {1,1}}; - // auto [Eigenvectors, Eigenvalues] = alg.eig(inputSet); - // std::cout << "Eigenvectors:" << std::endl; - // alg.printMatrix(Eigenvectors); - // std::cout << std::endl; - // std::cout << "Eigenvalues:" << std::endl; - // alg.printMatrix(Eigenvalues); + // PCA, SVD, eigenvalues & eigenvectors + std::vector> inputSet = { { 1, 1 }, { 1, 1 } }; - // auto [U, S, Vt] = alg.SVD(inputSet); + MLPPLinAlg::EigenResult eigen = alg.eigen(inputSet); - // // PCA done using Jacobi's method to approximate eigenvalues and eigenvectors. - // PCA dr(inputSet, 1); // 1 dimensional representation. - // std::cout << std::endl; - // std::cout << "Dimensionally reduced representation:" << std::endl; - // alg.printMatrix(dr.principalComponents()); - // std::cout << "SCORE: " << dr.score() << std::endl; + std::cout << "Eigenvectors:" << std::endl; + alg.printMatrix(eigen.eigen_vectors); + std::cout << std::endl; + std::cout << "Eigenvalues:" << std::endl; + alg.printMatrix(eigen.eigen_values); + + std::cout << "SVD" << std::endl; + + MLPPLinAlg::SDVResult svd = alg.svd(inputSet); + + std::cout << "U:" << std::endl; + alg.printMatrix(svd.U); + std::cout << "S:" << std::endl; + alg.printMatrix(svd.S); + std::cout << "Vt:" << std::endl; + alg.printMatrix(svd.Vt); + + std::cout << "PCA" << std::endl; + + // PCA done using Jacobi's method to approximate eigenvalues and eigenvectors. + MLPPPCA dr(inputSet, 1); // 1 dimensional representation. + std::cout << std::endl; + std::cout << "Dimensionally reduced representation:" << std::endl; + alg.printMatrix(dr.principalComponents()); + std::cout << "SCORE: " << dr.score() << std::endl; } void MLPPTests::test_nlp_and_data(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; + MLPPData data; - // // NLP/DATA - // std::string verbText = "I am appearing and thinking, as well as conducting."; - // std::cout << "Stemming Example:" << std::endl; - // std::cout << data.stemming(verbText) << std::endl; - // std::cout << std::endl; + // NLP/DATA + std::string verbText = "I am appearing and thinking, as well as conducting."; + std::cout << "Stemming Example:" << std::endl; + std::cout << data.stemming(verbText) << std::endl; + std::cout << std::endl; - // std::vector sentences = {"He is a good boy", "She is a good girl", "The boy and girl are good"}; - // std::cout << "Bag of Words Example:" << std::endl; - // alg.printMatrix(data.BOW(sentences, "Default")); - // std::cout << std::endl; - // std::cout << "TFIDF Example:" << std::endl; - // alg.printMatrix(data.TFIDF(sentences)); - // std::cout << std::endl; + std::vector sentences = { "He is a good boy", "She is a good girl", "The boy and girl are good" }; + std::cout << "Bag of Words Example:" << std::endl; + alg.printMatrix(data.BOW(sentences, "Default")); + std::cout << std::endl; + std::cout << "TFIDF Example:" << std::endl; + alg.printMatrix(data.TFIDF(sentences)); + std::cout << std::endl; - // std::cout << "Tokenization:" << std::endl; - // alg.printVector(data.tokenize(verbText)); - // std::cout << std::endl; + std::cout << "Tokenization:" << std::endl; + alg.printVector(data.tokenize(verbText)); + std::cout << std::endl; - // std::cout << "Word2Vec:" << std::endl; - // std::string textArchive = {"He is a good boy. She is a good girl. The boy and girl are good."}; - // std::vector corpus = data.splitSentences(textArchive); - // auto [wordEmbeddings, wordList] = data.word2Vec(corpus, "CBOW", 2, 2, 0.1, 10000); // Can use either CBOW or Skip-n-gram. - // alg.printMatrix(wordEmbeddings); - // std::cout << std::endl; + std::cout << "Word2Vec:" << std::endl; + std::string textArchive = { "He is a good boy. She is a good girl. The boy and girl are good." }; + std::vector corpus = data.splitSentences(textArchive); - // std::vector textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"}; + MLPPData::WordsToVecResult wtvres = data.word_to_vec(corpus, "CBOW", 2, 2, 0.1, 10000); // Can use either CBOW or Skip-n-gram. - // alg.printMatrix(data.LSA(textArchive, 2)); - // //alg.printMatrix(data.BOW(textArchive, "Default")); - // std::cout << std::endl; + alg.printMatrix(wtvres.word_embeddings); + std::cout << std::endl; - // std::vector> inputSet = {{1,2},{2,3},{3,4},{4,5},{5,6}}; - // std::cout << "Feature Scaling Example:" << std::endl; - // alg.printMatrix(data.featureScaling(inputSet)); - // std::cout << std::endl; + std::vector textArchive2 = { "pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi" }; - // std::cout << "Mean Centering Example:" << std::endl; - // alg.printMatrix(data.meanCentering(inputSet)); - // std::cout << std::endl; + alg.printMatrix(data.LSA(textArchive2, 2)); + //alg.printMatrix(data.BOW(textArchive, "Default")); + std::cout << std::endl; - // std::cout << "Mean Normalization Example:" << std::endl; - // alg.printMatrix(data.meanNormalization(inputSet)); - // std::cout << std::endl; + std::vector> inputSet = { { 1, 2 }, { 2, 3 }, { 3, 4 }, { 4, 5 }, { 5, 6 } }; + std::cout << "Feature Scaling Example:" << std::endl; + alg.printMatrix(data.featureScaling(inputSet)); + std::cout << std::endl; + + std::cout << "Mean Centering Example:" << std::endl; + alg.printMatrix(data.meanCentering(inputSet)); + std::cout << std::endl; + + std::cout << "Mean Normalization Example:" << std::endl; + alg.printMatrix(data.meanNormalization(inputSet)); + std::cout << std::endl; } void MLPPTests::test_outlier_finder(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; - // // Outlier Finder - // std::vector inputSet = {1,2,3,4,5,6,7,8,9,23554332523523}; - // OutlierFinder outlierFinder(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier. - // alg.printVector(outlierFinder.modelTest(inputSet)); + // Outlier Finder + std::vector inputSet = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 23554332523523 }; + MLPPOutlierFinder outlierFinder(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier. + alg.printVector(outlierFinder.modelTest(inputSet)); } void MLPPTests::test_new_math_functions() { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; + MLPPActivation avn; + MLPPData data; - // // Testing new Functions - // double z_s = 0.001; - // std::cout << avn.logit(z_s) << std::endl; - // std::cout << avn.logit(z_s, 1) << std::endl; + // Testing new Functions + double z_s = 0.001; + std::cout << avn.logit(z_s) << std::endl; + std::cout << avn.logit(z_s, 1) << std::endl; - // std::vector z_v = {0.001}; - // alg.printVector(avn.logit(z_v)); - // alg.printVector(avn.logit(z_v, 1)); + std::vector z_v = { 0.001 }; + alg.printVector(avn.logit(z_v)); + alg.printVector(avn.logit(z_v, 1)); - // std::vector> Z_m = {{0.001}}; - // alg.printMatrix(avn.logit(Z_m)); - // alg.printMatrix(avn.logit(Z_m, 1)); + std::vector> Z_m = { { 0.001 } }; + alg.printMatrix(avn.logit(Z_m)); + alg.printMatrix(avn.logit(Z_m, 1)); - // std::cout << alg.trace({{1,2}, {3,4}}) << std::endl; - // alg.printMatrix(alg.pinverse({{1,2}, {3,4}})); - // alg.printMatrix(alg.diag({1,2,3,4,5})); - // alg.printMatrix(alg.kronecker_product({{1,2,3,4,5}}, {{6,7,8,9,10}})); - // alg.printMatrix(alg.matrixPower({{5,5},{5,5}}, 2)); - // alg.printVector(alg.solve({{1,1}, {1.5, 4.0}}, {2200, 5050})); + std::cout << alg.trace({ { 1, 2 }, { 3, 4 } }) << std::endl; + alg.printMatrix(alg.pinverse({ { 1, 2 }, { 3, 4 } })); + alg.printMatrix(alg.diag({ 1, 2, 3, 4, 5 })); + alg.printMatrix(alg.kronecker_product({ { 1, 2, 3, 4, 5 } }, { { 6, 7, 8, 9, 10 } })); + alg.printMatrix(alg.matrixPower({ { 5, 5 }, { 5, 5 } }, 2)); + alg.printVector(alg.solve({ { 1, 1 }, { 1.5, 4.0 } }, { 2200, 5050 })); - // std::vector> matrixOfCubes = {{1,2,64,27}}; - // std::vector vectorOfCubes = {1,2,64,27}; - // alg.printMatrix(alg.cbrt(matrixOfCubes)); - // alg.printVector(alg.cbrt(vectorOfCubes)); - // std::cout << alg.max({{1,2,3,4,5}, {6,5,3,4,1}, {9,9,9,9,9}}) << std::endl; - // std::cout << alg.min({{1,2,3,4,5}, {6,5,3,4,1}, {9,9,9,9,9}}) << std::endl; + std::vector> matrixOfCubes = { { 1, 2, 64, 27 } }; + std::vector vectorOfCubes = { 1, 2, 64, 27 }; + alg.printMatrix(alg.cbrt(matrixOfCubes)); + alg.printVector(alg.cbrt(vectorOfCubes)); + std::cout << alg.max({ { 1, 2, 3, 4, 5 }, { 6, 5, 3, 4, 1 }, { 9, 9, 9, 9, 9 } }) << std::endl; + std::cout << alg.min({ { 1, 2, 3, 4, 5 }, { 6, 5, 3, 4, 1 }, { 9, 9, 9, 9, 9 } }) << std::endl; - // std::vector chicken; - // data.getImage("../../Data/apple.jpeg", chicken); - // alg.printVector(chicken); + //std::vector chicken; + //data.getImage("../../Data/apple.jpeg", chicken); + //alg.printVector(chicken); - // std::vector> P = {{12, -51, 4}, {6, 167, -68}, {-4, 24, -41}}; - // alg.printMatrix(P); + std::vector> P = { { 12, -51, 4 }, { 6, 167, -68 }, { -4, 24, -41 } }; + alg.printMatrix(P); - // alg.printMatrix(alg.gramSchmidtProcess(P)); + alg.printMatrix(alg.gramSchmidtProcess(P)); - // auto [Q, R] = alg.QRD(P); // It works! + MLPPLinAlg::QRDResult qrd_result = alg.qrd(P); // It works! - // alg.printMatrix(Q); + alg.printMatrix(qrd_result.Q); - // alg.printMatrix(R); + alg.printMatrix(qrd_result.R); } void MLPPTests::test_positive_definiteness_checker() { //MLPPStat stat; - //MLPPLinAlg alg; + MLPPLinAlg alg; //MLPPActivation avn; //MLPPCost cost; //MLPPData data; //MLPPConvolutions conv; - // // Checking positive-definiteness checker. For Cholesky Decomp. - // std::vector> A = - // { - // {1,-1,-1,-1}, - // {-1,2,2,2}, - // {-1,2,3,1}, - // {-1,2,1,4} - // }; + // Checking positive-definiteness checker. For Cholesky Decomp. + std::vector> A = { + { 1, -1, -1, -1 }, + { -1, 2, 2, 2 }, + { -1, 2, 3, 1 }, + { -1, 2, 1, 4 } + }; - // std::cout << std::boolalpha << alg.positiveDefiniteChecker(A) << std::endl; - // auto [L, Lt] = alg.chol(A); // works. - // alg.printMatrix(L); - // alg.printMatrix(Lt); + std::cout << std::boolalpha << alg.positiveDefiniteChecker(A) << std::endl; + MLPPLinAlg::CholeskyResult chres = alg.cholesky(A); // works. + alg.printMatrix(chres.L); + alg.printMatrix(chres.Lt); } + +// double f(double x){ +// return x*x*x + 2*x - 2; +// } + +double f(double x) { + return sin(x); +} + +double f_prime(double x) { + return 2 * x; +} + +double f_prime_2var(std::vector x) { + return 2 * x[0] + x[1]; +} +/* + y = x^3 + 2x - 2 + y' = 3x^2 + 2 + y'' = 6x + y''(2) = 12 +*/ + +// double f_mv(std::vector x){ +// return x[0] * x[0] + x[0] * x[1] * x[1] + x[1] + 5; +// } + +/* + Where x, y = x[0], x[1], this function is defined as: + f(x, y) = x^2 + xy^2 + y + 5 + ∂f/∂x = 2x + 2y + ∂^2f/∂x∂y = 2 +*/ + +double f_mv(std::vector x) { + return x[0] * x[0] * x[0] + x[0] + x[1] * x[1] * x[1] * x[0] + x[2] * x[2] * x[1]; +} + +/* + Where x, y = x[0], x[1], this function is defined as: + f(x, y) = x^3 + x + xy^3 + yz^2 + + fy = 3xy^2 + 2yz + fyy = 6xy + 2z + fyyz = 2 + + ∂^2f/∂y^2 = 6xy + 2z + ∂^3f/∂y^3 = 6x + + ∂f/∂z = 2zy + ∂^2f/∂z^2 = 2y + ∂^3f/∂z^3 = 0 + + ∂f/∂x = 3x^2 + 1 + y^3 + ∂^2f/∂x^2 = 6x + ∂^3f/∂x^3 = 6 + + ∂f/∂z = 2zy + ∂^2f/∂z^2 = 2z + + ∂f/∂y = 3xy^2 + ∂^2f/∂y∂x = 3y^2 + +*/ + void MLPPTests::test_numerical_analysis() { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; + MLPPConvolutions conv; // Checks for numerical analysis class. - //NumericalAnalysis numAn; + MLPPNumericalAnalysis numAn; - //std::cout << numAn.quadraticApproximation(f, 0, 1) << std::endl; + std::cout << numAn.quadraticApproximation(f, 0, 1) << std::endl; - // std::cout << numAn.cubicApproximation(f, 0, 1.001) << std::endl; + std::cout << numAn.cubicApproximation(f, 0, 1.001) << std::endl; - // std::cout << f(1.001) << std::endl; + std::cout << f(1.001) << std::endl; - // std::cout << numAn.quadraticApproximation(f_mv, {0, 0, 0}, {1, 1, 1}) << std::endl; + std::cout << numAn.quadraticApproximation(f_mv, { 0, 0, 0 }, { 1, 1, 1 }) << std::endl; - // std::cout << numAn.numDiff(&f, 1) << std::endl; - // std::cout << numAn.newtonRaphsonMethod(&f, 1, 1000) << std::endl; - //std::cout << numAn.invQuadraticInterpolation(&f, {100, 2,1.5}, 10) << std::endl; + std::cout << numAn.numDiff(&f, 1) << std::endl; + std::cout << numAn.newtonRaphsonMethod(&f, 1, 1000) << std::endl; + std::cout << numAn.invQuadraticInterpolation(&f, { 100, 2, 1.5 }, 10) << std::endl; - // std::cout << numAn.numDiff(&f_mv, {1, 1}, 1) << std::endl; // Derivative w.r.t. x. + std::cout << numAn.numDiff(&f_mv, { 1, 1 }, 1) << std::endl; // Derivative w.r.t. x. - // alg.printVector(numAn.jacobian(&f_mv, {1, 1})); + alg.printVector(numAn.jacobian(&f_mv, { 1, 1 })); - //std::cout << numAn.numDiff_2(&f, 2) << std::endl; + std::cout << numAn.numDiff_2(&f, 2) << std::endl; - //std::cout << numAn.numDiff_3(&f, 2) << std::endl; + std::cout << numAn.numDiff_3(&f, 2) << std::endl; - // std::cout << numAn.numDiff_2(&f_mv, {2, 2, 500}, 2, 2) << std::endl; - //std::cout << numAn.numDiff_3(&f_mv, {2, 1000, 130}, 0, 0, 0) << std::endl; + std::cout << numAn.numDiff_2(&f_mv, { 2, 2, 500 }, 2, 2) << std::endl; + std::cout << numAn.numDiff_3(&f_mv, { 2, 1000, 130 }, 0, 0, 0) << std::endl; - // alg.printTensor(numAn.thirdOrderTensor(&f_mv, {1, 1, 1})); - // std::cout << "Our Hessian." << std::endl; - // alg.printMatrix(numAn.hessian(&f_mv, {2, 2, 500})); + alg.printTensor(numAn.thirdOrderTensor(&f_mv, { 1, 1, 1 })); + std::cout << "Our Hessian." << std::endl; + alg.printMatrix(numAn.hessian(&f_mv, { 2, 2, 500 })); - // std::cout << numAn.laplacian(f_mv, {1,1,1}) << std::endl; + std::cout << numAn.laplacian(f_mv, { 1, 1, 1 }) << std::endl; - // std::vector>> tensor; - // tensor.push_back({{1,2}, {1,2}, {1,2}}); - // tensor.push_back({{1,2}, {1,2}, {1,2}}); + std::vector>> tensor; + tensor.push_back({ { 1, 2 }, { 1, 2 }, { 1, 2 } }); + tensor.push_back({ { 1, 2 }, { 1, 2 }, { 1, 2 } }); - // alg.printTensor(tensor); + alg.printTensor(tensor); - // alg.printMatrix(alg.tensor_vec_mult(tensor, {1,2})); + alg.printMatrix(alg.tensor_vec_mult(tensor, { 1, 2 })); - // std::cout << numAn.cubicApproximation(f_mv, {0, 0, 0}, {1, 1, 1}) << std::endl; + std::cout << numAn.cubicApproximation(f_mv, { 0, 0, 0 }, { 1, 1, 1 }) << std::endl; + std::cout << numAn.eulerianMethod(f_prime, { 1, 1 }, 1.5, 0.000001) << std::endl; + std::cout << numAn.eulerianMethod(f_prime_2var, { 2, 3 }, 2.5, 0.00000001) << std::endl; - // std::cout << numAn.eulerianMethod(f_prime, {1, 1}, 1.5, 0.000001) << std::endl; + std::vector> A = { + { 1, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 1 } + }; - // std::cout << numAn.eulerianMethod(f_prime_2var, {2, 3}, 2.5, 0.00000001) << std::endl; + alg.printMatrix(conv.dx(A)); + alg.printMatrix(conv.dy(A)); - // alg.printMatrix(conv.dx(A)); - // alg.printMatrix(conv.dy(A)); + alg.printMatrix(conv.gradOrientation(A)); - // alg.printMatrix(conv.gradOrientation(A)); + std::vector> h = conv.harrisCornerDetection(A); - // std::vector> A = - // { - // {1,0,0,0}, - // {0,0,0,0}, - // {0,0,0,0}, - // {0,0,0,1} - // }; + for (int i = 0; i < h.size(); i++) { + for (int j = 0; j < h[i].size(); j++) { + std::cout << h[i][j] << " "; + } + std::cout << std::endl; + } // Harris detector works. Life is good! - // std::vector> h = conv.harrisCornerDetection(A); - - // for(int i = 0; i < h.size(); i++){ - // for(int j = 0; j < h[i].size(); j++){ - // std::cout << h[i][j] << " "; - // } - // std::cout << std::endl; - // } // Harris detector works. Life is good! - - // std::vector a = {3,4,4}; - // std::vector b = {4,4,4}; - // alg.printVector(alg.cross(a,b)); + std::vector a = { 3, 4, 4 }; + std::vector b = { 4, 4, 4 }; + alg.printVector(alg.cross(a, b)); } void MLPPTests::test_support_vector_classification_kernel(bool ui) { - //MLPPStat stat; - //MLPPLinAlg alg; - //MLPPActivation avn; - //MLPPCost cost; - //MLPPData data; - //MLPPConvolutions conv; + MLPPLinAlg alg; + MLPPData data; //SUPPORT VECTOR CLASSIFICATION (kernel method) - // std::vector> inputSet; - // std::vector outputSet; - // data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancerSVM.csv", inputSet, outputSet); + Ref dt = data.load_breast_cancer_svc(_breast_cancer_svm_data_path); - // std::vector> inputSet; - // std::vector outputSet; - // data.setData(4, "/Users/marcmelikyan/Desktop/Data/IrisSVM.csv", inputSet, outputSet); + MLPPDualSVC kernelSVM(dt->input, dt->output, 1000); + kernelSVM.gradientDescent(0.0001, 20, ui); + std::cout << "SCORE: " << kernelSVM.score() << std::endl; - // DualSVC kernelSVM(inputSet, outputSet, 1000); - // kernelSVM.gradientDescent(0.0001, 20, 1); + std::vector> linearlyIndependentMat = { + { 1, 2, 3, 4 }, + { 234538495, 4444, 6111, 55 } + }; - // std::vector> linearlyIndependentMat = - - // { - // {1,2,3,4}, - // {234538495,4444,6111,55} - // }; - - // std::cout << "True of false: linearly independent?: " << std::boolalpha << alg.linearIndependenceChecker(linearlyIndependentMat) << std::endl; + std::cout << "True of false: linearly independent?: " << std::boolalpha << alg.linearIndependenceChecker(linearlyIndependentMat) << std::endl; } void MLPPTests::is_approx_equalsd(double a, double b, const String &str) { @@ -1083,7 +1052,6 @@ void MLPPTests::_bind_methods() { ClassDB::bind_method(D_METHOD("test_tanh_regression", "ui"), &MLPPTests::test_tanh_regression, false); ClassDB::bind_method(D_METHOD("test_softmax_regression", "ui"), &MLPPTests::test_softmax_regression, false); ClassDB::bind_method(D_METHOD("test_support_vector_classification", "ui"), &MLPPTests::test_support_vector_classification, false); - ClassDB::bind_method(D_METHOD("test_logistic_regression", "ui"), &MLPPTests::test_logistic_regression, false); ClassDB::bind_method(D_METHOD("test_mlp", "ui"), &MLPPTests::test_mlp, false); ClassDB::bind_method(D_METHOD("test_soft_max_network", "ui"), &MLPPTests::test_soft_max_network, false); @@ -1094,7 +1062,7 @@ void MLPPTests::_bind_methods() { ClassDB::bind_method(D_METHOD("test_dynamically_sized_mann", "ui"), &MLPPTests::test_dynamically_sized_mann, false); ClassDB::bind_method(D_METHOD("test_train_test_split_mann", "ui"), &MLPPTests::test_train_test_split_mann, false); - ClassDB::bind_method(D_METHOD("test_naive_bayes", "ui"), &MLPPTests::test_naive_bayes, false); + ClassDB::bind_method(D_METHOD("test_naive_bayes"), &MLPPTests::test_naive_bayes); ClassDB::bind_method(D_METHOD("test_k_means", "ui"), &MLPPTests::test_k_means, false); ClassDB::bind_method(D_METHOD("test_knn", "ui"), &MLPPTests::test_knn, false); diff --git a/test/mlpp_tests.h b/test/mlpp_tests.h index 1dbb4c1..735d93d 100644 --- a/test/mlpp_tests.h +++ b/test/mlpp_tests.h @@ -44,7 +44,7 @@ public: void test_dynamically_sized_mann(bool ui = false); void test_train_test_split_mann(bool ui = false); - void test_naive_bayes(bool ui = false); + void test_naive_bayes(); void test_k_means(bool ui = false); void test_knn(bool ui = false);