Reworked more methods.

This commit is contained in:
Relintai 2023-12-28 21:06:16 +01:00
parent edb6cd4dbf
commit 074af18c64
5 changed files with 95 additions and 94 deletions

View File

@ -15,6 +15,7 @@
#include "../lin_alg/lin_alg_old.h" #include "../lin_alg/lin_alg_old.h"
#include "../softmax_net/softmax_net.h" #include "../softmax_net/softmax_net.h"
#include "../stat/stat_old.h" #include "../stat/stat_old.h"
#include "data_old.h"
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
@ -430,9 +431,10 @@ std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> M
const int ONE_HOT_NUM = 3; const int ONE_HOT_NUM = 3;
std::vector<std::vector<real_t>> inputSet; std::vector<std::vector<real_t>> inputSet;
std::vector<real_t> tempOutputSet; std::vector<real_t> tempOutputSet;
MLPPDataOld d;
setData(IRIS_SIZE, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet); setData(IRIS_SIZE, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM); std::vector<std::vector<real_t>> outputSet = d.oneHotRep(tempOutputSet, ONE_HOT_NUM);
return { inputSet, outputSet }; return { inputSet, outputSet };
} }
@ -441,9 +443,10 @@ std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> M
const int ONE_HOT_NUM = 3; const int ONE_HOT_NUM = 3;
std::vector<std::vector<real_t>> inputSet; std::vector<std::vector<real_t>> inputSet;
std::vector<real_t> tempOutputSet; std::vector<real_t> tempOutputSet;
MLPPDataOld d;
setData(WINE_SIZE, "MLPP/Data/Datasets/Iris.csv", inputSet, tempOutputSet); setData(WINE_SIZE, "MLPP/Data/Datasets/Iris.csv", inputSet, tempOutputSet);
std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM); std::vector<std::vector<real_t>> outputSet = d.oneHotRep(tempOutputSet, ONE_HOT_NUM);
return { inputSet, outputSet }; return { inputSet, outputSet };
} }
@ -452,9 +455,10 @@ std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> M
const int ONE_HOT_NUM = 10; const int ONE_HOT_NUM = 10;
std::vector<std::vector<real_t>> inputSet; std::vector<std::vector<real_t>> inputSet;
std::vector<real_t> tempOutputSet; std::vector<real_t> tempOutputSet;
MLPPDataOld d;
setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTrain.csv", inputSet, tempOutputSet); setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTrain.csv", inputSet, tempOutputSet);
std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM); std::vector<std::vector<real_t>> outputSet = d.oneHotRep(tempOutputSet, ONE_HOT_NUM);
return { inputSet, outputSet }; return { inputSet, outputSet };
} }
@ -463,9 +467,10 @@ std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> M
const int ONE_HOT_NUM = 10; const int ONE_HOT_NUM = 10;
std::vector<std::vector<real_t>> inputSet; std::vector<std::vector<real_t>> inputSet;
std::vector<real_t> tempOutputSet; std::vector<real_t> tempOutputSet;
MLPPDataOld d;
setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTest.csv", inputSet, tempOutputSet); setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTest.csv", inputSet, tempOutputSet);
std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM); std::vector<std::vector<real_t>> outputSet = d.oneHotRep(tempOutputSet, ONE_HOT_NUM);
return { inputSet, outputSet }; return { inputSet, outputSet };
} }
@ -1117,80 +1122,40 @@ void MLPPData::setInputNames(std::string fileName, std::vector<std::string> &inp
dataFile.close(); dataFile.close();
} }
std::vector<std::vector<real_t>> MLPPData::featureScaling(std::vector<std::vector<real_t>> X) { Ref<MLPPMatrix> MLPPData::feature_scaling(const Ref<MLPPMatrix> &p_X) {
MLPPLinAlgOld alg; Ref<MLPPMatrix> X = p_X->transposen();
X = alg.transpose(X);
std::vector<real_t> max_elements, min_elements;
max_elements.resize(X.size());
min_elements.resize(X.size());
for (uint32_t i = 0; i < X.size(); i++) { Size2i x_size = X->size();
max_elements[i] = alg.max(X[i]);
min_elements[i] = alg.min(X[i]); LocalVector<real_t> max_elements;
LocalVector<real_t> min_elements;
max_elements.resize(x_size.y);
min_elements.resize(x_size.y);
Ref<MLPPVector> row_tmp;
row_tmp.instance();
row_tmp->resize(x_size.x);
for (int i = 0; i < x_size.y; ++i) {
X->row_get_into_mlpp_vector(i, row_tmp);
max_elements[i] = row_tmp->max_element();
min_elements[i] = row_tmp->min_element();
} }
for (uint32_t i = 0; i < X.size(); i++) { for (int i = 0; i < x_size.y; i++) {
for (uint32_t j = 0; j < X[i].size(); j++) { real_t maxe = max_elements[i];
X[i][j] = (X[i][j] - min_elements[i]) / (max_elements[i] - min_elements[i]); real_t mine = min_elements[i];
for (int j = 0; j < x_size.x; j++) {
real_t xij = X->element_get(i, j);
X->element_set(i, j, (xij - mine) / (maxe - mine));
} }
} }
return alg.transpose(X);
}
std::vector<std::vector<real_t>> MLPPData::meanNormalization(std::vector<std::vector<real_t>> X) { return X->transposen();
MLPPLinAlgOld alg;
MLPPStatOld stat;
// (X_j - mu_j) / std_j, for every j
X = meanCentering(X);
for (uint32_t i = 0; i < X.size(); i++) {
X[i] = alg.scalarMultiply(1 / stat.standardDeviation(X[i]), X[i]);
}
return X;
}
std::vector<std::vector<real_t>> MLPPData::meanCentering(std::vector<std::vector<real_t>> X) {
MLPPStatOld stat;
for (uint32_t i = 0; i < X.size(); i++) {
real_t mean_i = stat.mean(X[i]);
for (uint32_t j = 0; j < X[i].size(); j++) {
X[i][j] -= mean_i;
}
}
return X;
}
std::vector<std::vector<real_t>> MLPPData::oneHotRep(std::vector<real_t> tempOutputSet, int n_class) {
std::vector<std::vector<real_t>> outputSet;
outputSet.resize(tempOutputSet.size());
for (uint32_t i = 0; i < tempOutputSet.size(); i++) {
for (int j = 0; j <= n_class - 1; j++) {
if (tempOutputSet[i] == j) {
outputSet[i].push_back(1);
} else {
outputSet[i].push_back(0);
}
}
}
return outputSet;
}
std::vector<real_t> MLPPData::reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet) {
std::vector<real_t> outputSet;
//uint32_t n_class = tempOutputSet[0].size();
for (uint32_t i = 0; i < tempOutputSet.size(); i++) {
int current_class = 1;
for (uint32_t j = 0; j < tempOutputSet[i].size(); j++) {
if (tempOutputSet[i][j] == 1) {
break;
} else {
current_class++;
}
}
outputSet.push_back(current_class);
}
return outputSet;
} }
Ref<MLPPMatrix> MLPPData::mean_centering(const Ref<MLPPMatrix> &p_X) { Ref<MLPPMatrix> MLPPData::mean_centering(const Ref<MLPPMatrix> &p_X) {
@ -1207,7 +1172,7 @@ Ref<MLPPMatrix> MLPPData::mean_centering(const Ref<MLPPMatrix> &p_X) {
x_row_tmp->resize(x_size.x); x_row_tmp->resize(x_size.x);
for (int i = 0; i < x_size.y; ++i) { for (int i = 0; i < x_size.y; ++i) {
X->row_get_into_mlpp_vector(i, x_row_tmp); p_X->row_get_into_mlpp_vector(i, x_row_tmp);
real_t mean_i = stat.meanv(x_row_tmp); real_t mean_i = stat.meanv(x_row_tmp);
@ -1219,6 +1184,30 @@ Ref<MLPPMatrix> MLPPData::mean_centering(const Ref<MLPPMatrix> &p_X) {
return X; return X;
} }
Ref<MLPPMatrix> MLPPData::mean_normalization(const Ref<MLPPMatrix> &p_X) {
MLPPLinAlg alg;
MLPPStat stat;
// (X_j - mu_j) / std_j, for every j
Ref<MLPPMatrix> X = mean_centering(p_X);
Size2i x_size = X->size();
Ref<MLPPVector> x_row_tmp;
x_row_tmp.instance();
x_row_tmp->resize(x_size.x);
for (int i = 0; i < x_size.y; i++) {
X->row_get_into_mlpp_vector(i, x_row_tmp);
x_row_tmp->scalar_multiply((real_t)1 / stat.standard_deviationv(x_row_tmp));
X->row_set_mlpp_vector(i, x_row_tmp);
}
return X;
}
Ref<MLPPMatrix> MLPPData::one_hot_rep(const Ref<MLPPVector> &temp_output_set, int n_class) { Ref<MLPPMatrix> MLPPData::one_hot_rep(const Ref<MLPPVector> &temp_output_set, int n_class) {
ERR_FAIL_COND_V(!temp_output_set.is_valid(), Ref<MLPPMatrix>()); ERR_FAIL_COND_V(!temp_output_set.is_valid(), Ref<MLPPMatrix>());
@ -1243,6 +1232,24 @@ Ref<MLPPMatrix> MLPPData::one_hot_rep(const Ref<MLPPVector> &temp_output_set, in
return output_set; return output_set;
} }
std::vector<real_t> MLPPData::reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet) {
std::vector<real_t> outputSet;
//uint32_t n_class = tempOutputSet[0].size();
for (uint32_t i = 0; i < tempOutputSet.size(); i++) {
int current_class = 1;
for (uint32_t j = 0; j < tempOutputSet[i].size(); j++) {
if (tempOutputSet[i][j] == 1) {
break;
} else {
current_class++;
}
}
outputSet.push_back(current_class);
}
return outputSet;
}
void MLPPData::load_default_suffixes() { void MLPPData::load_default_suffixes() {
// Our list of suffixes which we use to compare against // Our list of suffixes which we use to compare against
suffixes = String("eer er ion ity ment ness or sion ship th able ible al ant ary ful ic ious ous ive less y ed en ing ize ise ly ward wise").split_spaces(); suffixes = String("eer er ion ity ment ness or sion ship th able ible al ant ary ful ic ious ous ive less y ed en ing ize ise ly ward wise").split_spaces();

View File

@ -176,14 +176,11 @@ public:
// Extra // Extra
void setInputNames(std::string fileName, std::vector<std::string> &inputNames); void setInputNames(std::string fileName, std::vector<std::string> &inputNames);
std::vector<std::vector<real_t>> featureScaling(std::vector<std::vector<real_t>> X); Ref<MLPPMatrix> feature_scaling(const Ref<MLPPMatrix> &X);
std::vector<std::vector<real_t>> meanNormalization(std::vector<std::vector<real_t>> X);
std::vector<std::vector<real_t>> meanCentering(std::vector<std::vector<real_t>> X);
std::vector<std::vector<real_t>> oneHotRep(std::vector<real_t> tempOutputSet, int n_class);
std::vector<real_t> reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet);
Ref<MLPPMatrix> mean_centering(const Ref<MLPPMatrix> &X); Ref<MLPPMatrix> mean_centering(const Ref<MLPPMatrix> &X);
Ref<MLPPMatrix> mean_normalization(const Ref<MLPPMatrix> &X);
Ref<MLPPMatrix> one_hot_rep(const Ref<MLPPVector> &temp_output_set, int n_class); Ref<MLPPMatrix> one_hot_rep(const Ref<MLPPVector> &temp_output_set, int n_class);
std::vector<real_t> reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet);
template <class T> template <class T>
std::vector<T> vecToSet(std::vector<T> inputSet) { std::vector<T> vecToSet(std::vector<T> inputSet) {

View File

@ -5,7 +5,7 @@
// //
#include "pca_old.h" #include "pca_old.h"
#include "../data/data.h" #include "../data/data_old.h"
#include "../lin_alg/lin_alg_old.h" #include "../lin_alg/lin_alg_old.h"
#include <iostream> #include <iostream>
@ -19,7 +19,7 @@ MLPPPCAOld::MLPPPCAOld(std::vector<std::vector<real_t>> inputSet, int k) :
std::vector<std::vector<real_t>> MLPPPCAOld::principalComponents() { std::vector<std::vector<real_t>> MLPPPCAOld::principalComponents() {
MLPPLinAlgOld alg; MLPPLinAlgOld alg;
MLPPData data; MLPPDataOld data;
MLPPLinAlgOld::SVDResultOld svr_res = alg.SVD(alg.cov(inputSet)); MLPPLinAlgOld::SVDResultOld svr_res = alg.SVD(alg.cov(inputSet));
X_normalized = data.meanCentering(inputSet); X_normalized = data.meanCentering(inputSet);

View File

@ -978,20 +978,18 @@ void MLPPTests::test_nlp_and_data(bool ui) {
PLOG_MSG("LSA:"); PLOG_MSG("LSA:");
PLOG_MSG(data.lsa(text_archive2, 2)->to_string()); PLOG_MSG(data.lsa(text_archive2, 2)->to_string());
/* std::vector<std::vector<real_t>> input_set_vec = { { 1, 2 }, { 2, 3 }, { 3, 4 }, { 4, 5 }, { 5, 6 } };
std::vector<std::vector<real_t>> inputSet = { { 1, 2 }, { 2, 3 }, { 3, 4 }, { 4, 5 }, { 5, 6 } };
std::cout << "Feature Scaling Example:" << std::endl;
alg.printMatrix(data.featureScaling(inputSet));
std::cout << std::endl;
std::cout << "Mean Centering Example:" << std::endl; Ref<MLPPMatrix> input_set = Ref<MLPPMatrix>(memnew(MLPPMatrix(input_set_vec)));
alg.printMatrix(data.meanCentering(inputSet));
std::cout << std::endl;
std::cout << "Mean Normalization Example:" << std::endl; PLOG_MSG("Feature Scaling Example:");
alg.printMatrix(data.meanNormalization(inputSet)); PLOG_MSG(data.feature_scaling(input_set)->to_string());
std::cout << std::endl;
*/ PLOG_MSG("Mean Centering Example:");
PLOG_MSG(data.mean_centering(input_set)->to_string());
PLOG_MSG("Mean Normalization Example:");
PLOG_MSG(data.mean_normalization(input_set)->to_string());
} }
void MLPPTests::test_outlier_finder(bool ui) { void MLPPTests::test_outlier_finder(bool ui) {
MLPPLinAlg alg; MLPPLinAlg alg;

View File

@ -400,7 +400,6 @@ void MLPPTestsOld::test_outlier_finder(bool ui) {
MLPPLinAlgOld alg; MLPPLinAlgOld alg;
// Outlier Finder // Outlier Finder
//std::vector<real_t> inputSet = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 23554332523523 };
std::vector<real_t> inputSet = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 23554332 }; std::vector<real_t> inputSet = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 23554332 };
MLPPOutlierFinderOld outlierFinderOld(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier. MLPPOutlierFinderOld outlierFinderOld(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier.
alg.printVector(outlierFinderOld.modelTest(inputSet)); alg.printVector(outlierFinderOld.modelTest(inputSet));