mirror of
https://github.com/Relintai/pmlpp.git
synced 2024-11-08 13:12:09 +01:00
Reworked more methods.
This commit is contained in:
parent
edb6cd4dbf
commit
074af18c64
@ -15,6 +15,7 @@
|
|||||||
#include "../lin_alg/lin_alg_old.h"
|
#include "../lin_alg/lin_alg_old.h"
|
||||||
#include "../softmax_net/softmax_net.h"
|
#include "../softmax_net/softmax_net.h"
|
||||||
#include "../stat/stat_old.h"
|
#include "../stat/stat_old.h"
|
||||||
|
#include "data_old.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
@ -430,9 +431,10 @@ std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> M
|
|||||||
const int ONE_HOT_NUM = 3;
|
const int ONE_HOT_NUM = 3;
|
||||||
std::vector<std::vector<real_t>> inputSet;
|
std::vector<std::vector<real_t>> inputSet;
|
||||||
std::vector<real_t> tempOutputSet;
|
std::vector<real_t> tempOutputSet;
|
||||||
|
MLPPDataOld d;
|
||||||
|
|
||||||
setData(IRIS_SIZE, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
|
setData(IRIS_SIZE, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
|
||||||
std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
|
std::vector<std::vector<real_t>> outputSet = d.oneHotRep(tempOutputSet, ONE_HOT_NUM);
|
||||||
return { inputSet, outputSet };
|
return { inputSet, outputSet };
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -441,9 +443,10 @@ std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> M
|
|||||||
const int ONE_HOT_NUM = 3;
|
const int ONE_HOT_NUM = 3;
|
||||||
std::vector<std::vector<real_t>> inputSet;
|
std::vector<std::vector<real_t>> inputSet;
|
||||||
std::vector<real_t> tempOutputSet;
|
std::vector<real_t> tempOutputSet;
|
||||||
|
MLPPDataOld d;
|
||||||
|
|
||||||
setData(WINE_SIZE, "MLPP/Data/Datasets/Iris.csv", inputSet, tempOutputSet);
|
setData(WINE_SIZE, "MLPP/Data/Datasets/Iris.csv", inputSet, tempOutputSet);
|
||||||
std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
|
std::vector<std::vector<real_t>> outputSet = d.oneHotRep(tempOutputSet, ONE_HOT_NUM);
|
||||||
return { inputSet, outputSet };
|
return { inputSet, outputSet };
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -452,9 +455,10 @@ std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> M
|
|||||||
const int ONE_HOT_NUM = 10;
|
const int ONE_HOT_NUM = 10;
|
||||||
std::vector<std::vector<real_t>> inputSet;
|
std::vector<std::vector<real_t>> inputSet;
|
||||||
std::vector<real_t> tempOutputSet;
|
std::vector<real_t> tempOutputSet;
|
||||||
|
MLPPDataOld d;
|
||||||
|
|
||||||
setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTrain.csv", inputSet, tempOutputSet);
|
setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTrain.csv", inputSet, tempOutputSet);
|
||||||
std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
|
std::vector<std::vector<real_t>> outputSet = d.oneHotRep(tempOutputSet, ONE_HOT_NUM);
|
||||||
return { inputSet, outputSet };
|
return { inputSet, outputSet };
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -463,9 +467,10 @@ std::tuple<std::vector<std::vector<real_t>>, std::vector<std::vector<real_t>>> M
|
|||||||
const int ONE_HOT_NUM = 10;
|
const int ONE_HOT_NUM = 10;
|
||||||
std::vector<std::vector<real_t>> inputSet;
|
std::vector<std::vector<real_t>> inputSet;
|
||||||
std::vector<real_t> tempOutputSet;
|
std::vector<real_t> tempOutputSet;
|
||||||
|
MLPPDataOld d;
|
||||||
|
|
||||||
setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTest.csv", inputSet, tempOutputSet);
|
setData(MNIST_SIZE, "MLPP/Data/Datasets/MnistTest.csv", inputSet, tempOutputSet);
|
||||||
std::vector<std::vector<real_t>> outputSet = oneHotRep(tempOutputSet, ONE_HOT_NUM);
|
std::vector<std::vector<real_t>> outputSet = d.oneHotRep(tempOutputSet, ONE_HOT_NUM);
|
||||||
return { inputSet, outputSet };
|
return { inputSet, outputSet };
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1117,80 +1122,40 @@ void MLPPData::setInputNames(std::string fileName, std::vector<std::string> &inp
|
|||||||
dataFile.close();
|
dataFile.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::vector<real_t>> MLPPData::featureScaling(std::vector<std::vector<real_t>> X) {
|
Ref<MLPPMatrix> MLPPData::feature_scaling(const Ref<MLPPMatrix> &p_X) {
|
||||||
MLPPLinAlgOld alg;
|
Ref<MLPPMatrix> X = p_X->transposen();
|
||||||
X = alg.transpose(X);
|
|
||||||
std::vector<real_t> max_elements, min_elements;
|
|
||||||
max_elements.resize(X.size());
|
|
||||||
min_elements.resize(X.size());
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < X.size(); i++) {
|
Size2i x_size = X->size();
|
||||||
max_elements[i] = alg.max(X[i]);
|
|
||||||
min_elements[i] = alg.min(X[i]);
|
LocalVector<real_t> max_elements;
|
||||||
|
LocalVector<real_t> min_elements;
|
||||||
|
|
||||||
|
max_elements.resize(x_size.y);
|
||||||
|
min_elements.resize(x_size.y);
|
||||||
|
|
||||||
|
Ref<MLPPVector> row_tmp;
|
||||||
|
row_tmp.instance();
|
||||||
|
row_tmp->resize(x_size.x);
|
||||||
|
|
||||||
|
for (int i = 0; i < x_size.y; ++i) {
|
||||||
|
X->row_get_into_mlpp_vector(i, row_tmp);
|
||||||
|
|
||||||
|
max_elements[i] = row_tmp->max_element();
|
||||||
|
min_elements[i] = row_tmp->min_element();
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint32_t i = 0; i < X.size(); i++) {
|
for (int i = 0; i < x_size.y; i++) {
|
||||||
for (uint32_t j = 0; j < X[i].size(); j++) {
|
real_t maxe = max_elements[i];
|
||||||
X[i][j] = (X[i][j] - min_elements[i]) / (max_elements[i] - min_elements[i]);
|
real_t mine = min_elements[i];
|
||||||
|
|
||||||
|
for (int j = 0; j < x_size.x; j++) {
|
||||||
|
real_t xij = X->element_get(i, j);
|
||||||
|
|
||||||
|
X->element_set(i, j, (xij - mine) / (maxe - mine));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return alg.transpose(X);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::vector<real_t>> MLPPData::meanNormalization(std::vector<std::vector<real_t>> X) {
|
return X->transposen();
|
||||||
MLPPLinAlgOld alg;
|
|
||||||
MLPPStatOld stat;
|
|
||||||
// (X_j - mu_j) / std_j, for every j
|
|
||||||
|
|
||||||
X = meanCentering(X);
|
|
||||||
for (uint32_t i = 0; i < X.size(); i++) {
|
|
||||||
X[i] = alg.scalarMultiply(1 / stat.standardDeviation(X[i]), X[i]);
|
|
||||||
}
|
|
||||||
return X;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::vector<real_t>> MLPPData::meanCentering(std::vector<std::vector<real_t>> X) {
|
|
||||||
MLPPStatOld stat;
|
|
||||||
for (uint32_t i = 0; i < X.size(); i++) {
|
|
||||||
real_t mean_i = stat.mean(X[i]);
|
|
||||||
for (uint32_t j = 0; j < X[i].size(); j++) {
|
|
||||||
X[i][j] -= mean_i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return X;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::vector<real_t>> MLPPData::oneHotRep(std::vector<real_t> tempOutputSet, int n_class) {
|
|
||||||
std::vector<std::vector<real_t>> outputSet;
|
|
||||||
outputSet.resize(tempOutputSet.size());
|
|
||||||
for (uint32_t i = 0; i < tempOutputSet.size(); i++) {
|
|
||||||
for (int j = 0; j <= n_class - 1; j++) {
|
|
||||||
if (tempOutputSet[i] == j) {
|
|
||||||
outputSet[i].push_back(1);
|
|
||||||
} else {
|
|
||||||
outputSet[i].push_back(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return outputSet;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<real_t> MLPPData::reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet) {
|
|
||||||
std::vector<real_t> outputSet;
|
|
||||||
//uint32_t n_class = tempOutputSet[0].size();
|
|
||||||
for (uint32_t i = 0; i < tempOutputSet.size(); i++) {
|
|
||||||
int current_class = 1;
|
|
||||||
for (uint32_t j = 0; j < tempOutputSet[i].size(); j++) {
|
|
||||||
if (tempOutputSet[i][j] == 1) {
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
current_class++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
outputSet.push_back(current_class);
|
|
||||||
}
|
|
||||||
|
|
||||||
return outputSet;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ref<MLPPMatrix> MLPPData::mean_centering(const Ref<MLPPMatrix> &p_X) {
|
Ref<MLPPMatrix> MLPPData::mean_centering(const Ref<MLPPMatrix> &p_X) {
|
||||||
@ -1207,7 +1172,7 @@ Ref<MLPPMatrix> MLPPData::mean_centering(const Ref<MLPPMatrix> &p_X) {
|
|||||||
x_row_tmp->resize(x_size.x);
|
x_row_tmp->resize(x_size.x);
|
||||||
|
|
||||||
for (int i = 0; i < x_size.y; ++i) {
|
for (int i = 0; i < x_size.y; ++i) {
|
||||||
X->row_get_into_mlpp_vector(i, x_row_tmp);
|
p_X->row_get_into_mlpp_vector(i, x_row_tmp);
|
||||||
|
|
||||||
real_t mean_i = stat.meanv(x_row_tmp);
|
real_t mean_i = stat.meanv(x_row_tmp);
|
||||||
|
|
||||||
@ -1219,6 +1184,30 @@ Ref<MLPPMatrix> MLPPData::mean_centering(const Ref<MLPPMatrix> &p_X) {
|
|||||||
return X;
|
return X;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ref<MLPPMatrix> MLPPData::mean_normalization(const Ref<MLPPMatrix> &p_X) {
|
||||||
|
MLPPLinAlg alg;
|
||||||
|
MLPPStat stat;
|
||||||
|
|
||||||
|
// (X_j - mu_j) / std_j, for every j
|
||||||
|
|
||||||
|
Ref<MLPPMatrix> X = mean_centering(p_X);
|
||||||
|
Size2i x_size = X->size();
|
||||||
|
|
||||||
|
Ref<MLPPVector> x_row_tmp;
|
||||||
|
x_row_tmp.instance();
|
||||||
|
x_row_tmp->resize(x_size.x);
|
||||||
|
|
||||||
|
for (int i = 0; i < x_size.y; i++) {
|
||||||
|
X->row_get_into_mlpp_vector(i, x_row_tmp);
|
||||||
|
|
||||||
|
x_row_tmp->scalar_multiply((real_t)1 / stat.standard_deviationv(x_row_tmp));
|
||||||
|
|
||||||
|
X->row_set_mlpp_vector(i, x_row_tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return X;
|
||||||
|
}
|
||||||
|
|
||||||
Ref<MLPPMatrix> MLPPData::one_hot_rep(const Ref<MLPPVector> &temp_output_set, int n_class) {
|
Ref<MLPPMatrix> MLPPData::one_hot_rep(const Ref<MLPPVector> &temp_output_set, int n_class) {
|
||||||
ERR_FAIL_COND_V(!temp_output_set.is_valid(), Ref<MLPPMatrix>());
|
ERR_FAIL_COND_V(!temp_output_set.is_valid(), Ref<MLPPMatrix>());
|
||||||
|
|
||||||
@ -1243,6 +1232,24 @@ Ref<MLPPMatrix> MLPPData::one_hot_rep(const Ref<MLPPVector> &temp_output_set, in
|
|||||||
return output_set;
|
return output_set;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<real_t> MLPPData::reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet) {
|
||||||
|
std::vector<real_t> outputSet;
|
||||||
|
//uint32_t n_class = tempOutputSet[0].size();
|
||||||
|
for (uint32_t i = 0; i < tempOutputSet.size(); i++) {
|
||||||
|
int current_class = 1;
|
||||||
|
for (uint32_t j = 0; j < tempOutputSet[i].size(); j++) {
|
||||||
|
if (tempOutputSet[i][j] == 1) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
current_class++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
outputSet.push_back(current_class);
|
||||||
|
}
|
||||||
|
|
||||||
|
return outputSet;
|
||||||
|
}
|
||||||
|
|
||||||
void MLPPData::load_default_suffixes() {
|
void MLPPData::load_default_suffixes() {
|
||||||
// Our list of suffixes which we use to compare against
|
// Our list of suffixes which we use to compare against
|
||||||
suffixes = String("eer er ion ity ment ness or sion ship th able ible al ant ary ful ic ious ous ive less y ed en ing ize ise ly ward wise").split_spaces();
|
suffixes = String("eer er ion ity ment ness or sion ship th able ible al ant ary ful ic ious ous ive less y ed en ing ize ise ly ward wise").split_spaces();
|
||||||
|
@ -176,14 +176,11 @@ public:
|
|||||||
|
|
||||||
// Extra
|
// Extra
|
||||||
void setInputNames(std::string fileName, std::vector<std::string> &inputNames);
|
void setInputNames(std::string fileName, std::vector<std::string> &inputNames);
|
||||||
std::vector<std::vector<real_t>> featureScaling(std::vector<std::vector<real_t>> X);
|
Ref<MLPPMatrix> feature_scaling(const Ref<MLPPMatrix> &X);
|
||||||
std::vector<std::vector<real_t>> meanNormalization(std::vector<std::vector<real_t>> X);
|
|
||||||
std::vector<std::vector<real_t>> meanCentering(std::vector<std::vector<real_t>> X);
|
|
||||||
std::vector<std::vector<real_t>> oneHotRep(std::vector<real_t> tempOutputSet, int n_class);
|
|
||||||
std::vector<real_t> reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet);
|
|
||||||
|
|
||||||
Ref<MLPPMatrix> mean_centering(const Ref<MLPPMatrix> &X);
|
Ref<MLPPMatrix> mean_centering(const Ref<MLPPMatrix> &X);
|
||||||
|
Ref<MLPPMatrix> mean_normalization(const Ref<MLPPMatrix> &X);
|
||||||
Ref<MLPPMatrix> one_hot_rep(const Ref<MLPPVector> &temp_output_set, int n_class);
|
Ref<MLPPMatrix> one_hot_rep(const Ref<MLPPVector> &temp_output_set, int n_class);
|
||||||
|
std::vector<real_t> reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet);
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
std::vector<T> vecToSet(std::vector<T> inputSet) {
|
std::vector<T> vecToSet(std::vector<T> inputSet) {
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
#include "pca_old.h"
|
#include "pca_old.h"
|
||||||
#include "../data/data.h"
|
#include "../data/data_old.h"
|
||||||
#include "../lin_alg/lin_alg_old.h"
|
#include "../lin_alg/lin_alg_old.h"
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@ -19,7 +19,7 @@ MLPPPCAOld::MLPPPCAOld(std::vector<std::vector<real_t>> inputSet, int k) :
|
|||||||
|
|
||||||
std::vector<std::vector<real_t>> MLPPPCAOld::principalComponents() {
|
std::vector<std::vector<real_t>> MLPPPCAOld::principalComponents() {
|
||||||
MLPPLinAlgOld alg;
|
MLPPLinAlgOld alg;
|
||||||
MLPPData data;
|
MLPPDataOld data;
|
||||||
|
|
||||||
MLPPLinAlgOld::SVDResultOld svr_res = alg.SVD(alg.cov(inputSet));
|
MLPPLinAlgOld::SVDResultOld svr_res = alg.SVD(alg.cov(inputSet));
|
||||||
X_normalized = data.meanCentering(inputSet);
|
X_normalized = data.meanCentering(inputSet);
|
||||||
|
@ -978,20 +978,18 @@ void MLPPTests::test_nlp_and_data(bool ui) {
|
|||||||
PLOG_MSG("LSA:");
|
PLOG_MSG("LSA:");
|
||||||
PLOG_MSG(data.lsa(text_archive2, 2)->to_string());
|
PLOG_MSG(data.lsa(text_archive2, 2)->to_string());
|
||||||
|
|
||||||
/*
|
std::vector<std::vector<real_t>> input_set_vec = { { 1, 2 }, { 2, 3 }, { 3, 4 }, { 4, 5 }, { 5, 6 } };
|
||||||
std::vector<std::vector<real_t>> inputSet = { { 1, 2 }, { 2, 3 }, { 3, 4 }, { 4, 5 }, { 5, 6 } };
|
|
||||||
std::cout << "Feature Scaling Example:" << std::endl;
|
|
||||||
alg.printMatrix(data.featureScaling(inputSet));
|
|
||||||
std::cout << std::endl;
|
|
||||||
|
|
||||||
std::cout << "Mean Centering Example:" << std::endl;
|
Ref<MLPPMatrix> input_set = Ref<MLPPMatrix>(memnew(MLPPMatrix(input_set_vec)));
|
||||||
alg.printMatrix(data.meanCentering(inputSet));
|
|
||||||
std::cout << std::endl;
|
|
||||||
|
|
||||||
std::cout << "Mean Normalization Example:" << std::endl;
|
PLOG_MSG("Feature Scaling Example:");
|
||||||
alg.printMatrix(data.meanNormalization(inputSet));
|
PLOG_MSG(data.feature_scaling(input_set)->to_string());
|
||||||
std::cout << std::endl;
|
|
||||||
*/
|
PLOG_MSG("Mean Centering Example:");
|
||||||
|
PLOG_MSG(data.mean_centering(input_set)->to_string());
|
||||||
|
|
||||||
|
PLOG_MSG("Mean Normalization Example:");
|
||||||
|
PLOG_MSG(data.mean_normalization(input_set)->to_string());
|
||||||
}
|
}
|
||||||
void MLPPTests::test_outlier_finder(bool ui) {
|
void MLPPTests::test_outlier_finder(bool ui) {
|
||||||
MLPPLinAlg alg;
|
MLPPLinAlg alg;
|
||||||
|
@ -400,7 +400,6 @@ void MLPPTestsOld::test_outlier_finder(bool ui) {
|
|||||||
MLPPLinAlgOld alg;
|
MLPPLinAlgOld alg;
|
||||||
|
|
||||||
// Outlier Finder
|
// Outlier Finder
|
||||||
//std::vector<real_t> inputSet = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 23554332523523 };
|
|
||||||
std::vector<real_t> inputSet = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 23554332 };
|
std::vector<real_t> inputSet = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 23554332 };
|
||||||
MLPPOutlierFinderOld outlierFinderOld(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier.
|
MLPPOutlierFinderOld outlierFinderOld(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier.
|
||||||
alg.printVector(outlierFinderOld.modelTest(inputSet));
|
alg.printVector(outlierFinderOld.modelTest(inputSet));
|
||||||
|
Loading…
Reference in New Issue
Block a user