Removed new things from MLPPDataOld.

This commit is contained in:
Relintai 2023-02-13 18:36:08 +01:00
parent 0a51b845f7
commit adc61e6aaa
2 changed files with 1 additions and 538 deletions

View File

@ -20,387 +20,6 @@
#include <random> #include <random>
#include <sstream> #include <sstream>
Ref<MLPPVector> MLPPDataOldESimple::get_input() {
return _input;
}
void MLPPDataOldESimple::set_input(const Ref<MLPPVector> &val) {
_input = val;
}
Ref<MLPPVector> MLPPDataOldESimple::get_output() {
return _output;
}
void MLPPDataOldESimple::set_output(const Ref<MLPPVector> &val) {
_output = val;
}
void MLPPDataOldESimple::instance_data() {
_input.instance();
_output.instance();
}
void MLPPDataOldESimple::_bind_methods() {
ClassDB::bind_method(D_METHOD("get_input"), &MLPPDataOldESimple::get_input);
ClassDB::bind_method(D_METHOD("set_input", "val"), &MLPPDataOldESimple::set_input);
ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "input", PROPERTY_HINT_RESOURCE_TYPE, "MLPPVector"), "set_input", "get_input");
ClassDB::bind_method(D_METHOD("get_output"), &MLPPDataOldESimple::get_input);
ClassDB::bind_method(D_METHOD("set_output", "val"), &MLPPDataOldESimple::set_output);
ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "output", PROPERTY_HINT_RESOURCE_TYPE, "MLPPVector"), "set_output", "get_output");
ClassDB::bind_method(D_METHOD("instance_data"), &MLPPDataOldESimple::instance_data);
}
Ref<MLPPMatrix> MLPPDataOldSimple::get_input() {
return _input;
}
void MLPPDataOldSimple::set_input(const Ref<MLPPMatrix> &val) {
_input = val;
}
Ref<MLPPVector> MLPPDataOldSimple::get_output() {
return _output;
}
void MLPPDataOldSimple::set_output(const Ref<MLPPVector> &val) {
_output = val;
}
void MLPPDataOldSimple::instance_data() {
_input.instance();
_output.instance();
}
void MLPPDataOldSimple::_bind_methods() {
ClassDB::bind_method(D_METHOD("get_input"), &MLPPDataOldSimple::get_input);
ClassDB::bind_method(D_METHOD("set_input", "val"), &MLPPDataOldSimple::set_input);
ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "input", PROPERTY_HINT_RESOURCE_TYPE, "MLPPMatrix"), "set_input", "get_input");
ClassDB::bind_method(D_METHOD("get_output"), &MLPPDataOldSimple::get_input);
ClassDB::bind_method(D_METHOD("set_output", "val"), &MLPPDataOldSimple::set_output);
ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "output", PROPERTY_HINT_RESOURCE_TYPE, "MLPPVector"), "set_output", "get_output");
ClassDB::bind_method(D_METHOD("instance_data"), &MLPPDataOldSimple::instance_data);
}
Ref<MLPPMatrix> MLPPDataOldComplex::get_input() {
return _input;
}
void MLPPDataOldComplex::set_input(const Ref<MLPPMatrix> &val) {
_input = val;
}
Ref<MLPPMatrix> MLPPDataOldComplex::get_output() {
return _output;
}
void MLPPDataOldComplex::set_output(const Ref<MLPPMatrix> &val) {
_output = val;
}
void MLPPDataOldComplex::instance_data() {
_input.instance();
_output.instance();
}
void MLPPDataOldComplex::_bind_methods() {
ClassDB::bind_method(D_METHOD("get_input"), &MLPPDataOldComplex::get_input);
ClassDB::bind_method(D_METHOD("set_input", "val"), &MLPPDataOldComplex::set_input);
ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "input", PROPERTY_HINT_RESOURCE_TYPE, "MLPPMatrix"), "set_input", "get_input");
ClassDB::bind_method(D_METHOD("get_output"), &MLPPDataOldComplex::get_input);
ClassDB::bind_method(D_METHOD("set_output", "val"), &MLPPDataOldComplex::set_output);
ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "output", PROPERTY_HINT_RESOURCE_TYPE, "MLPPMatrix"), "set_output", "get_output");
ClassDB::bind_method(D_METHOD("instance_data"), &MLPPDataOldComplex::instance_data);
}
// Loading Datasets
Ref<MLPPDataOldSimple> MLPPDataOld::load_breast_cancer(const String &path) {
const int BREAST_CANCER_SIZE = 30; // k = 30
Ref<MLPPDataOldSimple> data;
data.instance();
data->instance_data();
set_data_supervised(BREAST_CANCER_SIZE, path, data->get_input(), data->get_output());
return data;
}
Ref<MLPPDataOldSimple> MLPPDataOld::load_breast_cancer_svc(const String &path) {
const int BREAST_CANCER_SIZE = 30; // k = 30
Ref<MLPPDataOldSimple> data;
data.instance();
data->instance_data();
set_data_supervised(BREAST_CANCER_SIZE, path, data->get_input(), data->get_output());
return data;
}
Ref<MLPPDataOldComplex> MLPPDataOld::load_iris(const String &path) {
const int IRIS_SIZE = 4;
const int ONE_HOT_NUM = 3;
Ref<MLPPVector> temp_output_set;
temp_output_set.instance();
Ref<MLPPDataOldComplex> data;
data.instance();
data->instance_data();
set_data_supervised(IRIS_SIZE, path, data->get_input(), temp_output_set);
data->set_output(one_hot_rep(temp_output_set, ONE_HOT_NUM));
return data;
}
Ref<MLPPDataOldComplex> MLPPDataOld::load_wine(const String &path) {
const int WINE_SIZE = 4;
const int ONE_HOT_NUM = 3;
Ref<MLPPVector> temp_output_set;
temp_output_set.instance();
Ref<MLPPDataOldComplex> data;
data.instance();
data->instance_data();
set_data_supervised(WINE_SIZE, path, data->get_input(), temp_output_set);
data->set_output(one_hot_rep(temp_output_set, ONE_HOT_NUM));
return data;
}
Ref<MLPPDataOldComplex> MLPPDataOld::load_mnist_train(const String &path) {
const int MNIST_SIZE = 784;
const int ONE_HOT_NUM = 10;
Ref<MLPPVector> temp_output_set;
temp_output_set.instance();
Ref<MLPPDataOldComplex> data;
data.instance();
data->instance_data();
set_data_supervised(MNIST_SIZE, path, data->get_input(), temp_output_set);
data->set_output(one_hot_rep(temp_output_set, ONE_HOT_NUM));
return data;
}
Ref<MLPPDataOldComplex> MLPPDataOld::load_mnist_test(const String &path) {
const int MNIST_SIZE = 784;
const int ONE_HOT_NUM = 10;
Ref<MLPPVector> temp_output_set;
temp_output_set.instance();
Ref<MLPPDataOldComplex> data;
data.instance();
data->instance_data();
set_data_supervised(MNIST_SIZE, path, data->get_input(), temp_output_set);
data->set_output(one_hot_rep(temp_output_set, ONE_HOT_NUM));
return data;
}
Ref<MLPPDataOldSimple> MLPPDataOld::load_california_housing(const String &path) {
const int CALIFORNIA_HOUSING_SIZE = 13; // k = 30
Ref<MLPPDataOldSimple> data;
data.instance();
data->instance_data();
set_data_supervised(CALIFORNIA_HOUSING_SIZE, path, data->get_input(), data->get_output());
return data;
}
Ref<MLPPDataOldESimple> MLPPDataOld::load_fires_and_crime(const String &path) {
// k is implicitly 1.
Ref<MLPPDataOldESimple> data;
data.instance();
data->instance_data();
set_data_simple(path, data->get_input(), data->get_output());
return data;
}
// MULTIVARIATE SUPERVISED
void MLPPDataOld::set_data_supervised(int k, const String &file_name, Ref<MLPPMatrix> input_set, Ref<MLPPVector> output_set) {
ERR_FAIL_COND(!input_set.is_valid() || !output_set.is_valid());
MLPPLinAlg alg;
Vector<Vector<real_t>> input_set_tmp;
input_set_tmp.resize(k);
Vector<real_t> output_set_tmp;
FileAccess *file = FileAccess::open(file_name, FileAccess::READ);
ERR_FAIL_COND(!file);
while (!file->eof_reached()) {
Vector<String> ll = file->get_csv_line();
for (int i = 0; i < k; ++i) {
input_set_tmp.write[i].push_back(static_cast<real_t>(ll[i].to_double()));
}
output_set_tmp.push_back(static_cast<real_t>(ll[k].to_double()));
}
file->close();
memdelete(file);
output_set->set_from_vector(output_set_tmp);
input_set->set_from_vectors(input_set_tmp);
input_set = alg.transposem(input_set);
}
void MLPPDataOld::set_data_unsupervised(int k, const String &file_name, Ref<MLPPMatrix> input_set) {
ERR_FAIL_COND(!input_set.is_valid());
MLPPLinAlg alg;
Vector<Vector<real_t>> input_set_tmp;
input_set_tmp.resize(k);
FileAccess *file = FileAccess::open(file_name, FileAccess::READ);
ERR_FAIL_COND(!file);
while (!file->eof_reached()) {
Vector<String> ll = file->get_csv_line();
for (int i = 0; i < k; ++i) {
input_set_tmp.write[i].push_back(static_cast<real_t>(ll[i].to_double()));
}
}
file->close();
memdelete(file);
input_set->set_from_vectors(input_set_tmp);
input_set = alg.transposem(input_set);
}
void MLPPDataOld::set_data_simple(const String &file_name, Ref<MLPPVector> input_set, Ref<MLPPVector> output_set) {
ERR_FAIL_COND(!input_set.is_valid() || !output_set.is_valid());
FileAccess *file = FileAccess::open(file_name, FileAccess::READ);
ERR_FAIL_COND(!file);
Vector<real_t> input_set_tmp;
Vector<real_t> output_set_tmp;
while (!file->eof_reached()) {
Vector<String> ll = file->get_csv_line();
for (int i = 0; i < ll.size(); i += 2) {
input_set_tmp.push_back(static_cast<real_t>(ll[i].to_double()));
output_set_tmp.push_back(static_cast<real_t>(ll[i + 1].to_double()));
}
}
file->close();
memdelete(file);
input_set->set_from_vector(input_set_tmp);
output_set->set_from_vector(output_set_tmp);
}
MLPPDataOld::SplitComplexData MLPPDataOld::train_test_split(Ref<MLPPDataOldComplex> data, real_t test_size) {
SplitComplexData res;
res.train.instance();
res.train->instance_data();
res.test.instance();
res.test->instance_data();
ERR_FAIL_COND_V(!data.is_valid(), res);
Ref<MLPPMatrix> orig_input = data->get_input();
Ref<MLPPMatrix> orig_output = data->get_output();
Size2i orig_input_size = orig_input->size();
Size2i orig_output_size = orig_output->size();
int is = MIN(orig_input_size.y, orig_output_size.y);
Array indices;
indices.resize(is);
for (int i = 0; i < is; ++i) {
indices[i] = i;
}
indices.shuffle();
Ref<MLPPVector> orig_input_row_tmp;
orig_input_row_tmp.instance();
orig_input_row_tmp->resize(orig_input_size.x);
Ref<MLPPVector> orig_output_row_tmp;
orig_output_row_tmp.instance();
orig_output_row_tmp->resize(orig_output_size.x);
int test_input_number = test_size * is; // implicit usage of floor
Ref<MLPPMatrix> res_test_input = res.test->get_input();
Ref<MLPPMatrix> res_test_output = res.test->get_output();
res_test_input->resize(Size2i(orig_input_size.x, test_input_number));
res_test_output->resize(Size2i(orig_output_size.x, test_input_number));
for (int i = 0; i < test_input_number; ++i) {
int index = indices[i];
orig_input->get_row_into_mlpp_vector(index, orig_input_row_tmp);
orig_output->get_row_into_mlpp_vector(index, orig_output_row_tmp);
res_test_input->set_row_mlpp_vector(i, orig_input);
res_test_output->set_row_mlpp_vector(i, orig_output);
}
Ref<MLPPMatrix> res_train_input = res.train->get_input();
Ref<MLPPMatrix> res_train_output = res.train->get_output();
int train_input_number = is - test_input_number;
res_train_input->resize(Size2i(orig_input_size.x, train_input_number));
res_train_output->resize(Size2i(orig_output_size.x, train_input_number));
for (int i = 0; i < train_input_number; ++i) {
int index = indices[train_input_number + i];
orig_input->get_row_into_mlpp_vector(index, orig_input_row_tmp);
orig_output->get_row_into_mlpp_vector(index, orig_output_row_tmp);
res_train_input->set_row_mlpp_vector(i, orig_input);
res_train_output->set_row_mlpp_vector(i, orig_output);
}
return res;
}
Array MLPPDataOld::train_test_split_bind(const Ref<MLPPDataOldComplex> &data, real_t test_size) {
SplitComplexData res = train_test_split(data, test_size);
Array arr;
arr.push_back(res.train);
arr.push_back(res.test);
return arr;
}
// Loading Datasets // Loading Datasets
std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> MLPPDataOld::loadBreastCancer() { std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> MLPPDataOld::loadBreastCancer() {
const int BREAST_CANCER_SIZE = 30; // k = 30 const int BREAST_CANCER_SIZE = 30; // k = 30
@ -1210,67 +829,3 @@ std::vector<real_t> MLPPDataOld::reverseOneHot(std::vector<std::vector<real_t>>
return outputSet; return outputSet;
} }
Ref<MLPPMatrix> MLPPDataOld::mean_centering(const Ref<MLPPMatrix> &p_X) {
MLPPStat stat;
Ref<MLPPMatrix> X;
X.instance();
X->resize(p_X->size());
Size2i x_size = X->size();
Ref<MLPPVector> x_row_tmp;
x_row_tmp.instance();
x_row_tmp->resize(x_size.x);
for (int i = 0; i < x_size.y; ++i) {
X->get_row_into_mlpp_vector(i, x_row_tmp);
real_t mean_i = stat.meanv(x_row_tmp);
for (int j = 0; j < x_size.x; ++j) {
X->set_element(i, j, p_X->get_element(i, j) - mean_i);
}
}
return X;
}
Ref<MLPPMatrix> MLPPDataOld::one_hot_rep(const Ref<MLPPVector> &temp_output_set, int n_class) {
ERR_FAIL_COND_V(!temp_output_set.is_valid(), Ref<MLPPMatrix>());
Ref<MLPPMatrix> output_set;
output_set.instance();
int temp_output_set_size = temp_output_set->size();
const real_t *temp_output_set_ptr = temp_output_set->ptr();
output_set->resize(Size2i(n_class, temp_output_set_size));
for (int i = 0; i < temp_output_set_size; ++i) {
for (int j = 0; j <= n_class - 1; ++j) {
if (static_cast<int>(temp_output_set_ptr[i]) == j) {
output_set->set_element(i, j, 1);
} else {
output_set->set_element(i, j, 0);
}
}
}
return output_set;
}
void MLPPDataOld::_bind_methods() {
ClassDB::bind_method(D_METHOD("load_breast_cancer", "path"), &MLPPDataOld::load_breast_cancer);
ClassDB::bind_method(D_METHOD("load_breast_cancer_svc", "path"), &MLPPDataOld::load_breast_cancer_svc);
ClassDB::bind_method(D_METHOD("load_iris", "path"), &MLPPDataOld::load_iris);
ClassDB::bind_method(D_METHOD("load_wine", "path"), &MLPPDataOld::load_wine);
ClassDB::bind_method(D_METHOD("load_mnist_train", "path"), &MLPPDataOld::load_mnist_train);
ClassDB::bind_method(D_METHOD("load_mnist_test", "path"), &MLPPDataOld::load_mnist_test);
ClassDB::bind_method(D_METHOD("load_california_housing", "path"), &MLPPDataOld::load_california_housing);
ClassDB::bind_method(D_METHOD("load_fires_and_crime", "path"), &MLPPDataOld::load_fires_and_crime);
ClassDB::bind_method(D_METHOD("train_test_split", "data", "test_size"), &MLPPDataOld::train_test_split_bind);
}

View File

@ -11,101 +11,12 @@
#include "core/math/math_defs.h" #include "core/math/math_defs.h"
#include "core/string/ustring.h"
#include "core/variant/array.h"
#include "core/object/reference.h"
#include "../lin_alg/mlpp_matrix.h"
#include "../lin_alg/mlpp_vector.h"
#include <string> #include <string>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
class MLPPDataOldESimple : public Reference { class MLPPDataOld {
GDCLASS(MLPPDataOldESimple, Reference);
public: public:
Ref<MLPPVector> get_input();
void set_input(const Ref<MLPPVector> &val);
Ref<MLPPVector> get_output();
void set_output(const Ref<MLPPVector> &val);
void instance_data();
protected:
static void _bind_methods();
Ref<MLPPVector> _input;
Ref<MLPPVector> _output;
};
class MLPPDataOldSimple : public Reference {
GDCLASS(MLPPDataOldSimple, Reference);
public:
Ref<MLPPMatrix> get_input();
void set_input(const Ref<MLPPMatrix> &val);
Ref<MLPPVector> get_output();
void set_output(const Ref<MLPPVector> &val);
void instance_data();
protected:
static void _bind_methods();
Ref<MLPPMatrix> _input;
Ref<MLPPVector> _output;
};
class MLPPDataOldComplex : public Reference {
GDCLASS(MLPPDataOldComplex, Reference);
public:
Ref<MLPPMatrix> get_input();
void set_input(const Ref<MLPPMatrix> &val);
Ref<MLPPMatrix> get_output();
void set_output(const Ref<MLPPMatrix> &val);
void instance_data();
protected:
static void _bind_methods();
Ref<MLPPMatrix> _input;
Ref<MLPPMatrix> _output;
};
class MLPPDataOld : public Reference {
GDCLASS(MLPPDataOld, Reference);
public:
// Load Datasets
Ref<MLPPDataOldSimple> load_breast_cancer(const String &path);
Ref<MLPPDataOldSimple> load_breast_cancer_svc(const String &path);
Ref<MLPPDataOldComplex> load_iris(const String &path);
Ref<MLPPDataOldComplex> load_wine(const String &path);
Ref<MLPPDataOldComplex> load_mnist_train(const String &path);
Ref<MLPPDataOldComplex> load_mnist_test(const String &path);
Ref<MLPPDataOldSimple> load_california_housing(const String &path);
Ref<MLPPDataOldESimple> load_fires_and_crime(const String &path);
void set_data_supervised(int k, const String &file_name, Ref<MLPPMatrix> input_set, Ref<MLPPVector> output_set);
void set_data_unsupervised(int k, const String &file_name, Ref<MLPPMatrix> input_set);
void set_data_simple(const String &file_name, Ref<MLPPVector> input_set, Ref<MLPPVector> output_set);
struct SplitComplexData {
Ref<MLPPDataOldComplex> train;
Ref<MLPPDataOldComplex> test;
};
SplitComplexData train_test_split(Ref<MLPPDataOldComplex> data, real_t test_size);
Array train_test_split_bind(const Ref<MLPPDataOldComplex> &data, real_t test_size);
// Load Datasets // Load Datasets
std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> loadBreastCancer(); std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> loadBreastCancer();
std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> loadBreastCancerSVC(); std::tuple<std::vector<std::vector<real_t>>, std::vector<real_t>> loadBreastCancerSVC();
@ -174,9 +85,6 @@ public:
std::vector<std::vector<real_t>> oneHotRep(std::vector<real_t> tempOutputSet, int n_class); std::vector<std::vector<real_t>> oneHotRep(std::vector<real_t> tempOutputSet, int n_class);
std::vector<real_t> reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet); std::vector<real_t> reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet);
Ref<MLPPMatrix> mean_centering(const Ref<MLPPMatrix> &X);
Ref<MLPPMatrix> one_hot_rep(const Ref<MLPPVector> &temp_output_set, int n_class);
template <class T> template <class T>
std::vector<T> vecToSet(std::vector<T> inputSet) { std::vector<T> vecToSet(std::vector<T> inputSet) {
std::vector<T> setInputSet; std::vector<T> setInputSet;