From 5ff5afff483ab15a5d27b1966265d003d2c8ee52 Mon Sep 17 00:00:00 2001 From: Relintai Date: Thu, 9 Feb 2023 11:40:16 +0100 Subject: [PATCH] Now MLPPData's helper classes use MLPPVector and MLPPMatrix. --- mlpp/data/data.cpp | 258 +++++++++++++++++++++++++++++------ mlpp/data/data.h | 45 ++++-- mlpp/lin_alg/mlpp_matrix.cpp | 72 ++++++++++ mlpp/lin_alg/mlpp_matrix.h | 58 +------- mlpp/lin_alg/mlpp_vector.cpp | 25 ++++ mlpp/lin_alg/mlpp_vector.h | 27 +--- test/mlpp_tests.cpp | 98 ++++++------- 7 files changed, 407 insertions(+), 176 deletions(-) diff --git a/mlpp/data/data.cpp b/mlpp/data/data.cpp index 310033a..41d4c32 100644 --- a/mlpp/data/data.cpp +++ b/mlpp/data/data.cpp @@ -20,13 +20,97 @@ #include #include +Ref MLPPDataESimple::get_input() { + return _input; +} +void MLPPDataESimple::set_input(const Ref &val) { + _input = val; +} + +Ref MLPPDataESimple::get_output() { + return _output; +} +void MLPPDataESimple::set_output(const Ref &val) { + _output = val; +} + +void MLPPDataESimple::instance_data() { + _input.instance(); + _output.instance(); +} + void MLPPDataESimple::_bind_methods() { + ClassDB::bind_method(D_METHOD("get_input"), &MLPPDataESimple::get_input); + ClassDB::bind_method(D_METHOD("set_input", "val"), &MLPPDataESimple::set_input); + ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "input", PROPERTY_HINT_RESOURCE_TYPE, "MLPPVector"), "set_input", "get_input"); + + ClassDB::bind_method(D_METHOD("get_output"), &MLPPDataESimple::get_input); + ClassDB::bind_method(D_METHOD("set_output", "val"), &MLPPDataESimple::set_output); + ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "output", PROPERTY_HINT_RESOURCE_TYPE, "MLPPVector"), "set_output", "get_output"); + + ClassDB::bind_method(D_METHOD("instance_data"), &MLPPDataESimple::instance_data); +} + +Ref MLPPDataSimple::get_input() { + return _input; +} +void MLPPDataSimple::set_input(const Ref &val) { + _input = val; +} + +Ref MLPPDataSimple::get_output() { + return _output; +} +void MLPPDataSimple::set_output(const Ref &val) { + _output = val; +} + +void MLPPDataSimple::instance_data() { + _input.instance(); + _output.instance(); } void MLPPDataSimple::_bind_methods() { + ClassDB::bind_method(D_METHOD("get_input"), &MLPPDataSimple::get_input); + ClassDB::bind_method(D_METHOD("set_input", "val"), &MLPPDataSimple::set_input); + ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "input", PROPERTY_HINT_RESOURCE_TYPE, "MLPPMatrix"), "set_input", "get_input"); + + ClassDB::bind_method(D_METHOD("get_output"), &MLPPDataSimple::get_input); + ClassDB::bind_method(D_METHOD("set_output", "val"), &MLPPDataSimple::set_output); + ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "output", PROPERTY_HINT_RESOURCE_TYPE, "MLPPVector"), "set_output", "get_output"); + + ClassDB::bind_method(D_METHOD("instance_data"), &MLPPDataSimple::instance_data); +} + +Ref MLPPDataComplex::get_input() { + return _input; +} +void MLPPDataComplex::set_input(const Ref &val) { + _input = val; +} + +Ref MLPPDataComplex::get_output() { + return _output; +} +void MLPPDataComplex::set_output(const Ref &val) { + _output = val; +} + +void MLPPDataComplex::instance_data() { + _input.instance(); + _output.instance(); } void MLPPDataComplex::_bind_methods() { + ClassDB::bind_method(D_METHOD("get_input"), &MLPPDataComplex::get_input); + ClassDB::bind_method(D_METHOD("set_input", "val"), &MLPPDataComplex::set_input); + ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "input", PROPERTY_HINT_RESOURCE_TYPE, "MLPPMatrix"), "set_input", "get_input"); + + ClassDB::bind_method(D_METHOD("get_output"), &MLPPDataComplex::get_input); + ClassDB::bind_method(D_METHOD("set_output", "val"), &MLPPDataComplex::set_output); + ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "output", PROPERTY_HINT_RESOURCE_TYPE, "MLPPMatrix"), "set_output", "get_output"); + + ClassDB::bind_method(D_METHOD("instance_data"), &MLPPDataComplex::instance_data); } // Loading Datasets @@ -35,8 +119,9 @@ Ref MLPPData::load_breast_cancer(const String &path) { Ref data; data.instance(); + data->instance_data(); - set_data_supervised(BREAST_CANCER_SIZE, path, data->input, data->output); + set_data_supervised(BREAST_CANCER_SIZE, path, data->get_input(), data->get_output()); return data; } @@ -46,8 +131,9 @@ Ref MLPPData::load_breast_cancer_svc(const String &path) { Ref data; data.instance(); + data->instance_data(); - set_data_supervised(BREAST_CANCER_SIZE, path, data->input, data->output); + set_data_supervised(BREAST_CANCER_SIZE, path, data->get_input(), data->get_output()); return data; } @@ -56,13 +142,15 @@ Ref MLPPData::load_iris(const String &path) { const int IRIS_SIZE = 4; const int ONE_HOT_NUM = 3; - std::vector tempOutputSet; + Ref temp_output_set; + temp_output_set.instance(); Ref data; data.instance(); + data->instance_data(); - set_data_supervised(IRIS_SIZE, path, data->input, tempOutputSet); - data->output = oneHotRep(tempOutputSet, ONE_HOT_NUM); + set_data_supervised(IRIS_SIZE, path, data->get_input(), temp_output_set); + data->set_output(one_hot_rep(temp_output_set, ONE_HOT_NUM)); return data; } @@ -71,13 +159,15 @@ Ref MLPPData::load_wine(const String &path) { const int WINE_SIZE = 4; const int ONE_HOT_NUM = 3; - std::vector tempOutputSet; + Ref temp_output_set; + temp_output_set.instance(); Ref data; data.instance(); + data->instance_data(); - set_data_supervised(WINE_SIZE, path, data->input, tempOutputSet); - data->output = oneHotRep(tempOutputSet, ONE_HOT_NUM); + set_data_supervised(WINE_SIZE, path, data->get_input(), temp_output_set); + data->set_output(one_hot_rep(temp_output_set, ONE_HOT_NUM)); return data; } @@ -86,14 +176,15 @@ Ref MLPPData::load_mnist_train(const String &path) { const int MNIST_SIZE = 784; const int ONE_HOT_NUM = 10; - std::vector> inputSet; - std::vector tempOutputSet; + Ref temp_output_set; + temp_output_set.instance(); Ref data; data.instance(); + data->instance_data(); - set_data_supervised(MNIST_SIZE, path, data->input, tempOutputSet); - data->output = oneHotRep(tempOutputSet, ONE_HOT_NUM); + set_data_supervised(MNIST_SIZE, path, data->get_input(), temp_output_set); + data->set_output(one_hot_rep(temp_output_set, ONE_HOT_NUM)); return data; } @@ -101,14 +192,16 @@ Ref MLPPData::load_mnist_train(const String &path) { Ref MLPPData::load_mnist_test(const String &path) { const int MNIST_SIZE = 784; const int ONE_HOT_NUM = 10; - std::vector> inputSet; - std::vector tempOutputSet; + + Ref temp_output_set; + temp_output_set.instance(); Ref data; data.instance(); + data->instance_data(); - set_data_supervised(MNIST_SIZE, path, data->input, tempOutputSet); - data->output = oneHotRep(tempOutputSet, ONE_HOT_NUM); + set_data_supervised(MNIST_SIZE, path, data->get_input(), temp_output_set); + data->set_output(one_hot_rep(temp_output_set, ONE_HOT_NUM)); return data; } @@ -118,8 +211,9 @@ Ref MLPPData::load_california_housing(const String &path) { Ref data; data.instance(); + data->instance_data(); - set_data_supervised(CALIFORNIA_HOUSING_SIZE, path, data->input, data->output); + set_data_supervised(CALIFORNIA_HOUSING_SIZE, path, data->get_input(), data->get_output()); return data; } @@ -129,18 +223,24 @@ Ref MLPPData::load_fires_and_crime(const String &path) { Ref data; data.instance(); + data->instance_data(); - set_data_simple(path, data->input, data->output); + set_data_simple(path, data->get_input(), data->get_output()); return data; } // MULTIVARIATE SUPERVISED -void MLPPData::set_data_supervised(int k, const String &file_name, std::vector> &inputSet, std::vector &outputSet) { +void MLPPData::set_data_supervised(int k, const String &file_name, Ref input_set, Ref output_set) { + ERR_FAIL_COND(!input_set.is_valid() || !output_set.is_valid()); + MLPPLinAlg alg; - inputSet.resize(k); + Vector> input_set_tmp; + input_set_tmp.resize(k); + + Vector output_set_tmp; FileAccess *file = FileAccess::open(file_name, FileAccess::READ); @@ -150,21 +250,28 @@ void MLPPData::set_data_supervised(int k, const String &file_name, std::vector ll = file->get_csv_line(); for (int i = 0; i < k; ++i) { - inputSet[i].push_back(static_cast(ll[i].to_double())); + input_set_tmp.write[i].push_back(static_cast(ll[i].to_double())); } - outputSet.push_back(static_cast(ll[k].to_double())); + output_set_tmp.push_back(static_cast(ll[k].to_double())); } - inputSet = alg.transpose(inputSet); - + file->close(); memdelete(file); + + output_set->set_from_vector(output_set_tmp); + + input_set->set_from_vectors(input_set_tmp); + input_set = alg.transposem(input_set); } -void MLPPData::set_data_unsupervised(int k, const String &file_name, std::vector> &inputSet) { +void MLPPData::set_data_unsupervised(int k, const String &file_name, Ref input_set) { + ERR_FAIL_COND(!input_set.is_valid()); + MLPPLinAlg alg; - inputSet.resize(k); + Vector> input_set_tmp; + input_set_tmp.resize(k); FileAccess *file = FileAccess::open(file_name, FileAccess::READ); @@ -174,41 +281,60 @@ void MLPPData::set_data_unsupervised(int k, const String &file_name, std::vector Vector ll = file->get_csv_line(); for (int i = 0; i < k; ++i) { - inputSet[i].push_back(static_cast(ll[i].to_double())); + input_set_tmp.write[i].push_back(static_cast(ll[i].to_double())); } } - inputSet = alg.transpose(inputSet); - + file->close(); memdelete(file); + + input_set->set_from_vectors(input_set_tmp); + input_set = alg.transposem(input_set); } -void MLPPData::set_data_simple(const String &file_name, std::vector &inputSet, std::vector &outputSet) { +void MLPPData::set_data_simple(const String &file_name, Ref input_set, Ref output_set) { + ERR_FAIL_COND(!input_set.is_valid() || !output_set.is_valid()); + FileAccess *file = FileAccess::open(file_name, FileAccess::READ); ERR_FAIL_COND(!file); + Vector input_set_tmp; + Vector output_set_tmp; + while (!file->eof_reached()) { Vector ll = file->get_csv_line(); for (int i = 0; i < ll.size(); i += 2) { - inputSet.push_back(static_cast(ll[i].to_double())); - outputSet.push_back(static_cast(ll[i + 1].to_double())); + input_set_tmp.push_back(static_cast(ll[i].to_double())); + output_set_tmp.push_back(static_cast(ll[i + 1].to_double())); } } + file->close(); memdelete(file); + + input_set->set_from_vector(input_set_tmp); + output_set->set_from_vector(output_set_tmp); } -MLPPData::SplitComplexData MLPPData::train_test_split(const Ref &data, real_t test_size) { +MLPPData::SplitComplexData MLPPData::train_test_split(Ref data, real_t test_size) { SplitComplexData res; res.train.instance(); + res.train->instance_data(); res.test.instance(); + res.test->instance_data(); ERR_FAIL_COND_V(!data.is_valid(), res); - int is = MIN(data->input.size(), data->output.size()); + Ref orig_input = data->get_input(); + Ref orig_output = data->get_output(); + + Size2i orig_input_size = orig_input->size(); + Size2i orig_output_size = orig_output->size(); + + int is = MIN(orig_input_size.y, orig_output_size.y); Array indices; indices.resize(is); @@ -219,20 +345,48 @@ MLPPData::SplitComplexData MLPPData::train_test_split(const Ref indices.shuffle(); + Ref orig_input_row_tmp; + orig_input_row_tmp.instance(); + orig_input_row_tmp->resize(orig_input_size.x); + + Ref orig_output_row_tmp; + orig_output_row_tmp.instance(); + orig_output_row_tmp->resize(orig_output_size.x); + int test_input_number = test_size * is; // implicit usage of floor + Ref res_test_input = res.test->get_input(); + Ref res_test_output = res.test->get_output(); + + res_test_input->resize(Size2i(orig_input_size.x, test_input_number)); + res_test_output->resize(Size2i(orig_output_size.x, test_input_number)); + for (int i = 0; i < test_input_number; ++i) { int index = indices[i]; - res.test->input.push_back(data->input[i]); - res.test->output.push_back(data->output[i]); + orig_input->get_row_into_mlpp_vector(index, orig_input_row_tmp); + orig_output->get_row_into_mlpp_vector(index, orig_output_row_tmp); + + res_test_input->set_row_mlpp_vector(i, orig_input); + res_test_output->set_row_mlpp_vector(i, orig_output); } - for (int i = test_input_number; i < is; ++i) { - int index = indices[i]; + Ref res_train_input = res.train->get_input(); + Ref res_train_output = res.train->get_output(); - res.train->input.push_back(data->input[i]); - res.train->output.push_back(data->output[i]); + int train_input_number = is - test_input_number; + + res_train_input->resize(Size2i(orig_input_size.x, train_input_number)); + res_train_output->resize(Size2i(orig_output_size.x, train_input_number)); + + for (int i = 0; i < train_input_number; ++i) { + int index = indices[train_input_number + i]; + + orig_input->get_row_into_mlpp_vector(index, orig_input_row_tmp); + orig_output->get_row_into_mlpp_vector(index, orig_output_row_tmp); + + res_train_input->set_row_mlpp_vector(i, orig_input); + res_train_output->set_row_mlpp_vector(i, orig_output); } return res; @@ -1081,6 +1235,30 @@ Ref MLPPData::mean_centering(const Ref &p_X) { return X; } +Ref MLPPData::one_hot_rep(const Ref &temp_output_set, int n_class) { + ERR_FAIL_COND_V(!temp_output_set.is_valid(), Ref()); + + Ref output_set; + output_set.instance(); + + int temp_output_set_size = temp_output_set->size(); + const real_t *temp_output_set_ptr = temp_output_set->ptr(); + + output_set->resize(Size2i(n_class, temp_output_set_size)); + + for (int i = 0; i < temp_output_set_size; ++i) { + for (int j = 0; j <= n_class - 1; ++j) { + if (static_cast(temp_output_set_ptr[i]) == j) { + output_set->set_element(i, j, 1); + } else { + output_set->set_element(i, j, 0); + } + } + } + + return output_set; +} + void MLPPData::_bind_methods() { ClassDB::bind_method(D_METHOD("load_breast_cancer", "path"), &MLPPData::load_breast_cancer); ClassDB::bind_method(D_METHOD("load_breast_cancer_svc", "path"), &MLPPData::load_breast_cancer_svc); diff --git a/mlpp/data/data.h b/mlpp/data/data.h index 3b7141c..b868025 100644 --- a/mlpp/data/data.h +++ b/mlpp/data/data.h @@ -27,33 +27,57 @@ class MLPPDataESimple : public Reference { GDCLASS(MLPPDataESimple, Reference); public: - std::vector input; - std::vector output; + Ref get_input(); + void set_input(const Ref &val); + + Ref get_output(); + void set_output(const Ref &val); + + void instance_data(); protected: static void _bind_methods(); + + Ref _input; + Ref _output; }; class MLPPDataSimple : public Reference { GDCLASS(MLPPDataSimple, Reference); public: - std::vector> input; - std::vector output; + Ref get_input(); + void set_input(const Ref &val); + + Ref get_output(); + void set_output(const Ref &val); + + void instance_data(); protected: static void _bind_methods(); + + Ref _input; + Ref _output; }; class MLPPDataComplex : public Reference { GDCLASS(MLPPDataComplex, Reference); public: - std::vector> input; - std::vector> output; + Ref get_input(); + void set_input(const Ref &val); + + Ref get_output(); + void set_output(const Ref &val); + + void instance_data(); protected: static void _bind_methods(); + + Ref _input; + Ref _output; }; class MLPPData : public Reference { @@ -70,16 +94,16 @@ public: Ref load_california_housing(const String &path); Ref load_fires_and_crime(const String &path); - void set_data_supervised(int k, const String &file_name, std::vector> &inputSet, std::vector &outputSet); - void set_data_unsupervised(int k, const String &file_name, std::vector> &inputSet); - void set_data_simple(const String &file_name, std::vector &inputSet, std::vector &outputSet); + void set_data_supervised(int k, const String &file_name, Ref input_set, Ref output_set); + void set_data_unsupervised(int k, const String &file_name, Ref input_set); + void set_data_simple(const String &file_name, Ref input_set, Ref output_set); struct SplitComplexData { Ref train; Ref test; }; - SplitComplexData train_test_split(const Ref &data, real_t test_size); + SplitComplexData train_test_split(Ref data, real_t test_size); Array train_test_split_bind(const Ref &data, real_t test_size); // Load Datasets @@ -151,6 +175,7 @@ public: std::vector reverseOneHot(std::vector> tempOutputSet); Ref mean_centering(const Ref &X); + Ref one_hot_rep(const Ref &temp_output_set, int n_class); template std::vector vecToSet(std::vector inputSet) { diff --git a/mlpp/lin_alg/mlpp_matrix.cpp b/mlpp/lin_alg/mlpp_matrix.cpp index 4e5122c..f63d4ff 100644 --- a/mlpp/lin_alg/mlpp_matrix.cpp +++ b/mlpp/lin_alg/mlpp_matrix.cpp @@ -22,6 +22,78 @@ String MLPPMatrix::to_string() { return str; } +std::vector MLPPMatrix::to_flat_std_vector() const { + std::vector ret; + ret.resize(data_size()); + real_t *w = &ret[0]; + memcpy(w, _data, sizeof(real_t) * data_size()); + return ret; +} + +void MLPPMatrix::set_from_std_vectors(const std::vector> &p_from) { + if (p_from.size() == 0) { + reset(); + return; + } + + resize(Size2i(p_from[0].size(), p_from.size())); + + if (data_size() == 0) { + reset(); + return; + } + + for (uint32_t i = 0; i < p_from.size(); ++i) { + const std::vector &r = p_from[i]; + + ERR_CONTINUE(r.size() != static_cast(_size.x)); + + int start_index = i * _size.x; + + const real_t *from_ptr = &r[0]; + for (int j = 0; j < _size.x; j++) { + _data[start_index + j] = from_ptr[j]; + } + } +} + +std::vector> MLPPMatrix::to_std_vector() { + std::vector> ret; + + ret.resize(_size.y); + + for (int i = 0; i < _size.y; ++i) { + std::vector row; + + for (int j = 0; j < _size.x; ++j) { + row.push_back(_data[calculate_index(i, j)]); + } + + ret[i] = row; + } + + return ret; +} + +void MLPPMatrix::set_row_std_vector(int p_index_y, const std::vector &p_row) { + ERR_FAIL_COND(p_row.size() != static_cast(_size.x)); + ERR_FAIL_INDEX(p_index_y, _size.y); + + int ind_start = p_index_y * _size.x; + + const real_t *row_ptr = &p_row[0]; + + for (int i = 0; i < _size.x; ++i) { + _data[ind_start + i] = row_ptr[i]; + } +} + +MLPPMatrix::MLPPMatrix(const std::vector> &p_from) { + _data = NULL; + + set_from_std_vectors(p_from); +} + void MLPPMatrix::_bind_methods() { ClassDB::bind_method(D_METHOD("add_row", "row"), &MLPPMatrix::add_row_pool_vector); ClassDB::bind_method(D_METHOD("add_row_mlpp_vector", "row"), &MLPPMatrix::add_row_mlpp_vector); diff --git a/mlpp/lin_alg/mlpp_matrix.h b/mlpp/lin_alg/mlpp_matrix.h index 70ca15a..6b026c4 100644 --- a/mlpp/lin_alg/mlpp_matrix.h +++ b/mlpp/lin_alg/mlpp_matrix.h @@ -644,59 +644,11 @@ public: } // TODO: These are temporary - std::vector to_flat_std_vector() const { - std::vector ret; - ret.resize(data_size()); - real_t *w = &ret[0]; - memcpy(w, _data, sizeof(real_t) * data_size()); - return ret; - } - - _FORCE_INLINE_ void set_from_std_vectors(const std::vector> &p_from) { - if (p_from.size() == 0) { - reset(); - return; - } - - resize(Size2i(p_from[0].size(), p_from.size())); - - if (data_size() == 0) { - reset(); - return; - } - - for (uint32_t i = 0; i < p_from.size(); ++i) { - const std::vector &r = p_from[i]; - - ERR_CONTINUE(r.size() != static_cast(_size.x)); - - int start_index = i * _size.x; - - const real_t *from_ptr = &r[0]; - for (int j = 0; j < _size.x; j++) { - _data[start_index + j] = from_ptr[j]; - } - } - } - - _FORCE_INLINE_ void set_row_std_vector(int p_index_y, const std::vector &p_row) { - ERR_FAIL_COND(p_row.size() != static_cast(_size.x)); - ERR_FAIL_INDEX(p_index_y, _size.y); - - int ind_start = p_index_y * _size.x; - - const real_t *row_ptr = &p_row[0]; - - for (int i = 0; i < _size.x; ++i) { - _data[ind_start + i] = row_ptr[i]; - } - } - - MLPPMatrix(const std::vector> &p_from) { - _data = NULL; - - set_from_std_vectors(p_from); - } + std::vector to_flat_std_vector() const; + void set_from_std_vectors(const std::vector> &p_from); + std::vector> to_std_vector(); + void set_row_std_vector(int p_index_y, const std::vector &p_row); + MLPPMatrix(const std::vector> &p_from); protected: static void _bind_methods(); diff --git a/mlpp/lin_alg/mlpp_vector.cpp b/mlpp/lin_alg/mlpp_vector.cpp index 0caa64c..7636467 100644 --- a/mlpp/lin_alg/mlpp_vector.cpp +++ b/mlpp/lin_alg/mlpp_vector.cpp @@ -16,6 +16,31 @@ String MLPPVector::to_string() { return str; } +std::vector MLPPVector::to_std_vector() const { + std::vector ret; + ret.resize(size()); + real_t *w = &ret[0]; + memcpy(w, _data, sizeof(real_t) * _size); + return ret; +} + +void MLPPVector::set_from_std_vector(const std::vector &p_from) { + resize(p_from.size()); + for (int i = 0; i < _size; i++) { + _data[i] = p_from[i]; + } +} + +MLPPVector::MLPPVector(const std::vector &p_from) { + _size = 0; + _data = NULL; + + resize(p_from.size()); + for (int i = 0; i < _size; i++) { + _data[i] = p_from[i]; + } +} + void MLPPVector::_bind_methods() { ClassDB::bind_method(D_METHOD("push_back", "elem"), &MLPPVector::push_back); ClassDB::bind_method(D_METHOD("add_mlpp_vector", "other"), &MLPPVector::push_back); diff --git a/mlpp/lin_alg/mlpp_vector.h b/mlpp/lin_alg/mlpp_vector.h index c18a487..d7f4e33 100644 --- a/mlpp/lin_alg/mlpp_vector.h +++ b/mlpp/lin_alg/mlpp_vector.h @@ -385,30 +385,9 @@ public: } // TODO: These are temporary - std::vector to_std_vector() const { - std::vector ret; - ret.resize(size()); - real_t *w = &ret[0]; - memcpy(w, _data, sizeof(real_t) * _size); - return ret; - } - - _FORCE_INLINE_ void set_from_std_vector(const std::vector &p_from) { - resize(p_from.size()); - for (int i = 0; i < _size; i++) { - _data[i] = p_from[i]; - } - } - - MLPPVector(const std::vector &p_from) { - _size = 0; - _data = NULL; - - resize(p_from.size()); - for (int i = 0; i < _size; i++) { - _data[i] = p_from[i]; - } - } + std::vector to_std_vector() const; + void set_from_std_vector(const std::vector &p_from); + MLPPVector(const std::vector &p_from); protected: static void _bind_methods(); diff --git a/test/mlpp_tests.cpp b/test/mlpp_tests.cpp index 008f09f..1f1d4f0 100644 --- a/test/mlpp_tests.cpp +++ b/test/mlpp_tests.cpp @@ -182,7 +182,7 @@ void MLPPTests::test_univariate_linear_regression() { Ref ds = data.load_fires_and_crime(_fires_and_crime_data_path); - MLPPUniLinRegOld model_old(ds->input, ds->output); + MLPPUniLinRegOld model_old(ds->get_input()->to_std_vector(), ds->get_output()->to_std_vector()); std::vector slr_res = { 24.1095, 28.4829, 29.8082, 26.0974, 27.2902, 61.0851, 30.4709, 25.0372, 25.5673, 35.9046, @@ -191,17 +191,9 @@ void MLPPTests::test_univariate_linear_regression() { 27.8203, 20.6637, 22.5191, 53.796, 38.9527, 30.8685, 20.3986 }; - is_approx_equals_dvec(dstd_vec_to_vec(model_old.modelSetTest(ds->input)), dstd_vec_to_vec(slr_res), "stat.mode(x)"); + is_approx_equals_dvec(dstd_vec_to_vec(model_old.modelSetTest(ds->get_input()->to_std_vector())), dstd_vec_to_vec(slr_res), "stat.mode(x)"); - Ref input; - input.instance(); - input->set_from_std_vector(ds->input); - - Ref output; - output.instance(); - output->set_from_std_vector(ds->output); - - MLPPUniLinReg model(input, output); + MLPPUniLinReg model(ds->get_input(), ds->get_output()); std::vector slr_res_n = { 24.109467, 28.482935, 29.808228, 26.097408, 27.290173, 61.085152, 30.470875, 25.037172, 25.567291, @@ -215,7 +207,7 @@ void MLPPTests::test_univariate_linear_regression() { slr_res_v.instance(); slr_res_v->set_from_std_vector(slr_res_n); - Ref res = model.model_set_test(input); + Ref res = model.model_set_test(ds->get_input()); if (!slr_res_v->is_equal_approx(res)) { ERR_PRINT("!slr_res_v->is_equal_approx(res)"); @@ -230,10 +222,10 @@ void MLPPTests::test_multivariate_linear_regression_gradient_descent(bool ui) { Ref ds = data.load_california_housing(_california_housing_data_path); - MLPPLinReg model(ds->input, ds->output); // Can use Lasso, Ridge, ElasticNet Reg + MLPPLinReg model(ds->get_input()->to_std_vector(), ds->get_output()->to_std_vector()); // Can use Lasso, Ridge, ElasticNet Reg model.gradientDescent(0.001, 30, ui); - alg.printVector(model.modelSetTest(ds->input)); + alg.printVector(model.modelSetTest(ds->get_input()->to_std_vector())); } void MLPPTests::test_multivariate_linear_regression_sgd(bool ui) { @@ -242,10 +234,10 @@ void MLPPTests::test_multivariate_linear_regression_sgd(bool ui) { Ref ds = data.load_california_housing(_california_housing_data_path); - MLPPLinReg model(ds->input, ds->output); // Can use Lasso, Ridge, ElasticNet Reg + MLPPLinReg model(ds->get_input()->to_std_vector(), ds->get_output()->to_std_vector()); // Can use Lasso, Ridge, ElasticNet Reg model.SGD(0.00000001, 300000, ui); - alg.printVector(model.modelSetTest(ds->input)); + alg.printVector(model.modelSetTest(ds->get_input()->to_std_vector())); } void MLPPTests::test_multivariate_linear_regression_mbgd(bool ui) { @@ -254,10 +246,10 @@ void MLPPTests::test_multivariate_linear_regression_mbgd(bool ui) { Ref ds = data.load_california_housing(_california_housing_data_path); - MLPPLinReg model(ds->input, ds->output); // Can use Lasso, Ridge, ElasticNet Reg + MLPPLinReg model(ds->get_input()->to_std_vector(), ds->get_output()->to_std_vector()); // Can use Lasso, Ridge, ElasticNet Reg model.MBGD(0.001, 10000, 2, ui); - alg.printVector(model.modelSetTest(ds->input)); + alg.printVector(model.modelSetTest(ds->get_input()->to_std_vector())); } void MLPPTests::test_multivariate_linear_regression_normal_equation(bool ui) { @@ -266,10 +258,10 @@ void MLPPTests::test_multivariate_linear_regression_normal_equation(bool ui) { Ref ds = data.load_california_housing(_california_housing_data_path); - MLPPLinReg model(ds->input, ds->output); // Can use Lasso, Ridge, ElasticNet Reg + MLPPLinReg model(ds->get_input()->to_std_vector(), ds->get_output()->to_std_vector()); // Can use Lasso, Ridge, ElasticNet Reg model.normalEquation(); - alg.printVector(model.modelSetTest(ds->input)); + alg.printVector(model.modelSetTest(ds->get_input()->to_std_vector())); } void MLPPTests::test_multivariate_linear_regression_adam() { @@ -278,8 +270,8 @@ void MLPPTests::test_multivariate_linear_regression_adam() { Ref ds = data.load_california_housing(_california_housing_data_path); - MLPPLinReg adamModel(alg.transpose(ds->input), ds->output); - alg.printVector(adamModel.modelSetTest(ds->input)); + MLPPLinReg adamModel(alg.transpose(ds->get_input()->to_std_vector()), ds->get_output()->to_std_vector()); + alg.printVector(adamModel.modelSetTest(ds->get_input()->to_std_vector())); std::cout << "ACCURACY: " << 100 * adamModel.score() << "%" << std::endl; } @@ -294,11 +286,11 @@ void MLPPTests::test_multivariate_linear_regression_score_sgd_adam(bool ui) { real_t scoreSGD = 0; real_t scoreADAM = 0; for (int i = 0; i < TRIAL_NUM; i++) { - MLPPLinReg modelf(alg.transpose(ds->input), ds->output); + MLPPLinReg modelf(alg.transpose(ds->get_input()->to_std_vector()), ds->get_output()->to_std_vector()); modelf.MBGD(0.001, 5, 1, ui); scoreSGD += modelf.score(); - MLPPLinReg adamModelf(alg.transpose(ds->input), ds->output); + MLPPLinReg adamModelf(alg.transpose(ds->get_input()->to_std_vector()), ds->get_output()->to_std_vector()); adamModelf.Adam(0.1, 5, 1, 0.9, 0.999, 1e-8, ui); // Change batch size = sgd, bgd scoreADAM += adamModelf.score(); } @@ -317,9 +309,9 @@ void MLPPTests::test_multivariate_linear_regression_epochs_gradient_descent(bool std::cout << "Total epoch num: 300" << std::endl; std::cout << "Method: 1st Order w/ Jacobians" << std::endl; - MLPPLinReg model3(alg.transpose(ds->input), ds->output); // Can use Lasso, Ridge, ElasticNet Reg + MLPPLinReg model3(alg.transpose(ds->get_input()->to_std_vector()), ds->get_output()->to_std_vector()); // Can use Lasso, Ridge, ElasticNet Reg model3.gradientDescent(0.001, 300, ui); - alg.printVector(model3.modelSetTest(ds->input)); + alg.printVector(model3.modelSetTest(ds->get_input()->to_std_vector())); } void MLPPTests::test_multivariate_linear_regression_newton_raphson(bool ui) { @@ -331,10 +323,10 @@ void MLPPTests::test_multivariate_linear_regression_newton_raphson(bool ui) { std::cout << "--------------------------------------------" << std::endl; std::cout << "Total epoch num: 300" << std::endl; std::cout << "Method: Newtonian 2nd Order w/ Hessians" << std::endl; - MLPPLinReg model2(alg.transpose(ds->input), ds->output); + MLPPLinReg model2(alg.transpose(ds->get_input()->to_std_vector()), ds->get_output()->to_std_vector()); model2.NewtonRaphson(1.5, 300, ui); - alg.printVector(model2.modelSetTest(ds->input)); + alg.printVector(model2.modelSetTest(ds->get_input()->to_std_vector())); } void MLPPTests::test_logistic_regression(bool ui) { @@ -343,9 +335,9 @@ void MLPPTests::test_logistic_regression(bool ui) { // LOGISTIC REGRESSION Ref dt = data.load_breast_cancer(_breast_cancer_data_path); - MLPPLogReg model(dt->input, dt->output); + MLPPLogReg model(dt->get_input()->to_std_vector(), dt->get_output()->to_std_vector()); model.SGD(0.001, 100000, ui); - alg.printVector(model.modelSetTest(dt->input)); + alg.printVector(model.modelSetTest(dt->get_input()->to_std_vector())); std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } void MLPPTests::test_probit_regression(bool ui) { @@ -355,9 +347,9 @@ void MLPPTests::test_probit_regression(bool ui) { // PROBIT REGRESSION Ref dt = data.load_breast_cancer(_breast_cancer_data_path); - MLPPProbitReg model(dt->input, dt->output); + MLPPProbitReg model(dt->get_input()->to_std_vector(), dt->get_output()->to_std_vector()); model.SGD(0.001, 10000, ui); - alg.printVector(model.modelSetTest(dt->input)); + alg.printVector(model.modelSetTest(dt->get_input()->to_std_vector())); std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } void MLPPTests::test_c_log_log_regression(bool ui) { @@ -399,9 +391,9 @@ void MLPPTests::test_softmax_regression(bool ui) { // SOFTMAX REGRESSION Ref dt = data.load_iris(_iris_data_path); - MLPPSoftmaxReg model(dt->input, dt->output); + MLPPSoftmaxReg model(dt->get_input()->to_std_vector(), dt->get_output()->to_std_vector()); model.SGD(0.1, 10000, ui); - alg.printMatrix(model.modelSetTest(dt->input)); + alg.printMatrix(model.modelSetTest(dt->get_input()->to_std_vector())); std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } void MLPPTests::test_support_vector_classification(bool ui) { @@ -414,9 +406,9 @@ void MLPPTests::test_support_vector_classification(bool ui) { // SUPPORT VECTOR CLASSIFICATION Ref dt = data.load_breast_cancer_svc(_breast_cancer_svm_data_path); - MLPPSVC model(dt->input, dt->output, ui); + MLPPSVC model(dt->get_input()->to_std_vector(), dt->get_output()->to_std_vector(), ui); model.SGD(0.00001, 100000, ui); - alg.printVector(model.modelSetTest(dt->input)); + alg.printVector(model.modelSetTest(dt->get_input()->to_std_vector())); std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } @@ -473,9 +465,9 @@ void MLPPTests::test_soft_max_network(bool ui) { // SOFTMAX NETWORK Ref dt = data.load_wine(_wine_data_path); - MLPPSoftmaxNet model(dt->input, dt->output, 1); + MLPPSoftmaxNet model(dt->get_input()->to_std_vector(), dt->get_output()->to_std_vector(), 1); model.gradientDescent(0.01, 100000, ui); - alg.printMatrix(model.modelSetTest(dt->input)); + alg.printMatrix(model.modelSetTest(dt->get_input()->to_std_vector())); std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl; } void MLPPTests::test_autoencoder(bool ui) { @@ -598,24 +590,32 @@ void MLPPTests::test_train_test_split_mann(bool ui) { std::vector> inputSet1 = { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, { 3, 5, 9, 12, 15, 18, 21, 24, 27, 30 } }; std::vector> outputSet1 = { { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20 } }; + Ref input_set_1; + input_set_1.instance(); + input_set_1->set_from_std_vectors(inputSet1); + + Ref output_set_1; + output_set_1.instance(); + output_set_1->set_from_std_vectors(outputSet1); + Ref d; d.instance(); - d->input = alg.transpose(inputSet1); - d->output = alg.transpose(outputSet1); + d->set_input(alg.transposem(input_set_1)); + d->set_output(alg.transposem(output_set_1)); MLPPData::SplitComplexData split_data = data.train_test_split(d, 0.2); - alg.printMatrix(split_data.train->input); - alg.printMatrix(split_data.train->output); - alg.printMatrix(split_data.test->input); - alg.printMatrix(split_data.test->output); + PLOG_MSG(split_data.train->get_input()->to_string()); + PLOG_MSG(split_data.train->get_output()->to_string()); + PLOG_MSG(split_data.test->get_input()->to_string()); + PLOG_MSG(split_data.test->get_output()->to_string()); - MLPPMANN mann(split_data.train->input, split_data.train->output); + MLPPMANN mann(split_data.train->get_input()->to_std_vector(), split_data.train->get_output()->to_std_vector()); mann.addLayer(100, "RELU", "XavierNormal"); mann.addOutputLayer("Softmax", "CrossEntropy", "XavierNormal"); - mann.gradientDescent(0.1, 80000, 1); - alg.printMatrix(mann.modelSetTest(split_data.test->input)); + mann.gradientDescent(0.1, 80000, ui); + alg.printMatrix(mann.modelSetTest(split_data.test->get_input()->to_std_vector())); std::cout << "ACCURACY: " << 100 * mann.score() << "%" << std::endl; } @@ -866,7 +866,7 @@ void MLPPTests::test_new_math_functions() { // Testing new Functions real_t z_s = 0.001; std::cout << avn.logit(z_s) << std::endl; - std::cout << avn.logit(z_s, 1) << std::endl; + std::cout << avn.logit(z_s, true) << std::endl; std::vector z_v = { 0.001 }; alg.printVector(avn.logit(z_v)); @@ -1071,7 +1071,7 @@ void MLPPTests::test_support_vector_classification_kernel(bool ui) { //SUPPORT VECTOR CLASSIFICATION (kernel method) Ref dt = data.load_breast_cancer_svc(_breast_cancer_svm_data_path); - MLPPDualSVC kernelSVM(dt->input, dt->output, 1000); + MLPPDualSVC kernelSVM(dt->get_input()->to_std_vector(), dt->get_output()->to_std_vector(), 1000); kernelSVM.gradientDescent(0.0001, 20, ui); std::cout << "SCORE: " << kernelSVM.score() << std::endl;