Reworked and cleaned up MLPPKNN. Also registered it to the engine.

This commit is contained in:
Relintai 2023-01-28 01:02:57 +01:00
parent 4deb34c852
commit 02a44dddf7
11 changed files with 414 additions and 72 deletions

View File

@ -8,77 +8,172 @@
#include "../lin_alg/lin_alg.h"
#include "../utilities/utilities.h"
#include <algorithm>
#include <iostream>
#include <map>
#include "core/containers/hash_map.h"
#include "core/containers/vector.h"
MLPPKNN::MLPPKNN(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int k) :
inputSet(inputSet), outputSet(outputSet), k(k) {
Ref<MLPPMatrix> MLPPKNN::get_input_set() {
return _input_set;
}
void MLPPKNN::set_input_set(const Ref<MLPPMatrix> &val) {
_input_set = val;
}
std::vector<real_t> MLPPKNN::modelSetTest(std::vector<std::vector<real_t>> X) {
std::vector<real_t> y_hat;
for (int i = 0; i < X.size(); i++) {
y_hat.push_back(modelTest(X[i]));
Ref<MLPPVector> MLPPKNN::get_output_set() {
return _output_set;
}
void MLPPKNN::set_output_set(const Ref<MLPPVector> &val) {
_output_set = val;
}
int MLPPKNN::get_k() {
return _k;
}
void MLPPKNN::set_k(const int val) {
_k = val;
}
PoolIntArray MLPPKNN::model_set_test(const Ref<MLPPMatrix> &X) {
ERR_FAIL_COND_V(!X.is_valid(), PoolIntArray());
Ref<MLPPVector> v;
v.instance();
int y_size = X->size().y;
PoolIntArray y_hat;
y_hat.resize(y_size);
for (int i = 0; i < y_size; i++) {
X->get_row_into_mlpp_vector(i, v);
y_hat.set(i, model_test(v));
}
return y_hat;
}
int MLPPKNN::modelTest(std::vector<real_t> x) {
return determineClass(nearestNeighbors(x));
int MLPPKNN::model_test(const Ref<MLPPVector> &x) {
return determine_class(nearest_neighbors(x));
}
real_t MLPPKNN::score() {
MLPPUtilities util;
return util.performance(modelSetTest(inputSet), outputSet);
MLPPUtilities util;
return util.performance_pool_int_array_vec(model_set_test(_input_set), _output_set);
}
int MLPPKNN::determineClass(std::vector<real_t> knn) {
std::map<int, int> class_nums;
for (int i = 0; i < outputSet.size(); i++) {
class_nums[outputSet[i]] = 0;
}
for (int i = 0; i < knn.size(); i++) {
for (int j = 0; j < outputSet.size(); j++) {
if (knn[i] == outputSet[j]) {
class_nums[outputSet[j]]++;
}
}
}
int max = class_nums[outputSet[0]];
int final_class = outputSet[0];
for (int i = 0; i < outputSet.size(); i++) {
if (class_nums[outputSet[i]] > max) {
max = class_nums[outputSet[i]];
}
}
for (auto [c, v] : class_nums) {
if (v == max) {
final_class = c;
}
}
return final_class;
MLPPKNN::MLPPKNN(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int k) {
_k = k;
}
std::vector<real_t> MLPPKNN::nearestNeighbors(std::vector<real_t> x) {
MLPPKNN::MLPPKNN() {
_k = 0;
}
MLPPKNN::~MLPPKNN() {
}
// Private Model Functions
PoolIntArray MLPPKNN::nearest_neighbors(const Ref<MLPPVector> &x) {
ERR_FAIL_COND_V(!_input_set.is_valid(), PoolIntArray());
MLPPLinAlg alg;
// The nearest neighbors
std::vector<real_t> knn;
PoolIntArray knn;
HashMap<int, bool> skip_map;
Ref<MLPPVector> tmpv1;
tmpv1.instance();
Ref<MLPPVector> tmpv2;
tmpv2.instance();
int iuss = _input_set->size().y;
std::vector<std::vector<real_t>> inputUseSet = inputSet;
//Perfom this loop unless and until all k nearest neighbors are found, appended, and returned
for (int i = 0; i < k; i++) {
for (int i = 0; i < _k; ++i) {
int neighbor = 0;
for (int j = 0; j < inputUseSet.size(); j++) {
bool isNeighborNearer = alg.euclideanDistance(x, inputUseSet[j]) < alg.euclideanDistance(x, inputUseSet[neighbor]);
if (isNeighborNearer) {
for (int j = 0; j < iuss; j++) {
if (skip_map.has(j)) {
continue;
}
_input_set->get_row_into_mlpp_vector(j, tmpv1);
_input_set->get_row_into_mlpp_vector(neighbor, tmpv2);
bool is_neighbor_nearer = alg.euclidean_distance(x, tmpv1) < alg.euclidean_distance(x, tmpv2);
if (is_neighbor_nearer) {
neighbor = j;
}
}
knn.push_back(neighbor);
inputUseSet.erase(inputUseSet.begin() + neighbor); // This is why we maintain an extra input"Use"Set
if (!skip_map.has(neighbor)) {
knn.push_back(neighbor);
skip_map.set(neighbor, true);
}
}
return knn;
}
int MLPPKNN::determine_class(const PoolIntArray &knn) {
ERR_FAIL_COND_V(!_output_set.is_valid(), 0);
int output_set_size = _output_set->size();
ERR_FAIL_COND_V(output_set_size == 0, 0);
const real_t *os_ptr = _output_set->ptr();
HashMap<int, int> class_nums;
for (int i = 0; i < output_set_size; ++i) {
class_nums[static_cast<int>(os_ptr[i])] = 0;
}
PoolIntArray::Read knn_r = knn.read();
const int *knn_ptr = knn_r.ptr();
int knn_size = knn.size();
for (int i = 0; i < knn_size; ++i) {
for (int j = 0; j < output_set_size; j++) {
int opj = static_cast<int>(os_ptr[j]);
if (knn_ptr[i] == opj) {
class_nums[opj]++;
}
}
}
int final_class = static_cast<int>(os_ptr[0]);
int max = class_nums[final_class];
for (int i = 0; i < output_set_size; ++i) {
int opi = static_cast<int>(os_ptr[i]);
if (class_nums[opi] > max) {
max = class_nums[opi];
final_class = opi;
}
}
return final_class;
}
void MLPPKNN::_bind_methods() {
ClassDB::bind_method(D_METHOD("get_input_set"), &MLPPKNN::get_input_set);
ClassDB::bind_method(D_METHOD("set_input_set", "value"), &MLPPKNN::set_input_set);
ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "input_set", PROPERTY_HINT_RESOURCE_TYPE, "MLPPMatrix"), "set_input_set", "get_input_set");
ClassDB::bind_method(D_METHOD("get_output_set"), &MLPPKNN::get_output_set);
ClassDB::bind_method(D_METHOD("set_output_set", "value"), &MLPPKNN::set_output_set);
ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "output_set", PROPERTY_HINT_RESOURCE_TYPE, "MLPPVector"), "set_output_set", "get_output_set");
ClassDB::bind_method(D_METHOD("get_k"), &MLPPKNN::get_k);
ClassDB::bind_method(D_METHOD("set_k", "value"), &MLPPKNN::set_k);
ADD_PROPERTY(PropertyInfo(Variant::VECTOR2, "k"), "set_k", "get_k");
ClassDB::bind_method(D_METHOD("model_set_test", "X"), &MLPPKNN::model_set_test);
ClassDB::bind_method(D_METHOD("model_test", "x"), &MLPPKNN::model_test);
ClassDB::bind_method(D_METHOD("score"), &MLPPKNN::score);
}

View File

@ -10,26 +10,44 @@
#include "core/math/math_defs.h"
#include <vector>
#include "core/object/reference.h"
#include "../lin_alg/mlpp_matrix.h"
#include "../lin_alg/mlpp_vector.h"
class MLPPKNN : public Reference {
GDCLASS(MLPPKNN, Reference);
class MLPPKNN {
public:
MLPPKNN(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int k);
std::vector<real_t> modelSetTest(std::vector<std::vector<real_t>> X);
int modelTest(std::vector<real_t> x);
Ref<MLPPMatrix> get_input_set();
void set_input_set(const Ref<MLPPMatrix> &val);
Ref<MLPPVector> get_output_set();
void set_output_set(const Ref<MLPPVector> &val);
int get_k();
void set_k(const int val);
PoolIntArray model_set_test(const Ref<MLPPMatrix> &X);
int model_test(const Ref<MLPPVector> &x);
real_t score();
private:
MLPPKNN(std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet, int k);
MLPPKNN();
~MLPPKNN();
protected:
// Private Model Functions
std::vector<real_t> nearestNeighbors(std::vector<real_t> x);
int determineClass(std::vector<real_t> knn);
PoolIntArray nearest_neighbors(const Ref<MLPPVector> &x);
int determine_class(const PoolIntArray &knn);
static void _bind_methods();
// Model Inputs and Parameters
std::vector<std::vector<real_t>> inputSet;
std::vector<real_t> outputSet;
int k;
Ref<MLPPMatrix> _input_set;
Ref<MLPPVector> _output_set;
int _k;
};
#endif /* kNN_hpp */

View File

@ -5,6 +5,9 @@
//
#include "lin_alg.h"
#include "core/math/math_funcs.h"
#include "../stat/stat.h"
#include <cmath>
#include <iostream>
@ -1220,6 +1223,43 @@ real_t MLPPLinAlg::euclideanDistance(std::vector<real_t> a, std::vector<real_t>
return std::sqrt(dist);
}
real_t MLPPLinAlg::euclidean_distance(const Ref<MLPPVector> &a, const Ref<MLPPVector> &b) {
ERR_FAIL_COND_V(!a.is_valid() || !b.is_valid(), 0);
int a_size = a->size();
ERR_FAIL_COND_V(a_size != b->size(), 0);
const real_t *aa = a->ptr();
const real_t *ba = b->ptr();
real_t dist = 0;
for (int i = 0; i < a_size; i++) {
dist += (aa[i] - ba[i]) * (aa[i] - ba[i]);
}
return Math::sqrt(dist);
}
real_t MLPPLinAlg::euclidean_distance_squared(const Ref<MLPPVector> &a, const Ref<MLPPVector> &b) {
ERR_FAIL_COND_V(!a.is_valid() || !b.is_valid(), 0);
int a_size = a->size();
ERR_FAIL_COND_V(a_size != b->size(), 0);
const real_t *aa = a->ptr();
const real_t *ba = b->ptr();
real_t dist = 0;
for (int i = 0; i < a_size; i++) {
dist += (aa[i] - ba[i]) * (aa[i] - ba[i]);
}
return dist;
}
real_t MLPPLinAlg::norm_2(std::vector<real_t> a) {
return std::sqrt(norm_sq(a));
}

View File

@ -10,6 +10,9 @@
#include "core/math/math_defs.h"
#include "../lin_alg/mlpp_matrix.h"
#include "../lin_alg/mlpp_vector.h"
#include <tuple>
#include <vector>
@ -211,6 +214,8 @@ public:
std::vector<real_t> round(std::vector<real_t> a);
real_t euclideanDistance(std::vector<real_t> a, std::vector<real_t> b);
real_t euclidean_distance(const Ref<MLPPVector> &a, const Ref<MLPPVector> &b);
real_t euclidean_distance_squared(const Ref<MLPPVector> &a, const Ref<MLPPVector> &b);
real_t norm_2(std::vector<real_t> a);

View File

@ -25,7 +25,7 @@ String MLPPMatrix::to_string() {
void MLPPMatrix::_bind_methods() {
ClassDB::bind_method(D_METHOD("add_row", "row"), &MLPPMatrix::add_row_pool_vector);
ClassDB::bind_method(D_METHOD("remove_row", "index"), &MLPPMatrix::remove_row);
ClassDB::bind_method(D_METHOD("remove_unordered", "index"), &MLPPMatrix::remove_unordered);
ClassDB::bind_method(D_METHOD("remove_row_unordered", "index"), &MLPPMatrix::remove_row_unordered);
ClassDB::bind_method(D_METHOD("swap_row", "index_1", "index_2"), &MLPPMatrix::swap_row);
ClassDB::bind_method(D_METHOD("clear"), &MLPPMatrix::clear);
@ -40,7 +40,12 @@ void MLPPMatrix::_bind_methods() {
ClassDB::bind_method(D_METHOD("get_element", "index_x", "index_y"), &MLPPMatrix::get_element_bind);
ClassDB::bind_method(D_METHOD("set_element", "index_x", "index_y", "val"), &MLPPMatrix::set_element_bind);
ClassDB::bind_method(D_METHOD("get_row_pool_vector", "index_y"), &MLPPMatrix::get_row_pool_vector);
ClassDB::bind_method(D_METHOD("get_row_mlpp_vector", "index_y"), &MLPPMatrix::get_row_mlpp_vector);
ClassDB::bind_method(D_METHOD("get_row_into_mlpp_vector", "index_y", "target"), &MLPPMatrix::get_row_into_mlpp_vector);
ClassDB::bind_method(D_METHOD("set_row_pool_vector", "index_y", "row"), &MLPPMatrix::set_row_pool_vector);
ClassDB::bind_method(D_METHOD("set_row_mlpp_vector", "index_y", "row"), &MLPPMatrix::set_row_mlpp_vector);
ClassDB::bind_method(D_METHOD("fill", "val"), &MLPPMatrix::fill);

View File

@ -21,7 +21,7 @@ class MLPPMatrix : public Reference {
GDCLASS(MLPPMatrix, Reference);
public:
real_t *ptr() {
real_t *ptrw() {
return _data;
}
@ -95,7 +95,7 @@ public:
// Removes the item copying the last value into the position of the one to
// remove. It's generally faster than `remove`.
void remove_unordered(int p_index) {
void remove_row_unordered(int p_index) {
ERR_FAIL_INDEX(p_index, _size.y);
--_size.y;
@ -203,6 +203,85 @@ public:
_data[p_index_x * p_index_y] = p_val;
}
_FORCE_INLINE_ Vector<real_t> get_row_vector(int p_index_y) {
ERR_FAIL_INDEX_V(p_index_y, _size.y, Vector<real_t>());
Vector<real_t> ret;
if (unlikely(_size.x == 0)) {
return ret;
}
ret.resize(_size.x);
int ind_start = p_index_y * _size.x;
real_t *row_ptr = ret.ptrw();
for (int i = 0; i < _size.x; ++i) {
row_ptr[i] = _data[ind_start + i];
}
}
_FORCE_INLINE_ PoolRealArray get_row_pool_vector(int p_index_y) {
ERR_FAIL_INDEX_V(p_index_y, _size.y, PoolRealArray());
PoolRealArray ret;
if (unlikely(_size.x == 0)) {
return ret;
}
ret.resize(_size.x);
int ind_start = p_index_y * _size.x;
PoolRealArray::Write w = ret.write();
real_t *row_ptr = w.ptr();
for (int i = 0; i < _size.x; ++i) {
row_ptr[i] = _data[ind_start + i];
}
}
_FORCE_INLINE_ Ref<MLPPVector> get_row_mlpp_vector(int p_index_y) {
ERR_FAIL_INDEX_V(p_index_y, _size.y, Ref<MLPPVector>());
Ref<MLPPVector> ret;
ret.instance();
if (unlikely(_size.x == 0)) {
return ret;
}
ret->resize(_size.x);
int ind_start = p_index_y * _size.x;
real_t *row_ptr = ret->ptrw();
for (int i = 0; i < _size.x; ++i) {
row_ptr[i] = _data[ind_start + i];
}
}
_FORCE_INLINE_ void get_row_into_mlpp_vector(int p_index_y, Ref<MLPPVector> target) const {
ERR_FAIL_COND(!target.is_valid());
ERR_FAIL_INDEX(p_index_y, _size.y);
if (unlikely(target->size() != _size.x)) {
target->resize(_size.x);
}
int ind_start = p_index_y * _size.x;
real_t *row_ptr = target->ptrw();
for (int i = 0; i < _size.x; ++i) {
row_ptr[i] = _data[ind_start + i];
}
}
_FORCE_INLINE_ void set_row_vector(int p_index_y, const Vector<real_t> &p_row) {
ERR_FAIL_COND(p_row.size() != _size.x);
ERR_FAIL_INDEX(p_index_y, _size.y);
@ -230,6 +309,20 @@ public:
}
}
_FORCE_INLINE_ void set_row_mlpp_vector(int p_index_y, const Ref<MLPPVector> &p_row) {
ERR_FAIL_COND(!p_row.is_valid());
ERR_FAIL_COND(p_row->size() != _size.x);
ERR_FAIL_INDEX(p_index_y, _size.y);
int ind_start = p_index_y * _size.x;
const real_t *row_ptr = p_row->ptr();
for (int i = 0; i < _size.x; ++i) {
_data[ind_start + i] = row_ptr[i];
}
}
void fill(real_t p_val) {
int ds = data_size();
for (int i = 0; i < ds; i++) {

View File

@ -18,7 +18,7 @@ class MLPPVector : public Reference {
GDCLASS(MLPPVector, Reference);
public:
real_t *ptr() {
real_t *ptrw() {
return _data;
}

View File

@ -5,13 +5,14 @@
//
#include "utilities.h"
#include "core/math/math_funcs.h"
#include <fstream>
#include <iostream>
#include <random>
#include <string>
std::vector<real_t> MLPPUtilities::weightInitialization(int n, std::string type) {
std::random_device rd;
std::default_random_engine generator(rd());
@ -132,6 +133,50 @@ real_t MLPPUtilities::performance(std::vector<std::vector<real_t>> y_hat, std::v
return correct / y_hat.size();
}
real_t MLPPUtilities::performance_vec(const Ref<MLPPVector> &y_hat, const Ref<MLPPVector> &output_set) {
ERR_FAIL_COND_V(!y_hat.is_valid(), 0);
ERR_FAIL_COND_V(!output_set.is_valid(), 0);
real_t correct = 0;
for (int i = 0; i < y_hat->size(); i++) {
if (Math::is_equal_approx(y_hat->get_element(i), output_set->get_element(i))) {
correct++;
}
}
return correct / y_hat->size();
}
real_t MLPPUtilities::performance_mat(const Ref<MLPPMatrix> &y_hat, const Ref<MLPPMatrix> &y) {
ERR_FAIL_COND_V(!y_hat.is_valid(), 0);
ERR_FAIL_COND_V(!y.is_valid(), 0);
real_t correct = 0;
for (int i = 0; i < y_hat->size().y; i++) {
int sub_correct = 0;
for (int j = 0; j < y_hat->size().x; j++) {
if (Math::round(y_hat->get_element(i, j)) == y->get_element(i, j)) {
sub_correct++;
}
if (sub_correct == y_hat->size().x) {
correct++;
}
}
}
return correct / y_hat->size().y;
}
real_t MLPPUtilities::performance_pool_int_array_vec(PoolIntArray y_hat, const Ref<MLPPVector> &output_set) {
ERR_FAIL_COND_V(!output_set.is_valid(), 0);
real_t correct = 0;
for (int i = 0; i < y_hat.size(); i++) {
if (y_hat[i] == Math::round(output_set->get_element(i))) {
correct++;
}
}
return correct / y_hat.size();
}
void MLPPUtilities::saveParameters(std::string fileName, std::vector<real_t> weights, real_t bias, bool app, int layer) {
std::string layer_info = "";
std::ofstream saveFile;

View File

@ -10,6 +10,11 @@
#include "core/math/math_defs.h"
#include "core/containers/vector.h"
#include "core/variant/variant.h"
#include "../lin_alg/mlpp_matrix.h"
#include "../lin_alg/mlpp_vector.h"
#include <string>
#include <tuple>
@ -29,6 +34,10 @@ public:
real_t performance(std::vector<real_t> y_hat, std::vector<real_t> y);
real_t performance(std::vector<std::vector<real_t>> y_hat, std::vector<std::vector<real_t>> y);
real_t performance_vec(const Ref<MLPPVector> &y_hat, const Ref<MLPPVector> &output_set);
real_t performance_mat(const Ref<MLPPMatrix> &y_hat, const Ref<MLPPMatrix> &y);
real_t performance_pool_int_array_vec(PoolIntArray y_hat, const Ref<MLPPVector> &output_set);
// Parameter Saving Functions
void saveParameters(std::string fileName, std::vector<real_t> weights, real_t bias, bool app = 0, int layer = -1);
void saveParameters(std::string fileName, std::vector<real_t> weights, std::vector<real_t> initial, real_t bias, bool app = 0, int layer = -1);

View File

@ -24,8 +24,10 @@ SOFTWARE.
#include "register_types.h"
#include "mlpp/data/data.h"
#include "mlpp/lin_alg/mlpp_vector.h"
#include "mlpp/lin_alg/mlpp_matrix.h"
#include "mlpp/lin_alg/mlpp_vector.h"
#include "mlpp/knn/knn.h"
#include "test/mlpp_tests.h"
@ -34,6 +36,8 @@ void register_pmlpp_types(ModuleRegistrationLevel p_level) {
ClassDB::register_class<MLPPVector>();
ClassDB::register_class<MLPPMatrix>();
ClassDB::register_class<MLPPKNN>();
ClassDB::register_class<MLPPDataESimple>();
ClassDB::register_class<MLPPDataSimple>();
ClassDB::register_class<MLPPDataComplex>();

View File

@ -553,11 +553,39 @@ void MLPPTests::test_knn(bool ui) {
MLPPLinAlg alg;
// kNN
std::vector<std::vector<real_t>> inputSet = { { 1, 2, 3, 4, 5, 6, 7, 8 }, { 0, 0, 0, 0, 1, 1, 1, 1 } };
std::vector<std::vector<real_t>> inputSet = {
{ 1, 2, 3, 4, 5, 6, 7, 8 },
{ 0, 0, 0, 0, 1, 1, 1, 1 }
};
std::vector<real_t> outputSet = { 0, 0, 0, 0, 1, 1, 1, 1 };
MLPPKNN knn(alg.transpose(inputSet), outputSet, 8);
alg.printVector(knn.modelSetTest(alg.transpose(inputSet)));
std::cout << "ACCURACY: " << 100 * knn.score() << "%" << std::endl;
Ref<MLPPMatrix> ism;
ism.instance();
ism->set_from_std_vectors(alg.transpose(inputSet));
//ERR_PRINT(ism->to_string());
Ref<MLPPVector> osm;
osm.instance();
osm->set_from_std_vector(outputSet);
//ERR_PRINT(osm->to_string());
Ref<MLPPKNN> knn;
knn.instance();
knn->set_k(7);
knn->set_input_set(ism);
knn->set_output_set(osm);
PoolIntArray res = knn->model_set_test(ism);
ERR_PRINT(String(Variant(res)));
ERR_PRINT("ACCURACY: " + itos(100 * knn->score()) + "%");
//(alg.transpose(inputSet), outputSet, 8);
//alg.printVector(knn.modelSetTest(alg.transpose(inputSet)));
//std::cout << "ACCURACY: " << 100 * knn.score() << "%" << std::endl;
}
void MLPPTests::test_convolution_tensors_etc() {