diff --git a/mlpp/outlier_finder/outlier_finder.cpp b/mlpp/outlier_finder/outlier_finder.cpp index cc4f50a..5b66659 100644 --- a/mlpp/outlier_finder/outlier_finder.cpp +++ b/mlpp/outlier_finder/outlier_finder.cpp @@ -5,37 +5,134 @@ // #include "outlier_finder.h" + #include "../stat/stat.h" -#include - -MLPPOutlierFinder::MLPPOutlierFinder(int threshold) : - threshold(threshold) { +real_t MLPPOutlierFinder::get_threshold() { + return _threshold; +} +void MLPPOutlierFinder::set_threshold(real_t val) { + _threshold = val; } -std::vector> MLPPOutlierFinder::modelSetTest(std::vector> inputSet) { - MLPPStat stat; - std::vector> outliers; - outliers.resize(inputSet.size()); - for (int i = 0; i < inputSet.size(); i++) { - for (int j = 0; j < inputSet[i].size(); j++) { - real_t z = (inputSet[i][j] - stat.mean(inputSet[i])) / stat.standardDeviation(inputSet[i]); - if (abs(z) > threshold) { - outliers[i].push_back(inputSet[i][j]); +Vector> MLPPOutlierFinder::model_set_test(const Ref &input_set) { + ERR_FAIL_COND_V(!input_set.is_valid(), Vector>()); + + MLPPStat stat; + + Size2i input_set_size = input_set->size(); + + Vector> outliers; + outliers.resize(input_set_size.y); + + Ref input_set_i_row_tmp; + input_set_i_row_tmp.instance(); + input_set_i_row_tmp->resize(input_set_size.x); + + for (int i = 0; i < input_set_size.y; ++i) { + input_set->get_row_into_mlpp_vector(i, input_set_i_row_tmp); + real_t meanv = stat.meanv(input_set_i_row_tmp); + real_t s_dev_v = stat.standard_deviationv(input_set_i_row_tmp); + + for (int j = 0; j < input_set_size.x; ++j) { + real_t input_set_i_j = input_set->get_element(i, j); + + real_t z = (input_set_i_j - meanv) / s_dev_v; + + if (ABS(z) > _threshold) { + outliers.write[i].push_back(input_set_i_j); } } } + return outliers; } -std::vector MLPPOutlierFinder::modelTest(std::vector inputSet) { - MLPPStat stat; - std::vector outliers; - for (int i = 0; i < inputSet.size(); i++) { - real_t z = (inputSet[i] - stat.mean(inputSet)) / stat.standardDeviation(inputSet); - if (abs(z) > threshold) { - outliers.push_back(inputSet[i]); +Array MLPPOutlierFinder::model_set_test_bind(const Ref &input_set) { + Vector> res = model_set_test(input_set); + + Array arr; + + for (int i = 0; i < res.size(); ++i) { + //will get converted to PoolRealArray + arr.push_back(Variant(res[i])); + } + + return arr; +} + +PoolVector2iArray MLPPOutlierFinder::model_set_test_indices(const Ref &input_set) { + ERR_FAIL_COND_V(!input_set.is_valid(), PoolVector2iArray()); + + MLPPStat stat; + + Size2i input_set_size = input_set->size(); + + PoolVector2iArray outliers; + + Ref input_set_i_row_tmp; + input_set_i_row_tmp.instance(); + input_set_i_row_tmp->resize(input_set_size.x); + + for (int i = 0; i < input_set_size.y; ++i) { + input_set->get_row_into_mlpp_vector(i, input_set_i_row_tmp); + real_t meanv = stat.meanv(input_set_i_row_tmp); + real_t s_dev_v = stat.standard_deviationv(input_set_i_row_tmp); + + for (int j = 0; j < input_set_size.x; ++j) { + real_t z = (input_set->get_element(i, j) - meanv) / s_dev_v; + + if (ABS(z) > _threshold) { + outliers.push_back(Vector2i(j, i)); + } } } + return outliers; } + +PoolRealArray MLPPOutlierFinder::model_test(const Ref &input_set) { + ERR_FAIL_COND_V(!input_set.is_valid(), PoolRealArray()); + + MLPPStat stat; + PoolRealArray outliers; + + real_t mean = stat.meanv(input_set); + real_t s_dev = stat.standard_deviationv(input_set); + + int input_set_size = input_set->size(); + const real_t *input_set_ptr = input_set->ptr(); + + for (int i = 0; i < input_set_size; ++i) { + real_t input_set_i = input_set_ptr[i]; + + real_t z = (input_set_i - mean) / s_dev; + + if (ABS(z) > _threshold) { + outliers.push_back(input_set_i); + } + } + + return outliers; +} + +MLPPOutlierFinder::MLPPOutlierFinder(real_t threshold) { + _threshold = threshold; +} + +MLPPOutlierFinder::MLPPOutlierFinder() { + _threshold = 0; +} +MLPPOutlierFinder::~MLPPOutlierFinder() { +} + +void MLPPOutlierFinder::_bind_methods() { + ClassDB::bind_method(D_METHOD("get_threshold"), &MLPPOutlierFinder::get_threshold); + ClassDB::bind_method(D_METHOD("set_threshold", "val"), &MLPPOutlierFinder::set_threshold); + ADD_PROPERTY(PropertyInfo(Variant::REAL, "threshold"), "set_threshold", "get_threshold"); + + ClassDB::bind_method(D_METHOD("model_set_test", "input_set"), &MLPPOutlierFinder::model_set_test_bind); + ClassDB::bind_method(D_METHOD("model_set_test_indices", "input_set"), &MLPPOutlierFinder::model_set_test_indices); + + ClassDB::bind_method(D_METHOD("model_test", "input_set"), &MLPPOutlierFinder::model_test); +} diff --git a/mlpp/outlier_finder/outlier_finder.h b/mlpp/outlier_finder/outlier_finder.h index 897a889..130195f 100644 --- a/mlpp/outlier_finder/outlier_finder.h +++ b/mlpp/outlier_finder/outlier_finder.h @@ -10,20 +10,34 @@ #include "core/math/math_defs.h" -#include +#include "core/object/reference.h" +#include "../lin_alg/mlpp_matrix.h" +#include "../lin_alg/mlpp_vector.h" + +class MLPPOutlierFinder : public Reference { + GDCLASS(MLPPOutlierFinder, Reference); -class MLPPOutlierFinder { public: - // Cnstr - MLPPOutlierFinder(int threshold); + real_t get_threshold(); + void set_threshold(real_t val); - std::vector> modelSetTest(std::vector> inputSet); - std::vector modelTest(std::vector inputSet); + Vector> model_set_test(const Ref &input_set); + Array model_set_test_bind(const Ref &input_set); - // Variables required - int threshold; + PoolVector2iArray model_set_test_indices(const Ref &input_set); + + PoolRealArray model_test(const Ref &input_set); + + MLPPOutlierFinder(real_t threshold); + + MLPPOutlierFinder(); + ~MLPPOutlierFinder(); + +protected: + static void _bind_methods(); + + real_t _threshold; }; - #endif /* OutlierFinder_hpp */ diff --git a/mlpp/stat/stat.cpp b/mlpp/stat/stat.cpp index 11312f3..622f133 100644 --- a/mlpp/stat/stat.cpp +++ b/mlpp/stat/stat.cpp @@ -133,6 +133,10 @@ real_t MLPPStat::meanv(const Ref &x) { return sum / x_size; } +real_t MLPPStat::standard_deviationv(const Ref &x) { + return Math::sqrt(variancev(x)); +} + real_t MLPPStat::variancev(const Ref &x) { real_t x_mean = meanv(x); diff --git a/mlpp/stat/stat.h b/mlpp/stat/stat.h index b19cc2c..7718834 100644 --- a/mlpp/stat/stat.h +++ b/mlpp/stat/stat.h @@ -39,6 +39,7 @@ public: real_t chebyshevIneq(const real_t k); real_t meanv(const Ref &x); + real_t standard_deviationv(const Ref &x); real_t variancev(const Ref &x); real_t covariancev(const Ref &x, const Ref &y); diff --git a/register_types.cpp b/register_types.cpp index 0406b71..2e94a71 100644 --- a/register_types.cpp +++ b/register_types.cpp @@ -38,6 +38,7 @@ SOFTWARE. #include "mlpp/kmeans/kmeans.h" #include "mlpp/knn/knn.h" +#include "mlpp/outlier_finder/outlier_finder.h" #include "mlpp/pca/pca.h" #include "mlpp/uni_lin_reg/uni_lin_reg.h" #include "mlpp/wgan/wgan.h" @@ -67,6 +68,7 @@ void register_pmlpp_types(ModuleRegistrationLevel p_level) { ClassDB::register_class(); ClassDB::register_class(); ClassDB::register_class(); + ClassDB::register_class(); ClassDB::register_class(); ClassDB::register_class(); diff --git a/test/mlpp_tests.cpp b/test/mlpp_tests.cpp index 1f1d4f0..8505b97 100644 --- a/test/mlpp_tests.cpp +++ b/test/mlpp_tests.cpp @@ -48,6 +48,7 @@ #include "../mlpp/wgan/wgan.h" #include "../mlpp/mlp/mlp_old.h" +#include "../mlpp/outlier_finder/outlier_finder_old.h" #include "../mlpp/pca/pca_old.h" #include "../mlpp/uni_lin_reg/uni_lin_reg_old.h" #include "../mlpp/wgan/wgan_old.h" @@ -855,8 +856,15 @@ void MLPPTests::test_outlier_finder(bool ui) { // Outlier Finder //std::vector inputSet = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 23554332523523 }; std::vector inputSet = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 23554332 }; - MLPPOutlierFinder outlierFinder(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier. - alg.printVector(outlierFinder.modelTest(inputSet)); + MLPPOutlierFinderOld outlierFinderOld(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier. + alg.printVector(outlierFinderOld.modelTest(inputSet)); + + Ref input_set; + input_set.instance(); + input_set->set_from_std_vector(inputSet); + + MLPPOutlierFinder outlier_finder(2); // Any datapoint outside of 2 stds from the mean is marked as an outlier. + PLOG_MSG(Variant(outlier_finder.model_test(input_set))); } void MLPPTests::test_new_math_functions() { MLPPLinAlg alg;