/*************************************************************************/ /* convolutions.cpp */ /*************************************************************************/ /* This file is part of: */ /* PMLPP Machine Learning Library */ /* https://github.com/Relintai/pmlpp */ /*************************************************************************/ /* Copyright (c) 2023-present Péter Magyar. */ /* Copyright (c) 2022-2023 Marc Melikyan */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ /* "Software"), to deal in the Software without restriction, including */ /* without limitation the rights to use, copy, modify, merge, publish, */ /* distribute, sublicense, and/or sell copies of the Software, and to */ /* permit persons to whom the Software is furnished to do so, subject to */ /* the following conditions: */ /* */ /* The above copyright notice and this permission notice shall be */ /* included in all copies or substantial portions of the Software. */ /* */ /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /*************************************************************************/ #include "convolutions.h" #include "../lin_alg/lin_alg.h" #include "../stat/stat.h" #ifdef USING_SFW #include "sfw.h" #else #include "core/math/math_funcs.h" #endif #include Ref MLPPConvolutions::convolve_2d(const Ref &p_input, const Ref &filter, const int S, const int P) { MLPPLinAlg alg; Ref input = p_input; Size2i input_size = input->size(); int N = input_size.y; int F = filter->size().y; int map_size = (N - F + 2 * P) / S + 1; // This is computed as ⌊map_size⌋ by def- thanks C++! if (P != 0) { Ref padded_input; padded_input.instance(); Size2i pis = Size2i(N + 2 * P, N + 2 * P); padded_input->resize(pis); for (int i = 0; i < pis.y; i++) { for (int j = 0; j < pis.x; j++) { if (i - P < 0 || j - P < 0 || i - P > input_size.y - 1 || j - P > input_size.x - 1) { padded_input->element_set(i, j, 0); } else { padded_input->element_set(i, j, input->element_get(i - P, j - P)); } } } input = padded_input; } Ref feature_map; feature_map.instance(); feature_map->resize(Size2i(map_size, map_size)); Ref filter_flattened = filter->flatten(); Ref convolving_input; convolving_input.instance(); convolving_input->resize(F * F); for (int i = 0; i < map_size; i++) { for (int j = 0; j < map_size; j++) { int current_index = 0; for (int k = 0; k < F; k++) { for (int p = 0; p < F; p++) { real_t val; if (i == 0 && j == 0) { val = input->element_get(i + k, j + p); } else if (i == 0) { val = input->element_get(i + k, j + (S - 1) + p); } else if (j == 0) { val = input->element_get(i + (S - 1) + k, j + p); } else { val = input->element_get(i + (S - 1) + k, j + (S - 1) + p); } convolving_input->element_set(current_index, val); ++current_index; } } feature_map->element_set(i, j, convolving_input->dot(filter_flattened)); } } return feature_map; } Ref MLPPConvolutions::convolve_3d(const Ref &p_input, const Ref &filter, const int S, const int P) { MLPPLinAlg alg; Ref input = p_input; Size3i input_size = input->size(); Size3i filter_size = filter->size(); int N = input_size.y; int F = filter_size.y; int C = filter_size.z / input_size.z; int map_size = (N - F + 2 * P) / S + 1; // This is computed as ⌊map_size⌋ by def. if (P != 0) { Ref padded_input; padded_input.instance(); Ref padded_input_slice; padded_input_slice.instance(); Size2i padded_input_slice_size = Size2i(N + 2 * P, N + 2 * P); padded_input_slice->resize(padded_input_slice_size); padded_input->resize(Size3i(padded_input_slice_size.x, padded_input_slice_size.y, input_size.z)); for (int c = 0; c < input_size.z; c++) { for (int i = 0; i < padded_input_slice_size.y; i++) { for (int j = 0; j < padded_input_slice_size.x; j++) { if (i - P < 0 || j - P < 0 || i - P > input_size.y - 1 || j - P > input_size.x - 1) { padded_input_slice->element_set(i, j, 0); } else { padded_input_slice->element_set(i, j, input->element_get(i - P, j - P, c)); } } } padded_input->z_slice_set_mlpp_matrix(c, padded_input_slice); } input = padded_input; } Ref feature_map; feature_map.instance(); feature_map->resize(Size3i(map_size, map_size, C)); Ref filter_flattened = filter->flatten(); Ref convolving_input; convolving_input.instance(); convolving_input->resize(input_size.z * F * F); for (int c = 0; c < C; c++) { for (int i = 0; i < map_size; i++) { for (int j = 0; j < map_size; j++) { int current_index = 0; for (int t = 0; t < input_size.z; t++) { for (int k = 0; k < F; k++) { for (int p = 0; p < F; p++) { real_t val; if (i == 0 && j == 0) { val = input->element_get(i + k, j + p, t); } else if (i == 0) { val = input->element_get(i + k, j + (S - 1) + p, t); } else if (j == 0) { val = input->element_get(i + (S - 1) + k, j + p, t); } else { val = input->element_get(i + (S - 1) + k, j + (S - 1) + p, t); } convolving_input->element_set(current_index, val); ++current_index; } } } feature_map->element_set(c, i, j, convolving_input->dot(filter_flattened)); } } } return feature_map; } Ref MLPPConvolutions::pool_2d(const Ref &input, const int F, const int S, const PoolType type) { MLPPLinAlg alg; Size2i input_size = input->size(); int N = input_size.y; int map_size = (N - F) / S + 1; Ref pooled_map; pooled_map.instance(); pooled_map->resize(Size2i(map_size, map_size)); Ref pooling_input; pooling_input.instance(); pooling_input->resize(F * F); for (int i = 0; i < map_size; i++) { for (int j = 0; j < map_size; j++) { int current_index = 0; for (int k = 0; k < F; k++) { for (int p = 0; p < F; p++) { real_t val; if (i == 0 && j == 0) { val = input->element_get(i + k, j + p); } else if (i == 0) { val = input->element_get(i + k, j + (S - 1) + p); } else if (j == 0) { val = input->element_get(i + (S - 1) + k, j + p); } else { val = input->element_get(i + (S - 1) + k, j + (S - 1) + p); } pooling_input->element_set(current_index, val); ++current_index; } } if (type == POOL_TYPE_AVERAGE) { MLPPStat stat; pooled_map->element_set(i, j, stat.meanv(pooling_input)); } else if (type == POOL_TYPE_MIN) { pooled_map->element_set(i, j, alg.minvr(pooling_input)); } else { pooled_map->element_set(i, j, alg.maxvr(pooling_input)); } } } return pooled_map; } Ref MLPPConvolutions::pool_3d(const Ref &input, const int F, const int S, const PoolType type) { Size3i input_size = input->size(); Ref z_slice; z_slice.instance(); z_slice->resize(Size2i(input_size.x, input_size.y)); int N = input_size.y; int map_size = (N - F) / S + 1; Ref pooled_map; pooled_map.instance(); pooled_map->resize(Size3i(map_size, map_size, input_size.z)); for (int i = 0; i < input_size.z; i++) { input->z_slice_get_into_mlpp_matrix(i, z_slice); Ref p = pool_2d(z_slice, F, S, type); pooled_map->z_slice_set_mlpp_matrix(i, p); } return pooled_map; } real_t MLPPConvolutions::global_pool_2d(const Ref &input, const PoolType type) { MLPPLinAlg alg; Ref f = input->flatten(); if (type == POOL_TYPE_AVERAGE) { MLPPStat stat; return stat.meanv(f); } else if (type == POOL_TYPE_MIN) { return alg.minvr(f); } else { return alg.maxvr(f); } } Ref MLPPConvolutions::global_pool_3d(const Ref &input, const PoolType type) { Size3i input_size = input->size(); Ref pooled_map; pooled_map.instance(); pooled_map->resize(input_size.z); Ref z_slice; z_slice.instance(); z_slice->resize(Size2i(input_size.x, input_size.y)); for (int i = 0; i < input_size.z; i++) { input->z_slice_get_into_mlpp_matrix(i, z_slice); pooled_map->element_set(i, global_pool_2d(z_slice, type)); } return pooled_map; } real_t MLPPConvolutions::gaussian_2d(const real_t x, const real_t y, const real_t std) { real_t std_sq = std * std; return 1 / (2 * Math_PI * std_sq) * Math::exp(-(x * x + y * y) / 2 * std_sq); } Ref MLPPConvolutions::gaussian_filter_2d(const int size, const real_t std) { Ref filter; filter.instance(); filter->resize(Size2i(size, size)); for (int i = 0; i < size; i++) { for (int j = 0; j < size; j++) { real_t val = gaussian_2d(i - (size - 1) / 2, (size - 1) / 2 - j, std); filter->element_set(i, j, val); } } return filter; } // Indeed a filter could have been used for this purpose, but I decided that it would've just // been easier to carry out the calculation explicitly, mainly because it is more informative, // and also because my convolution algorithm is only built for filters with equally sized // heights and widths. Ref MLPPConvolutions::dx(const Ref &input) { Size2i input_size = input->size(); Ref deriv; // We assume a gray scale image. deriv.instance(); deriv->resize(input_size); for (int i = 0; i < input_size.y; i++) { for (int j = 0; j < input_size.x; j++) { if (j != 0 && j != input_size.y - 1) { deriv->element_set(i, j, input->element_get(i, j + 1) - input->element_get(i, j - 1)); } else if (j == 0) { deriv->element_set(i, j, input->element_get(i, j + 1)); // E0 - 0 = Implicit zero-padding } else { deriv->element_set(i, j, -input->element_get(i, j - 1)); // 0 - E1 = Implicit zero-padding } } } return deriv; } Ref MLPPConvolutions::dy(const Ref &input) { Size2i input_size = input->size(); Ref deriv; // We assume a gray scale image. deriv.instance(); deriv->resize(input_size); for (int i = 0; i < input_size.y; i++) { for (int j = 0; j < input_size.x; j++) { if (i != 0 && i != input_size.y - 1) { deriv->element_set(i, j, input->element_get(i - 1, j) - input->element_get(i + 1, j)); } else if (i == 0) { deriv->element_set(i, j, -input->element_get(i + 1, j)); // 0 - E1 = Implicit zero-padding } else { deriv->element_set(i, j, input->element_get(i - 1, j)); // E0 - 0 =Implicit zero-padding } } } return deriv; } Ref MLPPConvolutions::grad_magnitude(const Ref &input) { MLPPLinAlg alg; Ref x_deriv_2 = dx(input)->hadamard_productn(dx(input)); Ref y_deriv_2 = dy(input)->hadamard_productn(dy(input)); return x_deriv_2->addn(y_deriv_2)->sqrtn(); } Ref MLPPConvolutions::grad_orientation(const Ref &input) { Ref deriv; // We assume a gray scale image. deriv.instance(); deriv->resize(input->size()); Size2i deriv_size = deriv->size(); Ref x_deriv = dx(input); Ref y_deriv = dy(input); for (int i = 0; i < deriv_size.y; i++) { for (int j = 0; j < deriv_size.x; j++) { deriv->element_set(i, j, Math::atan2(y_deriv->element_get(i, j), x_deriv->element_get(i, j))); } } return deriv; } Ref MLPPConvolutions::compute_m(const Ref &input) { Size2i input_size = input->size(); real_t const SIGMA = 1; real_t const GAUSSIAN_SIZE = 3; real_t const GAUSSIAN_PADDING = ((input_size.y - 1) + GAUSSIAN_SIZE - input_size.y) / 2; // Convs must be same. Ref x_deriv = dx(input); Ref y_deriv = dy(input); Ref gaussian_filter = gaussian_filter_2d(GAUSSIAN_SIZE, SIGMA); // Sigma of 1, size of 3. Ref xx_deriv = convolve_2d(x_deriv->hadamard_productn(x_deriv), gaussian_filter, 1, GAUSSIAN_PADDING); Ref yy_deriv = convolve_2d(y_deriv->hadamard_productn(y_deriv), gaussian_filter, 1, GAUSSIAN_PADDING); Ref xy_deriv = convolve_2d(x_deriv->hadamard_productn(y_deriv), gaussian_filter, 1, GAUSSIAN_PADDING); Size2i ds = xx_deriv->size(); Ref M; M.instance(); M->resize(Size3i(ds.x, ds.y, 3)); M->z_slice_set_mlpp_matrix(0, xx_deriv); M->z_slice_set_mlpp_matrix(1, yy_deriv); M->z_slice_set_mlpp_matrix(2, xy_deriv); return M; } Vector> MLPPConvolutions::compute_mv(const Ref &input) { Size2i input_size = input->size(); real_t const SIGMA = 1; real_t const GAUSSIAN_SIZE = 3; real_t const GAUSSIAN_PADDING = ((input_size.y - 1) + GAUSSIAN_SIZE - input_size.y) / 2; // Convs must be same. Ref x_deriv = dx(input); Ref y_deriv = dy(input); Ref gaussian_filter = gaussian_filter_2d(GAUSSIAN_SIZE, SIGMA); // Sigma of 1, size of 3. Ref xx_deriv = convolve_2d(x_deriv->hadamard_productn(x_deriv), gaussian_filter, 1, GAUSSIAN_PADDING); Ref yy_deriv = convolve_2d(y_deriv->hadamard_productn(y_deriv), gaussian_filter, 1, GAUSSIAN_PADDING); Ref xy_deriv = convolve_2d(x_deriv->hadamard_productn(y_deriv), gaussian_filter, 1, GAUSSIAN_PADDING); Vector> M; M.resize(3); M.set(0, xx_deriv); M.set(1, yy_deriv); M.set(2, xy_deriv); return M; } Vector> MLPPConvolutions::harris_corner_detection(const Ref &input) { real_t const k = 0.05; // Empirically determined wherein k -> [0.04, 0.06], though conventionally 0.05 is typically used as well. Vector> M = compute_mv(input); Ref M0 = M[0]; Ref M1 = M[1]; Ref M2 = M[2]; Ref det = M0->hadamard_productn(M1)->subn(M2->hadamard_productn(M2)); Ref trace = M0->addn(M1); // The reason this is not a scalar is because xx_deriv, xy_deriv, yx_deriv, and yy_deriv are not scalars. Ref r = det->subn(trace->hadamard_productn(trace)->scalar_multiplyn(k)); Size2i r_size = r->size(); Vector> image_types; image_types.resize(r_size.y); //alg.printMatrix(r); for (int i = 0; i < r_size.y; i++) { image_types.write[i].resize(r_size.x); for (int j = 0; j < r_size.x; j++) { real_t e = r->element_get(i, j); if (e > 0) { image_types.write[i].write[j] = 'C'; } else if (e < 0) { image_types.write[i].write[j] = 'E'; } else { image_types.write[i].write[j] = 'N'; } } } return image_types; } Ref MLPPConvolutions::get_prewitt_horizontal() const { return _prewitt_horizontal; } Ref MLPPConvolutions::get_prewitt_vertical() const { return _prewitt_vertical; } Ref MLPPConvolutions::get_sobel_horizontal() const { return _sobel_horizontal; } Ref MLPPConvolutions::get_sobel_vertical() const { return _sobel_vertical; } Ref MLPPConvolutions::get_scharr_horizontal() const { return _scharr_horizontal; } Ref MLPPConvolutions::get_scharr_vertical() const { return _scharr_vertical; } Ref MLPPConvolutions::get_roberts_horizontal() const { return _roberts_horizontal; } Ref MLPPConvolutions::get_roberts_vertical() const { return _roberts_vertical; } MLPPConvolutions::MLPPConvolutions() { const real_t prewitt_horizontal_arr[]{ 1, 1, 1, // 0, 0, 0, // -1, -1, -1, // }; const real_t prewitt_vertical_arr[] = { 1, 0, -1, // 1, 0, -1, // 1, 0, -1 // }; const real_t sobel_horizontal_arr[] = { 1, 2, 1, // 0, 0, 0, // -1, -2, -1 // }; const real_t sobel_vertical_arr[] = { -1, 0, 1, // -2, 0, 2, // -1, 0, 1 // }; const real_t scharr_horizontal_arr[] = { 3, 10, 3, // 0, 0, 0, // -3, -10, -3 // }; const real_t scharr_vertical_arr[] = { 3, 0, -3, // 10, 0, -10, // 3, 0, -3 // }; const real_t roberts_horizontal_arr[] = { 0, 1, // -1, 0 // }; const real_t roberts_vertical_arr[] = { 1, 0, // 0, -1 // }; _prewitt_horizontal = Ref(memnew(MLPPMatrix(prewitt_horizontal_arr, 3, 3))); _prewitt_vertical = Ref(memnew(MLPPMatrix(prewitt_vertical_arr, 3, 3))); _sobel_horizontal = Ref(memnew(MLPPMatrix(sobel_horizontal_arr, 3, 3))); _sobel_vertical = Ref(memnew(MLPPMatrix(sobel_vertical_arr, 3, 3))); _scharr_horizontal = Ref(memnew(MLPPMatrix(scharr_horizontal_arr, 3, 3))); _scharr_vertical = Ref(memnew(MLPPMatrix(scharr_vertical_arr, 3, 3))); _roberts_horizontal = Ref(memnew(MLPPMatrix(roberts_horizontal_arr, 2, 2))); _roberts_vertical = Ref(memnew(MLPPMatrix(roberts_vertical_arr, 2, 2))); } void MLPPConvolutions::_bind_methods() { }