2023-01-23 21:13:26 +01:00
|
|
|
//
|
|
|
|
// kNN.cpp
|
|
|
|
//
|
|
|
|
// Created by Marc Melikyan on 10/2/20.
|
|
|
|
//
|
|
|
|
|
2023-01-24 18:12:23 +01:00
|
|
|
#include "knn.h"
|
|
|
|
#include "../lin_alg/lin_alg.h"
|
|
|
|
#include "../utilities/utilities.h"
|
2023-01-23 21:13:26 +01:00
|
|
|
|
2023-01-24 19:00:54 +01:00
|
|
|
#include <algorithm>
|
2023-01-23 21:13:26 +01:00
|
|
|
#include <iostream>
|
|
|
|
#include <map>
|
|
|
|
|
2023-01-24 19:20:18 +01:00
|
|
|
|
2023-01-25 00:25:18 +01:00
|
|
|
MLPPKNN::MLPPKNN(std::vector<std::vector<double>> inputSet, std::vector<double> outputSet, int k) :
|
2023-01-24 19:00:54 +01:00
|
|
|
inputSet(inputSet), outputSet(outputSet), k(k) {
|
|
|
|
}
|
|
|
|
|
2023-01-25 00:25:18 +01:00
|
|
|
std::vector<double> MLPPKNN::modelSetTest(std::vector<std::vector<double>> X) {
|
2023-01-24 19:00:54 +01:00
|
|
|
std::vector<double> y_hat;
|
|
|
|
for (int i = 0; i < X.size(); i++) {
|
|
|
|
y_hat.push_back(modelTest(X[i]));
|
|
|
|
}
|
|
|
|
return y_hat;
|
|
|
|
}
|
|
|
|
|
2023-01-25 00:25:18 +01:00
|
|
|
int MLPPKNN::modelTest(std::vector<double> x) {
|
2023-01-24 19:00:54 +01:00
|
|
|
return determineClass(nearestNeighbors(x));
|
|
|
|
}
|
|
|
|
|
2023-01-25 00:25:18 +01:00
|
|
|
double MLPPKNN::score() {
|
2023-01-24 19:00:54 +01:00
|
|
|
Utilities util;
|
|
|
|
return util.performance(modelSetTest(inputSet), outputSet);
|
|
|
|
}
|
|
|
|
|
2023-01-25 00:25:18 +01:00
|
|
|
int MLPPKNN::determineClass(std::vector<double> knn) {
|
2023-01-24 19:00:54 +01:00
|
|
|
std::map<int, int> class_nums;
|
|
|
|
for (int i = 0; i < outputSet.size(); i++) {
|
|
|
|
class_nums[outputSet[i]] = 0;
|
|
|
|
}
|
|
|
|
for (int i = 0; i < knn.size(); i++) {
|
|
|
|
for (int j = 0; j < outputSet.size(); j++) {
|
|
|
|
if (knn[i] == outputSet[j]) {
|
|
|
|
class_nums[outputSet[j]]++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
int max = class_nums[outputSet[0]];
|
|
|
|
int final_class = outputSet[0];
|
|
|
|
|
|
|
|
for (int i = 0; i < outputSet.size(); i++) {
|
|
|
|
if (class_nums[outputSet[i]] > max) {
|
|
|
|
max = class_nums[outputSet[i]];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (auto [c, v] : class_nums) {
|
|
|
|
if (v == max) {
|
|
|
|
final_class = c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return final_class;
|
|
|
|
}
|
2023-01-23 21:13:26 +01:00
|
|
|
|
2023-01-25 00:25:18 +01:00
|
|
|
std::vector<double> MLPPKNN::nearestNeighbors(std::vector<double> x) {
|
2023-01-24 19:00:54 +01:00
|
|
|
LinAlg alg;
|
|
|
|
// The nearest neighbors
|
|
|
|
std::vector<double> knn;
|
2023-01-23 21:13:26 +01:00
|
|
|
|
2023-01-24 19:00:54 +01:00
|
|
|
std::vector<std::vector<double>> inputUseSet = inputSet;
|
|
|
|
//Perfom this loop unless and until all k nearest neighbors are found, appended, and returned
|
|
|
|
for (int i = 0; i < k; i++) {
|
|
|
|
int neighbor = 0;
|
|
|
|
for (int j = 0; j < inputUseSet.size(); j++) {
|
|
|
|
bool isNeighborNearer = alg.euclideanDistance(x, inputUseSet[j]) < alg.euclideanDistance(x, inputUseSet[neighbor]);
|
|
|
|
if (isNeighborNearer) {
|
|
|
|
neighbor = j;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
knn.push_back(neighbor);
|
|
|
|
inputUseSet.erase(inputUseSet.begin() + neighbor); // This is why we maintain an extra input"Use"Set
|
|
|
|
}
|
|
|
|
return knn;
|
|
|
|
}
|