pmlpp/mlpp/pca/pca.cpp

55 lines
1.2 KiB
C++
Raw Normal View History

//
// PCA.cpp
//
// Created by Marc Melikyan on 10/2/20.
//
2023-01-24 18:12:23 +01:00
#include "pca.h"
#include "../data/data.h"
2023-01-24 19:00:54 +01:00
#include "../lin_alg/lin_alg.h"
#include <iostream>
#include <random>
2023-01-24 19:20:18 +01:00
2023-01-25 00:54:50 +01:00
MLPPPCA::MLPPPCA(std::vector<std::vector<double>> inputSet, int k) :
2023-01-24 19:00:54 +01:00
inputSet(inputSet), k(k) {
}
2023-01-25 00:54:50 +01:00
std::vector<std::vector<double>> MLPPPCA::principalComponents() {
2023-01-25 00:29:02 +01:00
MLPPLinAlg alg;
2023-01-25 00:21:31 +01:00
MLPPData data;
2023-01-24 19:00:54 +01:00
auto [U, S, Vt] = alg.SVD(alg.cov(inputSet));
X_normalized = data.meanCentering(inputSet);
U_reduce.resize(U.size());
for (int i = 0; i < k; i++) {
for (int j = 0; j < U.size(); j++) {
U_reduce[j].push_back(U[j][i]);
}
}
Z = alg.matmult(alg.transpose(U_reduce), X_normalized);
return Z;
}
// Simply tells us the percentage of variance maintained.
2023-01-25 00:54:50 +01:00
double MLPPPCA::score() {
2023-01-25 00:29:02 +01:00
MLPPLinAlg alg;
2023-01-24 19:00:54 +01:00
std::vector<std::vector<double>> X_approx = alg.matmult(U_reduce, Z);
double num, den = 0;
for (int i = 0; i < X_normalized.size(); i++) {
num += alg.norm_sq(alg.subtraction(X_normalized[i], X_approx[i]));
}
num /= X_normalized.size();
for (int i = 0; i < X_normalized.size(); i++) {
den += alg.norm_sq(X_normalized[i]);
}
den /= X_normalized.size();
if (den == 0) {
den += 1e-10; // For numerical sanity as to not recieve a domain error
}
return 1 - num / den;
}
2023-01-24 19:20:18 +01:00