2023-02-07 21:18:01 +01:00
|
|
|
//
|
|
|
|
// PCA.cpp
|
|
|
|
//
|
|
|
|
// Created by Marc Melikyan on 10/2/20.
|
|
|
|
//
|
|
|
|
|
|
|
|
#include "pca_old.h"
|
|
|
|
#include "../data/data.h"
|
2023-04-22 17:17:58 +02:00
|
|
|
#include "../lin_alg/lin_alg_old.h"
|
2023-02-07 21:18:01 +01:00
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include <random>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MLPPPCAOld::MLPPPCAOld(std::vector<std::vector<real_t>> inputSet, int k) :
|
|
|
|
inputSet(inputSet), k(k) {
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::vector<real_t>> MLPPPCAOld::principalComponents() {
|
2023-04-22 17:17:58 +02:00
|
|
|
MLPPLinAlgOld alg;
|
2023-02-07 21:18:01 +01:00
|
|
|
MLPPData data;
|
|
|
|
|
2023-04-22 17:17:58 +02:00
|
|
|
MLPPLinAlgOld::SVDResultOld svr_res = alg.SVD(alg.cov(inputSet));
|
2023-02-07 21:18:01 +01:00
|
|
|
X_normalized = data.meanCentering(inputSet);
|
2023-02-07 21:46:59 +01:00
|
|
|
U_reduce.resize(svr_res.U.size());
|
2023-02-07 21:18:01 +01:00
|
|
|
for (int i = 0; i < k; i++) {
|
2023-02-08 12:58:01 +01:00
|
|
|
for (uint32_t j = 0; j < svr_res.U.size(); j++) {
|
2023-02-07 21:46:59 +01:00
|
|
|
U_reduce[j].push_back(svr_res.U[j][i]);
|
2023-02-07 21:18:01 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
Z = alg.matmult(alg.transpose(U_reduce), X_normalized);
|
|
|
|
return Z;
|
|
|
|
}
|
2023-02-08 12:58:01 +01:00
|
|
|
|
2023-02-07 21:18:01 +01:00
|
|
|
// Simply tells us the percentage of variance maintained.
|
|
|
|
real_t MLPPPCAOld::score() {
|
2023-04-22 17:17:58 +02:00
|
|
|
MLPPLinAlgOld alg;
|
2023-02-07 21:18:01 +01:00
|
|
|
std::vector<std::vector<real_t>> X_approx = alg.matmult(U_reduce, Z);
|
2023-02-08 12:58:01 +01:00
|
|
|
real_t num = 0;
|
|
|
|
real_t den = 0;
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < X_normalized.size(); i++) {
|
2023-02-07 21:18:01 +01:00
|
|
|
num += alg.norm_sq(alg.subtraction(X_normalized[i], X_approx[i]));
|
|
|
|
}
|
2023-02-08 12:58:01 +01:00
|
|
|
|
2023-02-07 21:18:01 +01:00
|
|
|
num /= X_normalized.size();
|
2023-02-08 12:58:01 +01:00
|
|
|
for (uint32_t i = 0; i < X_normalized.size(); i++) {
|
2023-02-07 21:18:01 +01:00
|
|
|
den += alg.norm_sq(X_normalized[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
den /= X_normalized.size();
|
|
|
|
if (den == 0) {
|
|
|
|
den += 1e-10; // For numerical sanity as to not recieve a domain error
|
|
|
|
}
|
2023-02-08 12:58:01 +01:00
|
|
|
|
2023-02-07 21:18:01 +01:00
|
|
|
return 1 - num / den;
|
|
|
|
}
|
|
|
|
|