Added LinAlg.euclideanDistance, optimized softmax, rebuilt SO.

This commit is contained in:
novak_99 2021-06-01 18:29:35 -07:00
parent 629f63059c
commit bc00f633ac
10 changed files with 41 additions and 50 deletions

View File

@ -50,16 +50,18 @@ namespace MLPP{
}
std::vector<double> Activation::softmax(std::vector<double> z){
LinAlg alg;
std::vector<double> a;
a.resize(z.size());
for(int i = 0; i < z.size(); i++){
double sum = 0;
for(int j = 0; j < z.size(); j++){
sum += exp(z[j]);
}
a[i] = exp(z[i]) / sum;
}
std::vector<double> expZ = alg.exp(z);
double sum = 0;
for(int i = 0; i < z.size(); i++){
sum += expZ[i];
}
for(int i = 0; i < z.size(); i++){
a[i] = expZ[i] / sum;
}
return a;
}
@ -80,16 +82,7 @@ namespace MLPP{
double C = -*max_element(z.begin(), z.end());
z = alg.scalarAdd(C, z);
a.resize(z.size());
for(int i = 0; i < z.size(); i++){
double sum = 0;
for(int j = 0; j < z.size(); j++){
sum += exp(z[j]);
}
a[i] = exp(z[i]) / sum;
}
return a;
return softmax(z);
}
std::vector<std::vector<double>> Activation::adjSoftmax(std::vector<std::vector<double>> z){

View File

@ -24,11 +24,13 @@ namespace MLPP{
}
std::vector<std::vector<double>> KMeans::modelSetTest(std::vector<std::vector<double>> X){
LinAlg alg;
std::vector<std::vector<double>> closestCentroids;
for(int i = 0; i < inputSet.size(); i++){
std::vector<double> closestCentroid = mu[0];
for(int j = 0; j < r[0].size(); j++){
if(euclideanDistance(X[i], mu[j]) < euclideanDistance(X[i], closestCentroid)){
bool isCentroidCloser = alg.euclideanDistance(X[i], mu[j]) < alg.euclideanDistance(X[i], closestCentroid);
if(isCentroidCloser){
closestCentroid = mu[j];
}
}
@ -38,9 +40,10 @@ namespace MLPP{
}
std::vector<double> KMeans::modelTest(std::vector<double> x){
LinAlg alg;
std::vector<double> closestCentroid = mu[0];
for(int j = 0; j < mu.size(); j++){
if(euclideanDistance(x, mu[j]) < euclideanDistance(x, closestCentroid)){
if(alg.euclideanDistance(x, mu[j]) < alg.euclideanDistance(x, closestCentroid)){
closestCentroid = mu[j];
}
}
@ -81,6 +84,7 @@ namespace MLPP{
}
std::vector<double> KMeans::silhouette_scores(){
LinAlg alg;
std::vector<std::vector<double>> closestCentroids = modelSetTest(inputSet);
std::vector<double> silhouette_scores;
for(int i = 0; i < inputSet.size(); i++){
@ -88,7 +92,7 @@ namespace MLPP{
double a = 0;
for(int j = 0; j < inputSet.size(); j++){
if(i != j && r[i] == r[j]){
a += euclideanDistance(inputSet[i], inputSet[j]);
a += alg.euclideanDistance(inputSet[i], inputSet[j]);
}
}
// NORMALIZE a[i]
@ -101,7 +105,7 @@ namespace MLPP{
if(closestCentroids[i] != mu[j]){
double sum = 0;
for(int k = 0; k < inputSet.size(); k++){
sum += euclideanDistance(inputSet[i], inputSet[k]);
sum += alg.euclideanDistance(inputSet[i], inputSet[k]);
}
// NORMALIZE b[i]
double k_clusterSize = 0;
@ -130,6 +134,7 @@ namespace MLPP{
// This simply computes r_nk
void KMeans::Evaluate(){
LinAlg alg;
r.resize(inputSet.size());
for(int i = 0; i < r.size(); i++){
@ -139,7 +144,8 @@ namespace MLPP{
for(int i = 0; i < r.size(); i++){
std::vector<double> closestCentroid = mu[0];
for(int j = 0; j < r[0].size(); j++){
if(euclideanDistance(inputSet[i], mu[j]) < euclideanDistance(inputSet[i], closestCentroid)){
bool isCentroidCloser = alg.euclideanDistance(inputSet[i], mu[j]) < alg.euclideanDistance(inputSet[i], closestCentroid);
if(isCentroidCloser){
closestCentroid = mu[j];
}
}
@ -190,6 +196,7 @@ namespace MLPP{
}
void KMeans::kmeansppInitialization(int k){
LinAlg alg;
std::random_device rd;
std::default_random_engine generator(rd());
std::uniform_int_distribution<int> distribution(0, int(inputSet.size() - 1));
@ -203,7 +210,7 @@ namespace MLPP{
AS TO SPREAD OUT THE CLUSTER CENTROIDS. */
double sum = 0;
for(int k = 0; k < mu.size(); k++){
sum += euclideanDistance(inputSet[j], mu[k]);
sum += alg.euclideanDistance(inputSet[j], mu[k]);
}
if(sum * sum > max_dist){
farthestCentroid = inputSet[j];
@ -224,13 +231,4 @@ namespace MLPP{
}
return sum;
}
// Multidimensional Euclidean Distance
double KMeans::euclideanDistance(std::vector<double> A, std::vector<double> B){
double dist = 0;
for(int i = 0; i < A.size(); i++){
dist += (A[i] - B[i])*(A[i] - B[i]);
}
return sqrt(dist);
}
}

View File

@ -811,6 +811,15 @@ namespace MLPP{
return b;
}
// Multidimensional Euclidean Distance
double LinAlg::euclideanDistance(std::vector<double> a, std::vector<double> b){
double dist = 0;
for(int i = 0; i < a.size(); i++){
dist += (a[i] - b[i])*(a[i] - b[i]);
}
return std::sqrt(dist);
}
double LinAlg::norm_sq(std::vector<double> a){
double n_sq = 0;
for(int i = 0; i < a.size(); i++){

View File

@ -137,6 +137,8 @@ namespace MLPP{
double min(std::vector<double> a);
std::vector<double> round(std::vector<double> a);
double euclideanDistance(std::vector<double> a, std::vector<double> b);
double norm_sq(std::vector<double> a);

View File

@ -34,7 +34,6 @@ namespace MLPP{
static void UI(std::vector<double> weights, double bias);
static void UI(std::vector<double> weights, std::vector<double> initial, double bias);
static void UI(std::vector<std::vector<double>>, std::vector<double> bias);
static void CostInfo(int epoch, double cost_prev, double Cost);
// F1 score, Precision/Recall, TP, FP, TN, FN, etc.

View File

@ -65,6 +65,7 @@ namespace MLPP{
}
std::vector<double> kNN::nearestNeighbors(std::vector<double> x){
LinAlg alg;
// The nearest neighbors
std::vector<double> knn;
@ -73,22 +74,14 @@ namespace MLPP{
for(int i = 0; i < k; i++){
int neighbor = 0;
for(int j = 0; j < inputUseSet.size(); j++){
if(euclideanDistance(x, inputUseSet[j]) < euclideanDistance(x, inputUseSet[neighbor])){
bool isNeighborNearer = alg.euclideanDistance(x, inputUseSet[j]) < alg.euclideanDistance(x, inputUseSet[neighbor]);
if(isNeighborNearer){
neighbor = j;
}
}
knn.push_back(neighbor);
inputUseSet.erase(inputUseSet.begin() + neighbor);
inputUseSet.erase(inputUseSet.begin() + neighbor); // This is why we maintain an extra input"Use"Set
}
return knn;
}
// Multidimensional Euclidean Distance
double kNN::euclideanDistance(std::vector<double> A, std::vector<double> B){
double dist = 0;
for(int i = 0; i < A.size(); i++){
dist += (A[i] - B[i])*(A[i] - B[i]);
}
return sqrt(dist);
}
}

View File

@ -23,9 +23,6 @@ namespace MLPP{
// Private Model Functions
std::vector<double> nearestNeighbors(std::vector<double> x);
int determineClass(std::vector<double> knn);
// Helper Functions
double euclideanDistance(std::vector<double> A, std::vector<double> B);
// Model Inputs and Parameters
std::vector<std::vector<double>> inputSet;

Binary file not shown.

BIN
a.out

Binary file not shown.

View File

@ -186,6 +186,7 @@ int main() {
// std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
// // SOFTMAX REGRESSION
// time_t start = time(0);
// std::vector<std::vector<double>> inputSet;
// std::vector<double> tempOutputSet;
// data.setData(4, "/Users/marcmelikyan/Desktop/Data/Iris.csv", inputSet, tempOutputSet);
@ -194,7 +195,6 @@ int main() {
// SoftmaxReg model(inputSet, outputSet);
// model.SGD(0.001, 20000, 0);
// alg.printMatrix(model.modelSetTest(inputSet));
// std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
// // MLP
// std::vector<std::vector<double>> inputSet = {{0,0,1,1}, {0,1,0,1}};
@ -354,11 +354,11 @@ int main() {
// std::cout << avn.softsign(z_s, 1) << std::endl;
// std::vector<double> z_v = {0.001, 5};
// alg.printVector(avn.softsign(z_v));
// alg.printVector(avn.softmax(z_v));
// alg.printVector(avn.softsign(z_v, 1));
// std::vector<std::vector<double>> Z_m = {{0.001, 5}};
// alg.printMatrix(avn.softsign(Z_m));
// alg.printMatrix(avn.softmax(Z_m));
// alg.printMatrix(avn.softsign(Z_m, 1));
// std::cout << alg.trace({{1,2}, {3,4}}) << std::endl;