// // Reg.cpp // // Created by Marc Melikyan on 1/16/21. // #include #include #include "Cost.hpp" #include "LinAlg/LinAlg.hpp" #include "Regularization/Reg.hpp" namespace MLPP{ double Cost::MSE(std::vector y_hat, std::vector y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ sum += (y_hat[i] - y[i]) * (y_hat[i] - y[i]); } return sum / 2 * y_hat.size(); } double Cost::MSE(std::vector> y_hat, std::vector> y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ for(int j = 0; j < y_hat[i].size(); j++){ sum += (y_hat[i][j] - y[i][j]) * (y_hat[i][j] - y[i][j]); } } return sum / 2 * y_hat.size(); } std::vector Cost::MSEDeriv(std::vector y_hat, std::vector y){ LinAlg alg; return alg.subtraction(y_hat, y); } std::vector> Cost::MSEDeriv(std::vector> y_hat, std::vector> y){ LinAlg alg; return alg.subtraction(y_hat, y); } double Cost::RMSE(std::vector y_hat, std::vector y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ sum += (y_hat[i] - y[i]) * (y_hat[i] - y[i]); } return sqrt(sum / y_hat.size()); } double Cost::RMSE(std::vector> y_hat, std::vector> y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ for(int j = 0; j < y_hat[i].size(); j++){ sum += (y_hat[i][j] - y[i][j]) * (y_hat[i][j] - y[i][j]); } } return sqrt(sum / y_hat.size()); } std::vector Cost::RMSEDeriv(std::vector y_hat, std::vector y){ LinAlg alg; return alg.scalarMultiply(1/(2*sqrt(MSE(y_hat, y))), MSEDeriv(y_hat, y)); } std::vector> Cost::RMSEDeriv(std::vector> y_hat, std::vector> y){ LinAlg alg; return alg.scalarMultiply(1/(2/sqrt(MSE(y_hat, y))), MSEDeriv(y_hat, y)); } double Cost::MAE(std::vector y_hat, std::vector y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ sum += abs((y_hat[i] - y[i])); } return sum / y_hat.size(); } double Cost::MAE(std::vector> y_hat, std::vector> y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ for(int j = 0; j < y_hat[i].size(); j++){ sum += abs((y_hat[i][j] - y[i][j])); } } return sum / y_hat.size(); } std::vector Cost::MAEDeriv(std::vector y_hat, std::vector y){ std::vector deriv; deriv.resize(y_hat.size()); for(int i = 0; i < deriv.size(); i++){ if(y_hat[i] < 0){ deriv[i] = -1; } else if(y_hat[i] == 0){ deriv[i] = 0; } else{ deriv[i] = 1; } } return deriv; } std::vector> Cost::MAEDeriv(std::vector> y_hat, std::vector> y){ std::vector> deriv; deriv.resize(y_hat.size()); for(int i = 0; i < deriv.size(); i++){ deriv.resize(y_hat[i].size()); } for(int i = 0; i < deriv.size(); i++){ for(int j = 0; j < deriv[i].size(); j++){ if(y_hat[i][j] < 0){ deriv[i][j] = -1; } else if(y_hat[i][j] == 0){ deriv[i][j] = 0; } else{ deriv[i][j] = 1; } } } return deriv; } double Cost::MBE(std::vector y_hat, std::vector y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ sum += (y_hat[i] - y[i]); } return sum / y_hat.size(); } double Cost::MBE(std::vector> y_hat, std::vector> y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ for(int j = 0; j < y_hat[i].size(); j++){ sum += (y_hat[i][j] - y[i][j]); } } return sum / y_hat.size(); } std::vector Cost::MBEDeriv(std::vector y_hat, std::vector y){ LinAlg alg; return alg.onevec(y_hat.size()); } std::vector> Cost::MBEDeriv(std::vector> y_hat, std::vector> y){ LinAlg alg; return alg.onemat(y_hat.size(), y_hat[0].size()); } double Cost::LogLoss(std::vector y_hat, std::vector y){ double sum = 0; double eps = 1e-8; for(int i = 0; i < y_hat.size(); i++){ sum += -(y[i] * std::log(y_hat[i] + eps) + (1 - y[i]) * std::log(1 - y_hat[i] + eps)); } return sum / y_hat.size(); } double Cost::LogLoss(std::vector > y_hat, std::vector > y){ double sum = 0; double eps = 1e-8; for(int i = 0; i < y_hat.size(); i++){ for(int j = 0; j < y_hat[i].size(); j++){ sum += -(y[i][j] * std::log(y_hat[i][j] + eps) + (1 - y[i][j]) * std::log(1 - y_hat[i][j] + eps)); } } return sum / y_hat.size(); } std::vector Cost::LogLossDeriv(std::vector y_hat, std::vector y){ LinAlg alg; return alg.addition(alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)), alg.elementWiseDivision(alg.scalarMultiply(-1, alg.scalarAdd(-1, y)), alg.scalarMultiply(-1, alg.scalarAdd(-1, y_hat)))); } std::vector> Cost::LogLossDeriv(std::vector> y_hat, std::vector> y){ LinAlg alg; return alg.addition(alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)), alg.elementWiseDivision(alg.scalarMultiply(-1, alg.scalarAdd(-1, y)), alg.scalarMultiply(-1, alg.scalarAdd(-1, y_hat)))); } double Cost::CrossEntropy(std::vector y_hat, std::vector y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ sum += y[i] * std::log(y_hat[i]); } return -1 * sum; } double Cost::CrossEntropy(std::vector> y_hat, std::vector> y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ for(int j = 0; j < y_hat[i].size(); j++){ sum += y[i][j] * std::log(y_hat[i][j]); } } return -1 * sum; } std::vector Cost::CrossEntropyDeriv(std::vector y_hat, std::vector y){ LinAlg alg; return alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)); } std::vector> Cost::CrossEntropyDeriv(std::vector> y_hat, std::vector> y){ LinAlg alg; return alg.scalarMultiply(-1, alg.elementWiseDivision(y, y_hat)); } double Cost::HuberLoss(std::vector y_hat, std::vector y, double delta){ LinAlg alg; double sum = 0; for(int i = 0; i < y_hat.size(); i++){ if(abs(y[i] - y_hat[i]) <= delta){ sum += (y[i] - y_hat[i]) * (y[i] - y_hat[i]); } else{ sum += 2 * delta * abs(y[i] - y_hat[i]) - delta * delta; } } return sum; } double Cost::HuberLoss(std::vector> y_hat, std::vector> y, double delta){ LinAlg alg; double sum = 0; for(int i = 0; i < y_hat.size(); i++){ for(int j = 0; j < y_hat[i].size(); j++){ if(abs(y[i][j] - y_hat[i][j]) <= delta){ sum += (y[i][j] - y_hat[i][j]) * (y[i][j] - y_hat[i][j]); } else{ sum += 2 * delta * abs(y[i][j] - y_hat[i][j]) - delta * delta; } } } return sum; } std::vector Cost::HuberLossDeriv(std::vector y_hat, std::vector y, double delta){ LinAlg alg; double sum = 0; std::vector deriv; deriv.resize(y_hat.size()); for(int i = 0; i < y_hat.size(); i++){ if(abs(y[i] - y_hat[i]) <= delta){ deriv.push_back(-(y[i] - y_hat[i])); } else{ if(y_hat[i] > 0 || y_hat[i] < 0){ deriv.push_back(2 * delta * (y_hat[i]/abs(y_hat[i]))); } else{ deriv.push_back(0); } } } return deriv; } std::vector> Cost::HuberLossDeriv(std::vector> y_hat, std::vector> y, double delta){ LinAlg alg; double sum = 0; std::vector> deriv; deriv.resize(y_hat.size()); for(int i = 0; i < deriv.size(); i++){ deriv[i].resize(y_hat[i].size()); } for(int i = 0; i < y_hat.size(); i++){ for(int j = 0; j < y_hat[i].size(); j++){ if(abs(y[i][j] - y_hat[i][j]) <= delta){ deriv[i].push_back(-(y[i][j] - y_hat[i][j])); } else{ if(y_hat[i][j] > 0 || y_hat[i][j] < 0){ deriv[i].push_back(2 * delta * (y_hat[i][j]/abs(y_hat[i][j]))); } else{ deriv[i].push_back(0); } } } } return deriv; } double Cost::HingeLoss(std::vector y_hat, std::vector y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ sum += fmax(0, 1 - y[i] * y_hat[i]); } return sum / y_hat.size(); } double Cost::HingeLoss(std::vector> y_hat, std::vector> y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ for(int j = 0; j < y_hat[i].size(); j++){ sum += fmax(0, 1 - y[i][j] * y_hat[i][j]); } } return sum / y_hat.size(); } std::vector Cost::HingeLossDeriv(std::vector y_hat, std::vector y){ std::vector deriv; deriv.resize(y_hat.size()); for(int i = 0; i < y_hat.size(); i++){ if(1 - y[i] * y_hat[i] > 0){ deriv[i] = -y[i]; } else{ deriv[i] = 0; } } return deriv; } std::vector> Cost::HingeLossDeriv(std::vector> y_hat, std::vector> y){ std::vector> deriv; for(int i = 0; i < y_hat.size(); i++){ for(int j = 0; j < y_hat[i].size(); j++){ if(1 - y[i][j] * y_hat[i][j] > 0){ deriv[i][j] = -y[i][j]; } else{ deriv[i][j] = 0; } } } return deriv; } double Cost::WassersteinLoss(std::vector y_hat, std::vector y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ sum += y_hat[i] * y[i]; } return -sum / y_hat.size(); } double Cost::WassersteinLoss(std::vector> y_hat, std::vector> y){ double sum = 0; for(int i = 0; i < y_hat.size(); i++){ for(int j = 0; j < y_hat[i].size(); j++){ sum += y_hat[i][j] * y[i][j]; } } return -sum / y_hat.size(); } std::vector Cost::WassersteinLossDeriv(std::vector y_hat, std::vector y){ LinAlg alg; return alg.scalarMultiply(-1, y); // Simple. } std::vector> Cost::WassersteinLossDeriv(std::vector> y_hat, std::vector> y){ LinAlg alg; return alg.scalarMultiply(-1, y); // Simple. } double Cost::HingeLoss(std::vector y_hat, std::vector y, std::vector weights, double C){ LinAlg alg; Reg regularization; return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge"); } double Cost::HingeLoss(std::vector> y_hat, std::vector> y, std::vector> weights, double C){ LinAlg alg; Reg regularization; return C * HingeLoss(y_hat, y) + regularization.regTerm(weights, 1, 0, "Ridge"); } std::vector Cost::HingeLossDeriv(std::vector y_hat, std::vector y, double C){ LinAlg alg; Reg regularization; return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y)); } std::vector> Cost::HingeLossDeriv(std::vector> y_hat, std::vector> y, double C){ LinAlg alg; Reg regularization; return alg.scalarMultiply(C, HingeLossDeriv(y_hat, y)); } double Cost::dualFormSVM(std::vector alpha, std::vector> X, std::vector y){ LinAlg alg; std::vector> Y = alg.diag(y); // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y. std::vector> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations. std::vector> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y); double alphaQ = alg.matmult(alg.matmult({alpha}, Q), alg.transpose({alpha}))[0][0]; std::vector one = alg.onevec(alpha.size()); return -alg.dot(one, alpha) + 0.5 * alphaQ; } std::vector Cost::dualFormSVMDeriv(std::vector alpha, std::vector> X, std::vector y){ LinAlg alg; std::vector> Y = alg.zeromat(y.size(), y.size()); for(int i = 0; i < y.size(); i++){ Y[i][i] = y[i]; // Y is a diagnoal matrix. Y[i][j] = y[i] if i = i, else Y[i][j] = 0. Yt = Y. } std::vector> K = alg.matmult(X, alg.transpose(X)); // TO DO: DON'T forget to add non-linear kernelizations. std::vector> Q = alg.matmult(alg.matmult(alg.transpose(Y), K), Y); std::vector alphaQDeriv = alg.mat_vec_mult(Q, alpha); std::vector one = alg.onevec(alpha.size()); return alg.subtraction(alphaQDeriv, one); } }