mirror of
https://github.com/Relintai/MLPP.git
synced 2024-11-14 10:17:18 +01:00
added new optimizers. fixed isnan.
This commit is contained in:
parent
7799a27935
commit
a66308dc78
@ -418,6 +418,18 @@ namespace MLPP{
|
|||||||
return B;
|
return B;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<double> LinAlg::max(std::vector<double> a, std::vector<double> b){
|
||||||
|
std::vector<double> c;
|
||||||
|
c.resize(a.size());
|
||||||
|
for(int i = 0; i < c.size(); i++){
|
||||||
|
if(a[i] >= b[i]) {
|
||||||
|
c[i] = a[i];
|
||||||
|
}
|
||||||
|
else { c[i] = b[i]; }
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
double LinAlg::max(std::vector<std::vector<double>> A){
|
double LinAlg::max(std::vector<std::vector<double>> A){
|
||||||
return max(flatten(A));
|
return max(flatten(A));
|
||||||
}
|
}
|
||||||
@ -945,6 +957,18 @@ namespace MLPP{
|
|||||||
return matmult(A, rotationMatrix);
|
return matmult(A, rotationMatrix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<double>> LinAlg::max(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B){
|
||||||
|
std::vector<std::vector<double>> C;
|
||||||
|
C.resize(A.size());
|
||||||
|
for(int i = 0; i < C.size(); i++){
|
||||||
|
C[i].resize(A[0].size());
|
||||||
|
}
|
||||||
|
for(int i = 0; i < A.size(); i++){
|
||||||
|
C[i] = max(A[i], B[i]);
|
||||||
|
}
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
double LinAlg::max(std::vector<double> a){
|
double LinAlg::max(std::vector<double> a){
|
||||||
int max = a[0];
|
int max = a[0];
|
||||||
for(int i = 0; i < a.size(); i++){
|
for(int i = 0; i < a.size(); i++){
|
||||||
|
@ -76,6 +76,8 @@ namespace MLPP{
|
|||||||
|
|
||||||
std::vector<std::vector<double>> rotate(std::vector<std::vector<double>> A, double theta, int axis = -1);
|
std::vector<std::vector<double>> rotate(std::vector<std::vector<double>> A, double theta, int axis = -1);
|
||||||
|
|
||||||
|
std::vector<std::vector<double>> max(std::vector<std::vector<double>> A, std::vector<std::vector<double>> B);
|
||||||
|
|
||||||
double max(std::vector<std::vector<double>> A);
|
double max(std::vector<std::vector<double>> A);
|
||||||
|
|
||||||
double min(std::vector<std::vector<double>> A);
|
double min(std::vector<std::vector<double>> A);
|
||||||
@ -162,6 +164,8 @@ namespace MLPP{
|
|||||||
|
|
||||||
std::vector<double> cos(std::vector<double> a);
|
std::vector<double> cos(std::vector<double> a);
|
||||||
|
|
||||||
|
std::vector<double> max(std::vector<double> a, std::vector<double> b);
|
||||||
|
|
||||||
double max(std::vector<double> a);
|
double max(std::vector<double> a);
|
||||||
|
|
||||||
double min(std::vector<double> a);
|
double min(std::vector<double> a);
|
||||||
|
@ -166,6 +166,327 @@ namespace MLPP{
|
|||||||
forwardPass();
|
forwardPass();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LinReg::Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool UI){
|
||||||
|
LinAlg alg;
|
||||||
|
Reg regularization;
|
||||||
|
double cost_prev = 0;
|
||||||
|
int epoch = 1;
|
||||||
|
|
||||||
|
// Creating the mini-batches
|
||||||
|
int n_mini_batch = n/mini_batch_size;
|
||||||
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
|
|
||||||
|
// Initializing necessary components for Momentum.
|
||||||
|
std::vector<double> v = alg.zerovec(weights.size());
|
||||||
|
while(true){
|
||||||
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
|
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
// Calculating the weight gradients
|
||||||
|
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||||
|
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||||
|
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||||
|
|
||||||
|
v = alg.addition(alg.scalarMultiply(gamma, v), alg.scalarMultiply(learning_rate, weight_grad));
|
||||||
|
|
||||||
|
weights = alg.subtraction(weights, v);
|
||||||
|
|
||||||
|
// Calculating the bias gradients
|
||||||
|
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||||
|
y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(UI) {
|
||||||
|
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||||
|
Utilities::UI(weights, bias);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
epoch++;
|
||||||
|
if(epoch > max_epoch) { break; }
|
||||||
|
}
|
||||||
|
forwardPass();
|
||||||
|
}
|
||||||
|
|
||||||
|
void LinReg::NAG(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool UI){
|
||||||
|
LinAlg alg;
|
||||||
|
Reg regularization;
|
||||||
|
double cost_prev = 0;
|
||||||
|
int epoch = 1;
|
||||||
|
|
||||||
|
// Creating the mini-batches
|
||||||
|
int n_mini_batch = n/mini_batch_size;
|
||||||
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
|
|
||||||
|
// Initializing necessary components for Momentum.
|
||||||
|
std::vector<double> v = alg.zerovec(weights.size());
|
||||||
|
while(true){
|
||||||
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
|
weights = alg.subtraction(weights, alg.scalarMultiply(gamma, v)); // "Aposterori" calculation
|
||||||
|
|
||||||
|
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
// Calculating the weight gradients
|
||||||
|
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||||
|
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||||
|
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||||
|
|
||||||
|
v = alg.addition(alg.scalarMultiply(gamma, v), alg.scalarMultiply(learning_rate, weight_grad));
|
||||||
|
|
||||||
|
weights = alg.subtraction(weights, v);
|
||||||
|
|
||||||
|
// Calculating the bias gradients
|
||||||
|
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||||
|
y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(UI) {
|
||||||
|
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||||
|
Utilities::UI(weights, bias);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
epoch++;
|
||||||
|
if(epoch > max_epoch) { break; }
|
||||||
|
}
|
||||||
|
forwardPass();
|
||||||
|
}
|
||||||
|
|
||||||
|
void LinReg::Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI){
|
||||||
|
LinAlg alg;
|
||||||
|
Reg regularization;
|
||||||
|
double cost_prev = 0;
|
||||||
|
int epoch = 1;
|
||||||
|
|
||||||
|
// Creating the mini-batches
|
||||||
|
int n_mini_batch = n/mini_batch_size;
|
||||||
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
|
|
||||||
|
// Initializing necessary components for Adagrad.
|
||||||
|
std::vector<double> v = alg.zerovec(weights.size());
|
||||||
|
while(true){
|
||||||
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
|
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
// Calculating the weight gradients
|
||||||
|
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||||
|
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||||
|
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||||
|
|
||||||
|
v = alg.hadamard_product(weight_grad, weight_grad);
|
||||||
|
|
||||||
|
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(weight_grad, alg.sqrt(alg.scalarAdd(e, v)))));
|
||||||
|
|
||||||
|
// Calculating the bias gradients
|
||||||
|
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||||
|
y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(UI) {
|
||||||
|
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||||
|
Utilities::UI(weights, bias);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
epoch++;
|
||||||
|
if(epoch > max_epoch) { break; }
|
||||||
|
}
|
||||||
|
forwardPass();
|
||||||
|
}
|
||||||
|
|
||||||
|
void LinReg::Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI){
|
||||||
|
// Adagrad upgrade. Momentum is applied.
|
||||||
|
LinAlg alg;
|
||||||
|
Reg regularization;
|
||||||
|
double cost_prev = 0;
|
||||||
|
int epoch = 1;
|
||||||
|
|
||||||
|
// Creating the mini-batches
|
||||||
|
int n_mini_batch = n/mini_batch_size;
|
||||||
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
|
|
||||||
|
// Initializing necessary components for Adagrad.
|
||||||
|
std::vector<double> v = alg.zerovec(weights.size());
|
||||||
|
while(true){
|
||||||
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
|
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
// Calculating the weight gradients
|
||||||
|
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||||
|
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||||
|
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||||
|
|
||||||
|
v = alg.addition(alg.scalarMultiply(b1, v), alg.scalarMultiply(1 - b1, alg.hadamard_product(weight_grad, weight_grad)));
|
||||||
|
|
||||||
|
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(weight_grad, alg.sqrt(alg.scalarAdd(e, v)))));
|
||||||
|
|
||||||
|
// Calculating the bias gradients
|
||||||
|
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||||
|
y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(UI) {
|
||||||
|
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||||
|
Utilities::UI(weights, bias);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
epoch++;
|
||||||
|
if(epoch > max_epoch) { break; }
|
||||||
|
}
|
||||||
|
forwardPass();
|
||||||
|
}
|
||||||
|
|
||||||
|
void LinReg::Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||||
|
LinAlg alg;
|
||||||
|
Reg regularization;
|
||||||
|
double cost_prev = 0;
|
||||||
|
int epoch = 1;
|
||||||
|
|
||||||
|
// Creating the mini-batches
|
||||||
|
int n_mini_batch = n/mini_batch_size;
|
||||||
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
|
|
||||||
|
// Initializing necessary components for Adam.
|
||||||
|
std::vector<double> m = alg.zerovec(weights.size());
|
||||||
|
|
||||||
|
std::vector<double> v = alg.zerovec(weights.size());
|
||||||
|
while(true){
|
||||||
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
|
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
// Calculating the weight gradients
|
||||||
|
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||||
|
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||||
|
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||||
|
|
||||||
|
m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
|
||||||
|
v = alg.addition(alg.scalarMultiply(b2, v), alg.scalarMultiply(1 - b2, alg.exponentiate(weight_grad, 2)));
|
||||||
|
|
||||||
|
std::vector<double> m_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m);
|
||||||
|
std::vector<double> v_hat = alg.scalarMultiply(1/(1 - pow(b2, epoch)), v);
|
||||||
|
|
||||||
|
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_hat, alg.scalarAdd(e, alg.sqrt(v_hat)))));
|
||||||
|
|
||||||
|
// Calculating the bias gradients
|
||||||
|
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||||
|
y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(UI) {
|
||||||
|
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||||
|
Utilities::UI(weights, bias);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
epoch++;
|
||||||
|
if(epoch > max_epoch) { break; }
|
||||||
|
}
|
||||||
|
forwardPass();
|
||||||
|
}
|
||||||
|
|
||||||
|
void LinReg::Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||||
|
LinAlg alg;
|
||||||
|
Reg regularization;
|
||||||
|
double cost_prev = 0;
|
||||||
|
int epoch = 1;
|
||||||
|
|
||||||
|
// Creating the mini-batches
|
||||||
|
int n_mini_batch = n/mini_batch_size;
|
||||||
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
|
|
||||||
|
std::vector<double> m = alg.zerovec(weights.size());
|
||||||
|
|
||||||
|
std::vector<double> u = alg.zerovec(weights.size());
|
||||||
|
while(true){
|
||||||
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
|
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
// Calculating the weight gradients
|
||||||
|
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||||
|
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||||
|
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||||
|
|
||||||
|
m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
|
||||||
|
u = alg.max(alg.scalarMultiply(b2, u), alg.abs(weight_grad));
|
||||||
|
|
||||||
|
std::vector<double> m_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m);
|
||||||
|
|
||||||
|
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_hat, u)));
|
||||||
|
|
||||||
|
// Calculating the bias gradients
|
||||||
|
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||||
|
y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(UI) {
|
||||||
|
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||||
|
Utilities::UI(weights, bias);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
epoch++;
|
||||||
|
if(epoch > max_epoch) { break; }
|
||||||
|
}
|
||||||
|
forwardPass();
|
||||||
|
}
|
||||||
|
|
||||||
|
void LinReg::Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI){
|
||||||
|
LinAlg alg;
|
||||||
|
Reg regularization;
|
||||||
|
double cost_prev = 0;
|
||||||
|
int epoch = 1;
|
||||||
|
|
||||||
|
// Creating the mini-batches
|
||||||
|
int n_mini_batch = n/mini_batch_size;
|
||||||
|
auto [inputMiniBatches, outputMiniBatches] = Utilities::createMiniBatches(inputSet, outputSet, n_mini_batch);
|
||||||
|
|
||||||
|
// Initializing necessary components for Adam.
|
||||||
|
std::vector<double> m = alg.zerovec(weights.size());
|
||||||
|
std::vector<double> v = alg.zerovec(weights.size());
|
||||||
|
std::vector<double> m_final = alg.zerovec(weights.size());
|
||||||
|
while(true){
|
||||||
|
for(int i = 0; i < n_mini_batch; i++){
|
||||||
|
std::vector<double> y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
cost_prev = Cost(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
std::vector<double> error = alg.subtraction(y_hat, outputMiniBatches[i]);
|
||||||
|
|
||||||
|
// Calculating the weight gradients
|
||||||
|
std::vector<double> gradient = alg.scalarMultiply(1/outputMiniBatches[i].size(), alg.mat_vec_mult(alg.transpose(inputMiniBatches[i]), error));
|
||||||
|
std::vector<double> RegDerivTerm = regularization.regDerivTerm(weights, lambda, alpha, reg);
|
||||||
|
std::vector<double> weight_grad = alg.addition(gradient, RegDerivTerm); // Weight_grad_final
|
||||||
|
|
||||||
|
m = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply(1 - b1, weight_grad));
|
||||||
|
v = alg.addition(alg.scalarMultiply(b2, v), alg.scalarMultiply(1 - b2, alg.exponentiate(weight_grad, 2)));
|
||||||
|
m_final = alg.addition(alg.scalarMultiply(b1, m), alg.scalarMultiply((1 - b1)/(1 - pow(b1, epoch)), weight_grad));
|
||||||
|
|
||||||
|
std::vector<double> m_hat = alg.scalarMultiply(1/(1 - pow(b1, epoch)), m);
|
||||||
|
std::vector<double> v_hat = alg.scalarMultiply(1/(1 - pow(b2, epoch)), v);
|
||||||
|
|
||||||
|
weights = alg.subtraction(weights, alg.scalarMultiply(learning_rate, alg.elementWiseDivision(m_final, alg.scalarAdd(e, alg.sqrt(v_hat)))));
|
||||||
|
|
||||||
|
// Calculating the bias gradients
|
||||||
|
bias -= learning_rate * alg.sum_elements(error) / outputMiniBatches[i].size(); // As normal
|
||||||
|
y_hat = Evaluate(inputMiniBatches[i]);
|
||||||
|
|
||||||
|
if(UI) {
|
||||||
|
Utilities::CostInfo(epoch, cost_prev, Cost(y_hat, outputMiniBatches[i]));
|
||||||
|
Utilities::UI(weights, bias);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
epoch++;
|
||||||
|
if(epoch > max_epoch) { break; }
|
||||||
|
}
|
||||||
|
forwardPass();
|
||||||
|
}
|
||||||
|
|
||||||
void LinReg::normalEquation(){
|
void LinReg::normalEquation(){
|
||||||
LinAlg alg;
|
LinAlg alg;
|
||||||
Stat stat;
|
Stat stat;
|
||||||
@ -181,14 +502,14 @@ namespace MLPP{
|
|||||||
std::vector<double> temp;
|
std::vector<double> temp;
|
||||||
temp.resize(k);
|
temp.resize(k);
|
||||||
temp = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
|
temp = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
|
||||||
if(isnan(temp[0])){
|
if(std::isnan(temp[0])){
|
||||||
throw 99;
|
throw 99;
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
if(reg == "Ridge") {
|
if(reg == "Ridge") {
|
||||||
weights = alg.mat_vec_mult(alg.inverse(alg.addition(alg.matmult(alg.transpose(inputSet), inputSet), alg.scalarMultiply(lambda, alg.identity(k)))), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
|
weights = alg.mat_vec_mult(alg.inverse(alg.addition(alg.matmult(alg.transpose(inputSet), inputSet), alg.scalarMultiply(lambda, alg.identity(k)))), alg.mat_vec_mult(alg.transpose(inputSet), outputSet));
|
||||||
}
|
}
|
||||||
else{ weights = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet)); }
|
else{ weights = alg.mat_vec_mult(alg.inverse(alg.matmult(alg.transpose(inputSet), inputSet)), alg.mat_vec_mult(alg.transpose(inputSet), outputSet)); }
|
||||||
|
|
||||||
bias = stat.mean(outputSet) - alg.dot(weights, x_means);
|
bias = stat.mean(outputSet) - alg.dot(weights, x_means);
|
||||||
|
|
||||||
@ -198,7 +519,6 @@ namespace MLPP{
|
|||||||
catch(int err_num){
|
catch(int err_num){
|
||||||
std::cout << "ERR " << err_num << ": Resulting matrix was noninvertible/degenerate, and so the normal equation could not be performed. Try utilizing gradient descent." << std::endl;
|
std::cout << "ERR " << err_num << ": Resulting matrix was noninvertible/degenerate, and so the normal equation could not be performed. Try utilizing gradient descent." << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
double LinReg::score(){
|
double LinReg::score(){
|
||||||
|
@ -21,6 +21,13 @@ namespace MLPP{
|
|||||||
void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
|
void gradientDescent(double learning_rate, int max_epoch, bool UI = 1);
|
||||||
void SGD(double learning_rate, int max_epoch, bool UI = 1);
|
void SGD(double learning_rate, int max_epoch, bool UI = 1);
|
||||||
void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
|
void MBGD(double learning_rate, int max_epoch, int mini_batch_size, bool UI = 1);
|
||||||
|
void Momentum(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool UI = 1);
|
||||||
|
void NAG(double learning_rate, int max_epoch, int mini_batch_size, double gamma, bool UI = 1);
|
||||||
|
void Adagrad(double learning_rate, int max_epoch, int mini_batch_size, double e, bool UI = 1);
|
||||||
|
void Adadelta(double learning_rate, int max_epoch, int mini_batch_size, double b1, double e, bool UI = 1);
|
||||||
|
void Adam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||||
|
void Adamax(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||||
|
void Nadam(double learning_rate, int max_epoch, int mini_batch_size, double b1, double b2, double e, bool UI = 1);
|
||||||
void normalEquation();
|
void normalEquation();
|
||||||
double score();
|
double score();
|
||||||
void save(std::string fileName);
|
void save(std::string fileName);
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <random>
|
#include <random>
|
||||||
#include "Reg.hpp"
|
#include "Reg.hpp"
|
||||||
|
#include "LinAlg/LinAlg.hpp"
|
||||||
#include "Activation/Activation.hpp"
|
#include "Activation/Activation.hpp"
|
||||||
|
|
||||||
namespace MLPP{
|
namespace MLPP{
|
||||||
@ -70,19 +71,48 @@ namespace MLPP{
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<double> Reg::regWeights(std::vector<double> weights, double lambda, double alpha, std::string reg){
|
std::vector<double> Reg::regWeights(std::vector<double> weights, double lambda, double alpha, std::string reg){
|
||||||
for(int i = 0; i < weights.size(); i++){
|
LinAlg alg;
|
||||||
weights[i] -= regDerivTerm(weights, lambda, alpha, reg, i);
|
return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg));
|
||||||
}
|
// for(int i = 0; i < weights.size(); i++){
|
||||||
return weights;
|
// weights[i] -= regDerivTerm(weights, lambda, alpha, reg, i);
|
||||||
|
// }
|
||||||
|
// return weights;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::vector<double>> Reg::regWeights(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg){
|
std::vector<std::vector<double>> Reg::regWeights(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg){
|
||||||
for(int i = 0; i < weights.size(); i++){
|
LinAlg alg;
|
||||||
for(int j = 0; j < weights[i].size(); j++){
|
return alg.subtraction(weights, regDerivTerm(weights, lambda, alpha, reg));
|
||||||
weights[i][j] -= regDerivTerm(weights, lambda, alpha, reg, i, j);
|
// for(int i = 0; i < weights.size(); i++){
|
||||||
|
// for(int j = 0; j < weights[i].size(); j++){
|
||||||
|
// weights[i][j] -= regDerivTerm(weights, lambda, alpha, reg, i, j);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// return weights;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<double> Reg::regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg){
|
||||||
|
std::vector<double> regDeriv;
|
||||||
|
regDeriv.resize(weights.size());
|
||||||
|
|
||||||
|
for(int i = 0; i < regDeriv.size(); i++){
|
||||||
|
regDeriv[i] = regDerivTerm(weights, lambda, alpha, reg, i);
|
||||||
|
}
|
||||||
|
return regDeriv;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<double>> Reg::regDerivTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg){
|
||||||
|
std::vector<std::vector<double>> regDeriv;
|
||||||
|
regDeriv.resize(weights.size());
|
||||||
|
for(int i = 0; i < regDeriv.size(); i++){
|
||||||
|
regDeriv[i].resize(weights[0].size());
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i = 0; i < regDeriv.size(); i++){
|
||||||
|
for(int j = 0; j < regDeriv[i].size(); j++){
|
||||||
|
regDeriv[i][j] = regDerivTerm(weights, lambda, alpha, reg, i, j);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return weights;
|
return regDeriv;
|
||||||
}
|
}
|
||||||
|
|
||||||
double Reg::regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg, int j){
|
double Reg::regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg, int j){
|
||||||
|
@ -19,6 +19,9 @@ namespace MLPP{
|
|||||||
std::vector<double> regWeights(std::vector<double> weights, double lambda, double alpha, std::string reg);
|
std::vector<double> regWeights(std::vector<double> weights, double lambda, double alpha, std::string reg);
|
||||||
std::vector<std::vector<double>> regWeights(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg);
|
std::vector<std::vector<double>> regWeights(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg);
|
||||||
|
|
||||||
|
std::vector<double> regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg);
|
||||||
|
std::vector<std::vector<double>> regDerivTerm(std::vector<std::vector<double>>, double lambda, double alpha, std::string reg);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
double regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg, int j);
|
double regDerivTerm(std::vector<double> weights, double lambda, double alpha, std::string reg, int j);
|
||||||
double regDerivTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg, int i, int j);
|
double regDerivTerm(std::vector<std::vector<double>> weights, double lambda, double alpha, std::string reg, int i, int j);
|
||||||
|
Binary file not shown.
51
main.cpp
51
main.cpp
@ -212,18 +212,43 @@ int main() {
|
|||||||
// alg.printVector(model.modelSetTest(inputSet));
|
// alg.printVector(model.modelSetTest(inputSet));
|
||||||
|
|
||||||
// // MULIVARIATE LINEAR REGRESSION
|
// // MULIVARIATE LINEAR REGRESSION
|
||||||
// std::vector<std::vector<double>> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
|
std::vector<std::vector<double>> inputSet = {{1,2,3,4,5,6,7,8,9,10}, {3,5,9,12,15,18,21,24,27,30}};
|
||||||
// std::vector<double> outputSet = {2,4,6,8,10,12,14,16,18,20};
|
std::vector<double> outputSet = {2,4,6,8,10,12,14,16,18,20};
|
||||||
|
|
||||||
// LinReg model(alg.transpose(inputSet), outputSet); // Can use Lasso, Ridge, ElasticNet Reg
|
LinReg model(alg.transpose(inputSet), outputSet); // Can use Lasso, Ridge, ElasticNet Reg
|
||||||
|
|
||||||
// model.gradientDescent(0.001, 30000, 0);
|
//model.gradientDescent(0.001, 30, 0);
|
||||||
// model.SGD(0.001, 30000, 1);
|
//model.SGD(0.001, 30000, 1);
|
||||||
// model.MBGD(0.001, 10000, 2, 1);
|
// model.MBGD(0.001, 10000, 2, 1);
|
||||||
// model.normalEquation();
|
//model.normalEquation();
|
||||||
|
|
||||||
// alg.printVector(model.modelSetTest((alg.transpose(inputSet))));
|
|
||||||
// std::cout << "ACCURACY: " << 100 * model.score() << "%" << std::endl;
|
|
||||||
|
LinReg adamModel(alg.transpose(inputSet), outputSet);
|
||||||
|
adamModel.Nadam(0.1, 5, 1, 0.9, 0.999, 1e-8, 0); // Change batch size = sgd, bgd
|
||||||
|
alg.printVector(adamModel.modelSetTest(alg.transpose(inputSet)));
|
||||||
|
std::cout << "ACCURACY: " << 100 * adamModel.score() << "%" << std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
// const int TRIAL_NUM = 1000;
|
||||||
|
|
||||||
|
// double scoreSGD = 0;
|
||||||
|
// double scoreADAM = 0;
|
||||||
|
// for(int i = 0; i < TRIAL_NUM; i++){
|
||||||
|
// LinReg model(alg.transpose(inputSet), outputSet);
|
||||||
|
// model.MBGD(0.001, 5, 1, 0);
|
||||||
|
// scoreSGD += model.score();
|
||||||
|
|
||||||
|
// LinReg adamModel(alg.transpose(inputSet), outputSet);
|
||||||
|
// adamModel.Adam(0.1, 5, 1, 0.9, 0.999, 1e-8, 0); // Change batch size = sgd, bgd
|
||||||
|
// scoreADAM += adamModel.score();
|
||||||
|
// }
|
||||||
|
|
||||||
|
// std::cout << "ACCURACY, AVG, SGD: " << 100 * scoreSGD/TRIAL_NUM << "%" << std::endl;
|
||||||
|
|
||||||
|
// std::cout << std::endl;
|
||||||
|
|
||||||
|
// std::cout << "ACCURACY, AVG, ADAM: " << 100 * scoreADAM/TRIAL_NUM << "%" << std::endl;
|
||||||
|
|
||||||
|
|
||||||
// std::cout << "Total epoch num: 300" << std::endl;
|
// std::cout << "Total epoch num: 300" << std::endl;
|
||||||
@ -646,12 +671,12 @@ int main() {
|
|||||||
// std::vector<double> outputSet;
|
// std::vector<double> outputSet;
|
||||||
// data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancerSVM.csv", inputSet, outputSet);
|
// data.setData(30, "/Users/marcmelikyan/Desktop/Data/BreastCancerSVM.csv", inputSet, outputSet);
|
||||||
|
|
||||||
std::vector<std::vector<double>> inputSet;
|
// std::vector<std::vector<double>> inputSet;
|
||||||
std::vector<double> outputSet;
|
// std::vector<double> outputSet;
|
||||||
data.setData(4, "/Users/marcmelikyan/Desktop/Data/IrisSVM.csv", inputSet, outputSet);
|
// data.setData(4, "/Users/marcmelikyan/Desktop/Data/IrisSVM.csv", inputSet, outputSet);
|
||||||
|
|
||||||
DualSVC kernelSVM(inputSet, outputSet, 1000);
|
// DualSVC kernelSVM(inputSet, outputSet, 1000);
|
||||||
kernelSVM.gradientDescent(0.0001, 20, 1);
|
// kernelSVM.gradientDescent(0.0001, 20, 1);
|
||||||
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user