Fixed warnings in MLPPData.

This commit is contained in:
Relintai 2023-02-12 18:03:17 +01:00
parent 69bc0f6f26
commit bee57218a2
2 changed files with 76 additions and 73 deletions

View File

@ -543,15 +543,15 @@ void MLPPData::setData(int k, std::string fileName, std::vector<std::vector<real
void MLPPData::printData(std::vector<std::string> inputName, std::string outputName, std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet) { void MLPPData::printData(std::vector<std::string> inputName, std::string outputName, std::vector<std::vector<real_t>> inputSet, std::vector<real_t> outputSet) {
MLPPLinAlg alg; MLPPLinAlg alg;
inputSet = alg.transpose(inputSet); inputSet = alg.transpose(inputSet);
for (int i = 0; i < inputSet.size(); i++) { for (uint32_t i = 0; i < inputSet.size(); i++) {
std::cout << inputName[i] << std::endl; std::cout << inputName[i] << std::endl;
for (int j = 0; j < inputSet[i].size(); j++) { for (uint32_t j = 0; j < inputSet[i].size(); j++) {
std::cout << inputSet[i][j] << std::endl; std::cout << inputSet[i][j] << std::endl;
} }
} }
std::cout << outputName << std::endl; std::cout << outputName << std::endl;
for (int i = 0; i < outputSet.size(); i++) { for (uint32_t i = 0; i < outputSet.size(); i++) {
std::cout << outputSet[i] << std::endl; std::cout << outputSet[i] << std::endl;
} }
} }
@ -585,9 +585,9 @@ void MLPPData::setData(int k, std::string fileName, std::vector<std::vector<real
void MLPPData::printData(std::vector<std::string> inputName, std::vector<std::vector<real_t>> inputSet) { void MLPPData::printData(std::vector<std::string> inputName, std::vector<std::vector<real_t>> inputSet) {
MLPPLinAlg alg; MLPPLinAlg alg;
inputSet = alg.transpose(inputSet); inputSet = alg.transpose(inputSet);
for (int i = 0; i < inputSet.size(); i++) { for (uint32_t i = 0; i < inputSet.size(); i++) {
std::cout << inputName[i] << std::endl; std::cout << inputName[i] << std::endl;
for (int j = 0; j < inputSet[i].size(); j++) { for (uint32_t j = 0; j < inputSet[i].size(); j++) {
std::cout << inputSet[i][j] << std::endl; std::cout << inputSet[i][j] << std::endl;
} }
} }
@ -620,12 +620,12 @@ void MLPPData::setData(std::string fileName, std::vector<real_t> &inputSet, std:
void MLPPData::printData(std::string &inputName, std::string &outputName, std::vector<real_t> &inputSet, std::vector<real_t> &outputSet) { void MLPPData::printData(std::string &inputName, std::string &outputName, std::vector<real_t> &inputSet, std::vector<real_t> &outputSet) {
std::cout << inputName << std::endl; std::cout << inputName << std::endl;
for (int i = 0; i < inputSet.size(); i++) { for (uint32_t i = 0; i < inputSet.size(); i++) {
std::cout << inputSet[i] << std::endl; std::cout << inputSet[i] << std::endl;
} }
std::cout << outputName << std::endl; std::cout << outputName << std::endl;
for (int i = 0; i < inputSet.size(); i++) { for (uint32_t i = 0; i < inputSet.size(); i++) {
std::cout << outputSet[i] << std::endl; std::cout << outputSet[i] << std::endl;
} }
} }
@ -634,11 +634,11 @@ void MLPPData::printData(std::string &inputName, std::string &outputName, std::v
std::vector<std::vector<real_t>> MLPPData::rgb2gray(std::vector<std::vector<std::vector<real_t>>> input) { std::vector<std::vector<real_t>> MLPPData::rgb2gray(std::vector<std::vector<std::vector<real_t>>> input) {
std::vector<std::vector<real_t>> grayScale; std::vector<std::vector<real_t>> grayScale;
grayScale.resize(input[0].size()); grayScale.resize(input[0].size());
for (int i = 0; i < grayScale.size(); i++) { for (uint32_t i = 0; i < grayScale.size(); i++) {
grayScale[i].resize(input[0][i].size()); grayScale[i].resize(input[0][i].size());
} }
for (int i = 0; i < grayScale.size(); i++) { for (uint32_t i = 0; i < grayScale.size(); i++) {
for (int j = 0; j < grayScale[i].size(); j++) { for (uint32_t j = 0; j < grayScale[i].size(); j++) {
grayScale[i][j] = 0.299 * input[0][i][j] + 0.587 * input[1][i][j] + 0.114 * input[2][i][j]; grayScale[i][j] = 0.299 * input[0][i][j] + 0.587 * input[1][i][j] + 0.114 * input[2][i][j];
} }
} }
@ -649,8 +649,8 @@ std::vector<std::vector<std::vector<real_t>>> MLPPData::rgb2ycbcr(std::vector<st
MLPPLinAlg alg; MLPPLinAlg alg;
std::vector<std::vector<std::vector<real_t>>> YCbCr; std::vector<std::vector<std::vector<real_t>>> YCbCr;
YCbCr = alg.resize(YCbCr, input); YCbCr = alg.resize(YCbCr, input);
for (int i = 0; i < YCbCr[0].size(); i++) { for (uint32_t i = 0; i < YCbCr[0].size(); i++) {
for (int j = 0; j < YCbCr[0][i].size(); j++) { for (uint32_t j = 0; j < YCbCr[0][i].size(); j++) {
YCbCr[0][i][j] = 0.299 * input[0][i][j] + 0.587 * input[1][i][j] + 0.114 * input[2][i][j]; YCbCr[0][i][j] = 0.299 * input[0][i][j] + 0.587 * input[1][i][j] + 0.114 * input[2][i][j];
YCbCr[1][i][j] = -0.169 * input[0][i][j] - 0.331 * input[1][i][j] + 0.500 * input[2][i][j]; YCbCr[1][i][j] = -0.169 * input[0][i][j] - 0.331 * input[1][i][j] + 0.500 * input[2][i][j];
YCbCr[2][i][j] = 0.500 * input[0][i][j] - 0.419 * input[1][i][j] - 0.081 * input[2][i][j]; YCbCr[2][i][j] = 0.500 * input[0][i][j] - 0.419 * input[1][i][j] - 0.081 * input[2][i][j];
@ -665,8 +665,8 @@ std::vector<std::vector<std::vector<real_t>>> MLPPData::rgb2hsv(std::vector<std:
MLPPLinAlg alg; MLPPLinAlg alg;
std::vector<std::vector<std::vector<real_t>>> HSV; std::vector<std::vector<std::vector<real_t>>> HSV;
HSV = alg.resize(HSV, input); HSV = alg.resize(HSV, input);
for (int i = 0; i < HSV[0].size(); i++) { for (uint32_t i = 0; i < HSV[0].size(); i++) {
for (int j = 0; j < HSV[0][i].size(); j++) { for (uint32_t j = 0; j < HSV[0][i].size(); j++) {
real_t rPrime = input[0][i][j] / 255; real_t rPrime = input[0][i][j] / 255;
real_t gPrime = input[1][i][j] / 255; real_t gPrime = input[1][i][j] / 255;
real_t bPrime = input[2][i][j] / 255; real_t bPrime = input[2][i][j] / 255;
@ -721,7 +721,7 @@ std::vector<std::vector<std::vector<real_t>>> MLPPData::xyz2rgb(std::vector<std:
// TEXT-BASED & NLP // TEXT-BASED & NLP
std::string MLPPData::toLower(std::string text) { std::string MLPPData::toLower(std::string text) {
for (int i = 0; i < text.size(); i++) { for (uint32_t i = 0; i < text.size(); i++) {
text[i] = tolower(text[i]); text[i] = tolower(text[i]);
} }
return text; return text;
@ -729,7 +729,7 @@ std::string MLPPData::toLower(std::string text) {
std::vector<char> MLPPData::split(std::string text) { std::vector<char> MLPPData::split(std::string text) {
std::vector<char> split_data; std::vector<char> split_data;
for (int i = 0; i < text.size(); i++) { for (uint32_t i = 0; i < text.size(); i++) {
split_data.push_back(text[i]); split_data.push_back(text[i]);
} }
return split_data; return split_data;
@ -739,7 +739,7 @@ std::vector<std::string> MLPPData::splitSentences(std::string data) {
std::vector<std::string> sentences; std::vector<std::string> sentences;
std::string currentStr = ""; std::string currentStr = "";
for (int i = 0; i < data.length(); i++) { for (uint32_t i = 0; i < data.length(); i++) {
currentStr.push_back(data[i]); currentStr.push_back(data[i]);
if (data[i] == '.' && data[i + 1] != '.') { if (data[i] == '.' && data[i + 1] != '.') {
sentences.push_back(currentStr); sentences.push_back(currentStr);
@ -751,9 +751,9 @@ std::vector<std::string> MLPPData::splitSentences(std::string data) {
} }
std::vector<std::string> MLPPData::removeSpaces(std::vector<std::string> data) { std::vector<std::string> MLPPData::removeSpaces(std::vector<std::string> data) {
for (int i = 0; i < data.size(); i++) { for (uint32_t i = 0; i < data.size(); i++) {
auto it = data[i].begin(); auto it = data[i].begin();
for (int j = 0; j < data[i].length(); j++) { for (uint32_t j = 0; j < data[i].length(); j++) {
if (data[i][j] == ' ') { if (data[i][j] == ' ') {
data[i].erase(it); data[i].erase(it);
} }
@ -764,7 +764,7 @@ std::vector<std::string> MLPPData::removeSpaces(std::vector<std::string> data) {
} }
std::vector<std::string> MLPPData::removeNullByte(std::vector<std::string> data) { std::vector<std::string> MLPPData::removeNullByte(std::vector<std::string> data) {
for (int i = 0; i < data.size(); i++) { for (uint32_t i = 0; i < data.size(); i++) {
if (data[i] == "\0") { if (data[i] == "\0") {
data.erase(data.begin() + i); data.erase(data.begin() + i);
} }
@ -775,7 +775,7 @@ std::vector<std::string> MLPPData::removeNullByte(std::vector<std::string> data)
std::vector<std::string> MLPPData::segment(std::string text) { std::vector<std::string> MLPPData::segment(std::string text) {
std::vector<std::string> segmented_data; std::vector<std::string> segmented_data;
int prev_delim = 0; int prev_delim = 0;
for (int i = 0; i < text.length(); i++) { for (uint32_t i = 0; i < text.length(); i++) {
if (text[i] == ' ') { if (text[i] == ' ') {
segmented_data.push_back(text.substr(prev_delim, i - prev_delim)); segmented_data.push_back(text.substr(prev_delim, i - prev_delim));
prev_delim = i + 1; prev_delim = i + 1;
@ -800,7 +800,7 @@ std::vector<real_t> MLPPData::tokenize(std::string text) {
std::vector<std::string> segmented_data = segment(text); std::vector<std::string> segmented_data = segment(text);
std::vector<real_t> tokenized_data; std::vector<real_t> tokenized_data;
tokenized_data.resize(segmented_data.size()); tokenized_data.resize(segmented_data.size());
for (int i = 0; i < segmented_data.size(); i++) { for (uint32_t i = 0; i < segmented_data.size(); i++) {
for (int j = i - 1; j >= 0; j--) { for (int j = i - 1; j >= 0; j--) {
if (segmented_data[i] == segmented_data[j]) { if (segmented_data[i] == segmented_data[j]) {
tokenized_data[i] = tokenized_data[j]; tokenized_data[i] = tokenized_data[j];
@ -821,8 +821,8 @@ std::vector<std::string> MLPPData::removeStopWords(std::string text) {
std::vector<std::string> stopWords = { "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now" }; std::vector<std::string> stopWords = { "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now" };
std::vector<std::string> segmented_data = removeSpaces(segment(toLower(text))); std::vector<std::string> segmented_data = removeSpaces(segment(toLower(text)));
for (int i = 0; i < stopWords.size(); i++) { for (uint32_t i = 0; i < stopWords.size(); i++) {
for (int j = 0; j < segmented_data.size(); j++) { for (uint32_t j = 0; j < segmented_data.size(); j++) {
if (segmented_data[j] == stopWords[i]) { if (segmented_data[j] == stopWords[i]) {
segmented_data.erase(segmented_data.begin() + j); segmented_data.erase(segmented_data.begin() + j);
} }
@ -833,8 +833,8 @@ std::vector<std::string> MLPPData::removeStopWords(std::string text) {
std::vector<std::string> MLPPData::removeStopWords(std::vector<std::string> segmented_data) { std::vector<std::string> MLPPData::removeStopWords(std::vector<std::string> segmented_data) {
std::vector<std::string> stopWords = { "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now" }; std::vector<std::string> stopWords = { "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now" };
for (int i = 0; i < segmented_data.size(); i++) { for (uint32_t i = 0; i < segmented_data.size(); i++) {
for (int j = 0; j < stopWords.size(); j++) { for (uint32_t j = 0; j < stopWords.size(); j++) {
if (segmented_data[i] == stopWords[j]) { if (segmented_data[i] == stopWords[j]) {
segmented_data.erase(segmented_data.begin() + i); segmented_data.erase(segmented_data.begin() + i);
} }
@ -853,8 +853,8 @@ std::string MLPPData::stemming(std::string text) {
text[text.length() + i] = padding; // ' ' will be our padding value text[text.length() + i] = padding; // ' ' will be our padding value
} }
for (int i = 0; i < text.size(); i++) { for (uint32_t i = 0; i < text.size(); i++) {
for (int j = 0; j < suffixes.size(); j++) { for (uint32_t j = 0; j < suffixes.size(); j++) {
if (text.substr(i, suffixes[j].length()) == suffixes[j] && (text[i + suffixes[j].length()] == ' ' || text[i + suffixes[j].length()] == ',' || text[i + suffixes[j].length()] == '-' || text[i + suffixes[j].length()] == '.' || text[i + suffixes[j].length()] == '!')) { if (text.substr(i, suffixes[j].length()) == suffixes[j] && (text[i + suffixes[j].length()] == ' ' || text[i + suffixes[j].length()] == ',' || text[i + suffixes[j].length()] == '-' || text[i + suffixes[j].length()] == '.' || text[i + suffixes[j].length()] == '!')) {
text.erase(i, suffixes[j].length()); text.erase(i, suffixes[j].length());
} }
@ -879,20 +879,20 @@ std::vector<std::vector<real_t>> MLPPData::BOW(std::vector<std::string> sentence
std::vector<std::vector<std::string>> segmented_sentences; std::vector<std::vector<std::string>> segmented_sentences;
segmented_sentences.resize(sentences.size()); segmented_sentences.resize(sentences.size());
for (int i = 0; i < sentences.size(); i++) { for (uint32_t i = 0; i < sentences.size(); i++) {
segmented_sentences[i] = removeStopWords(sentences[i]); segmented_sentences[i] = removeStopWords(sentences[i]);
} }
std::vector<std::vector<real_t>> bow; std::vector<std::vector<real_t>> bow;
bow.resize(sentences.size()); bow.resize(sentences.size());
for (int i = 0; i < bow.size(); i++) { for (uint32_t i = 0; i < bow.size(); i++) {
bow[i].resize(wordList.size()); bow[i].resize(wordList.size());
} }
for (int i = 0; i < segmented_sentences.size(); i++) { for (uint32_t i = 0; i < segmented_sentences.size(); i++) {
for (int j = 0; j < segmented_sentences[i].size(); j++) { for (uint32_t j = 0; j < segmented_sentences[i].size(); j++) {
for (int k = 0; k < wordList.size(); k++) { for (uint32_t k = 0; k < wordList.size(); k++) {
if (segmented_sentences[i][j] == wordList[k]) { if (segmented_sentences[i][j] == wordList[k]) {
if (type == "Binary") { if (type == "Binary") {
bow[i][k] = 1; bow[i][k] = 1;
@ -913,7 +913,7 @@ std::vector<std::vector<real_t>> MLPPData::TFIDF(std::vector<std::string> senten
std::vector<std::vector<std::string>> segmented_sentences; std::vector<std::vector<std::string>> segmented_sentences;
segmented_sentences.resize(sentences.size()); segmented_sentences.resize(sentences.size());
for (int i = 0; i < sentences.size(); i++) { for (uint32_t i = 0; i < sentences.size(); i++) {
segmented_sentences[i] = removeStopWords(sentences[i]); segmented_sentences[i] = removeStopWords(sentences[i]);
} }
@ -921,13 +921,13 @@ std::vector<std::vector<real_t>> MLPPData::TFIDF(std::vector<std::string> senten
std::vector<int> frequency; std::vector<int> frequency;
frequency.resize(wordList.size()); frequency.resize(wordList.size());
TF.resize(segmented_sentences.size()); TF.resize(segmented_sentences.size());
for (int i = 0; i < TF.size(); i++) { for (uint32_t i = 0; i < TF.size(); i++) {
TF[i].resize(wordList.size()); TF[i].resize(wordList.size());
} }
for (int i = 0; i < segmented_sentences.size(); i++) { for (uint32_t i = 0; i < segmented_sentences.size(); i++) {
std::vector<bool> present(wordList.size(), 0); std::vector<bool> present(wordList.size(), false);
for (int j = 0; j < segmented_sentences[i].size(); j++) { for (uint32_t j = 0; j < segmented_sentences[i].size(); j++) {
for (int k = 0; k < wordList.size(); k++) { for (uint32_t k = 0; k < wordList.size(); k++) {
if (segmented_sentences[i][j] == wordList[k]) { if (segmented_sentences[i][j] == wordList[k]) {
TF[i][k]++; TF[i][k]++;
if (!present[k]) { if (!present[k]) {
@ -943,18 +943,18 @@ std::vector<std::vector<real_t>> MLPPData::TFIDF(std::vector<std::string> senten
std::vector<real_t> IDF; std::vector<real_t> IDF;
IDF.resize(frequency.size()); IDF.resize(frequency.size());
for (int i = 0; i < IDF.size(); i++) { for (uint32_t i = 0; i < IDF.size(); i++) {
IDF[i] = std::log((real_t)segmented_sentences.size() / (real_t)frequency[i]); IDF[i] = std::log((real_t)segmented_sentences.size() / (real_t)frequency[i]);
} }
std::vector<std::vector<real_t>> TFIDF; std::vector<std::vector<real_t>> TFIDF;
TFIDF.resize(segmented_sentences.size()); TFIDF.resize(segmented_sentences.size());
for (int i = 0; i < TFIDF.size(); i++) { for (uint32_t i = 0; i < TFIDF.size(); i++) {
TFIDF[i].resize(wordList.size()); TFIDF[i].resize(wordList.size());
} }
for (int i = 0; i < TFIDF.size(); i++) { for (uint32_t i = 0; i < TFIDF.size(); i++) {
for (int j = 0; j < TFIDF[i].size(); j++) { for (uint32_t j = 0; j < TFIDF[i].size(); j++) {
TFIDF[i][j] = TF[i][j] * IDF[j]; TFIDF[i][j] = TF[i][j] * IDF[j];
} }
} }
@ -968,15 +968,15 @@ std::tuple<std::vector<std::vector<real_t>>, std::vector<std::string>> MLPPData:
std::vector<std::vector<std::string>> segmented_sentences; std::vector<std::vector<std::string>> segmented_sentences;
segmented_sentences.resize(sentences.size()); segmented_sentences.resize(sentences.size());
for (int i = 0; i < sentences.size(); i++) { for (uint32_t i = 0; i < sentences.size(); i++) {
segmented_sentences[i] = removeStopWords(sentences[i]); segmented_sentences[i] = removeStopWords(sentences[i]);
} }
std::vector<std::string> inputStrings; std::vector<std::string> inputStrings;
std::vector<std::string> outputStrings; std::vector<std::string> outputStrings;
for (int i = 0; i < segmented_sentences.size(); i++) { for (uint32_t i = 0; i < segmented_sentences.size(); i++) {
for (int j = 0; j < segmented_sentences[i].size(); j++) { for (uint32_t j = 0; j < segmented_sentences[i].size(); j++) {
for (int k = windowSize; k > 0; k--) { for (int k = windowSize; k > 0; k--) {
if (j - k >= 0) { if (j - k >= 0) {
inputStrings.push_back(segmented_sentences[i][j]); inputStrings.push_back(segmented_sentences[i][j]);
@ -991,7 +991,7 @@ std::tuple<std::vector<std::vector<real_t>>, std::vector<std::string>> MLPPData:
} }
} }
int inputSize = inputStrings.size(); uint32_t inputSize = inputStrings.size();
inputStrings.insert(inputStrings.end(), outputStrings.begin(), outputStrings.end()); inputStrings.insert(inputStrings.end(), outputStrings.begin(), outputStrings.end());
@ -1000,21 +1000,23 @@ std::tuple<std::vector<std::vector<real_t>>, std::vector<std::string>> MLPPData:
std::vector<std::vector<real_t>> inputSet; std::vector<std::vector<real_t>> inputSet;
std::vector<std::vector<real_t>> outputSet; std::vector<std::vector<real_t>> outputSet;
for (int i = 0; i < inputSize; i++) { for (uint32_t i = 0; i < inputSize; i++) {
inputSet.push_back(BOW[i]); inputSet.push_back(BOW[i]);
} }
for (int i = inputSize; i < BOW.size(); i++) { for (uint32_t i = inputSize; i < BOW.size(); i++) {
outputSet.push_back(BOW[i]); outputSet.push_back(BOW[i]);
} }
MLPPLinAlg alg;
MLPPSoftmaxNetOld *model; MLPPSoftmaxNetOld *model;
if (type == "Skipgram") { if (type == "Skipgram") {
model = new MLPPSoftmaxNetOld(outputSet, inputSet, dimension); model = new MLPPSoftmaxNetOld(outputSet, inputSet, dimension);
} else { // else = CBOW. We maintain it is a default. } else { // else = CBOW. We maintain it is a default.
model = new MLPPSoftmaxNetOld(inputSet, outputSet, dimension); model = new MLPPSoftmaxNetOld(inputSet, outputSet, dimension);
} }
model->gradientDescent(learning_rate, max_epoch, 1);
model->gradientDescent(learning_rate, max_epoch, true);
std::vector<std::vector<real_t>> wordEmbeddings = model->getEmbeddings(); std::vector<std::vector<real_t>> wordEmbeddings = model->getEmbeddings();
delete model; delete model;
@ -1034,15 +1036,15 @@ MLPPData::WordsToVecResult MLPPData::word_to_vec(std::vector<std::string> senten
std::vector<std::vector<std::string>> segmented_sentences; std::vector<std::vector<std::string>> segmented_sentences;
segmented_sentences.resize(sentences.size()); segmented_sentences.resize(sentences.size());
for (int i = 0; i < sentences.size(); i++) { for (uint32_t i = 0; i < sentences.size(); i++) {
segmented_sentences[i] = removeStopWords(sentences[i]); segmented_sentences[i] = removeStopWords(sentences[i]);
} }
std::vector<std::string> inputStrings; std::vector<std::string> inputStrings;
std::vector<std::string> outputStrings; std::vector<std::string> outputStrings;
for (int i = 0; i < segmented_sentences.size(); i++) { for (uint32_t i = 0; i < segmented_sentences.size(); i++) {
for (int j = 0; j < segmented_sentences[i].size(); j++) { for (uint32_t j = 0; j < segmented_sentences[i].size(); j++) {
for (int k = windowSize; k > 0; k--) { for (int k = windowSize; k > 0; k--) {
if (j - k >= 0) { if (j - k >= 0) {
inputStrings.push_back(segmented_sentences[i][j]); inputStrings.push_back(segmented_sentences[i][j]);
@ -1057,7 +1059,7 @@ MLPPData::WordsToVecResult MLPPData::word_to_vec(std::vector<std::string> senten
} }
} }
int inputSize = inputStrings.size(); uint32_t inputSize = inputStrings.size();
inputStrings.insert(inputStrings.end(), outputStrings.begin(), outputStrings.end()); inputStrings.insert(inputStrings.end(), outputStrings.begin(), outputStrings.end());
@ -1066,20 +1068,22 @@ MLPPData::WordsToVecResult MLPPData::word_to_vec(std::vector<std::string> senten
std::vector<std::vector<real_t>> inputSet; std::vector<std::vector<real_t>> inputSet;
std::vector<std::vector<real_t>> outputSet; std::vector<std::vector<real_t>> outputSet;
for (int i = 0; i < inputSize; i++) { for (uint32_t i = 0; i < inputSize; i++) {
inputSet.push_back(BOW[i]); inputSet.push_back(BOW[i]);
} }
for (int i = inputSize; i < BOW.size(); i++) { for (uint32_t i = inputSize; i < BOW.size(); i++) {
outputSet.push_back(BOW[i]); outputSet.push_back(BOW[i]);
} }
MLPPLinAlg alg;
MLPPSoftmaxNetOld *model; MLPPSoftmaxNetOld *model;
if (type == "Skipgram") { if (type == "Skipgram") {
model = new MLPPSoftmaxNetOld(outputSet, inputSet, dimension); model = new MLPPSoftmaxNetOld(outputSet, inputSet, dimension);
} else { // else = CBOW. We maintain it is a default. } else { // else = CBOW. We maintain it is a default.
model = new MLPPSoftmaxNetOld(inputSet, outputSet, dimension); model = new MLPPSoftmaxNetOld(inputSet, outputSet, dimension);
} }
model->gradientDescent(learning_rate, max_epoch, false); model->gradientDescent(learning_rate, max_epoch, false);
res.word_embeddings = model->getEmbeddings(); res.word_embeddings = model->getEmbeddings();
@ -1106,7 +1110,7 @@ std::vector<std::vector<real_t>> MLPPData::LSA(std::vector<std::string> sentence
std::vector<std::string> MLPPData::createWordList(std::vector<std::string> sentences) { std::vector<std::string> MLPPData::createWordList(std::vector<std::string> sentences) {
std::string combinedText = ""; std::string combinedText = "";
for (int i = 0; i < sentences.size(); i++) { for (uint32_t i = 0; i < sentences.size(); i++) {
if (i != 0) { if (i != 0) {
combinedText += " "; combinedText += " ";
} }
@ -1138,13 +1142,13 @@ std::vector<std::vector<real_t>> MLPPData::featureScaling(std::vector<std::vecto
max_elements.resize(X.size()); max_elements.resize(X.size());
min_elements.resize(X.size()); min_elements.resize(X.size());
for (int i = 0; i < X.size(); i++) { for (uint32_t i = 0; i < X.size(); i++) {
max_elements[i] = alg.max(X[i]); max_elements[i] = alg.max(X[i]);
min_elements[i] = alg.min(X[i]); min_elements[i] = alg.min(X[i]);
} }
for (int i = 0; i < X.size(); i++) { for (uint32_t i = 0; i < X.size(); i++) {
for (int j = 0; j < X[i].size(); j++) { for (uint32_t j = 0; j < X[i].size(); j++) {
X[i][j] = (X[i][j] - min_elements[i]) / (max_elements[i] - min_elements[i]); X[i][j] = (X[i][j] - min_elements[i]) / (max_elements[i] - min_elements[i]);
} }
} }
@ -1157,18 +1161,17 @@ std::vector<std::vector<real_t>> MLPPData::meanNormalization(std::vector<std::ve
// (X_j - mu_j) / std_j, for every j // (X_j - mu_j) / std_j, for every j
X = meanCentering(X); X = meanCentering(X);
for (int i = 0; i < X.size(); i++) { for (uint32_t i = 0; i < X.size(); i++) {
X[i] = alg.scalarMultiply(1 / stat.standardDeviation(X[i]), X[i]); X[i] = alg.scalarMultiply(1 / stat.standardDeviation(X[i]), X[i]);
} }
return X; return X;
} }
std::vector<std::vector<real_t>> MLPPData::meanCentering(std::vector<std::vector<real_t>> X) { std::vector<std::vector<real_t>> MLPPData::meanCentering(std::vector<std::vector<real_t>> X) {
MLPPLinAlg alg;
MLPPStat stat; MLPPStat stat;
for (int i = 0; i < X.size(); i++) { for (uint32_t i = 0; i < X.size(); i++) {
real_t mean_i = stat.mean(X[i]); real_t mean_i = stat.mean(X[i]);
for (int j = 0; j < X[i].size(); j++) { for (uint32_t j = 0; j < X[i].size(); j++) {
X[i][j] -= mean_i; X[i][j] -= mean_i;
} }
} }
@ -1178,7 +1181,7 @@ std::vector<std::vector<real_t>> MLPPData::meanCentering(std::vector<std::vector
std::vector<std::vector<real_t>> MLPPData::oneHotRep(std::vector<real_t> tempOutputSet, int n_class) { std::vector<std::vector<real_t>> MLPPData::oneHotRep(std::vector<real_t> tempOutputSet, int n_class) {
std::vector<std::vector<real_t>> outputSet; std::vector<std::vector<real_t>> outputSet;
outputSet.resize(tempOutputSet.size()); outputSet.resize(tempOutputSet.size());
for (int i = 0; i < tempOutputSet.size(); i++) { for (uint32_t i = 0; i < tempOutputSet.size(); i++) {
for (int j = 0; j <= n_class - 1; j++) { for (int j = 0; j <= n_class - 1; j++) {
if (tempOutputSet[i] == j) { if (tempOutputSet[i] == j) {
outputSet[i].push_back(1); outputSet[i].push_back(1);
@ -1192,10 +1195,10 @@ std::vector<std::vector<real_t>> MLPPData::oneHotRep(std::vector<real_t> tempOut
std::vector<real_t> MLPPData::reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet) { std::vector<real_t> MLPPData::reverseOneHot(std::vector<std::vector<real_t>> tempOutputSet) {
std::vector<real_t> outputSet; std::vector<real_t> outputSet;
int n_class = tempOutputSet[0].size(); //uint32_t n_class = tempOutputSet[0].size();
for (int i = 0; i < tempOutputSet.size(); i++) { for (uint32_t i = 0; i < tempOutputSet.size(); i++) {
int current_class = 1; int current_class = 1;
for (int j = 0; j < tempOutputSet[i].size(); j++) { for (uint32_t j = 0; j < tempOutputSet[i].size(); j++) {
if (tempOutputSet[i][j] == 1) { if (tempOutputSet[i][j] == 1) {
break; break;
} else { } else {
@ -1209,7 +1212,6 @@ std::vector<real_t> MLPPData::reverseOneHot(std::vector<std::vector<real_t>> tem
} }
Ref<MLPPMatrix> MLPPData::mean_centering(const Ref<MLPPMatrix> &p_X) { Ref<MLPPMatrix> MLPPData::mean_centering(const Ref<MLPPMatrix> &p_X) {
MLPPLinAlg alg;
MLPPStat stat; MLPPStat stat;
Ref<MLPPMatrix> X; Ref<MLPPMatrix> X;
@ -1259,6 +1261,7 @@ Ref<MLPPMatrix> MLPPData::one_hot_rep(const Ref<MLPPVector> &temp_output_set, in
return output_set; return output_set;
} }
void MLPPData::_bind_methods() { void MLPPData::_bind_methods() {
ClassDB::bind_method(D_METHOD("load_breast_cancer", "path"), &MLPPData::load_breast_cancer); ClassDB::bind_method(D_METHOD("load_breast_cancer", "path"), &MLPPData::load_breast_cancer);
ClassDB::bind_method(D_METHOD("load_breast_cancer_svc", "path"), &MLPPData::load_breast_cancer_svc); ClassDB::bind_method(D_METHOD("load_breast_cancer_svc", "path"), &MLPPData::load_breast_cancer_svc);

View File

@ -180,9 +180,9 @@ public:
template <class T> template <class T>
std::vector<T> vecToSet(std::vector<T> inputSet) { std::vector<T> vecToSet(std::vector<T> inputSet) {
std::vector<T> setInputSet; std::vector<T> setInputSet;
for (int i = 0; i < inputSet.size(); i++) { for (uint32_t i = 0; i < inputSet.size(); i++) {
bool new_element = true; bool new_element = true;
for (int j = 0; j < setInputSet.size(); j++) { for (uint32_t j = 0; j < setInputSet.size(); j++) {
if (setInputSet[j] == inputSet[i]) { if (setInputSet[j] == inputSet[i]) {
new_element = false; new_element = false;
} }