Implemented latent semantic analysis

This commit is contained in:
novak_99 2021-12-23 19:13:55 -08:00
parent b7f7e10b73
commit 6d7e048b12
4 changed files with 27 additions and 3 deletions

View File

@ -463,6 +463,22 @@ namespace MLPP{
return {wordEmbeddings, wordList};
}
std::vector<std::vector<double>> Data::LSA(std::vector<std::string> sentences, int dim){
LinAlg alg;
std::vector<std::vector<double>> docWordData = BOW(sentences, "Binary");
auto [U, S, Vt] = alg.SVD(docWordData);
std::vector<std::vector<double>> S_trunc = alg.zeromat(dim, dim);
std::vector<std::vector<double>> Vt_trunc;
for(int i = 0; i < dim; i++){
S_trunc[i][i] = S[i][i];
Vt_trunc.push_back(Vt[i]);
}
std::vector<std::vector<double>> embeddings = alg.matmult(S_trunc, Vt);
return embeddings;
}
std::vector<std::string> Data::createWordList(std::vector<std::string> sentences){
std::string combinedText = "";
for(int i = 0; i < sentences.size(); i++){

View File

@ -47,6 +47,7 @@ class Data{
std::vector<std::vector<double>> BOW(std::vector<std::string> sentences, std::string = "Default");
std::vector<std::vector<double>> TFIDF(std::vector<std::string> sentences);
std::tuple<std::vector<std::vector<double>>, std::vector<std::string>> word2Vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, double learning_rate, int max_epoch);
std::vector<std::vector<double>> LSA(std::vector<std::string> sentences, int dim);
std::vector<std::string> createWordList(std::vector<std::string> sentences);

BIN
a.out

Binary file not shown.

View File

@ -487,6 +487,13 @@ int main() {
// alg.printMatrix(wordEmbeddings);
// std::cout << std::endl;
std::vector<std::string> textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"};
alg.printMatrix(data.LSA(textArchive, 2));
//alg.printMatrix(data.BOW(textArchive, "Default"));
std::cout << std::endl;
// std::vector<std::vector<double>> inputSet = {{1,2},{2,3},{3,4},{4,5},{5,6}};
// std::cout << "Feature Scaling Example:" << std::endl;
// alg.printMatrix(data.featureScaling(inputSet));
@ -629,9 +636,9 @@ int main() {
// std::cout << std::endl;
// } // Harris detector works. Life is good!
std::vector<double> a = {3,4,4};
std::vector<double> b= {4,4,4};
alg.printVector(alg.cross(a,b));
// std::vector<double> a = {3,4,4};
// std::vector<double> b = {4,4,4};
// alg.printVector(alg.cross(a,b));