mirror of
https://github.com/Relintai/MLPP.git
synced 2025-02-04 15:55:53 +01:00
Implemented latent semantic analysis
This commit is contained in:
parent
b7f7e10b73
commit
6d7e048b12
@ -463,6 +463,22 @@ namespace MLPP{
|
||||
return {wordEmbeddings, wordList};
|
||||
}
|
||||
|
||||
std::vector<std::vector<double>> Data::LSA(std::vector<std::string> sentences, int dim){
|
||||
LinAlg alg;
|
||||
std::vector<std::vector<double>> docWordData = BOW(sentences, "Binary");
|
||||
|
||||
auto [U, S, Vt] = alg.SVD(docWordData);
|
||||
std::vector<std::vector<double>> S_trunc = alg.zeromat(dim, dim);
|
||||
std::vector<std::vector<double>> Vt_trunc;
|
||||
for(int i = 0; i < dim; i++){
|
||||
S_trunc[i][i] = S[i][i];
|
||||
Vt_trunc.push_back(Vt[i]);
|
||||
}
|
||||
|
||||
std::vector<std::vector<double>> embeddings = alg.matmult(S_trunc, Vt);
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
std::vector<std::string> Data::createWordList(std::vector<std::string> sentences){
|
||||
std::string combinedText = "";
|
||||
for(int i = 0; i < sentences.size(); i++){
|
||||
|
@ -47,6 +47,7 @@ class Data{
|
||||
std::vector<std::vector<double>> BOW(std::vector<std::string> sentences, std::string = "Default");
|
||||
std::vector<std::vector<double>> TFIDF(std::vector<std::string> sentences);
|
||||
std::tuple<std::vector<std::vector<double>>, std::vector<std::string>> word2Vec(std::vector<std::string> sentences, std::string type, int windowSize, int dimension, double learning_rate, int max_epoch);
|
||||
std::vector<std::vector<double>> LSA(std::vector<std::string> sentences, int dim);
|
||||
|
||||
std::vector<std::string> createWordList(std::vector<std::string> sentences);
|
||||
|
||||
|
13
main.cpp
13
main.cpp
@ -487,6 +487,13 @@ int main() {
|
||||
// alg.printMatrix(wordEmbeddings);
|
||||
// std::cout << std::endl;
|
||||
|
||||
std::vector<std::string> textArchive = {"pizza", "pizza hamburger cookie", "hamburger", "ramen", "sushi", "ramen sushi"};
|
||||
|
||||
alg.printMatrix(data.LSA(textArchive, 2));
|
||||
//alg.printMatrix(data.BOW(textArchive, "Default"));
|
||||
std::cout << std::endl;
|
||||
|
||||
|
||||
// std::vector<std::vector<double>> inputSet = {{1,2},{2,3},{3,4},{4,5},{5,6}};
|
||||
// std::cout << "Feature Scaling Example:" << std::endl;
|
||||
// alg.printMatrix(data.featureScaling(inputSet));
|
||||
@ -629,9 +636,9 @@ int main() {
|
||||
// std::cout << std::endl;
|
||||
// } // Harris detector works. Life is good!
|
||||
|
||||
std::vector<double> a = {3,4,4};
|
||||
std::vector<double> b= {4,4,4};
|
||||
alg.printVector(alg.cross(a,b));
|
||||
// std::vector<double> a = {3,4,4};
|
||||
// std::vector<double> b = {4,4,4};
|
||||
// alg.printVector(alg.cross(a,b));
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user