// // Data.hpp // MLP // // Created by Marc Melikyan on 11/4/20. // #ifndef Data_hpp #define Data_hpp #include #include #include namespace MLPP{ class Data{ public: // Load Datasets std::tuple>, std::vector> loadBreastCancer(); std::tuple>, std::vector> loadBreastCancerSVC(); std::tuple>, std::vector>> loadIris(); std::tuple>, std::vector>> loadWine(); std::tuple>, std::vector>> loadMnistTrain(); std::tuple>, std::vector>> loadMnistTest(); std::tuple>, std::vector> loadCaliforniaHousing(); std::tuple, std::vector> loadFiresAndCrime(); std::tuple>, std::vector>, std::vector>, std::vector>> trainTestSplit(std::vector> inputSet, std::vector> outputSet, double testSize); // Supervised void setData(int k, std::string fileName, std::vector>& inputSet, std::vector& outputSet); void printData(std::vector inputName, std::string outputName, std::vector> inputSet, std::vector outputSet); // Unsupervised void setData(int k, std::string fileName, std::vector>& inputSet); void printData(std::vector inputName, std::vector> inputSet); // Simple void setData(std::string fileName, std::vector & inputSet, std::vector & outputSet); void printData(std::string& inputName, std::string& outputName, std::vector & inputSet, std::vector & outputSet); // Images std::vector> rgb2gray(std::vector>> input); std::vector>> rgb2ycbcr(std::vector>> input); std::vector>> rgb2hsv(std::vector>> input); std::vector>> rgb2xyz(std::vector>> input); std::vector>> xyz2rgb(std::vector>> input); // Text-Based & NLP std::string toLower(std::string text); std::vector split(std::string text); std::vector splitSentences(std::string data); std::vector removeSpaces(std::vector data); std::vector removeNullByte(std::vector data); std::vector segment(std::string text); std::vector tokenize(std::string text); std::vector removeStopWords(std::string text); std::vector removeStopWords(std::vector segmented_data); std::string stemming(std::string text); std::vector> BOW(std::vector sentences, std::string = "Default"); std::vector> TFIDF(std::vector sentences); std::tuple>, std::vector> word2Vec(std::vector sentences, std::string type, int windowSize, int dimension, double learning_rate, int max_epoch); std::vector> LSA(std::vector sentences, int dim); std::vector createWordList(std::vector sentences); // Extra void setInputNames(std::string fileName, std::vector& inputNames); std::vector> featureScaling(std::vector> X); std::vector> meanNormalization(std::vector> X); std::vector> meanCentering(std::vector> X); std::vector> oneHotRep (std::vector tempOutputSet, int n_class); std::vector reverseOneHot(std::vector> tempOutputSet); template std::vector vecToSet(std::vector inputSet){ std::vector setInputSet; for(int i = 0; i < inputSet.size(); i++){ bool new_element = true; for(int j = 0; j < setInputSet.size(); j++){ if(setInputSet[j] == inputSet[i]){ new_element = false; } } if(new_element){ setInputSet.push_back(inputSet[i]); } } return setInputSet; } private: }; } #endif /* Data_hpp */