Anonymous avatar Anonymous committed 6a03c90

remove debug print in tf, add terms building (term matrix with count occurence)

Comments (0)

Files changed (3)

src/documentStatistics/tf.hxx

                 ++result(word_id, doc_id);
             }
 
-            std::cout << "document size: " << document.size() << std::endl;
-            std::cout << "original vec:\n" << result.col(doc_id) << std::endl;
             result.col(doc_id) /= (float)document.size();
-            std::cout << "divided vec:\n" << result.col(doc_id) << std::endl;
 
             ++doc_id;
         }

src/documentStatistics/tm.hh

+#ifndef TM_HH_
+# define TM_HH_
+
+# include <basic_types.hh>
+
+namespace stat
+{
+    template <typename C>
+    TermDocumentMatrix compute_terms(const C& corpus);
+}
+
+// Implementation
+# include "tm.hxx"
+
+#endif /* !TM_HH_ */

src/documentStatistics/tm.hxx

+
+namespace stat
+{
+    template <typename C>
+    TermDocumentMatrix compute_terms(const C& corpus)
+    {
+        TermDocumentMatrix result{TermDocumentMatrix::Zero(C::document_type::getNbTerms(), corpus.size())};
+        int doc_id = 0;
+
+        // Compute term frequency
+        for (const auto& document: corpus)
+        {
+            for (const auto& word_id: document)
+            {
+                std::cout << document.getUrl() << " " << word_id << std::endl;
+                ++result(word_id, doc_id);
+            }
+
+            ++doc_id;
+        }
+
+
+        return result;
+    }
+}
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.