Anonymous avatar Anonymous committed aab2409

fix LSA

Comments (0)

Files changed (2)

     k = args["topic"]
     topwords = args["topwords"]
     documents = map(x -> split(x.filename, "/")[end], corpus.documents)
-    models = [lda, nmf]
+    models = [lsa, lda, nmf]
 
     println()
     println("> Options are:")
     for model in models
         println("> Topic modeling with $(model)")
         result = model(m, k)
-        println("Final perplexity: $(perplexity(m, result))")
+        # println("Final perplexity: $(perplexity(m, result))")
 
         # Launch a benchmark
         if args["bench"]
             # Extract topics
             topics = getTopics(result, m.terms, k, topwords)
             for (i, topic) in enumerate(topics)
-                draw(PNG("$(model)/topic$(i)_$(k).png", 15inch, 10inch), plot(x = topic.words, y = topic.coeffs, Geom.bar))
+                if minimum(topic.coeffs) != maximum(topic.coeffs)
+                    draw(PNG("$(model)/topic$(i)_$(k).png", 15inch, 10inch), plot(x = topic.words, y = topic.coeffs, Geom.bar))
+                end
             end
 
             # Print topics

slides/slides.tex

 	\item généralisation de la diagonalisation à des matrices quelconques~;
 	\item $\Sigma$ matrice diagonale avec valeurs singulières~;
 	\item découvre la structure interne~;
-	\item trouver les axes expliquant au mieux la variance des données.
+	\item trouver les axes expliquant au mieux les données.
 \end{itemize}
 \end{frame}
 
 \begin{itemize}
 	\item Sélection des $k$ premières valeurs singulières ;
 	\item sélection des $k$ vecteurs de $U$ et $V$ correspondants ;
-	\item reconstruction de la matrice $M$ (débruitée, information dominante).
+	\item reconstruction de la matrice $M$ (débruitée, information dominante, moins de termes).
 \end{itemize}
 \end{frame}
 
     \item \textit{KMeans} : $V$ a des coefficients $v_{i, j} \in \{0, 1\}$~;
     \item \textit{NMF} : $V$ a des coefficients $v_{i, j} \in \mathbb{R}^+$~;
     \item plus flexible~;
-    \item complexe à approximer.
+    \item plus complexe à approximer.
 \end{itemize}
 \end{frame}
 
 \begin{frame}{Méthode non-paramétrique}
 \begin{itemize}
     \item Hiérarchique \emph{(dendrogramme)}~;
-    %\includegraphics[width=0.8\textwidth]{hierarchical}
+    \includegraphics[width=0.8\textwidth]{images/dendrogramme}
 \end{itemize}
 \end{frame}
 
 \begin{frame}{Heuristiques}
 \begin{itemize}
+	\item Rule of thumb : $k \approx \sqrt{\frac{n}{2}}$~;
+	\item Elbow method~;
+	\item Text : $k \approx \frac{D \times V}{\text{non-zero}}$~:
     \item Méthode basée sur un calcul en kernel-space~;
     \item piste à explorer.
 \end{itemize}
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.