Commits

Grzegorz Chrupała committed 3e24653

Added colada results to slides.

Comments (0)

Files changed (4)

doc/clin-2012/delta-h-alpha-mrr.pdf

 %âăĎÓ\r
 1 0 obj
 <<
-/CreationDate (D:20120118205232)
-/ModDate (D:20120118205232)
+/CreationDate (D:20120118232533)
+/ModDate (D:20120118232533)
 /Title (R Graphics Output)
 /Producer (R 2.13.1)
 /Creator (R)

doc/clin-2012/plots.R

        fill=c("blue","red"),cex=1.2,bg="white")
 dev.off()
 
+pdf('colada-best-mrr-10K.pdf', height=3.7, width=5)
+dat <- read.table('colada-best-mrr-10K.csv', header=T)
+K <- c(50, 200, 800)
+plot(K, dat$mrr[dat$pass==1], ylim=c(0.3, 0.45), 
+    xlab="K", ylab="MRR",
+    type='b', log="x", lwd=2, col="red")
+points(K, dat$mrr[dat$pass==20], type='b', lwd=2, col="blue") 
+legend("topleft",legend=c(expression(j==20), expression(j==1)),
+       fill=c("blue","red"),cex=1.2,bg="white")
+dev.off()

doc/clin-2012/slides.tex

  \end{frame}
 
  \begin{frame}
-   \frametitle{Word classes with online LDA}
+   \frametitle{Word classes with online LDA (coLaDA)}
    \begin{itemize}
    \item $d$ - word type
    \item $w$ - context feature
 
      \item For each $w_i$ in the sentence, sample:
        \begin{equation*}
-         P(z_i|\mathbf{z}_{i-1},\mathbf{w}_i) \propto 
+         P(z_i|\mathbf{z}_{i-1},\mathbf{w}_i, \mathbf{d}_i) \propto 
          \frac{(n_{z,d} + \alpha) \times (n_{z,w} + \beta)}{n_{z,\bullet} + V\beta}
        \end{equation*}
        and update the counts.
 
 
  \begin{frame}
-   \frametitle{Online LDA for word classes} 
+   \frametitle{CoLaDA} 
    \begin{itemize}
    \item oLDA did not work for inferring topics
    \item Key difference: word types $d$ recur
        \item {\bf Without any special arrangements}
        \end{itemize}
      \end{block}
+   \end{itemize}
+ \end{frame}
 
+ \begin{frame}
+   \frametitle{CoLaDA results}
+   \begin{center}
+     \includegraphics[scale=0.8]{colada-best-mrr-10K.pdf}
+   \end{center}
+ \end{frame}
+
+ \begin{frame}
+   \frametitle{Interpretation}
+   \begin{itemize}
+   \item Word recognition for $K \in \{200, 800\}$ similar (or better)
+     than $\Delta$H
+   \item Multiple passes help a bit
+   \item Best parameters
+     \begin{itemize}
+     \item 1 pass: $\sum_1^K \alpha = 0.1$, $\beta = 0.01$
+     \item 20 passes: $\sum_1^K \alpha = 10$, $\beta = 0.1$
+     \end{itemize}
    \end{itemize}
  \end{frame}
 

src/Entropy/Algorithm.hs~

 import Data.Ord (comparing)
 import Reader (Token,readcorpus)
 import Debug.Trace
-import System 
 import Prelude hiding (sum)
 import SparseVector (plus,dot)
 import Data.Binary (encode,decode,put,get,Binary)
 import Control.Monad (ap)
-import RecipRank
 import qualified Data.ByteString.Lazy as BS
 import Counts (rankNormalize)