Commits

Grzegorz Chrupała  committed d19d10a Merge

Merge

  • Participants
  • Parent commits d39019d, 493d7e1

Comments (0)

Files changed (2)

File src/Entropy/Algorithm.hs

 {-# LANGUAGE NoMonomorphismRestriction , BangPatterns #-}
 module Entropy.Algorithm ( cluster
+                         , clusterBeam
                          , clusterToken
                          , labelToken
                          , clusterWords
              -> ClusterSet (Int,String) 
              -> X (Int,String)
              -> [(Y,ClusterSet (Int,String))]
-clusterToken freeze cs x = 
-            
-    let rs'@(((e,_),(y,_)):_) =  
-            sortBy 
-            (comparing (\((s,y),_) 
-                            -> (realToFrac s::Float, negate y))) 
+clusterToken freeze cs x = map snd . clusterTok freeze cs $ x
+
+clusterTok :: Bool 
+             -> ClusterSet (Int,String) 
+             -> X (Int,String)
+             -> [((Double,Y), (Y,ClusterSet (Int,String)))]
+clusterTok freeze cs x =
+    let rs = rank 
              $ [let cs' = update cs x y 
                 in ((score cs',y),(y,cs'))
                 | y <- nextID cs : ids cs
-
-
                ,  y == nextID cs 
                       || Map.size (countXY cs ! y `Map.intersection` x) > 0
                      ]
-        rs = map snd rs'
-    in  rs
+    in rs
+
+rank :: [((Double,Y),a)] -> [((Double,Y),a)]
+rank = sortBy (comparing (\((s,y),_) -> (realToFrac s :: Float, negate y)))
 
 -- | labelToken: output a single label (from a closed set) 
 labelToken :: ClusterSet (Int,String)
         -> ClusterSet (Int, String)
 cluster freeze = foldl' (\cs x -> snd . head . clusterToken freeze cs $ x) 
                         
+clusterBeam :: Int 
+            -> Bool 
+            -> ClusterSet (Int,String)
+            -> [X (Int, String)] 
+            -> ClusterSet (Int,String)
+clusterBeam sz freeze cs = 
+    let step css x = map (snd . snd)
+                     . take sz
+                     . rank 
+                     . concat 
+                     $ [ take sz . clusterTok freeze cs $ x | cs <- css ]
+    in head . foldl' step (return cs)
 
 normalize :: (Ord k) => Map.Map k Double -> Map.Map k Double
 normalize x = let s = Map.fold (+) 0 x in Map.map (/s) x
 main = do
   (command:args) <- getArgs
   case command of
-    "learn"   -> do let (fids:trainf:_) = args
-                    train <-   fmap readcorpus $ readFile trainf
-                    let xss = concat . examples (read fids) $ train
-                        cs = cluster False empty xss
-                    hPutStrLn stderr . show . Map.size . countXY $ cs
-                    B.writeFile (trainf ++ "." ++ fids ++ ".learn.model") 
-                         . encode $ cs
+    "learn"   -> learn cluster args
+    "learn-beam" -> do 
+                 let (k:args') = args 
+                 learn (clusterBeam (read k)) args'
     "learn-seeded" -> 
                  do let (n:m:seedf:trainf:_) = args
                     seed'  <- fmap decode $ B.readFile seedf
                  printf "VI:  %.4f\n" . vi  $ cs
                  printf "ARI: %.4f\n" . ari $ cs
 
+learn f args = do 
+  let (fids:trainf:_) = args
+  train <-   fmap readcorpus $ readFile trainf
+  let xss = concat . examples (read fids) $ train
+      cs = f False empty xss
+  hPutStrLn stderr . show . Map.size . countXY $ cs
+  B.writeFile (trainf ++ "." ++ fids ++ ".learn.model") 
+            . encode $ cs
+
 teach :: [Int] -> [[Token]] -> (ClusterSet (Int,String),Atom.AtomTable)
 teach fids train = flip Atom.runAtom Atom.empty  $ do
     fmap (makeClusterSet