Commits

Grzegorz Chrupała committed b339144

Add NLP.Scores.Internals

  • Participants
  • Parent commits d0004fe

Comments (0)

Files changed (2)

nlp-scores/NLP/Scores.hs

 -- Information, Variation of Information and the Adjusted Rand Index
 -- on the same pair of clusterings:
 --
--- >>> let cs = counts $ zip "abcabc" "abaaba"
+-- >>> let cs = counts "abcabc" "abaaba"
 -- >>> mapM_ (print . ($ cs)) [mi, ari, vi]
 -- >>> 0.9182958340544894
 -- >>> 0.4444444444444445
 import qualified Data.Map as Map
 import Prelude hiding (sum)
 
+import NLP.Scores.Internals
+
 -- | Accuracy: the proportion of elements in the first sequence equal
 -- to elements at corresponding positions in second
 -- sequence. Sequences should be of equal lengths.
 
 -- | Variation of information: VI(X,Y) = H(X) + H(Y) - 2 MI(X,Y)
 vi :: (Ord a, Ord b) => Counts a b -> Double
-vi cs@(Counts cxy cx cy) = entropy (elems cx) + entropy (elems cy) - 2 * mi cs
+vi cs@(Counts _ cx cy) = entropy (elems cx) + entropy (elems cy) - 2 * mi cs
   where elems = Map.elems
 
 -- | Adjusted Rand Index: <http://en.wikipedia.org/wiki/Rand_index>
         sum2 = sum [ choice ni 2 | ni <- Map.elems cx ]
         sum3 = sum [ choice nj 2 | nj <- Map.elems cy ]
 
--- | A count
-type Count = Double
--- | Count table
-data Counts a b = 
-  Counts 
-  { joint :: !(Map.Map (P a b) Count) -- ^ Counts of both components
-  , marginalFst :: !(Map.Map a Count) -- ^ Counts of the first component
-  , marginalSnd :: !(Map.Map b Count) -- ^ Counts of the second component
-  } 
-data P a b = P !a !b deriving (Eq, Ord)
-
-instance (Ord a, Ord b) => Monoid (Counts a b) where
-    mempty = empty
-    c `mappend` k = 
-        Counts { joint = unionPlus (joint c) (joint k)
-               , marginalFst = unionPlus (marginalFst c) (marginalFst k)
-               , marginalSnd = unionPlus (marginalSnd c) (marginalSnd k)
-               }
-
-unionPlus :: (Num a, Ord k) => Map.Map k a -> Map.Map k a -> Map.Map k a
-unionPlus m = 
-    Map.foldlWithKey' (\z k v -> Map.insertWith' (+) k v z) m
-{-# SPECIALIZE unionPlus :: (Ord k) => 
-  Map.Map k Count -> Map.Map k Count -> Map.Map k Count #-}
-
--- | The empty count table
-empty :: (Ord a, Ord b) => Counts a b
-empty = Counts Map.empty Map.empty Map.empty
-
 -- | The sum of a sequence of numbers
 sum :: (F.Foldable t, Num a) => t a -> a
 sum = F.foldl' (+) 0
 histogram = F.foldl' (\ z k -> Map.insertWith' (+) k 1 z) Map.empty
 
 -- | Creates count table 'Counts'
-counts :: (Ord a, Ord b, F.Foldable t) => t (a, b) -> Counts a b
-counts xys = F.foldl' f empty xys
-    where f cs@(Counts cxy cx cy) (!x,!y) = 
-            cs { joint       = Map.insertWith' (+) (P x y) 1 cxy
+counts :: (Ord a, Ord b, F.Foldable t) => t a -> t b -> Counts a b
+counts xs = foldl' f empty . zipWith P (F.toList xs) . F.toList
+    where f cs@(Counts cxy cx cy) p@(P x y) = 
+            cs { joint       = Map.insertWith' (+) p 1 cxy
                , marginalFst = Map.insertWith' (+) x 1 cx
                , marginalSnd = Map.insertWith' (+) y 1 cy }
 

nlp-scores/nlp-scores.cabal

 -- The package version. See the Haskell package versioning policy
 -- (http://www.haskell.org/haskellwiki/Package_versioning_policy) for
 -- standards guiding when and how versions should be incremented.
-Version:             0.4.5
+Version:             0.5.1
 
 -- A short (one-line) description of the package.
 Synopsis:            Scoring functions commonly used for evaluation in NLP and IR
 
 Library
   -- Modules exported by the library.
-  Exposed-modules:     NLP.Scores
+  Exposed-modules:     NLP.Scores, NLP.Scores.Internals
   
   -- Packages needed in order to build this package.
   Build-depends:  base >= 3 && < 5 ,  containers >= 0.4.2