Commits

Grzegorz Chrupała  committed 9530205 Merge

Merge

  • Participants
  • Parent commits 531f99f, 0796302

Comments (0)

Files changed (2)

File nlp-scores/NLP/Scores.hs

     , ari
     , mi
     , vi
+    -- * Comparing probability distributions
+    , kullbackLeibler
+    , jensenShannon
     -- * Auxiliary types and functions
     , Count
     , Counts
 vi cs@(Counts _ cx cy) = entropy (elems cx) + entropy (elems cy) - 2 * mi cs
   where elems = Map.elems
 
+-- | Kullback-Leibler divergence: KL(X,Y) = SUM_i P(X=i) log_2(P(X=i)/P(Y=i)). 
+-- The distributions can be unnormalized.
+        
+kullbackLeibler :: (Floating a, F.Foldable f, T.Traversable t) => t a -> f a -> a
+kullbackLeibler xs ys = sum . zipWithTF f xs $ ys
+  where f !x !y = let px = x / sx in px `mult` logBase 2 (px/(y/sy))
+        sx = sum xs
+        sy = sum ys
+        mult 0 _ = 0
+        mult w p = w * p
+        {-# INLINE mult #-}  
+
+-- | Jensen-Shannon divergence: JS(X,Y) = 1/2 KL(X,(X+Y)/2) + 1/2 KL(Y,(X+Y)/2).
+-- The distributions can be unnormalized.
+jensenShannon :: (Floating a, T.Traversable t, T.Traversable u) => t a -> u a -> a
+jensenShannon xs ys = 0.5 * kullbackLeibler xs zs + 0.5 * kullbackLeibler ys zs
+  where zs = zipWithTF (+) xs ys
+          
 -- | Adjusted Rand Index: <http://en.wikipedia.org/wiki/Rand_index>
 ari :: (Ord a, Ord b) => Counts a b -> Double
 ari (Counts cxy cx cy) =  (sum1 - sum2*sum3/choicen2) 

File nlp-scores/nlp-scores.cabal

 -- The package version. See the Haskell package versioning policy
 -- (http://www.haskell.org/haskellwiki/Package_versioning_policy) for
 -- standards guiding when and how versions should be incremented.
-Version:             0.5.2
+Version:             0.5.4
 
 -- A short (one-line) description of the package.
 Synopsis:            Scoring functions commonly used for evaluation in NLP and IR