# Commits

committed 1a768d2

# nlp-scores/NLP/Scores.hs

-{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE
+    BangPatterns
+  , NoMonomorphismRestriction
+ #-}
-- | Scoring functions commonly used for evaluation of NLP
-- systems. Most functions in this module work on sequences which are
-- instances of 'Data.Foldable', but some take a precomputed table of
, mean
, jaccard
, entropy
+    , histogram
-- * Extracting joint and marginal counts from 'Counts'
, countJoint
, countFst
fromIntegral (Set.size (Set.union a b))
{-# SPECIALIZE jaccard :: (Ord a) => Set.Set a -> Set.Set a -> Double #-}

--- | Entropy: H(X) = -SUM_i P(X=i) log_2(P(X=i))
+-- | Entropy: H(X) = -SUM_i P(X=i) log_2(P(X=i)). @entropy xs@ is the
+-- entropy of the random variable represented by the sequence @xs@,
+-- where each element of @xs@ is the count of the one particular
+-- value the random variable can take. If you need to compute the
+-- entropy from a sequence of outcomes, the following will work:
+--
+-- > entropy . elems . histogram
+--
entropy :: (Floating c, F.Foldable t) => t c -> c
entropy cx = negate . getSum . F.foldMap  (Sum . f)  \$ cx
where n    = sum cx
logn = logBase 2 n
f nx = nx / n * (logBase 2 nx - logn)

+-- | @histogram xs@ is returns the map of the frequency counts of the
+-- elements in sequence @xs@
+histogram :: (Num a, Ord k, F.Foldable t) => t k -> Map.Map k a
+histogram = F.foldl' (\ z k -> Map.insertWith' (+) k 1 z) Map.empty
+
-- | Creates count table 'Counts'
counts :: (Ord a, Ord b, F.Foldable t) => t (a, b) -> Counts a b
counts xys = F.foldl' f empty xys

# nlp-scores/nlp-scores.cabal

-- The package version. See the Haskell package versioning policy