-{-# LANGUAGE BangPatterns #-}

+ , NoMonomorphismRestriction

-- | Scoring functions commonly used for evaluation of NLP

-- systems. Most functions in this module work on sequences which are

-- instances of 'Data.Foldable', but some take a precomputed table of

-- * Extracting joint and marginal counts from 'Counts'

fromIntegral (Set.size (Set.union a b))

{-# SPECIALIZE jaccard :: (Ord a) => Set.Set a -> Set.Set a -> Double #-}

--- | Entropy: H(X) = -SUM_i P(X=i) log_2(P(X=i))

+-- | Entropy: H(X) = -SUM_i P(X=i) log_2(P(X=i)). @entropy xs@ is the

+-- entropy of the random variable represented by the sequence @xs@,

+-- where each element of @xs@ is the count of the one particular

+-- value the random variable can take. If you need to compute the

+-- entropy from a sequence of outcomes, the following will work:

+-- > entropy . elems . histogram

entropy :: (Floating c, F.Foldable t) => t c -> c

entropy cx = negate . getSum . F.foldMap (Sum . f) $ cx

f nx = nx / n * (logBase 2 nx - logn)

+-- | @histogram xs@ is returns the map of the frequency counts of the

+-- elements in sequence @xs@

+histogram :: (Num a, Ord k, F.Foldable t) => t k -> Map.Map k a

+histogram = F.foldl' (\ z k -> Map.insertWith' (+) k 1 z) Map.empty

-- | Creates count table 'Counts'

counts :: (Ord a, Ord b, F.Foldable t) => t (a, b) -> Counts a b

counts xys = F.foldl' f empty xys