# Commits

committed 1a768d2

• Participants
• Parent commits 90dbd15
• Branches default

# File nlp-scores/NLP/Scores.hs

• Ignore whitespace
`-{-# LANGUAGE BangPatterns #-}`
`+{-# LANGUAGE `
`+    BangPatterns `
`+  , NoMonomorphismRestriction`
`+ #-}`
` -- | Scoring functions commonly used for evaluation of NLP`
` -- systems. Most functions in this module work on sequences which are`
` -- instances of 'Data.Foldable', but some take a precomputed table of`
`     , mean`
`     , jaccard`
`     , entropy`
`+    , histogram`
`       -- * Extracting joint and marginal counts from 'Counts'`
`     , countJoint`
`     , countFst`
`   fromIntegral (Set.size (Set.union a b))`
` {-# SPECIALIZE jaccard :: (Ord a) => Set.Set a -> Set.Set a -> Double #-}  `
` `
`--- | Entropy: H(X) = -SUM_i P(X=i) log_2(P(X=i))`
`+-- | Entropy: H(X) = -SUM_i P(X=i) log_2(P(X=i)). @entropy xs@ is the`
`+-- entropy of the random variable represented by the sequence @xs@,`
`+-- where each element of @xs@ is the count of the one particular `
`+-- value the random variable can take. If you need to compute the `
`+-- entropy from a sequence of outcomes, the following will work:`
`+--`
`+-- > entropy . elems . histogram`
`+--`
` entropy :: (Floating c, F.Foldable t) => t c -> c`
` entropy cx = negate . getSum . F.foldMap  (Sum . f)  \$ cx`
`     where n    = sum cx`
`           logn = logBase 2 n`
`           f nx = nx / n * (logBase 2 nx - logn)`
` `
`+-- | @histogram xs@ is returns the map of the frequency counts of the`
`+-- elements in sequence @xs@`
`+histogram :: (Num a, Ord k, F.Foldable t) => t k -> Map.Map k a`
`+histogram = F.foldl' (\ z k -> Map.insertWith' (+) k 1 z) Map.empty`
`+`
` -- | Creates count table 'Counts'`
` counts :: (Ord a, Ord b, F.Foldable t) => t (a, b) -> Counts a b`
` counts xys = F.foldl' f empty xys`

# File nlp-scores/nlp-scores.cabal

• Ignore whitespace
` -- The package version. See the Haskell package versioning policy`
` -- (http://www.haskell.org/haskellwiki/Package_versioning_policy) for`
` -- standards guiding when and how versions should be incremented.`
`-Version:             0.4.3`
`+Version:             0.4.4`
` `
` -- A short (one-line) description of the package.`
` Synopsis:            Scoring functions commonly used for evaluation in NLP and IR`