# Commits

committed cbdf8d2

Generalized list in function signatures to Foldable.

• Participants
• Parent commits a7682ff
• Branches default

# File nlp-scores/NLP/Scores.hs

• Ignore whitespace
, entropy
)
where
+import qualified Data.Foldable as F
+import Data.Monoid
import Data.List hiding (sum)
import qualified Data.Set as Set
import qualified Data.Map as Map
-- | Accuracy: the proportion of elements in the first list equal to
-- elements at corresponding positions in second list. Lists should be
-- of equal lengths.
-accuracy :: (Eq a, Fractional n) => [a] -> [a] -> n
-accuracy xs = mean . map fromEnum . zipWith (==) xs
+accuracy :: (Eq a, Fractional c, F.Foldable t) =>  t a -> t a -> c
+accuracy xs = mean . map fromEnum . zipWith (==) (F.toList xs) . F.toList
{-# SPECIALIZE accuracy :: [Double] -> [Double] -> Double #-}

-- | Reciprocal rank: the reciprocal of the rank at which the first arguments
-- occurs in the list given as the second argument.
-recipRank :: (Eq a, Fractional n) => a -> [a] -> n
+recipRank :: (Eq a, Fractional b, F.Foldable t) => a -> t a -> b
recipRank y ys =
-    case [ r | (r,y') <- zip [1::Int ..] ys , y' == y ] of
+    case [ r | (r,y') <- zip [1::Int ..] . F.toList \$ ys , y' == y ] of
[]  -> 0
r:_ -> 1/fromIntegral r
{-# SPECIALIZE recipRank :: Double -> [Double] -> Double #-}

-- | Average precision.
-- <http://en.wikipedia.org/wiki/Information_retrieval#Average_precision>
-avgPrecision :: (Fractional n, Ord a) => Set.Set a -> [a] -> n
+avgPrecision :: (Fractional n, Ord a, F.Foldable t) => Set.Set a -> t a -> n
avgPrecision gold _ | Set.size gold == 0 = 0
avgPrecision gold xs =
(/fromIntegral (Set.size gold))
. takeWhile (\(_,_,cum) -> cum <= Set.size gold)
. snd
. mapAccumL (\z (r,rel) -> (z+rel,(r,rel,z+rel))) 0
-    \$ [ (r,fromEnum \$ x `Set.member` gold) | (x,r) <- zip xs [1::Int ..]]
+    \$ [ (r,fromEnum \$ x `Set.member` gold)
+      | (x,r) <- zip (F.toList xs) [1::Int ..]]
{-# SPECIALIZE avgPrecision :: (Ord a) => Set.Set a -> [a] -> Double #-}

-- | Mutual information: MI(X,Y) = H(X) - H(X|Y) = H(Y) - H(Y|X). Also
-- | Count table
data Counts a b =
Counts
-  { joint :: !(Map.Map (P a b) Count)   -- ^ Counts of both components
+  { joint :: !(Map.Map (P a b) Count) -- ^ Counts of both components
, marginalFst :: !(Map.Map a Count) -- ^ Counts of the first component
, marginalSnd :: !(Map.Map b Count) -- ^ Counts of the second component
}

-- | The sum of a list of numbers (without overflowing stack,
-- unlike 'Prelude.sum').
-sum :: (Num a) => [a] -> a
-sum = foldl' (+) 0
+sum :: (F.Foldable t, Num a) => t a -> a
+sum = F.foldl' (+) 0
{-# SPECIALIZE sum :: [Double] -> Double #-}
{-# SPECIALIZE sum :: [Int] -> Int #-}
{-# INLINE sum #-}

-- | The mean of a list of numbers.
-mean :: (Fractional n, Real a) => [a] -> n
+mean :: (F.Foldable t, Fractional n, Real a) => t a -> n
mean xs =
-    let (P tot len) = foldl' (\(P s l) x -> (P (s+x) (l+1))) (P 0 0) xs
+    let (P tot len) = F.foldl' (\(P s l) x -> (P (s+x) (l+1))) (P 0 0) xs
in realToFrac tot/len
{-# SPECIALIZE mean :: [Double] -> Double #-}

{-# SPECIALIZE jaccard :: (Ord a) => Set.Set a -> Set.Set a -> Double #-}

-- | Entropy: H(X) = -SUM_i P(X=i) log_2(P(X=i))
-entropy :: [Count] -> Double
-entropy cx = negate \$ sum [ f nx | nx <- cx ]
+entropy :: (Floating c, F.Foldable t) => t c -> c
+entropy cx = negate . getSum . F.foldMap  (Sum . f)  \$ cx
where n    = sum cx
logn = logBase 2 n
f nx = nx / n * (logBase 2 nx - logn)

-- | Creates count table 'Counts'
-counts :: (Ord a, Ord b) => [(a,b)] -> Counts a b
-counts xys = foldl' f empty xys
+counts :: (Ord a, Ord b, F.Foldable t) => t (a, b) -> Counts a b
+counts xys = F.foldl' f empty xys
where f cs@(Counts cxy cx cy) (!x,!y) =
cs { joint       = Map.insertWith' (+) (P x y) 1 cxy
, marginalFst = Map.insertWith' (+) x 1 cx
, marginalSnd = Map.insertWith' (+) y 1 cy }
-