{-# LANGUAGE BangPatterns #-}

-- | Scoring functions commonly used for evaluation of NLP

--- systems. Most functions in this module work on lists, but some take

--- a precomputed table of 'Counts'. This will give a speedup if you

--- want to compute multiple scores on the same data. For example to

--- compute the Mutual Information, Variation of Information and the

--- Adujusted Rand Index on the same pair of clusterings:

+-- systems. Most functions in this module work on sequences which are

+-- instances of 'Data.Foldable', but some take a precomputed table of

+-- 'Counts'. This will give a speedup if you want to compute multiple

+-- scores on the same data. For example to compute the Mutual

+-- Information, Variation of Information and the Adujusted Rand Index

+-- on the same pair of clusterings:

-- >>> let cs = counts $ zip "abcabc" "abaaba"

-- >>> mapM_ (print . ($ cs)) [mi, ari, vi]

import qualified Data.Map as Map

import Prelude hiding (sum)

--- | Accuracy: the proportion of elements in the first list equal to

--- elements at corresponding positions in second list. Lists should be

+-- | Accuracy: the proportion of elements in the first sequence equal

+-- to elements at corresponding positions in second

+-- sequence. Sequences should be of equal lengths.

accuracy :: (Eq a, Fractional c, F.Foldable t) => t a -> t a -> c

accuracy xs = mean . map fromEnum . zipWith (==) (F.toList xs) . F.toList

{-# SPECIALIZE accuracy :: [Double] -> [Double] -> Double #-}

-- | Reciprocal rank: the reciprocal of the rank at which the first arguments

--- occurs in the ~~list~~ given as the second argument.

+-- occurs in the sequence given as the second argument.

recipRank :: (Eq a, Fractional b, F.Foldable t) => a -> t a -> b

case [ r | (r,y') <- zip [1::Int ..] . F.toList $ ys , y' == y ] of

empty :: (Ord a, Ord b) => Counts a b

empty = Counts Map.empty Map.empty Map.empty

--- | The sum of a list of numbers (without overflowing stack,

--- unlike 'Prelude.sum').

+-- | The sum of a sequence of numbers

sum :: (F.Foldable t, Num a) => t a -> a

{-# SPECIALIZE sum :: [Double] -> Double #-}

{-# SPECIALIZE sum :: [Int] -> Int #-}

--- | The mean of a ~~list~~ of numbers.

+-- | The mean of a sequence of numbers.

mean :: (F.Foldable t, Fractional n, Real a) => t a -> n

let (P tot len) = F.foldl' (\(P s l) x -> (P (s+x) (l+1))) (P 0 0) xs