# HG changeset patch
# User Alexey Khudyakov
# Date 1302289500 -14400
# Node ID 428a78366351de17e9ab1c8667b1a1017bffe9be
# Parent 8aaaa90c1cdb6a4a49f68ab39d0caca93a03755e
Improve documentation. Add examples
diff --git a/Data/Monoid/Statistics.hs b/Data/Monoid/Statistics.hs
--- a/Data/Monoid/Statistics.hs
+++ b/Data/Monoid/Statistics.hs
@@ -15,6 +15,8 @@
, TwoStats(..)
-- * Additional information
-- $info
+ -- * Examples
+ -- $examples
) where
@@ -32,6 +34,7 @@
--
-- Statistic could be calculated with fold over sample. Since
-- accumulator is 'Monoid' such fold could be easily parralelized.
+-- Check examples section for more information.
--
-- Instance must satisfy following law:
--
@@ -92,9 +95,48 @@
-- This indeed proves that monoid could be constructed. Monoid above
-- is completely impractical. It runs in O(n) space. However for some
-- statistics monoids which runs in O(1) space could be
--- implemented. For example mean.
+-- implemented. Simple examples of such statistics are number of
+-- elements in sample or mean of a sample.
--
-- On the other hand some statistics could not be implemented in such
-- way. For example calculation of median require O(n) space. Variance
--- could be implemented in O(1) but such implementation won't be
--- numerically stable.
+-- could be implemented in O(1) but such implementation will have
+-- problems with numberical stability.
+
+
+
+-- $examples
+--
+-- These examples show how to find maximum and minimum of a sample in
+-- one pass over data.
+--
+-- This is test data. It's not limited to list but could be anything
+-- what could be folded.
+--
+-- > > let xs = [1..100] :: [Double]
+--
+-- Now let calculate maximum of test sample using two methods. First
+-- one is to use generic function 'evalStatistic' and another one is
+-- fold.
+--
+-- > > evalStatistic xs :: Max
+-- > Max {calcMax = 100.0}
+-- > > foldl (flip pappend) mempty xs :: Max
+-- > Max {calcMax = 100.0}
+--
+-- More complicated example allows to combine several monoids
+-- together. It allows to calculate two statistics in one pass:
+--
+-- > > evalStatistic xs :: TwoStats Min Max
+-- > TwoStats {calcStat1 = Min {calcMin = 1.0}, calcStat2 = Max {calcMax = 100.0}}
+--
+-- Last example shows how to calculate nuber of elements, mean and
+-- variance at once:
+--
+-- > > let v = evalStatistic xs :: Variance
+-- > > calcCount v
+-- > 100
+-- > > calcMean v
+-- > 50.5
+-- > > calcStddev v
+-- > 28.86607004772212
diff --git a/Data/Monoid/Statistics/Numeric.hs b/Data/Monoid/Statistics/Numeric.hs
--- a/Data/Monoid/Statistics/Numeric.hs
+++ b/Data/Monoid/Statistics/Numeric.hs
@@ -12,6 +12,7 @@
, Variance(..)
, asVariance
-- ** Ad-hoc accessors
+ -- $accessors
, CalcCount(..)
, CalcMean(..)
, CalcVariance(..)
@@ -190,14 +191,41 @@
-- Ad-hoc type class
----------------------------------------------------------------
+-- $accessors
+--
+-- Monoids 'Count', 'Mean' and 'Variance' form some kind of tower.
+-- Every successive monoid can calculate every statistics previous
+-- monoids can. So to avoid replicating accessors for each statistics
+-- a set of ad-hoc type classes was added.
+--
+-- This approach have deficiency. It becomes to infer type of monoidal
+-- accumulator from accessor function so following expression will be
+-- rejected:
+--
+-- > calcCount $ evalStatistics xs
+--
+-- Indeed type of accumulator is:
+--
+-- > forall a . (StatMonoid a, CalcMean a) => a
+--
+-- Therefore it must be fixed by adding explicit type annotation. For
+-- example:
+--
+-- > calcMean (evalStatistics xs :: Mean)
+
+
+
+-- | Statistics which could count number of elements in the sample
class CalcCount m where
-- | Number of elements in sample
calcCount :: m -> Int
+-- | Statistics which could estimate mean of sample
class CalcMean m where
-- | Calculate esimate of mean of a sample
calcMean :: m -> Double
+-- | Statistics which could estimate variance of sample
class CalcVariance m where
-- | Calculate biased estimate of variance
calcVariance :: m -> Double