Commits

Bryan O'Sullivan committed 800eeea

Refactor the analysis code for reusability.

We now have a nice tidy analyseSample function that other code can use.

  • Participants
  • Parent commits 8230a6c

Comments (0)

Files changed (2)

File Criterion.hs

+{-# LANGUAGE RecordWildCards #-}
 -- |
 -- Module      : Criterion
--- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
+-- Copyright   : (c) 2009, 2010, 2011 Bryan O'Sullivan
 --
 -- License     : BSD-style
 -- Maintainer  : bos@serpentine.com
 
 import Control.Monad ((<=<), forM_, replicateM_, when)
 import Control.Monad.Trans (liftIO)
-import Criterion.Analysis (OutlierVariance(..), classifyOutliers,
-                           outlierVariance, noteOutliers)
+import Criterion.Analysis (OutlierEffect(..), OutlierVariance(..),
+                           SampleAnalysis(..), analyseSample,
+                           classifyOutliers, noteOutliers)
 import Criterion.Config (Config(..), Plot(..), Verbosity(..), fromLJ)
 import Criterion.Environment (Environment(..))
 import Criterion.IO (note, prolix, summary)
 import qualified Data.Vector.Unboxed as U
 import Statistics.Function (create, minMax)
 import Statistics.KernelDensity (epanechnikovPDF)
-import Statistics.Resampling (Resample, resample)
-import Statistics.Resampling.Bootstrap (Estimate(..), bootstrapBCA)
-import Statistics.Sample (mean, stdDev)
+import Statistics.Resampling.Bootstrap (Estimate(..))
 import Statistics.Types (Sample)
-import System.Random.MWC (withSystemRandom)
 import System.Mem (performGC)
 import Text.Printf (printf)
 
                  -> Criterion Sample
 runAndAnalyseOne env _desc b = do
   times <- runBenchmark env b
-  let numSamples = U.length times
-  let ests = [mean,stdDev]
+  ci <- getConfigItem $ fromLJ cfgConfInterval
   numResamples <- getConfigItem $ fromLJ cfgResamples
-  _ <- prolix "bootstrapping with %d resamples\n" numResamples
-  res <- liftIO . withSystemRandom $ \gen ->
-         resample gen ests numResamples times :: IO [Resample]
-  ci <- getConfigItem $ fromLJ cfgConfInterval
-  let [em,es] = bootstrapBCA ci times ests res
-      (effect, v) = outlierVariance em es (fromIntegral $ numSamples)
-      wibble = case effect of
+  _ <- prolix "analysing with %d resamples\n" numResamples
+  SampleAnalysis{..} <- liftIO $ analyseSample ci times numResamples
+  let OutlierVariance{..} = anOutliers
+  let wibble = case ovEffect of
                  Unaffected -> "unaffected" :: String
                  Slight -> "slightly inflated"
                  Moderate -> "moderately inflated"
                  Severe -> "severely inflated"
-  bs "mean" em
+  bs "mean" anMean
   summary ","
-  bs "std dev" es
+  bs "std dev" anStdDev
   summary "\n"
   vrb <- getConfigItem $ fromLJ cfgVerbosity
-  when (vrb == Verbose || (effect > Unaffected && vrb > Quiet)) $ do
+  when (vrb == Verbose || (ovEffect > Unaffected && vrb > Quiet)) $ do
     noteOutliers (classifyOutliers times)
-    _ <- note "variance introduced by outliers: %.3f%%\n" (v * 100)
+    _ <- note "variance introduced by outliers: %.3f%%\n" (ovFraction * 100)
     _ <- note "variance is %s by outliers\n" wibble
     return ()
   return times

File Criterion/Analysis.hs

 -- |
 -- Module      : Criterion.Analysis
--- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
+-- Copyright   : (c) 2009, 2010, 2011 Bryan O'Sullivan
 --
 -- License     : BSD-style
 -- Maintainer  : bos@serpentine.com
 module Criterion.Analysis
     (
       Outliers (..)
+    , OutlierEffect(..)
     , OutlierVariance(..)
+    , SampleAnalysis(..)
+    , analyseSample
     , analyseMean
     , countOutliers
     , classifyOutliers
     , outlierVariance
     ) where
 
+import System.Random.MWC (withSystemRandom)
+import Statistics.Resampling (Resample, resample)
+import Statistics.Resampling.Bootstrap (Estimate(..), bootstrapBCA)
 import Control.Monad (when)
 import Criterion.IO (note)
 import Criterion.Measurement (secs)
 import Data.Monoid (Monoid(..))
 import Statistics.Function (sort)
 import Statistics.Quantile (weightedAvg)
-import Statistics.Resampling.Bootstrap (Estimate(..))
-import Statistics.Sample (mean)
+import Statistics.Sample (mean, stdDev)
 import Statistics.Types (Sample)
 
 -- | Outliers from sample data, calculated using the boxplot
 
 -- | A description of the extent to which outliers in the sample data
 -- affect the sample mean and standard deviation.
-data OutlierVariance = Unaffected -- ^ Less than 1% effect.
-                     | Slight     -- ^ Between 1% and 10%.
-                     | Moderate   -- ^ Between 10% and 50%.
-                     | Severe     -- ^ Above 50% (i.e. measurements
-                                  -- are useless).
-                       deriving (Eq, Ord, Show)
+data OutlierEffect = Unaffected -- ^ Less than 1% effect.
+                   | Slight     -- ^ Between 1% and 10%.
+                   | Moderate   -- ^ Between 10% and 50%.
+                   | Severe     -- ^ Above 50% (i.e. measurements
+                                -- are useless).
+                     deriving (Eq, Ord, Read, Show)
 
 instance Monoid Outliers where
     mempty  = Outliers 0 0 0 0 0
           iqr = q3 - q1
 {-# INLINE classifyOutliers #-}
 
+-- | Analysis of the extent to which outliers in a sample affect its
+-- mean and standard deviation.
+data OutlierVariance = OutlierVariance {
+      ovEffect   :: OutlierEffect
+    -- ^ Qualitative description of effect.
+    , ovFraction :: Double
+    -- ^ Quantitative description of effect (a fraction between 0 and 1).
+    } deriving (Eq, Read, Show)
+
 -- | Compute the extent to which outliers in the sample data affect
 -- the sample mean and standard deviation.
 outlierVariance :: Estimate     -- ^ Bootstrap estimate of sample mean.
                 -> Estimate     -- ^ Bootstrap estimate of sample
                                 --   standard deviation.
                 -> Double       -- ^ Number of original iterations.
-                -> (OutlierVariance, Double)
-outlierVariance µ σ a = (effect, varOutMin)
+                -> OutlierVariance
+outlierVariance µ σ a = OutlierVariance effect varOutMin
   where
     effect | varOutMin < 0.01 = Unaffected
            | varOutMin < 0.1  = Slight
   noteOutliers . classifyOutliers $ a
   return µ
 
+data SampleAnalysis = SampleAnalysis {
+      anMean :: Estimate
+    , anStdDev :: Estimate
+    , anOutliers :: OutlierVariance
+    } deriving (Eq, Show)
+
+analyseSample :: Double -> Sample -> Int -> IO SampleAnalysis
+analyseSample ci samples numResamples = do
+  let ests = [mean,stdDev]
+  resamples <- withSystemRandom $ \gen ->
+               resample gen ests numResamples samples :: IO [Resample]
+  let [estMean,estStdDev] = bootstrapBCA ci samples ests resamples
+      ov = outlierVariance estMean estStdDev (fromIntegral $ U.length samples)
+  return SampleAnalysis {
+               anMean = estMean
+             , anStdDev = estStdDev
+             , anOutliers = ov
+             }
+
 -- | Display a report of the 'Outliers' present in a 'Sample'.
 noteOutliers :: Outliers -> Criterion ()
 noteOutliers o = do