Commits

Bryan O'Sullivan  committed 1c0eaaa

Add support for more fast jackknife stats

  • Participants
  • Parent commits 66d9ba5

Comments (0)

Files changed (3)

     the jackknife function to potentially use more efficient jackknife
     implementations.
 
-  * jackknifeMean, jackknifeVariance: new functions. These have O(n) cost
-    instead of the O(n^2) cost of the standard jackknife.
+  * jackknifeMean, jackknifeStdDev, jackknifeVariance,
+    jackknifeVarianceUnb: new functions.  These have O(n) cost instead
+    of the O(n^2) cost of the standard jackknife.
 
   * The mean function has been renamed to welfordMean; a new
     implementation of mean has better numerical accuracy in almost all

File Statistics/Resampling.hs

     , jackknife
     , jackknifeMean
     , jackknifeVariance
+    , jackknifeVarianceUnb
+    , jackknifeStdDev
     , resample
     , estimate
     ) where
 import GHC.Generics (Generic)
 import Numeric.Sum (Summation(..), kbn)
 import Statistics.Function (indices)
-import Statistics.Sample (mean, variance)
+import Statistics.Sample (mean, stdDev, variance, varianceUnbiased)
 import Statistics.Types (Estimator(..), Sample)
 import System.Random.MWC (Gen, initialize, uniform, uniformVector)
 import qualified Data.Vector.Generic as G
 
 -- | Run an 'Estimator' over a sample.
 estimate :: Estimator -> Sample -> Double
-estimate Mean           = mean
-estimate Variance       = variance
+estimate Mean             = mean
+estimate Variance         = variance
+estimate VarianceUnbiased = varianceUnbiased
+estimate StdDev           = stdDev
 estimate (Function est) = est
 
 -- | /O(n) or O(n^2)/ Compute a statistical estimate repeatedly over a
 -- sample, each time omitting a successive element.
 jackknife :: Estimator -> Sample -> U.Vector Double
-jackknife Mean sample     = jackknifeMean sample
-jackknife Variance sample = jackknifeVariance sample
+jackknife Mean sample             = jackknifeMean sample
+jackknife Variance sample         = jackknifeVariance sample
+jackknife VarianceUnbiased sample = jackknifeVarianceUnb sample
+jackknife StdDev sample = jackknifeStdDev sample
 jackknife (Function est) sample
   | G.length sample == 1 = singletonErr "jackknife"
   | otherwise            = U.map f . indices $ sample
     l   = fromIntegral (len - 1)
     len = G.length samp
 
--- | /O(n)/ Compute the jackknife variance of a sample.
-jackknifeVariance :: Sample -> U.Vector Double
-jackknifeVariance samp
+-- | /O(n)/ Compute the jackknife variance of a sample with a
+-- correction factor @c@, so we can get either the regular or
+-- \"unbiased\" variance.
+jackknifeVariance_ :: Double -> Sample -> U.Vector Double
+jackknifeVariance_ c samp
   | len == 1  = singletonErr "jackknifeVariance"
   | otherwise = G.zipWith4 go als ars bls brs
   where
     n = fromIntegral len
     go al ar bl br = (al + ar - (b * b) / q) / q
       where b = bl + br
-            q = n - 1
+            q = n - (1 + c)
     len = G.length samp
 
+-- | /O(n)/ Compute the unbiased jackknife variance of a sample.
+jackknifeVarianceUnb :: Sample -> U.Vector Double
+jackknifeVarianceUnb = jackknifeVariance_ 1
+
+-- | /O(n)/ Compute the jackknife variance of a sample.
+jackknifeVariance :: Sample -> U.Vector Double
+jackknifeVariance = jackknifeVariance_ 0
+
+-- | /O(n)/ Compute the jackknife standard deviation of a sample.
+jackknifeStdDev :: Sample -> U.Vector Double
+jackknifeStdDev = G.map sqrt . jackknifeVarianceUnb
+
 pfxSumL :: U.Vector Double -> U.Vector Double
 pfxSumL = G.map kbn . G.scanl add zero
 

File Statistics/Types.hs

 -- when possible.
 data Estimator = Mean
                | Variance
+               | VarianceUnbiased
+               | StdDev
                | Function (Sample -> Double)
 
 -- | Weights for affecting the importance of elements of a sample.