# Commits

committed 3ba5e70 Merge

Merge branch 'master' of github.com:Shimuuar/statistics

• Participants
• Parent commits 52a5ebb, bc6b136
• Branches default

# File Statistics/Distribution/Normal.hs

--   sample. Variance is estimated using maximum likelihood method
--   (biased estimation).
normalFromSample :: S.Sample -> NormalDistribution
-normalFromSample a = normalDistr (S.mean a) (S.stdDev a)
+normalFromSample xs
+  = normalDistr m (sqrt v)
+  where
+    (m,v) = S.meanVariance xs

density :: NormalDistribution -> Double -> Double
density d x = exp (-xm * xm / (2 * sd * sd)) / ndPdfDenom d

# File Statistics/Function.hs

-- non-negative integer.  If the given value is already a power of
-- two, it is returned unchanged.  If negative, zero is returned.
nextHighestPowerOfTwo :: Int -> Int
-nextHighestPowerOfTwo n = o + 1
-    where m = n - 1
-          o = m
-              .|. (m `shiftR` 1)
-              .|. (m `shiftR` 2)
-              .|. (m `shiftR` 4)
-              .|. (m `shiftR` 8)
-              .|. (m `shiftR` 16)
-#if WORD_SIZE_IN_BITS == 64
-              .|. (m `shiftR` 32)
-#endif
+nextHighestPowerOfTwo n
+#if WORD_SIZE_IN_BITS == 64
+  = 1 + _i32
+#else
+  = 1 + i16
+#endif
+  where
+    i0   = n - 1
+    i1   = i0  .|. i0  `shiftR` 1
+    i2   = i1  .|. i1  `shiftR` 2
+    i4   = i2  .|. i2  `shiftR` 4
+    i8   = i4  .|. i4  `shiftR` 8
+    i16  = i8  .|. i8  `shiftR` 16
+    _i32 = i16 .|. i16 `shiftR` 32
+-- It could be implemented as
+--
+-- > nextHighestPowerOfTwo n = 1 + foldl' go (n-1) [1, 2, 4, 8, 16, 32]
+--     where go m i = m .|. m `shiftR` i
+--
+-- But GHC do not inline foldl (probably because it's recursive) and
+-- as result function walks list of boxed ints. Hand rolled version
+-- uses unboxed arithmetic.

# File Statistics/Resampling/Bootstrap.hs

import Control.DeepSeq (NFData)
import Control.Exception (assert)
-import Control.Monad.Par (runPar, parMap)
+-- Workaround for the bug https://github.com/simonmar/monad-par/issues/23
+-- As suggested by Simon Marlow direct scheduler is used.
+--   Issue #45 tracks this workaround.
import Data.Data (Data)
import Data.Typeable (Typeable)
import Data.Vector.Unboxed ((!))

# File tests/Tests/Distribution.hs

cdfTests :: (Param d, Distribution d, QC.Arbitrary d, Show d) => T d -> [Test]
cdfTests t =
[ testProperty "C.D.F. sanity"        \$ cdfSanityCheck         t
-  , testProperty "CDF limit at +∞"      \$ cdfLimitAtPosInfinity  t
-  , testProperty "CDF limit at -∞"      \$ cdfLimitAtNegInfinity  t
-  , testProperty "CDF at +∞ = 1"        \$ cdfAtPosInfinity       t
-  , testProperty "CDF at -∞ = 1"        \$ cdfAtNegInfinity       t
+  , testProperty "CDF limit at +inf"    \$ cdfLimitAtPosInfinity  t
+  , testProperty "CDF limit at -inf"    \$ cdfLimitAtNegInfinity  t
+  , testProperty "CDF at +inf = 1"      \$ cdfAtPosInfinity       t
+  , testProperty "CDF at -inf = 1"      \$ cdfAtNegInfinity       t
, testProperty "CDF is nondecreasing" \$ cdfIsNondecreasing     t
, testProperty "1-CDF is correct"     \$ cdfComplementIsCorrect t
]
-- Check that discrete CDF is correct
discreteCDFcorrect :: (DiscreteDistr d) => T d -> d -> Int -> Int -> Property
discreteCDFcorrect _ d a b
-  = printTestCase (printf "CDF = %g" p1)
-  \$ printTestCase (printf "Sum = %g" p2)
-  \$ printTestCase (printf "Δ   = %g" (abs (p1 - p2)))
+  = printTestCase (printf "CDF   = %g" p1)
+  \$ printTestCase (printf "Sum   = %g" p2)
+  \$ printTestCase (printf "Delta = %g" (abs (p1 - p2)))
\$ abs (p1 - p2) < 3e-10
-- Avoid too large differeneces. Otherwise there is to much to sum
--
where
-- Student-T
testStudentPDF ndf x exact
-      = testAssertion (printf "density (studentT %f) %f  %f" ndf x exact)
+      = testAssertion (printf "density (studentT %f) %f ~ %f" ndf x exact)
\$ eq 1e-5  exact  (density (studentT ndf) x)
testStudentCDF ndf x exact
-      = testAssertion (printf "cumulative (studentT %f) %f  %f" ndf x exact)
+      = testAssertion (printf "cumulative (studentT %f) %f ~ %f" ndf x exact)
\$ eq 1e-5  exact  (cumulative (studentT ndf) x)
-- F-distribution
testFdistrPDF n m x exact
-      = testAssertion (printf "density (fDistribution %i %i) %f  %f [got %f]" n m x exact d)
+      = testAssertion (printf "density (fDistribution %i %i) %f ~ %f [got %f]" n m x exact d)
\$ eq 1e-5  exact d
where d = density (fDistribution n m) x
testFdistrCDF n m x exact
-      = testAssertion (printf "cumulative (fDistribution %i %i) %f  %f [got %f]" n m x exact d)
+      = testAssertion (printf "cumulative (fDistribution %i %i) %f ~ %f [got %f]" n m x exact d)
\$ eq 1e-5  exact d
where d = cumulative (fDistribution n m) x

# File tests/Tests/Function.hs

import Test.Framework
import Test.Framework.Providers.QuickCheck2

+import Tests.Helpers
import Statistics.Function

tests :: Test
tests = testGroup "S.Function"
-  [ testProperty "Sort is sort" p_sort
+  [ testProperty  "Sort is sort"                p_sort
+  , testAssertion "nextHighestPowerOfTwo is OK" p_nextHighestPowerOfTwo
]

where
v = sort \$ U.fromList xs

-
+p_nextHighestPowerOfTwo :: Bool
+p_nextHighestPowerOfTwo
+  = all (\(good, is) -> all ((==good) . nextHighestPowerOfTwo) is) lists
+  where
+    pows  = [1 .. 17]
+    lists = [ (2^m, [2^n+1 .. 2^m]) | (n,m) <- pows `zip` tail pows ]