# HG changeset patch
# User "Lorenzo M. Catucci"
# Date 1222638694 7200
# Branch trunk
# Node ID fc819b27a2fa6c2d2260ece2a45f25f47055cf9e
# Parent c6e5e56e0313fba54f9cf5f32ddf88c360236481
Second attempt to better define standard_deviation.
diff git a/webhelpers/number.py b/webhelpers/number.py
 a/webhelpers/number.py
+++ b/webhelpers/number.py
@@ 77,7 +77,7 @@
high = s[center+1]
return mean([low, high])
def standard_deviation(r):
+def standard_deviation(r,sample=True):
"""Standard deviation, `from the Python Cookbook
`_
@@ 88,6 +88,13 @@
used to detect whether the average has been skewed by a few extremely high
or extremely low values.
+ This function as a default does compute the unbiased estimate
+ for the population standard deviation, by applying an unbiasing
+ factor of sqrt(N/(N1)).
+
+ If you'd rather have the function compute the population standard
+ deviation, set sample=False.
+
The following examples are taken from Wikipedia.
http://en.wikipedia.org/wiki/Standard_deviation
@@ 97,9 +104,16 @@
5.773502691896258...
>>> standard_deviation([6, 6, 8, 8])
1.1547005383792515
+ >>> standard_deviation([0, 0, 14, 14], sample=False)
+ 7.0
+ >>> standard_deviation([0, 6, 8, 14], sample=False)
+ 5.0
+ >>> standard_deviation([6, 6, 8, 8], sample=False)
+ 1.0
 (Wikipedia reports 7, 5, and 1 respectively. Some of the difference is
 due to rounding, but the rest may be a bug?)
+ (The results reported in Wikipedia are those expected for whole
+ population statistics and therefore are equal to the ones we get
+ by setting sample=False in the later tests)
.. codeblock:: pycon
@@ 107,11 +121,15 @@
# Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
>>> standard_deviation([70, 70, 70, 75, 80, 85, 90, 95, 90, 80, 75, 70]) # doctest: +ELLIPSIS
9.003366373785...
+ >>> standard_deviation([70, 70, 70, 75, 80, 85, 90, 95, 90, 80, 75, 70], sample=False) # doctest: +ELLIPSIS
+ 8.620067027323...
 # Fictitious average mothly temperatures in Montana.
+ # Fictitious average monthly temperatures in Montana.
# Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
>>> standard_deviation([32, 10, 20, 30, 60, 90, 100, 80, 60, 30, 10, 32]) # doctest: +ELLIPSIS
45.1378360405574...
+ >>> standard_deviation([32, 10, 20, 30, 60, 90, 100, 80, 60, 30, 10, 32], sample=False) # doctest: +ELLIPSIS
+ 43.2161878106906...
Most natural and random phenomena follow the normal distribution (aka the
bell curve), which says that most values are close to average but a few are
@@ 134,8 +152,11 @@
"""
avg = average(r)
sdsq = sum([(i  avg) ** 2 for i in r])
 return (sdsq / (len(r)  1 or 1)) ** 0.5

+ if sample:
+ normal_denom=len(r)  1 or 1
+ else:
+ normal_denom=len(r)
+ return (sdsq / normal_denom) ** 0.5
class SimpleStats(object):
"""Calculate a few simple stats on data.