# HG changeset patch # User "Lorenzo M. Catucci" # Date 1222638694 -7200 # Branch trunk # Node ID fc819b27a2fa6c2d2260ece2a45f25f47055cf9e # Parent c6e5e56e0313fba54f9cf5f32ddf88c360236481 Second attempt to better define standard_deviation. diff --git a/webhelpers/number.py b/webhelpers/number.py --- a/webhelpers/number.py +++ b/webhelpers/number.py @@ -77,7 +77,7 @@ high = s[center+1] return mean([low, high]) -def standard_deviation(r): +def standard_deviation(r,sample=True): """Standard deviation, `from the Python Cookbook `_ @@ -88,6 +88,13 @@ used to detect whether the average has been skewed by a few extremely high or extremely low values. + This function as a default does compute the unbiased estimate + for the population standard deviation, by applying an unbiasing + factor of sqrt(N/(N-1)). + + If you'd rather have the function compute the population standard + deviation, set sample=False. + The following examples are taken from Wikipedia. http://en.wikipedia.org/wiki/Standard_deviation @@ -97,9 +104,16 @@ 5.773502691896258... >>> standard_deviation([6, 6, 8, 8]) 1.1547005383792515 + >>> standard_deviation([0, 0, 14, 14], sample=False) + 7.0 + >>> standard_deviation([0, 6, 8, 14], sample=False) + 5.0 + >>> standard_deviation([6, 6, 8, 8], sample=False) + 1.0 - (Wikipedia reports 7, 5, and 1 respectively. Some of the difference is - due to rounding, but the rest may be a bug?) + (The results reported in Wikipedia are those expected for whole + population statistics and therefore are equal to the ones we get + by setting sample=False in the later tests) .. code-block:: pycon @@ -107,11 +121,15 @@ # Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec >>> standard_deviation([70, 70, 70, 75, 80, 85, 90, 95, 90, 80, 75, 70]) # doctest: +ELLIPSIS 9.003366373785... + >>> standard_deviation([70, 70, 70, 75, 80, 85, 90, 95, 90, 80, 75, 70], sample=False) # doctest: +ELLIPSIS + 8.620067027323... - # Fictitious average mothly temperatures in Montana. + # Fictitious average monthly temperatures in Montana. # Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec >>> standard_deviation([-32, -10, 20, 30, 60, 90, 100, 80, 60, 30, 10, -32]) # doctest: +ELLIPSIS 45.1378360405574... + >>> standard_deviation([-32, -10, 20, 30, 60, 90, 100, 80, 60, 30, 10, -32], sample=False) # doctest: +ELLIPSIS + 43.2161878106906... Most natural and random phenomena follow the normal distribution (aka the bell curve), which says that most values are close to average but a few are @@ -134,8 +152,11 @@ """ avg = average(r) sdsq = sum([(i - avg) ** 2 for i in r]) - return (sdsq / (len(r) - 1 or 1)) ** 0.5 - + if sample: + normal_denom=len(r) - 1 or 1 + else: + normal_denom=len(r) + return (sdsq / normal_denom) ** 0.5 class SimpleStats(object): """Calculate a few simple stats on data.