# Commits

committed 10b83af

• Participants
• Parent commits 0f5c730
• Branches default

# File praxis/benford.py

`+#!/usr/bin/env python2.6`
`+"""Benford's law`
`+`
`+- http://programmingpraxis.com/2010/10/26/benfords-law/`
`+- http://en.wikipedia.org/wiki/Benford's_law`
`+"""`
`+`
`+import math`
`+import sys`
`+`
`+`
`+def leading_counts(iterable, base=10):`
`+  """Count leading digits.`
`+`
`+  Returns a list with index corresponding to leading digit.`
`+`
`+  Zero is counted for any items equal to zero; absolute value is used for negative numbers.`
`+  """`
`+  assert base >= 1`
`+  digits = [0] * base`
`+  for x in iterable:`
`+    x = abs(x)`
`+    while x >= base:`
`+      x //= base`
`+    digits[x] += 1`
`+  return digits`
`+`
`+def distrib(iterable, base=10, drop_zero=True):`
`+  """Return distribution of each leading digit.`
`+`
`+  If drop_zero, all zero values are discarded.  This has no affect if the data has no zeros.`
`+  """`
`+  counts = leading_counts(iterable, base)`
`+  if drop_zero:`
`+    counts[0] = 0`
`+  total = sum(counts)`
`+  return [1.0 * x / total for x in counts]`
`+`
`+`
`+def expected(digit, base=10):`
`+  """Return probability of given digit according to Benford's law."""`
`+  assert base >= 1`
`+  assert 0 <= digit < base`
`+  return math.log(1 + 1.0 / digit, base)`
`+`
`+`
`+def main(argv):`
`+  args = argv[1:]`
`+  if len(args) > 1:`
`+    return "unexpected arguments"`
`+  if not args or args[0] == "-":`
`+    f = sys.stdin`
`+  else:`
`+    f = open(args[0])`
`+  d = distrib(int(x) for x in f)`
`+  h = "n  actual  expected  diff  diff%"`
`+  widths = [len(x) for x in h.split("  ")]`
`+  print h`
`+  print "  ".join("-" * x for x in widths)`
`+  fmt = "%%%dd  %%%d.2f  %%%d.2f  %%%d.2f  %%%d.2f" % tuple(widths)`
`+  for n, x in zip(xrange(1, 10), d[1:]):`
`+    e = expected(n)`
`+    diff = abs(e - x)`
`+    print fmt % (n, 100 * x, 100 * e, 100 * diff, 100 * diff / e)`
`+`
`+if __name__ == "__main__":`
`+  sys.exit(main(sys.argv))`

# File praxis/test_benford.py

`+import random`
`+import unittest`
`+`
`+import benford`
`+`
`+`
`+random_data = []`
`+# 10 of every leading digit (no zeros)`
`+for x in xrange(1, 10):`
`+  for _ in xrange(10):`
`+    random_data.append(int(str(x) + str(random.randint(1, 1000000))))`
`+`
`+`
`+class Test_leading_counts(unittest.TestCase):`
`+  def test(self):`
`+    self.assertEqual([0, 2] + [0] * 8, benford.leading_counts([1, 100]))`
`+`
`+  def test_random_input(self):`
`+    self.assertEqual([0] + [10] * 9, benford.leading_counts(random_data))`
`+    self.assertEqual([1] + [10] * 9, benford.leading_counts([0] + random_data))`
`+`
`+class Test_distrib(unittest.TestCase):`
`+  def test(self):`
`+    p = benford.distrib(random_data)`
`+    self.assertEqual(0, p[0])`
`+    for n, x in enumerate(p[1:]):`
`+      self.assertAlmostEqual(1.0/9, x, places=7, msg="10 ~= %r; n = %r" % (x, n))`