Source

python-stdlib / modstats.py

Full commit
#!/usr/bin/env python
#---
#
# Get statistics for modules from standard Python library
#
#---
"""
  [ ] open patches per module
    [x] get list of all open issues with patches
    [x] get patch files for each issue
    [ ] extract paths from each patch
    [ ] detect modules for the issue using paths
    [ ] print list of modules and issues

"""

from datetime import datetime
from sys import version_info, stdout

START = datetime.now()

PY3K = version_info >= (3, 0)
if PY3K:
  import urllib.request as urllib
  import xmlrpc.client as xmlrpclib
else:
  import urllib
  import xmlrpclib
  input = raw_input

import stdlib

# -- 0. Bootstrap locally --

from os import makedirs
from os.path import abspath, exists, dirname
import sys

PY3K = sys.version_info >= (3, 0)
if PY3K:
  import urllib.request as urllib
else:
  import urllib

ROOT = abspath(dirname(__file__))
CACHE = ROOT + '/.locally/files/'
LIB = ROOT + '/.locally/lib/'
if not exists(CACHE):
  print("Creating cache directory")
  makedirs(CACHE)

# 1. create LIB dir to download required modules locally
#    and add it to Python sys.path to make them importable

if not exists(LIB):
  print("Creating directory for additional Python modules")
  makedirs(LIB)
sys.path += [LIB]

# 2. download required Python modules

required_files = [
  ('patch.py', 'http://python-patch.googlecode.com/svn/trunk/patch.py'),
]
for f, url in required_files:
  if exists(LIB + f):
    print("Skipping " + f + " download")
  else:
    print("Downloading %s into %s" % (f, LIB))
    urllib.urlretrieve(url, LIB + f)

# 3. import downloaded stuff

import patch

# -- /bootstrap --

# -- 1. Get list of all open issues with patches --
#
# Using http://roundup.sourceforge.net/docs/xmlrpc.html
# 
# [ ] patch keyword filtering may not be reliable
#   [ ] get list of all issues with attachments
#   [x] filter patches
#

bpo = xmlrpclib.ServerProxy('http://bugs.python.org', allow_none=True)

open_status = [bpo.lookup('status', x) for x in bpo.list('status')
               if x != 'closed']
patch_keyword = bpo.lookup('keyword', 'patch')

open_issues = bpo.filter('issue', None, {'status':open_status, 'keywords':patch_keyword})
print('Open issues with patch keyword: %s' % len(open_issues))


# -- 2. Read patch files for each issue and get paths per issue

files_attached = 0
files_patches = 0

# [x] implement caching for issue files
#  [ ] check what urlretrieve does for incomplete files
def fileget(number):
  """ Download and cache specified file from Roundup
      tracker. Return tuple (filepath, cached).
  """
  filename = CACHE + number
  cached = False
  if not exists(CACHE + number):
    try:
      urllib.urlretrieve('http://bugs.python.org/file' + number + '/', filename)
    except:
      from traceback import print_exc
      print_exc()
      print("\nException. Press Enter to continue")
      input()
  else:
    cached = True
  return (filename, cached)

issue_files = {}    # 'number' => []
issue_patches = {}  # 'number' => []
module_issues = {}
for issue in open_issues:
  print(issue)
  issue_files[issue] = bpo.display('issue'+issue, 'files')['files']
  for filen in issue_files[issue]:
    files_attached += 1
    stdout.write('  ' + filen)
    # get file
    path, cached = fileget(filen)
    if cached:
      stdout.write(' (cached)')
    # check if it is a patch
    patchset = patch.fromfile(path)
    if patchset:
      files_patches += 1
      if issue not in issue_patches:
        issue_patches[issue] = []
      issue_patches[issue].append(path)
      # detect if there is a module for the patch path
      for entry in patchset.items:
        module = (stdlib.getmodname(entry.source) or
                  stdlib.getmodname(entry.target))
        if module:
          stdout.write(' (module: %s)' % module)
          if module not in module_issues:
            module_issues[module] = []
          if issue not in module_issues[module]:
            module_issues[module].append(issue)
        else:
          # [ ] handle unknown path - ask to add to unknown paths cache
          pass
    else:
      stdout.write(' (not a patch)')
    stdout.write('\n')


print('')
print('Open issues with patch keyword: %s' % len(open_issues))
print('Issues with confirmed patches: %s' % len(issue_patches))
print('')
print('Files attached: %s' % files_attached)
print('Patches: %s' % files_patches)
print('')
for module, issues in module_issues.items():
  print('  %s: %d' % (module, len(issues)))
print('')
print('Finished in ' + str(datetime.now() - START)[:-4])