Source

python-stdlib / modstats.py

Full commit
#---
#
# Get statistics for modules from standard Python library
#
#---
"""
  [ ] open patches per module
    [x] get list of all open issues with patches
    [ ] get patch files for each issue
    [ ] extract paths from each patch
    [ ] detect modules for the issue using paths
    [ ] print list of modules and issues

"""

from os import makedirs
from os.path import abspath, exists, dirname
from sys import version_info

PY3K = version_info >= (3, 0)
if PY3K:
  import urllib.request as urllib
  import xmlrpc.client as xmlrpclib
else:
  import urllib
  import xmlrpclib
  print('NOTE: MinGW unpack requires `lzma` module available from '
        'Python 3.3+')

ROOT = abspath(dirname(__file__))
CACHE = ROOT + '/.filecache/'
if not exists(CACHE):
  print("Creating cache directory")
  makedirs(CACHE)

# -- 1. Get list of all open issues with patches --
#
# Using http://roundup.sourceforge.net/docs/xmlrpc.html
# 
# [ ] patch keyword filtering may not be reliable
#   [ ] get list of all issues with attachments
#   [ ] filter patches
#

bpo = xmlrpclib.ServerProxy('http://bugs.python.org', allow_none=True)

open_status = [bpo.lookup('status', x) for x in bpo.list('status')
               if x != 'closed']
patch_keyword = bpo.lookup('keyword', 'patch')

open_issues = bpo.filter('issue', None, {'status':open_status, 'keywords':patch_keyword})
#print(open_issues)
print('Opened issues with patch keyword: %s' % len(open_issues))


# -- 2. read patch files for each issue and get paths per issue

files_attached = 0
files_patches = []

# [x] implement caching for issue files
#  [ ] check what urlretrieve does for incomplete files
def fileget(number):
  if not exists(CACHE + number):
    print('  ' + number)
    urllib.urlretrieve('http://bugs.python.org/file' + number + '/', CACHE + number)
  else:
    print('  ' + number + ' (cached)')
      

issue_patches = {}  # 'number' => []
for issue in open_issues:
  print(issue)
  issue_patches[issue] = bpo.display('issue'+issue, 'files')['files']
  for filen in issue_patches[issue]:
    files_attached += 1
    fileget(filen)