python-stdlib / modstats.py

#---
#
# Get statistics for modules from standard Python library
#
#---
"""
  [ ] open patches per module
    [x] get list of all open issues with patches
    [x] get patch files for each issue
    [ ] extract paths from each patch
    [ ] detect modules for the issue using paths
    [ ] print list of modules and issues

"""

from os import makedirs
from os.path import abspath, exists, dirname
from sys import version_info
from datetime import datetime

START = datetime.now()

PY3K = version_info >= (3, 0)
if PY3K:
  import urllib.request as urllib
  import xmlrpc.client as xmlrpclib
else:
  import urllib
  import xmlrpclib
  input = raw_input

ROOT = abspath(dirname(__file__))
CACHE = ROOT + '/.filecache/'
if not exists(CACHE):
  print("Creating cache directory")
  makedirs(CACHE)

# -- 1. Get list of all open issues with patches --
#
# Using http://roundup.sourceforge.net/docs/xmlrpc.html
# 
# [ ] patch keyword filtering may not be reliable
#   [ ] get list of all issues with attachments
#   [ ] filter patches
#

bpo = xmlrpclib.ServerProxy('http://bugs.python.org', allow_none=True)

open_status = [bpo.lookup('status', x) for x in bpo.list('status')
               if x != 'closed']
patch_keyword = bpo.lookup('keyword', 'patch')

open_issues = bpo.filter('issue', None, {'status':open_status, 'keywords':patch_keyword})
print('Open issues with patch keyword: %s' % len(open_issues))


# -- 2. read patch files for each issue and get paths per issue

files_attached = 0
files_patches = []

# [x] implement caching for issue files
#  [ ] check what urlretrieve does for incomplete files
def fileget(number):
  if not exists(CACHE + number):
    print('  ' + number)
    try:
      urllib.urlretrieve('http://bugs.python.org/file' + number + '/', CACHE + number)
    except:
      from traceback import print_exc
      print_exc()
      print("\nException. Press Enter to continue")
      input()
  else:
    print('  ' + number + ' (cached)')
      

issue_patches = {}  # 'number' => []
for issue in open_issues:
  print(issue)
  issue_patches[issue] = bpo.display('issue'+issue, 'files')['files']
  for filen in issue_patches[issue]:
    files_attached += 1
    fileget(filen)

print('')
print('Open issues with patch keyword: %s' % len(open_issues))
print('Number of files attached: %s' % files_attached)
print('')
print('Finished in %s' % (datetime.now() - START))
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.