python-stdlib /

#!/usr/bin/env python
# Get statistics for modules from standard Python library
  [x] open patches per module
    [x] get list of all open issues with patches
    [x] get patch files for each issue
    [x] extract paths from each patch
    [x] detect modules for the issue using paths
    [x] print list of modules and issues


from __future__ import print_function

import re
import sys
import json

from os import makedirs
from os.path import abspath, exists, dirname
from datetime import datetime
from collections import defaultdict, OrderedDict

PY3K = sys.version_info >= (3, 0)
if PY3K:
  import urllib.request as urllib
  import xmlrpc.client as xmlrpclib
  import urllib
  import xmlrpclib
  input = raw_input

import stdlib


# -- 0. Bootstrap locally --

ROOT = abspath(dirname(__file__))
CACHE = ROOT + '/.locally/files/'
LIB = ROOT + '/.locally/lib/'
if not exists(CACHE):
  print("Creating cache directory")

# 1. create LIB dir to download required modules locally
#    and add it to Python sys.path to make them importable

if not exists(LIB):
  print("Creating directory for additional Python modules")
sys.path += [LIB]

# 2. download required Python modules

required_files = [
  ('', ''),
for f, url in required_files:
  if exists(LIB + f):
    print("Skipping " + f + " download")
    print("Downloading %s into %s" % (f, LIB))
    urllib.urlretrieve(url, LIB + f)

# 3. import downloaded stuff

import patch

# -- /bootstrap --

# -- 1. Get list of all open issues with patches --
# Using
# [ ] patch keyword filtering may not be reliable
#   [ ] get list of all issues with attachments
#   [x] filter patches

bpo = xmlrpclib.ServerProxy('', allow_none=True)

open_status = [bpo.lookup('status', x) for x in bpo.list('status')
               if x != 'closed']
patch_keyword = bpo.lookup('keyword', 'patch')

open_issues = bpo.filter('issue', None, {'status':open_status, 'keywords':patch_keyword})
print('Open issues with patch keyword: %s' % len(open_issues))

# -- 2. Read patch files for each issue and get paths per issue

files_attached = 0
files_patches = 0

# [x] implement caching for issue files
#  [ ] check what urlretrieve does for incomplete files
def fileget(number):
  """ Download and cache specified file from Roundup
      tracker. Return tuple (filepath, cached).
  filename = CACHE + number
  cached = False
  if not exists(CACHE + number):
      urllib.urlretrieve('' + number + '/', filename)
      from traceback import print_exc
      print("\nException. Press Enter to continue")
    cached = True
  return (filename, cached)

days = 'Mon|Tue|Wed|Thu|Fri|Sat|Sun'
path_re = re.compile(r'^(?:[ab]/)?(?:python[-23][^/]*/)?(.*?)\s*'
                     r'(?:\s(?:%s|20[01]\d|199\d|\(\w+\s)\b.*)?(?:\.orig)?$' %
                     days, re.I)
def cleanpath(source, target):
    # clean up the path by removing leading a/, b/, or python* dirs,
    # and trailing dates, or '(working copy)', or '.orig' extensions
    path = target
    if not target or target == 'dev/null':
        path = source
    # if this fails the regex is broken
    path = path_re.match(path).group(1)
    return path

issue_files = {}                   # 'number' => []
issue_patches = defaultdict(list)  # 'number' => []
module_issues = defaultdict(list)
python_files = defaultdict(set)  # 'path/file' => {(issuen, title), ...}

for issuen in open_issues:
  issue = bpo.display('issue'+issuen, 'files', 'title')
  issue_files[issuen] = issue['files']
  # encode title to get rid of
  #   UnicodeEncodeError: 'charmap' codec can't encode character u'\u2019' in
  #   position 65: character maps to <undefined>
  print('#%s: %s' % (issuen, issue['title'].encode(errors='replace')))
  for filen in issue_files[issuen]:
    files_attached += 1
    print('  ' + filen, end='')
    # retrieve filename (commented for performance reasons)
    #fname = bpo.display('file'+filen, 'name')['name']
    #print('  file%s (%s)' % (filen, fname), end='')
    # get file
    path, cached = fileget(filen)  # [ ] use fname (or filen-fname)
    if cached:
      print(' (cached)', end='')
    # check if it is a patch
    patchset = patch.fromfile(path)
    if patchset:
      files_patches += 1
      # detect if there is a module for the patch path
      for entry in patchset.items:
        path = cleanpath(entry.source,
        python_files[path].add((issuen, issue['title']))
        module = (stdlib.getmodname(entry.source) or
        if module:
          print(' (module: %s)' % module, end='')
          if issuen not in module_issues[module]:
          # [ ] handle unknown path - ask to add to unknown paths cache
      print(' (not a patch)', end='')

print('Open issues with patch keyword: %s' % len(open_issues))
print('Issues with confirmed patches: %s' % len(issue_patches))
print('Files attached: %s' % files_attached)
print('Patches: %s' % files_patches)
for module, issues in module_issues.items():
  print('  %s: %d' % (module, len(issues)))
print('Files affected by patches: %s' % len(python_files))

# convert from a dict of (path, set(issues))
# to an OrderedDict of (path, list(issues))
data = OrderedDict((path, sorted(issues))
                   for path,issues in sorted(python_files.items()))
with open('files.json', 'w') as f:
    json.dump(data, f)

print('Finished in ' + str( - START)[:-4])
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.