python-stdlib / modstats.py

#!/usr/bin/env python
#---
#
# Get statistics for modules from standard Python library
#
#---
"""
  [x] open patches per module
    [x] get list of all open issues with patches
    [x] get patch files for each issue
    [x] extract paths from each patch
    [x] detect modules for the issue using paths
    [x] print list of modules and issues

"""

from __future__ import print_function
from datetime import datetime
from sys import version_info

START = datetime.now()

PY3K = version_info >= (3, 0)
if PY3K:
  import urllib.request as urllib
  import xmlrpc.client as xmlrpclib
else:
  import urllib
  import xmlrpclib
  input = raw_input

import stdlib

# -- 0. Bootstrap locally --

from os import makedirs
from os.path import abspath, exists, dirname
import sys

PY3K = sys.version_info >= (3, 0)
if PY3K:
  import urllib.request as urllib
else:
  import urllib

ROOT = abspath(dirname(__file__))
CACHE = ROOT + '/.locally/files/'
LIB = ROOT + '/.locally/lib/'
if not exists(CACHE):
  print("Creating cache directory")
  makedirs(CACHE)

# 1. create LIB dir to download required modules locally
#    and add it to Python sys.path to make them importable

if not exists(LIB):
  print("Creating directory for additional Python modules")
  makedirs(LIB)
sys.path += [LIB]

# 2. download required Python modules

required_files = [
  ('patch.py', 'http://python-patch.googlecode.com/svn/trunk/patch.py'),
]
for f, url in required_files:
  if exists(LIB + f):
    print("Skipping " + f + " download")
  else:
    print("Downloading %s into %s" % (f, LIB))
    urllib.urlretrieve(url, LIB + f)

# 3. import downloaded stuff

import patch

# -- /bootstrap --

# -- 1. Get list of all open issues with patches --
#
# Using http://roundup.sourceforge.net/docs/xmlrpc.html
# 
# [ ] patch keyword filtering may not be reliable
#   [ ] get list of all issues with attachments
#   [x] filter patches
#

bpo = xmlrpclib.ServerProxy('http://bugs.python.org', allow_none=True)

open_status = [bpo.lookup('status', x) for x in bpo.list('status')
               if x != 'closed']
patch_keyword = bpo.lookup('keyword', 'patch')

open_issues = bpo.filter('issue', None, {'status':open_status, 'keywords':patch_keyword})
print('Open issues with patch keyword: %s' % len(open_issues))


# -- 2. Read patch files for each issue and get paths per issue

files_attached = 0
files_patches = 0

# [x] implement caching for issue files
#  [ ] check what urlretrieve does for incomplete files
def fileget(number):
  """ Download and cache specified file from Roundup
      tracker. Return tuple (filepath, cached).
  """
  filename = CACHE + number
  cached = False
  if not exists(CACHE + number):
    try:
      urllib.urlretrieve('http://bugs.python.org/file' + number + '/', filename)
    except:
      from traceback import print_exc
      print_exc()
      print("\nException. Press Enter to continue")
      input()
  else:
    cached = True
  return (filename, cached)

issue_files = {}    # 'number' => []
issue_patches = {}  # 'number' => []
module_issues = {}
for issuen in open_issues:
  issue = bpo.display('issue'+issuen, 'files', 'title')
  issue_files[issuen] = issue['files']
  print('#%s: %s' % (issuen, issue['title']))
  for filen in issue_files[issuen]:
    files_attached += 1
    print('  ' + filen, end='')
    #fname = bpo.display('file'+filen, 'name')['name']
    #print('  file%s (%s)' % (filen, fname), end='')
    # get file
    path, cached = fileget(filen)  # [ ] use fname (or filen-fname)
    if cached:
      print(' (cached)', end='')
    # check if it is a patch
    patchset = patch.fromfile(path)
    if patchset:
      files_patches += 1
      if issuen not in issue_patches:
        issue_patches[issuen] = []
      issue_patches[issuen].append(path)
      # detect if there is a module for the patch path
      for entry in patchset.items:
        module = (stdlib.getmodname(entry.source) or
                  stdlib.getmodname(entry.target))
        if module:
          print(' (module: %s)' % module, end='')
          if module not in module_issues:
            module_issues[module] = []
          if issuen not in module_issues[module]:
            module_issues[module].append(issuen)
        else:
          # [ ] handle unknown path - ask to add to unknown paths cache
          pass
    else:
      print(' (not a patch)', end='')
    print()


print('')
print('Open issues with patch keyword: %s' % len(open_issues))
print('Issues with confirmed patches: %s' % len(issue_patches))
print('')
print('Files attached: %s' % files_attached)
print('Patches: %s' % files_patches)
print('')
for module, issues in module_issues.items():
  print('  %s: %d' % (module, len(issues)))
print('')
print('Finished in ' + str(datetime.now() - START)[:-4])
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.