1. anatoly techtonik
  2. python-stdlib


python-stdlib / stdlib.py

stdlib tools

---[Story Board / Use Cases / User Stories]----------------

Task 001 (complete):

  Given a relative path from the Python checkout, detect to
which module it belongs.

  >>> getmodule(relpath)

Task 002:

  Generate module definition file directly from Python source
or at least validate existing definition file against it to
ensure no modules are missing.

Task 003:

  Find modified files in Python installation (to create
patches be sent upstream).

---[Reorganization Notes]----------------------------------

[ ] 


import os
import json
import sys

__version__ = '1.0'

DIR = os.path.abspath(os.path.dirname(__file__))
MODULES = json.load(open(DIR + '/stdlib.json'))

{'distutils': ['Doc/distutils', 
 'json': ['Doc/library/json.rst',

# --- helpers ---
def pathsplit(pathstr, maxsplit=1):
    Split a path into list.

    :param maxsplit:  left for compatibility with os.path.split
    path = [pathstr]
    while True:
        oldpath = path[:]
        path[:1] = list(os.path.split(path[0]))
        if path[0] == '':
            path = path[1:]
        elif path[1] == '':
            path = path[:1] + path[2:]
        if path == oldpath:
            return path
        if maxsplit is not None and len(path) > maxsplit:
            return path

def filepathgen(rootdir):
  Generate relative full filename paths starting from rootdir
  for root, dirs, files in os.walk(rootdir):
    # calculate relative root
    relroot = root.replace(rootdir, '', 1)
    if relroot.startswith(os.sep):  # may happen after replace
      relroot = relroot.replace(os.sep, '', 1)
    for f in files:   # dirs are not checked against map
      yield os.path.join(relroot, f)

def echo(msg):
    Print msg to the screen without linefeed and flush the output.
    Standard print() function doesn't flush, see:

# --- /helpers ---

def getmodname(relpath):
  Return module name for the module from standard library given
  its relative path from source checkout directory.
  global MODULES
  path = pathsplit(relpath, None)
  for m in MODULES:
    for p in MODULES[m]:
      p = pathsplit(p, None)
      if path[:len(p)] == p:
        return m
    return ''

def checklib(srcroot):
  Given path to the source root directory:
    [x] calculate stdlib coverage % for current module definition
    [ ] show unknown files
    [ ] show files that belong to two modules at once
    [ ] mechanizm to ignore .hg and stuff
  FILES = []
  for num, path in enumerate(filepathgen(srcroot)):
    echo("Reading files: %d\r" % (num+1))
  print("Reading files.. Done.")

  if not FILES:
    sys.exit("Error: No files found at '%s'" % srcroot)

  for path in FILES:
    name = getmodname(path)
    if name:
      print(name, path)
      MATCHED += 1

  print("Total: %s, Matched: %s, Coverage: %.2f%%" %
        (len(FILES), MATCHED, float(MATCHED)/len(FILES)))
  # [ ] .json entries that were not found
  #  [ ] id for .json entries

usage = """\
stdlib.py v%s
usage: stdlib.py <command> [params]

  check     - check sources for Python stdlib, show stats
  test      - run internal tests \
""" % __version__

if __name__ == '__main__':

  if not sys.argv[1:]:

  if sys.argv[1] == 'test':
    for test in [
      assert getmodname(test) == 'json'
    print("Tests passed ok.")

  elif sys.argv[1] == 'check':
   if not 'pyconfig.h.in' in os.listdir('.'):
     print("Error: No Python source code checkout is found.")
     print("       Please run the check from its directory.")

    sys.exit('Error: Unknown command "%s"' % ' '.join(sys.argv[1:]))