hotfiles / hotfiles.py

"""detect hot files

This work is based on the following blog article:

http://google-engtools.blogspot.com/2011/12/bug-prediction-at-google.html

This extension can be configured through the configuration file to avoid
repeating the same parameters again and again on the command line. The
configuration keys have to be under the ``hotfiles`` section, and can have
one of the following values::

    [hotfiles]
    pattern = issue\d+
    include.glob = 
      **/*.py
    exclude.glob =
      README*
      .hg*
"""

import re

from math import exp

from mercurial import cmdutil, scmutil, commands
from mercurial.i18n import _
from mercurial.node import nullrev
from mercurial.match import match

cmdtable = {}
try:
    command = cmdutil.command(cmdtable)
except AttributeError:
    print "Mercurial version too old, please upgrade to at least 1.9"
    raise

@command('hotfiles',
         [('r', 'rev', '', _('operate up to a given revision'), 'REV'),
          ('p', 'pattern', '', _('pattern to filter issue-fixing commits'),
           'REGEX')
          ] + commands.walkopts,
         _('[-r REV] [-p REGEX]')
)
def hotfiles(ui, repo, node=None, rev='.', pattern=None, **opts):
    """ Print the ten files the most susceptible to contains issues.
    
    The calculation is based on the number of time a file was touched during
    a bug-fixing commit, later commits weight more than earlier ones.
    
    The list of files analysed is based on the files present in the sepcified
    revision, this list can be filtered through the usage of include/exclude
    patterns on the command line or in the configuration file.
    
    The list of revisions analysed is by default all the non-merge revision.
    This list can be filtered by using a pattern given through the command line
    or the configuration file.
    """

    if not pattern:
        pattern = ui.config('hotfiles', 'pattern')
        if pattern is None:
            ui.debug('No pattern configured, taking all non-merge commits\n')
            pattern = '.*'

    ui.debug('using %s as pattern to filter changesets\n' % pattern)
    r = re.compile(pattern)

    excludeglob = ui.configlist('hotfiles', 'exclude.glob', [])
    excludeglob.extend(opts.get('exclude', []))
    includeglob = ui.configlist('hotfiles', 'include.glob', [])
    includeglob.extend(opts.get('include', []))
    m = match(repo.root, '', None, exclude=excludeglob, include=includeglob)

    if not node:
        node = rev

    ctx = scmutil.revsingle(repo, node)

    data = {}
    for f in ctx:
        if m(f):
            data[f] = []

    rev = ctx.rev()

    t, tz = repo[0].date()
    t0 = float(t) - tz
    t, tz = ctx.date()
    t1 = float(t) - tz

    def ti(t, tz):
        t = float(t) - tz
        return (t - t0) / (t1 - t0)

    for rv in xrange(rev):
        ctx = repo[rv]

        parents = [p for p in repo.changelog.parentrevs(rv)
                   if p != nullrev]
        if len(parents) == 2:
            # discard merges
            continue
        if not (r.search(ctx.description())):
            continue

        t = ti(*ctx.date())

        for f in ctx.files():
            if f in data:
                data[f].append(t)

        ui.progress('revisions', rv, total=rev)
    ui.progress('revisions', None)

    pos = 0
    d = []
    for f, ts in data.iteritems():
        s = 0
        for t in ts:
            s += 1 / (1 + exp((-12 * t) + 12))
        if s != 0:
            d.append((f, s))
        ui.progress('score', pos, f, total=len(data))
        pos += 1
    ui.progress('score', None)

    d = sorted(d, key=lambda x: x[1], reverse=True)[:10]

    for f, s in d:
        if ui.verbose:
            ui.write('% 5d: %s\n' % (int(s), f))
        else:
            ui.write('%s\n' % f)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.