hotfiles / hotfiles.py

"""detect hot files

Based on the following blog article:

http://google-engtools.blogspot.com/2011/12/bug-prediction-at-google.html

"""

import re

from math import exp

from mercurial import cmdutil, scmutil
from mercurial.i18n import _
from mercurial.node import nullrev
from mercurial.match import match

cmdtable = {}
try:
    command = cmdutil.command(cmdtable)
except AttributeError:
    print "Mercurial version too old, please upgrade to at least 1.9"
    raise

@command('hotfiles',
         [('r', 'rev', '', _('revision'), 'REV'),
          ('p', 'pattern', '', _('pattern to filter issue-fixing commits'), 'REGEX')
          ],
         _('[-r REV] [-p REGEX]')
)
def hotfiles(ui, repo, node=None, rev='.', pattern=None, **opts):
    """
    Files are hot.
    """

    if not pattern:
        pattern = ui.config('hotfiles', 'pattern')
        if pattern is None:
            ui.debug('No pattern configured, taking all (non-merge) commits\n')
            pattern = '.*'

    ui.debug('using %s as pattern to filter changesets' % pattern)
    r = re.compile(pattern)

    excludeglob = ui.configlist('hotfiles', 'exclude.glob', [])
    includeglob = ui.configlist('hotfiles', 'include.glob', [])
    m = match(repo.root, '', None, exclude=excludeglob, include=includeglob)

    if not node:
        node = rev

    ctx = scmutil.revsingle(repo, node)

    data = {}
    for f in ctx:
        if m(f):
            data[f] = []

    rev = ctx.rev()

    t, tz = repo[0].date()
    t0 = float(t) - tz
    t, tz = ctx.date()
    t1 = float(t) - tz

    def ti(t, tz):
        t = float(t) - tz
        return (t - t0) / (t1 - t0)

    for rv in xrange(rev):
        ctx = repo[rv]

        parents = [p for p in repo.changelog.parentrevs(rv)
                   if p != nullrev]
        if len(parents) == 2:
            # discard merges
            continue
        if not (r.search(ctx.description())):
            continue

        t = ti(*ctx.date())

        for f in ctx.files():
            if f in data:
                data[f].append(t)

        ui.progress('revisions', rv, total=rev)
    ui.progress('revisions', None)

    pos = 0
    d = []
    for f, ts in data.iteritems():
        s = 0
        for t in ts:
            s += 1 / (1 + exp((-12 * t) + 12))
        d.append((f, s))
        ui.progress('score', pos, f,total=len(data))
        pos += 1
    ui.progress('score', None)

    d = sorted(d, key=lambda x: x[1], reverse=True)[:10]

    for f, s in d:
        if ui.verbose:
            ui.write('% 5d: %s\n' % (int(s), f))
        else:
            ui.write('%s\n' % f)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.