Source

wikify / tests / runtest.py

#!/usr/bin/env python
import sys
import unittest
import os.path
import re

testdir = os.path.dirname(os.path.abspath(__file__))
# add the dir where wikify.py is (i.e. one level up)
sys.path.insert(0, os.path.dirname(testdir))

import wikify


# --- configure wikification rules ---

# --- replacement functions
def make_file_link(match):
    baseurl = 'http://hg.python.org/cpython/file/default/'
    sep = match.group('sep')
    path = match.group('path')
    npath = path.replace('\\', '/')  # normalize the path separators
    lnum = match.group('lnum') or ''  # the match includes the ':'
    if not npath.endswith('/'):
        # files without and with line number
        if not lnum:
            return '<a href="%s%s">%s%s</a>' % (baseurl, npath, sep, path)
        else:
            return '<a href="%s%s#l%s">%s%s%s</a>' % (baseurl, npath, lnum[1:],
                                                      sep, path, lnum)
    else:
        # dirs
        return '<a href="%s%s">%s%s</a>%s' % (baseurl, npath, sep, path, lnum)

def guess_version(path):
    """Search for Python version hints in the file path."""
    match = re.search(r'((?<=[Pp]ython)[23]\d|[23]\.\d)', path)
    if not match:
        return 'default'
    version = match.group(1)
    if '.' not in version:
        version = '.'.join(version)
    if version in ['2.5', '2.6', '2.7', '3.1', '3.2', '3.3']:
        return version
    return 'default'

def make_traceback_link(match):
    """Convert the file/line in the traceback lines in a link."""
    baseurl = 'http://hg.python.org/cpython/file/'
    path = match.group('path')  # first part of the path
    branch = guess_version(match.group('fullpath'))  # guessed branch
    file = match.group('file')  # second part after Lib/
    nfile = file.replace('\\', '/')  # normalize the path separators
    lnum = match.group('lnum')
    return ('File "%s<a href="%s%s/Lib/%s#l%s">%s</a>", line %s' %
            (path, baseurl, branch, nfile, lnum, file, lnum))

def make_pep_link(match):
    text = match.group(0)
    pepnum = match.group(1).zfill(4)
    return '<a href="http://www.python.org/dev/peps/pep-%s/">%s</a>' % (pepnum, text)
# --/ replacement functions

substitutions = [
    # URLs (adapted from Roundup)
    # replacement does not escape HTML chars
    (r'''(?xi)\b
         (
          (ht|f)tp(s?)://                   # protocol
          ([\w]+(:\w+)?@)?                  # username/password
          ([\w\-]+)                         # hostname
          ((\.[\w-]+)+)?                    # .domain.etc
         |                                  # ... or ...
          ([\w]+(:\w+)?@)?                  # username/password
          www\.                             # "www."
          ([\w\-]+\.)+                      # hostname
          [\w]{2,5}                         # TLD
         )
         (:[\d]{1,5})?                      # port
         (                                  # path etc.
          /(
            [\w\-$+*()@&=?/~\\#%.,;:!]*
            [\w\-$+*()@&=?/~\\#%]             # end of URL
           )
         )?
     ''', r'<a href="\0">\0</a>'),

    # email
    (r'\b[-+=%/\w\.]+@[\w\.\-]+', r'<a href="mailto:\0">\0</a>'),

    # deadbeeffeed  (hashes with exactly twelve or forty chars)
    (r'\b[a-fA-F0-9]{40}\b',
     r'<a href="http://hg.python.org/lookup/\0">\0</a>'),
    (r'\b(?P<revision>[a-fA-F0-9]{12})\b',
     r'<a href="http://hg.python.org/lookup/\0">\0</a>'),

    # r12345, r 12345, rev12345, rev. 12345, revision12345, revision 12345
    (r'\b(?P<revstr>r\.?(ev\.?(ision)?)?\s*)(?P<revision>\d{4,})',
     r'<a href="http://hg.python.org/lookup/r\4">\1\4</a>'),

    # Lib/somefile.py, Lib/somefile.py:123, Modules/somemodule.c:123, ...
    (r'(?P<sep>(?<!\w/)|(?<!\w)/)\b(?P<path>(?:Demo|Doc|Grammar|'
     r'Include|Lib|Mac|Misc|Modules|Parser|PC|PCbuild|Python|'
     r'RISCOS|Tools|Objects)/[-.\w/]+[a-zA-Z0-9]/?)(?P<lnum>:\d{1,5})?',
     make_file_link),

    # traceback lines: File "Lib/somefile.py", line 123 in some_func
    # note: this regex is not 100% accurate, it might get the wrong part of
    # the path or link to non-existing files, but it usually works fine
    (r'File "(?P<fullpath>(?P<path>[-.\w/\\:]+(?<!var)[/\\][Ll]ib[/\\]'
     r'(?!.*site-packages)(python[\d.]*[/\\])?)(?P<file>[-.\w/\\]+?\.py))", '
     r'line (?P<lnum>\d{1,5})', make_traceback_link),

    # PEP 8, PEP8, PEP 0008, ...
    (r'(?i)\b(?<![/=-])PEP\s*(\d{1,4})(?!/)\b', make_pep_link),

    # devguide
    (r'\bdevguide(/\w+(\.html)?(#[\w-]+)?)?',
     r'<a href="http://docs.python.org/\0">\0</a>'),
]

rules = [wikify.RegexpRule(a,b) for a,b in substitutions]


# --- rules from Roundup tracker
# roundup/website/issues/extensions/local_replace.py
roundup_rules =  [
    (re.compile('debian:\#(?P<id>\d+)'),
     '<a href="http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=\g<id>">debian#\g<id></a>' ),

    (re.compile('\#(?P<ws>\s*)(?P<id>\d+)'),
     "<a href='issue\g<id>'>#\g<ws>\g<id></a>" ),

    (re.compile('(?P<prews>\s+)revision(?P<ws>\s*)(?P<revision>\d+)'),
     "\g<prews><a href='http://svn.roundup-tracker.org/viewvc/roundup?view=rev&rev=\g<revision>'>revision\g<ws>\g<revision></a>"),
    (re.compile('(?P<prews>\s+)rev(?P<ws>\s*)(?P<revision>\d+)'),
     "\g<prews><a href='http://svn.roundup-tracker.org/viewvc/roundup?view=rev&rev=\g<revision>'>rev\g<ws>\g<revision></a>"),
    (re.compile('(?P<prews>\s+)(?P<revstr>r|r\s+)(?P<revision>\d+)'),
     "\g<prews><a href='http://svn.roundup-tracker.org/viewvc/roundup?view=rev&rev=\g<revision>'>\g<revstr>\g<revision></a>"),
]
roundup_rules_wikify = [wikify.RegexpRule(a,b) for a,b in roundup_rules]

roundup_in = """\
 debian:#222
 revision 222
 wordthatendswithr 222
 r222
 r 222
 #555
"""
roundup_out_bogusre = """\
 <a href="http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=222">debian<a href='issue222'>#222</a></a>
 <a href='http://svn.roundup-tracker.org/viewvc/roundup?view=rev&rev=222'>revision 222</a>
 wordthatendswithr 222
 <a href='http://svn.roundup-tracker.org/viewvc/roundup?view=rev&rev=222'>r222</a>
 <a href='http://svn.roundup-tracker.org/viewvc/roundup?view=rev&rev=222'>r 222</a>
 <a href='issue555'>#555</a>
"""
roundup_out_wikify = """\
 <a href="http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=222">debian#222</a>
 <a href='http://svn.roundup-tracker.org/viewvc/roundup?view=rev&rev=222'>revision 222</a>
 wordthatendswithr 222
 <a href='http://svn.roundup-tracker.org/viewvc/roundup?view=rev&rev=222'>r222</a>
 <a href='http://svn.roundup-tracker.org/viewvc/roundup?view=rev&rev=222'>r 222</a>
 <a href='issue555'>#555</a>
"""

# --/ configure wikification rules ---


class TestSuite(unittest.TestCase):
    def test_bpo_data(self):
        """ bugs.python.org replacement rules """
        self.maxDiff = None
        # the test file contains the text on odd lines and the expected
        # result on even ones, with comments starting with '##'
        f = open(os.path.join(testdir, 'data_python_tracker.txt'))
        for text, expected_result in zip(f, f):
            if text.startswith('##') and expected_result.startswith('##'):
                continue  # skip the comments
            p = wikify.wikify(text, rules)
            # decode the str -- Unicode strings have a better diff
            self.assertEqual(p.decode(), expected_result.decode())

    def test_native_roundup_rules(self):
        text = roundup_in
        for regexp, replace in roundup_rules:
            text = regexp.sub(replace, text)
        #print text
        self.assertEqual(roundup_out_bogusre, text)

    def test_wikify_roundup_rules(self):
        wikified = wikify.wikify(roundup_in, roundup_rules_wikify)
        #print wikified
        self.assertEqual(roundup_out_wikify, wikified)

    def test_order(self):
        rules = wikify.RegexpRule("([12345])", r"\1")

        data =   "01100220"
        result = "01100220"
        self.assertEqual(wikify.wikify(data, rules), result)



# run the tests
if  __name__ == '__main__':
    unittest.main()