Commits

Amaury Forgeot d'Arc  committed eb449ce Merge

hg merge default

  • Participants
  • Parent commits 835ebb4, d2f2a8c
  • Branches const-correctness

Comments (0)

Files changed (18)

File lib-python/2.7/argparse.py

             # error if this argument is not allowed with other previously
             # seen arguments, assuming that actions that use the default
             # value don't really count as "present"
-            if argument_values is not action.default:
+
+            # XXX PyPy bug-to-bug compatibility: "is" on primitive types
+            # is not consistent in CPython.  We'll assume it is close
+            # enough for ints (which is true only for "small ints"), but
+            # for floats and longs and complexes we'll go for the option
+            # of forcing "is" to say False, like it usually does on
+            # CPython.  A fix is pending on CPython trunk
+            # (http://bugs.python.org/issue18943) but that might change
+            # the details of the semantics and so not be applied to 2.7.
+            # See the line AA below.
+
+            if (argument_values is not action.default or
+                    type(argument_values) in (float, long, complex)):  # AA
                 seen_non_default_actions.add(action)
                 for conflict_action in action_conflicts.get(action, []):
                     if conflict_action in seen_non_default_actions:

File pypy/module/thread/test/test_thread.py

             skip("this OS supports too many threads to check (> 1000)")
         lock = thread.allocate_lock()
         lock.acquire()
+        count = [0]
         def f():
+            count[0] += 1
             lock.acquire()
             lock.release()
+            count[0] -= 1
         try:
             try:
                 for i in range(1000):
             finally:
                 lock.release()
                 # wait a bit to allow most threads to finish now
-                self.busywait(2.0)
+                while count[0] > 10:
+                    print count[0]     # <- releases the GIL
+                print "ok."
         except (thread.error, MemoryError):
             pass
         else:
             raise Exception("could unexpectedly start 1000 threads")
+        # safety: check that we can start a new thread here
+        thread.start_new_thread(lambda: None, ())
 
     def test_stack_size(self):
         import thread

File rpython/jit/codewriter/call.py

         else:
             assert op.opname == 'indirect_call'
             graphs = op.args[-1].value
-            if graphs is None:
-                # special case: handle the indirect call that goes to
-                # the 'instantiate' methods.  This check is a bit imprecise
-                # but it's not too bad if we mistake a random indirect call
-                # for the one to 'instantiate'.
-                from rpython.rtyper.lltypesystem import rclass
-                CALLTYPE = op.args[0].concretetype
-                if (op.opname == 'indirect_call' and len(op.args) == 2 and
-                    CALLTYPE == rclass.OBJECT_VTABLE.instantiate):
-                    graphs = list(self._graphs_of_all_instantiate())
-            #
             if graphs is not None:
                 result = []
                 for graph in graphs:
         # residual call case: we don't need to look into any graph
         return None
 
-    def _graphs_of_all_instantiate(self):
-        for vtable in self.rtyper.lltype2vtable.values():
-            if vtable.instantiate:
-                yield vtable.instantiate._obj.graph
-
     def guess_call_kind(self, op, is_candidate=None):
         if op.opname == 'direct_call':
             funcptr = op.args[0].value

File rpython/jit/codewriter/effectinfo.py

 EffectInfo.MOST_GENERAL = EffectInfo(None, None, None, None,
                                      EffectInfo.EF_RANDOM_EFFECTS,
                                      can_invalidate=True)
-EffectInfo.LEAST_GENERAL = EffectInfo([], [], [], [],
-                                      EffectInfo.EF_ELIDABLE_CANNOT_RAISE,
-                                      can_invalidate=False)
 
 
 def effectinfo_from_writeanalyze(effects, cpu,

File rpython/jit/metainterp/test/test_ajit.py

         self.check_resops(call=2)
 
     def test_merge_guardclass_guardvalue(self):
-        from rpython.rlib.objectmodel import instantiate
         myjitdriver = JitDriver(greens = [], reds = ['x', 'l'])
 
         class A(object):
         self.check_resops(guard_class=0, guard_value=6)
 
     def test_merge_guardnonnull_guardclass(self):
-        from rpython.rlib.objectmodel import instantiate
         myjitdriver = JitDriver(greens = [], reds = ['x', 'l'])
 
         class A(object):
 
 
     def test_merge_guardnonnull_guardvalue(self):
-        from rpython.rlib.objectmodel import instantiate
         myjitdriver = JitDriver(greens = [], reds = ['x', 'l'])
 
         class A(object):
 
 
     def test_merge_guardnonnull_guardvalue_2(self):
-        from rpython.rlib.objectmodel import instantiate
         myjitdriver = JitDriver(greens = [], reds = ['x', 'l'])
 
         class A(object):
 
 
     def test_merge_guardnonnull_guardclass_guardvalue(self):
-        from rpython.rlib.objectmodel import instantiate
         myjitdriver = JitDriver(greens = [], reds = ['x', 'l'])
 
         class A(object):

File rpython/rlib/rsre/rpy.py

-
-from rpython.rlib.rsre import rsre_char
-from rpython.rlib.rsre.rsre_core import match
-from rpython.rlib.rarithmetic import intmask
-
-def get_hacked_sre_compile(my_compile):
-    """Return a copy of the sre_compile module for which the _sre
-    module is a custom module that has _sre.compile == my_compile
-    and CODESIZE == rsre_char.CODESIZE.
-    """
-    import sre_compile, sre_constants, __builtin__, new
-    sre_hacked = new.module("_sre_hacked")
-    sre_hacked.compile = my_compile
-    sre_hacked.MAGIC = sre_compile.MAGIC
-    sre_hacked.CODESIZE = rsre_char.CODESIZE
-    sre_hacked.MAXREPEAT = sre_constants.MAX_REPEAT
-    sre_hacked.getlower = rsre_char.getlower
-    def my_import(name, *args):
-        if name == '_sre':
-            return sre_hacked
-        else:
-            return default_import(name, *args)
-    src = sre_compile.__file__
-    if src.lower().endswith('.pyc') or src.lower().endswith('.pyo'):
-        src = src[:-1]
-    mod = new.module("sre_compile_hacked")
-    default_import = __import__
-    try:
-        __builtin__.__import__ = my_import
-        execfile(src, mod.__dict__)
-    finally:
-        __builtin__.__import__ = default_import
-    return mod
-
-class GotIt(Exception):
-    pass
-def my_compile(pattern, flags, code, *args):
-    raise GotIt([intmask(i) for i in code], flags, args)
-sre_compile_hacked = get_hacked_sre_compile(my_compile)
-
-def get_code(regexp, flags=0, allargs=False):
-    try:
-        sre_compile_hacked.compile(regexp, flags)
-    except GotIt, e:
-        pass
-    else:
-        raise ValueError("did not reach _sre.compile()!")
-    if allargs:
-        return e.args
-    else:
-        return e.args[0]

File rpython/rlib/rsre/rpy/__init__.py

+from ._sre import get_code

File rpython/rlib/rsre/rpy/_sre.py

+
+from rpython.rlib.rsre import rsre_char
+from rpython.rlib.rarithmetic import intmask
+
+
+MAGIC = 20031017
+CODESIZE = rsre_char.CODESIZE
+getlower = rsre_char.getlower
+
+
+class GotIt(Exception):
+    pass
+
+def compile(pattern, flags, code, *args):
+    raise GotIt([intmask(i) for i in code], flags, args)
+
+
+def get_code(regexp, flags=0, allargs=False):
+    from . import sre_compile
+    try:
+        sre_compile.compile(regexp, flags)
+    except GotIt, e:
+        pass
+    else:
+        raise ValueError("did not reach _sre.compile()!")
+    if allargs:
+        return e.args
+    else:
+        return e.args[0]

File rpython/rlib/rsre/rpy/sre_compile.py

+#
+# Secret Labs' Regular Expression Engine
+#
+# convert template to internal format
+#
+# Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
+#
+# See the sre.py file for information on usage and redistribution.
+#
+
+"""Internal support module for sre (copied from CPython 2.7.3)"""
+
+import sys
+from . import _sre, sre_parse
+from .sre_constants import *
+
+assert _sre.MAGIC == MAGIC, "SRE module mismatch"
+
+if _sre.CODESIZE == 2:
+    MAXCODE = 65535
+else:
+    MAXCODE = 0xFFFFFFFFL
+
+def _identityfunction(x):
+    return x
+
+_LITERAL_CODES = set([LITERAL, NOT_LITERAL])
+_REPEATING_CODES = set([REPEAT, MIN_REPEAT, MAX_REPEAT])
+_SUCCESS_CODES = set([SUCCESS, FAILURE])
+_ASSERT_CODES = set([ASSERT, ASSERT_NOT])
+
+def _compile(code, pattern, flags):
+    # internal: compile a (sub)pattern
+    emit = code.append
+    _len = len
+    LITERAL_CODES = _LITERAL_CODES
+    REPEATING_CODES = _REPEATING_CODES
+    SUCCESS_CODES = _SUCCESS_CODES
+    ASSERT_CODES = _ASSERT_CODES
+    for op, av in pattern:
+        if op in LITERAL_CODES:
+            if flags & SRE_FLAG_IGNORECASE:
+                emit(OPCODES[OP_IGNORE[op]])
+                emit(_sre.getlower(av, flags))
+            else:
+                emit(OPCODES[op])
+                emit(av)
+        elif op is IN:
+            if flags & SRE_FLAG_IGNORECASE:
+                emit(OPCODES[OP_IGNORE[op]])
+                def fixup(literal, flags=flags):
+                    return _sre.getlower(literal, flags)
+            else:
+                emit(OPCODES[op])
+                fixup = _identityfunction
+            skip = _len(code); emit(0)
+            _compile_charset(av, flags, code, fixup)
+            code[skip] = _len(code) - skip
+        elif op is ANY:
+            if flags & SRE_FLAG_DOTALL:
+                emit(OPCODES[ANY_ALL])
+            else:
+                emit(OPCODES[ANY])
+        elif op in REPEATING_CODES:
+            if flags & SRE_FLAG_TEMPLATE:
+                raise error, "internal: unsupported template operator"
+                emit(OPCODES[REPEAT])
+                skip = _len(code); emit(0)
+                emit(av[0])
+                emit(av[1])
+                _compile(code, av[2], flags)
+                emit(OPCODES[SUCCESS])
+                code[skip] = _len(code) - skip
+            elif _simple(av) and op is not REPEAT:
+                if op is MAX_REPEAT:
+                    emit(OPCODES[REPEAT_ONE])
+                else:
+                    emit(OPCODES[MIN_REPEAT_ONE])
+                skip = _len(code); emit(0)
+                emit(av[0])
+                emit(av[1])
+                _compile(code, av[2], flags)
+                emit(OPCODES[SUCCESS])
+                code[skip] = _len(code) - skip
+            else:
+                emit(OPCODES[REPEAT])
+                skip = _len(code); emit(0)
+                emit(av[0])
+                emit(av[1])
+                _compile(code, av[2], flags)
+                code[skip] = _len(code) - skip
+                if op is MAX_REPEAT:
+                    emit(OPCODES[MAX_UNTIL])
+                else:
+                    emit(OPCODES[MIN_UNTIL])
+        elif op is SUBPATTERN:
+            if av[0]:
+                emit(OPCODES[MARK])
+                emit((av[0]-1)*2)
+            # _compile_info(code, av[1], flags)
+            _compile(code, av[1], flags)
+            if av[0]:
+                emit(OPCODES[MARK])
+                emit((av[0]-1)*2+1)
+        elif op in SUCCESS_CODES:
+            emit(OPCODES[op])
+        elif op in ASSERT_CODES:
+            emit(OPCODES[op])
+            skip = _len(code); emit(0)
+            if av[0] >= 0:
+                emit(0) # look ahead
+            else:
+                lo, hi = av[1].getwidth()
+                if lo != hi:
+                    raise error, "look-behind requires fixed-width pattern"
+                emit(lo) # look behind
+            _compile(code, av[1], flags)
+            emit(OPCODES[SUCCESS])
+            code[skip] = _len(code) - skip
+        elif op is CALL:
+            emit(OPCODES[op])
+            skip = _len(code); emit(0)
+            _compile(code, av, flags)
+            emit(OPCODES[SUCCESS])
+            code[skip] = _len(code) - skip
+        elif op is AT:
+            emit(OPCODES[op])
+            if flags & SRE_FLAG_MULTILINE:
+                av = AT_MULTILINE.get(av, av)
+            if flags & SRE_FLAG_LOCALE:
+                av = AT_LOCALE.get(av, av)
+            elif flags & SRE_FLAG_UNICODE:
+                av = AT_UNICODE.get(av, av)
+            emit(ATCODES[av])
+        elif op is BRANCH:
+            emit(OPCODES[op])
+            tail = []
+            tailappend = tail.append
+            for av in av[1]:
+                skip = _len(code); emit(0)
+                # _compile_info(code, av, flags)
+                _compile(code, av, flags)
+                emit(OPCODES[JUMP])
+                tailappend(_len(code)); emit(0)
+                code[skip] = _len(code) - skip
+            emit(0) # end of branch
+            for tail in tail:
+                code[tail] = _len(code) - tail
+        elif op is CATEGORY:
+            emit(OPCODES[op])
+            if flags & SRE_FLAG_LOCALE:
+                av = CH_LOCALE[av]
+            elif flags & SRE_FLAG_UNICODE:
+                av = CH_UNICODE[av]
+            emit(CHCODES[av])
+        elif op is GROUPREF:
+            if flags & SRE_FLAG_IGNORECASE:
+                emit(OPCODES[OP_IGNORE[op]])
+            else:
+                emit(OPCODES[op])
+            emit(av-1)
+        elif op is GROUPREF_EXISTS:
+            emit(OPCODES[op])
+            emit(av[0]-1)
+            skipyes = _len(code); emit(0)
+            _compile(code, av[1], flags)
+            if av[2]:
+                emit(OPCODES[JUMP])
+                skipno = _len(code); emit(0)
+                code[skipyes] = _len(code) - skipyes + 1
+                _compile(code, av[2], flags)
+                code[skipno] = _len(code) - skipno
+            else:
+                code[skipyes] = _len(code) - skipyes + 1
+        else:
+            raise ValueError, ("unsupported operand type", op)
+
+def _compile_charset(charset, flags, code, fixup=None):
+    # compile charset subprogram
+    emit = code.append
+    if fixup is None:
+        fixup = _identityfunction
+    for op, av in _optimize_charset(charset, fixup):
+        emit(OPCODES[op])
+        if op is NEGATE:
+            pass
+        elif op is LITERAL:
+            emit(fixup(av))
+        elif op is RANGE:
+            emit(fixup(av[0]))
+            emit(fixup(av[1]))
+        elif op is CHARSET:
+            code.extend(av)
+        elif op is BIGCHARSET:
+            code.extend(av)
+        elif op is CATEGORY:
+            if flags & SRE_FLAG_LOCALE:
+                emit(CHCODES[CH_LOCALE[av]])
+            elif flags & SRE_FLAG_UNICODE:
+                emit(CHCODES[CH_UNICODE[av]])
+            else:
+                emit(CHCODES[av])
+        else:
+            raise error, "internal: unsupported set operator"
+    emit(OPCODES[FAILURE])
+
+def _optimize_charset(charset, fixup):
+    # internal: optimize character set
+    out = []
+    outappend = out.append
+    charmap = [0]*256
+    try:
+        for op, av in charset:
+            if op is NEGATE:
+                outappend((op, av))
+            elif op is LITERAL:
+                charmap[fixup(av)] = 1
+            elif op is RANGE:
+                for i in range(fixup(av[0]), fixup(av[1])+1):
+                    charmap[i] = 1
+            elif op is CATEGORY:
+                # XXX: could append to charmap tail
+                return charset # cannot compress
+    except IndexError:
+        # character set contains unicode characters
+        return _optimize_unicode(charset, fixup)
+    # compress character map
+    i = p = n = 0
+    runs = []
+    runsappend = runs.append
+    for c in charmap:
+        if c:
+            if n == 0:
+                p = i
+            n = n + 1
+        elif n:
+            runsappend((p, n))
+            n = 0
+        i = i + 1
+    if n:
+        runsappend((p, n))
+    if len(runs) <= 2:
+        # use literal/range
+        for p, n in runs:
+            if n == 1:
+                outappend((LITERAL, p))
+            else:
+                outappend((RANGE, (p, p+n-1)))
+        if len(out) < len(charset):
+            return out
+    else:
+        # use bitmap
+        data = _mk_bitmap(charmap)
+        outappend((CHARSET, data))
+        return out
+    return charset
+
+def _mk_bitmap(bits):
+    data = []
+    dataappend = data.append
+    if _sre.CODESIZE == 2:
+        start = (1, 0)
+    else:
+        start = (1L, 0L)
+    m, v = start
+    for c in bits:
+        if c:
+            v = v + m
+        m = m + m
+        if m > MAXCODE:
+            dataappend(v)
+            m, v = start
+    return data
+
+# To represent a big charset, first a bitmap of all characters in the
+# set is constructed. Then, this bitmap is sliced into chunks of 256
+# characters, duplicate chunks are eliminated, and each chunk is
+# given a number. In the compiled expression, the charset is
+# represented by a 16-bit word sequence, consisting of one word for
+# the number of different chunks, a sequence of 256 bytes (128 words)
+# of chunk numbers indexed by their original chunk position, and a
+# sequence of chunks (16 words each).
+
+# Compression is normally good: in a typical charset, large ranges of
+# Unicode will be either completely excluded (e.g. if only cyrillic
+# letters are to be matched), or completely included (e.g. if large
+# subranges of Kanji match). These ranges will be represented by
+# chunks of all one-bits or all zero-bits.
+
+# Matching can be also done efficiently: the more significant byte of
+# the Unicode character is an index into the chunk number, and the
+# less significant byte is a bit index in the chunk (just like the
+# CHARSET matching).
+
+# In UCS-4 mode, the BIGCHARSET opcode still supports only subsets
+# of the basic multilingual plane; an efficient representation
+# for all of UTF-16 has not yet been developed. This means,
+# in particular, that negated charsets cannot be represented as
+# bigcharsets.
+
+def _optimize_unicode(charset, fixup):
+    try:
+        import array
+    except ImportError:
+        return charset
+    charmap = [0]*65536
+    negate = 0
+    try:
+        for op, av in charset:
+            if op is NEGATE:
+                negate = 1
+            elif op is LITERAL:
+                charmap[fixup(av)] = 1
+            elif op is RANGE:
+                for i in xrange(fixup(av[0]), fixup(av[1])+1):
+                    charmap[i] = 1
+            elif op is CATEGORY:
+                # XXX: could expand category
+                return charset # cannot compress
+    except IndexError:
+        # non-BMP characters
+        return charset
+    if negate:
+        if sys.maxunicode != 65535:
+            # XXX: negation does not work with big charsets
+            return charset
+        for i in xrange(65536):
+            charmap[i] = not charmap[i]
+    comps = {}
+    mapping = [0]*256
+    block = 0
+    data = []
+    for i in xrange(256):
+        chunk = tuple(charmap[i*256:(i+1)*256])
+        new = comps.setdefault(chunk, block)
+        mapping[i] = new
+        if new == block:
+            block = block + 1
+            data = data + _mk_bitmap(chunk)
+    header = [block]
+    if _sre.CODESIZE == 2:
+        code = 'H'
+    else:
+        code = 'I'
+    # Convert block indices to byte array of 256 bytes
+    mapping = array.array('b', mapping).tostring()
+    # Convert byte array to word array
+    mapping = array.array(code, mapping)
+    assert mapping.itemsize == _sre.CODESIZE
+    header = header + mapping.tolist()
+    data[0:0] = header
+    return [(BIGCHARSET, data)]
+
+def _simple(av):
+    # check if av is a "simple" operator
+    lo, hi = av[2].getwidth()
+    if lo == 0 and hi == MAXREPEAT:
+        raise error, "nothing to repeat"
+    return lo == hi == 1 and av[2][0][0] != SUBPATTERN
+
+def _compile_info(code, pattern, flags):
+    # internal: compile an info block.  in the current version,
+    # this contains min/max pattern width, and an optional literal
+    # prefix or a character map
+    lo, hi = pattern.getwidth()
+    if lo == 0:
+        return # not worth it
+    # look for a literal prefix
+    prefix = []
+    prefixappend = prefix.append
+    prefix_skip = 0
+    charset = [] # not used
+    charsetappend = charset.append
+    if not (flags & SRE_FLAG_IGNORECASE):
+        # look for literal prefix
+        for op, av in pattern.data:
+            if op is LITERAL:
+                if len(prefix) == prefix_skip:
+                    prefix_skip = prefix_skip + 1
+                prefixappend(av)
+            elif op is SUBPATTERN and len(av[1]) == 1:
+                op, av = av[1][0]
+                if op is LITERAL:
+                    prefixappend(av)
+                else:
+                    break
+            else:
+                break
+        # if no prefix, look for charset prefix
+        if not prefix and pattern.data:
+            op, av = pattern.data[0]
+            if op is SUBPATTERN and av[1]:
+                op, av = av[1][0]
+                if op is LITERAL:
+                    charsetappend((op, av))
+                elif op is BRANCH:
+                    c = []
+                    cappend = c.append
+                    for p in av[1]:
+                        if not p:
+                            break
+                        op, av = p[0]
+                        if op is LITERAL:
+                            cappend((op, av))
+                        else:
+                            break
+                    else:
+                        charset = c
+            elif op is BRANCH:
+                c = []
+                cappend = c.append
+                for p in av[1]:
+                    if not p:
+                        break
+                    op, av = p[0]
+                    if op is LITERAL:
+                        cappend((op, av))
+                    else:
+                        break
+                else:
+                    charset = c
+            elif op is IN:
+                charset = av
+##     if prefix:
+##         print "*** PREFIX", prefix, prefix_skip
+##     if charset:
+##         print "*** CHARSET", charset
+    # add an info block
+    emit = code.append
+    emit(OPCODES[INFO])
+    skip = len(code); emit(0)
+    # literal flag
+    mask = 0
+    if prefix:
+        mask = SRE_INFO_PREFIX
+        if len(prefix) == prefix_skip == len(pattern.data):
+            mask = mask + SRE_INFO_LITERAL
+    elif charset:
+        mask = mask + SRE_INFO_CHARSET
+    emit(mask)
+    # pattern length
+    if lo < MAXCODE:
+        emit(lo)
+    else:
+        emit(MAXCODE)
+        prefix = prefix[:MAXCODE]
+    if hi < MAXCODE:
+        emit(hi)
+    else:
+        emit(0)
+    # add literal prefix
+    if prefix:
+        emit(len(prefix)) # length
+        emit(prefix_skip) # skip
+        code.extend(prefix)
+        # generate overlap table
+        table = [-1] + ([0]*len(prefix))
+        for i in xrange(len(prefix)):
+            table[i+1] = table[i]+1
+            while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
+                table[i+1] = table[table[i+1]-1]+1
+        code.extend(table[1:]) # don't store first entry
+    elif charset:
+        _compile_charset(charset, flags, code)
+    code[skip] = len(code) - skip
+
+try:
+    unicode
+except NameError:
+    STRING_TYPES = (type(""),)
+else:
+    STRING_TYPES = (type(""), type(unicode("")))
+
+def isstring(obj):
+    for tp in STRING_TYPES:
+        if isinstance(obj, tp):
+            return 1
+    return 0
+
+def _code(p, flags):
+
+    flags = p.pattern.flags | flags
+    code = []
+
+    # compile info block
+    _compile_info(code, p, flags)
+
+    # compile the pattern
+    _compile(code, p.data, flags)
+
+    code.append(OPCODES[SUCCESS])
+
+    return code
+
+def compile(p, flags=0):
+    # internal: convert pattern list to internal format
+
+    if isstring(p):
+        pattern = p
+        p = sre_parse.parse(p, flags)
+    else:
+        pattern = None
+
+    code = _code(p, flags)
+
+    # print code
+
+    # XXX: <fl> get rid of this limitation!
+    if p.pattern.groups > 100:
+        raise AssertionError(
+            "sorry, but this version only supports 100 named groups"
+            )
+
+    # map in either direction
+    groupindex = p.pattern.groupdict
+    indexgroup = [None] * p.pattern.groups
+    for k, i in groupindex.items():
+        indexgroup[i] = k
+
+    return _sre.compile(
+        pattern, flags | p.pattern.flags, code,
+        p.pattern.groups-1,
+        groupindex, indexgroup
+        )

File rpython/rlib/rsre/rpy/sre_constants.py

+#
+# Secret Labs' Regular Expression Engine
+#
+# various symbols used by the regular expression engine.
+# run this script to update the _sre include files!
+#
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
+#
+# See the sre.py file for information on usage and redistribution.
+#
+
+"""Internal support module for sre (copied from CPython 2.7.3)"""
+
+# update when constants are added or removed
+
+MAGIC = 20031017
+
+# max code word in this release
+
+MAXREPEAT = 65535
+
+# SRE standard exception (access as sre.error)
+# (use the real re.error exception class)
+from re import error
+
+# operators
+
+FAILURE = "failure"
+SUCCESS = "success"
+
+ANY = "any"
+ANY_ALL = "any_all"
+ASSERT = "assert"
+ASSERT_NOT = "assert_not"
+AT = "at"
+BIGCHARSET = "bigcharset"
+BRANCH = "branch"
+CALL = "call"
+CATEGORY = "category"
+CHARSET = "charset"
+GROUPREF = "groupref"
+GROUPREF_IGNORE = "groupref_ignore"
+GROUPREF_EXISTS = "groupref_exists"
+IN = "in"
+IN_IGNORE = "in_ignore"
+INFO = "info"
+JUMP = "jump"
+LITERAL = "literal"
+LITERAL_IGNORE = "literal_ignore"
+MARK = "mark"
+MAX_REPEAT = "max_repeat"
+MAX_UNTIL = "max_until"
+MIN_REPEAT = "min_repeat"
+MIN_UNTIL = "min_until"
+NEGATE = "negate"
+NOT_LITERAL = "not_literal"
+NOT_LITERAL_IGNORE = "not_literal_ignore"
+RANGE = "range"
+REPEAT = "repeat"
+REPEAT_ONE = "repeat_one"
+SUBPATTERN = "subpattern"
+MIN_REPEAT_ONE = "min_repeat_one"
+
+# positions
+AT_BEGINNING = "at_beginning"
+AT_BEGINNING_LINE = "at_beginning_line"
+AT_BEGINNING_STRING = "at_beginning_string"
+AT_BOUNDARY = "at_boundary"
+AT_NON_BOUNDARY = "at_non_boundary"
+AT_END = "at_end"
+AT_END_LINE = "at_end_line"
+AT_END_STRING = "at_end_string"
+AT_LOC_BOUNDARY = "at_loc_boundary"
+AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
+AT_UNI_BOUNDARY = "at_uni_boundary"
+AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
+
+# categories
+CATEGORY_DIGIT = "category_digit"
+CATEGORY_NOT_DIGIT = "category_not_digit"
+CATEGORY_SPACE = "category_space"
+CATEGORY_NOT_SPACE = "category_not_space"
+CATEGORY_WORD = "category_word"
+CATEGORY_NOT_WORD = "category_not_word"
+CATEGORY_LINEBREAK = "category_linebreak"
+CATEGORY_NOT_LINEBREAK = "category_not_linebreak"
+CATEGORY_LOC_WORD = "category_loc_word"
+CATEGORY_LOC_NOT_WORD = "category_loc_not_word"
+CATEGORY_UNI_DIGIT = "category_uni_digit"
+CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"
+CATEGORY_UNI_SPACE = "category_uni_space"
+CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"
+CATEGORY_UNI_WORD = "category_uni_word"
+CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
+CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
+CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
+
+OPCODES = [
+
+    # failure=0 success=1 (just because it looks better that way :-)
+    FAILURE, SUCCESS,
+
+    ANY, ANY_ALL,
+    ASSERT, ASSERT_NOT,
+    AT,
+    BRANCH,
+    CALL,
+    CATEGORY,
+    CHARSET, BIGCHARSET,
+    GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
+    IN, IN_IGNORE,
+    INFO,
+    JUMP,
+    LITERAL, LITERAL_IGNORE,
+    MARK,
+    MAX_UNTIL,
+    MIN_UNTIL,
+    NOT_LITERAL, NOT_LITERAL_IGNORE,
+    NEGATE,
+    RANGE,
+    REPEAT,
+    REPEAT_ONE,
+    SUBPATTERN,
+    MIN_REPEAT_ONE
+
+]
+
+ATCODES = [
+    AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
+    AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
+    AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
+    AT_UNI_NON_BOUNDARY
+]
+
+CHCODES = [
+    CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
+    CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
+    CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,
+    CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,
+    CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,
+    CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,
+    CATEGORY_UNI_NOT_LINEBREAK
+]
+
+def makedict(list):
+    d = {}
+    i = 0
+    for item in list:
+        d[item] = i
+        i = i + 1
+    return d
+
+OPCODES = makedict(OPCODES)
+ATCODES = makedict(ATCODES)
+CHCODES = makedict(CHCODES)
+
+# replacement operations for "ignore case" mode
+OP_IGNORE = {
+    GROUPREF: GROUPREF_IGNORE,
+    IN: IN_IGNORE,
+    LITERAL: LITERAL_IGNORE,
+    NOT_LITERAL: NOT_LITERAL_IGNORE
+}
+
+AT_MULTILINE = {
+    AT_BEGINNING: AT_BEGINNING_LINE,
+    AT_END: AT_END_LINE
+}
+
+AT_LOCALE = {
+    AT_BOUNDARY: AT_LOC_BOUNDARY,
+    AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
+}
+
+AT_UNICODE = {
+    AT_BOUNDARY: AT_UNI_BOUNDARY,
+    AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
+}
+
+CH_LOCALE = {
+    CATEGORY_DIGIT: CATEGORY_DIGIT,
+    CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
+    CATEGORY_SPACE: CATEGORY_SPACE,
+    CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
+    CATEGORY_WORD: CATEGORY_LOC_WORD,
+    CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
+    CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
+    CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
+}
+
+CH_UNICODE = {
+    CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
+    CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
+    CATEGORY_SPACE: CATEGORY_UNI_SPACE,
+    CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
+    CATEGORY_WORD: CATEGORY_UNI_WORD,
+    CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
+    CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
+    CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
+}
+
+# flags
+SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)
+SRE_FLAG_IGNORECASE = 2 # case insensitive
+SRE_FLAG_LOCALE = 4 # honour system locale
+SRE_FLAG_MULTILINE = 8 # treat target as multiline string
+SRE_FLAG_DOTALL = 16 # treat target as a single string
+SRE_FLAG_UNICODE = 32 # use unicode locale
+SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
+SRE_FLAG_DEBUG = 128 # debugging
+
+# flags for INFO primitive
+SRE_INFO_PREFIX = 1 # has prefix
+SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
+SRE_INFO_CHARSET = 4 # pattern starts with character from given set
+
+if __name__ == "__main__":
+    def dump(f, d, prefix):
+        items = d.items()
+        items.sort(key=lambda a: a[1])
+        for k, v in items:
+            f.write("#define %s_%s %s\n" % (prefix, k.upper(), v))
+    f = open("sre_constants.h", "w")
+    f.write("""\
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * NOTE: This file is generated by sre_constants.py.  If you need
+ * to change anything in here, edit sre_constants.py and run it.
+ *
+ * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
+ *
+ * See the _sre.c file for information on usage and redistribution.
+ */
+
+""")
+
+    f.write("#define SRE_MAGIC %d\n" % MAGIC)
+
+    dump(f, OPCODES, "SRE_OP")
+    dump(f, ATCODES, "SRE")
+    dump(f, CHCODES, "SRE")
+
+    f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE)
+    f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE)
+    f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE)
+    f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE)
+    f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL)
+    f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
+    f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
+
+    f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX)
+    f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL)
+    f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET)
+
+    f.close()
+    print "done"

File rpython/rlib/rsre/rpy/sre_parse.py

+#
+# Secret Labs' Regular Expression Engine
+#
+# convert re-style regular expression to sre pattern
+#
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
+#
+# See the sre.py file for information on usage and redistribution.
+#
+
+"""Internal support module for sre (copied from CPython 2.7.3)"""
+
+# XXX: show string offset and offending character for all errors
+
+import sys
+
+from .sre_constants import *
+
+SPECIAL_CHARS = ".\\[{()*+?^$|"
+REPEAT_CHARS = "*+?{"
+
+DIGITS = set("0123456789")
+
+OCTDIGITS = set("01234567")
+HEXDIGITS = set("0123456789abcdefABCDEF")
+
+WHITESPACE = set(" \t\n\r\v\f")
+
+ESCAPES = {
+    r"\a": (LITERAL, ord("\a")),
+    r"\b": (LITERAL, ord("\b")),
+    r"\f": (LITERAL, ord("\f")),
+    r"\n": (LITERAL, ord("\n")),
+    r"\r": (LITERAL, ord("\r")),
+    r"\t": (LITERAL, ord("\t")),
+    r"\v": (LITERAL, ord("\v")),
+    r"\\": (LITERAL, ord("\\"))
+}
+
+CATEGORIES = {
+    r"\A": (AT, AT_BEGINNING_STRING), # start of string
+    r"\b": (AT, AT_BOUNDARY),
+    r"\B": (AT, AT_NON_BOUNDARY),
+    r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
+    r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
+    r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
+    r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
+    r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
+    r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
+    r"\Z": (AT, AT_END_STRING), # end of string
+}
+
+FLAGS = {
+    # standard flags
+    "i": SRE_FLAG_IGNORECASE,
+    "L": SRE_FLAG_LOCALE,
+    "m": SRE_FLAG_MULTILINE,
+    "s": SRE_FLAG_DOTALL,
+    "x": SRE_FLAG_VERBOSE,
+    # extensions
+    "t": SRE_FLAG_TEMPLATE,
+    "u": SRE_FLAG_UNICODE,
+}
+
+class Pattern:
+    # master pattern object.  keeps track of global attributes
+    def __init__(self):
+        self.flags = 0
+        self.open = []
+        self.groups = 1
+        self.groupdict = {}
+    def opengroup(self, name=None):
+        gid = self.groups
+        self.groups = gid + 1
+        if name is not None:
+            ogid = self.groupdict.get(name, None)
+            if ogid is not None:
+                raise error, ("redefinition of group name %s as group %d; "
+                              "was group %d" % (repr(name), gid,  ogid))
+            self.groupdict[name] = gid
+        self.open.append(gid)
+        return gid
+    def closegroup(self, gid):
+        self.open.remove(gid)
+    def checkgroup(self, gid):
+        return gid < self.groups and gid not in self.open
+
+class SubPattern:
+    # a subpattern, in intermediate form
+    def __init__(self, pattern, data=None):
+        self.pattern = pattern
+        if data is None:
+            data = []
+        self.data = data
+        self.width = None
+    def dump(self, level=0):
+        nl = 1
+        seqtypes = type(()), type([])
+        for op, av in self.data:
+            print level*"  " + op,; nl = 0
+            if op == "in":
+                # member sublanguage
+                print; nl = 1
+                for op, a in av:
+                    print (level+1)*"  " + op, a
+            elif op == "branch":
+                print; nl = 1
+                i = 0
+                for a in av[1]:
+                    if i > 0:
+                        print level*"  " + "or"
+                    a.dump(level+1); nl = 1
+                    i = i + 1
+            elif type(av) in seqtypes:
+                for a in av:
+                    if isinstance(a, SubPattern):
+                        if not nl: print
+                        a.dump(level+1); nl = 1
+                    else:
+                        print a, ; nl = 0
+            else:
+                print av, ; nl = 0
+            if not nl: print
+    def __repr__(self):
+        return repr(self.data)
+    def __len__(self):
+        return len(self.data)
+    def __delitem__(self, index):
+        del self.data[index]
+    def __getitem__(self, index):
+        if isinstance(index, slice):
+            return SubPattern(self.pattern, self.data[index])
+        return self.data[index]
+    def __setitem__(self, index, code):
+        self.data[index] = code
+    def insert(self, index, code):
+        self.data.insert(index, code)
+    def append(self, code):
+        self.data.append(code)
+    def getwidth(self):
+        # determine the width (min, max) for this subpattern
+        if self.width:
+            return self.width
+        lo = hi = 0L
+        UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY)
+        REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
+        for op, av in self.data:
+            if op is BRANCH:
+                i = sys.maxint
+                j = 0
+                for av in av[1]:
+                    l, h = av.getwidth()
+                    i = min(i, l)
+                    j = max(j, h)
+                lo = lo + i
+                hi = hi + j
+            elif op is CALL:
+                i, j = av.getwidth()
+                lo = lo + i
+                hi = hi + j
+            elif op is SUBPATTERN:
+                i, j = av[1].getwidth()
+                lo = lo + i
+                hi = hi + j
+            elif op in REPEATCODES:
+                i, j = av[2].getwidth()
+                lo = lo + long(i) * av[0]
+                hi = hi + long(j) * av[1]
+            elif op in UNITCODES:
+                lo = lo + 1
+                hi = hi + 1
+            elif op == SUCCESS:
+                break
+        self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
+        return self.width
+
+class Tokenizer:
+    def __init__(self, string):
+        self.string = string
+        self.index = 0
+        self.__next()
+    def __next(self):
+        if self.index >= len(self.string):
+            self.next = None
+            return
+        char = self.string[self.index]
+        if char[0] == "\\":
+            try:
+                c = self.string[self.index + 1]
+            except IndexError:
+                raise error, "bogus escape (end of line)"
+            char = char + c
+        self.index = self.index + len(char)
+        self.next = char
+    def match(self, char, skip=1):
+        if char == self.next:
+            if skip:
+                self.__next()
+            return 1
+        return 0
+    def get(self):
+        this = self.next
+        self.__next()
+        return this
+    def tell(self):
+        return self.index, self.next
+    def seek(self, index):
+        self.index, self.next = index
+
+def isident(char):
+    return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
+
+def isdigit(char):
+    return "0" <= char <= "9"
+
+def isname(name):
+    # check that group name is a valid string
+    if not isident(name[0]):
+        return False
+    for char in name[1:]:
+        if not isident(char) and not isdigit(char):
+            return False
+    return True
+
+def _class_escape(source, escape):
+    # handle escape code inside character class
+    code = ESCAPES.get(escape)
+    if code:
+        return code
+    code = CATEGORIES.get(escape)
+    if code:
+        return code
+    try:
+        c = escape[1:2]
+        if c == "x":
+            # hexadecimal escape (exactly two digits)
+            while source.next in HEXDIGITS and len(escape) < 4:
+                escape = escape + source.get()
+            escape = escape[2:]
+            if len(escape) != 2:
+                raise error, "bogus escape: %s" % repr("\\" + escape)
+            return LITERAL, int(escape, 16) & 0xff
+        elif c in OCTDIGITS:
+            # octal escape (up to three digits)
+            while source.next in OCTDIGITS and len(escape) < 4:
+                escape = escape + source.get()
+            escape = escape[1:]
+            return LITERAL, int(escape, 8) & 0xff
+        elif c in DIGITS:
+            raise error, "bogus escape: %s" % repr(escape)
+        if len(escape) == 2:
+            return LITERAL, ord(escape[1])
+    except ValueError:
+        pass
+    raise error, "bogus escape: %s" % repr(escape)
+
+def _escape(source, escape, state):
+    # handle escape code in expression
+    code = CATEGORIES.get(escape)
+    if code:
+        return code
+    code = ESCAPES.get(escape)
+    if code:
+        return code
+    try:
+        c = escape[1:2]
+        if c == "x":
+            # hexadecimal escape
+            while source.next in HEXDIGITS and len(escape) < 4:
+                escape = escape + source.get()
+            if len(escape) != 4:
+                raise ValueError
+            return LITERAL, int(escape[2:], 16) & 0xff
+        elif c == "0":
+            # octal escape
+            while source.next in OCTDIGITS and len(escape) < 4:
+                escape = escape + source.get()
+            return LITERAL, int(escape[1:], 8) & 0xff
+        elif c in DIGITS:
+            # octal escape *or* decimal group reference (sigh)
+            if source.next in DIGITS:
+                escape = escape + source.get()
+                if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
+                    source.next in OCTDIGITS):
+                    # got three octal digits; this is an octal escape
+                    escape = escape + source.get()
+                    return LITERAL, int(escape[1:], 8) & 0xff
+            # not an octal escape, so this is a group reference
+            group = int(escape[1:])
+            if group < state.groups:
+                if not state.checkgroup(group):
+                    raise error, "cannot refer to open group"
+                return GROUPREF, group
+            raise ValueError
+        if len(escape) == 2:
+            return LITERAL, ord(escape[1])
+    except ValueError:
+        pass
+    raise error, "bogus escape: %s" % repr(escape)
+
+def _parse_sub(source, state, nested=1):
+    # parse an alternation: a|b|c
+
+    items = []
+    itemsappend = items.append
+    sourcematch = source.match
+    while 1:
+        itemsappend(_parse(source, state))
+        if sourcematch("|"):
+            continue
+        if not nested:
+            break
+        if not source.next or sourcematch(")", 0):
+            break
+        else:
+            raise error, "pattern not properly closed"
+
+    if len(items) == 1:
+        return items[0]
+
+    subpattern = SubPattern(state)
+    subpatternappend = subpattern.append
+
+    # check if all items share a common prefix
+    while 1:
+        prefix = None
+        for item in items:
+            if not item:
+                break
+            if prefix is None:
+                prefix = item[0]
+            elif item[0] != prefix:
+                break
+        else:
+            # all subitems start with a common "prefix".
+            # move it out of the branch
+            for item in items:
+                del item[0]
+            subpatternappend(prefix)
+            continue # check next one
+        break
+
+    # check if the branch can be replaced by a character set
+    for item in items:
+        if len(item) != 1 or item[0][0] != LITERAL:
+            break
+    else:
+        # we can store this as a character set instead of a
+        # branch (the compiler may optimize this even more)
+        set = []
+        setappend = set.append
+        for item in items:
+            setappend(item[0])
+        subpatternappend((IN, set))
+        return subpattern
+
+    subpattern.append((BRANCH, (None, items)))
+    return subpattern
+
+def _parse_sub_cond(source, state, condgroup):
+    item_yes = _parse(source, state)
+    if source.match("|"):
+        item_no = _parse(source, state)
+        if source.match("|"):
+            raise error, "conditional backref with more than two branches"
+    else:
+        item_no = None
+    if source.next and not source.match(")", 0):
+        raise error, "pattern not properly closed"
+    subpattern = SubPattern(state)
+    subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
+    return subpattern
+
+_PATTERNENDERS = set("|)")
+_ASSERTCHARS = set("=!<")
+_LOOKBEHINDASSERTCHARS = set("=!")
+_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])
+
+def _parse(source, state):
+    # parse a simple pattern
+    subpattern = SubPattern(state)
+
+    # precompute constants into local variables
+    subpatternappend = subpattern.append
+    sourceget = source.get
+    sourcematch = source.match
+    _len = len
+    PATTERNENDERS = _PATTERNENDERS
+    ASSERTCHARS = _ASSERTCHARS
+    LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS
+    REPEATCODES = _REPEATCODES
+
+    while 1:
+
+        if source.next in PATTERNENDERS:
+            break # end of subpattern
+        this = sourceget()
+        if this is None:
+            break # end of pattern
+
+        if state.flags & SRE_FLAG_VERBOSE:
+            # skip whitespace and comments
+            if this in WHITESPACE:
+                continue
+            if this == "#":
+                while 1:
+                    this = sourceget()
+                    if this in (None, "\n"):
+                        break
+                continue
+
+        if this and this[0] not in SPECIAL_CHARS:
+            subpatternappend((LITERAL, ord(this)))
+
+        elif this == "[":
+            # character set
+            set = []
+            setappend = set.append
+##          if sourcematch(":"):
+##              pass # handle character classes
+            if sourcematch("^"):
+                setappend((NEGATE, None))
+            # check remaining characters
+            start = set[:]
+            while 1:
+                this = sourceget()
+                if this == "]" and set != start:
+                    break
+                elif this and this[0] == "\\":
+                    code1 = _class_escape(source, this)
+                elif this:
+                    code1 = LITERAL, ord(this)
+                else:
+                    raise error, "unexpected end of regular expression"
+                if sourcematch("-"):
+                    # potential range
+                    this = sourceget()
+                    if this == "]":
+                        if code1[0] is IN:
+                            code1 = code1[1][0]
+                        setappend(code1)
+                        setappend((LITERAL, ord("-")))
+                        break
+                    elif this:
+                        if this[0] == "\\":
+                            code2 = _class_escape(source, this)
+                        else:
+                            code2 = LITERAL, ord(this)
+                        if code1[0] != LITERAL or code2[0] != LITERAL:
+                            raise error, "bad character range"
+                        lo = code1[1]
+                        hi = code2[1]
+                        if hi < lo:
+                            raise error, "bad character range"
+                        setappend((RANGE, (lo, hi)))
+                    else:
+                        raise error, "unexpected end of regular expression"
+                else:
+                    if code1[0] is IN:
+                        code1 = code1[1][0]
+                    setappend(code1)
+
+            # XXX: <fl> should move set optimization to compiler!
+            if _len(set)==1 and set[0][0] is LITERAL:
+                subpatternappend(set[0]) # optimization
+            elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
+                subpatternappend((NOT_LITERAL, set[1][1])) # optimization
+            else:
+                # XXX: <fl> should add charmap optimization here
+                subpatternappend((IN, set))
+
+        elif this and this[0] in REPEAT_CHARS:
+            # repeat previous item
+            if this == "?":
+                min, max = 0, 1
+            elif this == "*":
+                min, max = 0, MAXREPEAT
+
+            elif this == "+":
+                min, max = 1, MAXREPEAT
+            elif this == "{":
+                if source.next == "}":
+                    subpatternappend((LITERAL, ord(this)))
+                    continue
+                here = source.tell()
+                min, max = 0, MAXREPEAT
+                lo = hi = ""
+                while source.next in DIGITS:
+                    lo = lo + source.get()
+                if sourcematch(","):
+                    while source.next in DIGITS:
+                        hi = hi + sourceget()
+                else:
+                    hi = lo
+                if not sourcematch("}"):
+                    subpatternappend((LITERAL, ord(this)))
+                    source.seek(here)
+                    continue
+                if lo:
+                    min = int(lo)
+                if hi:
+                    max = int(hi)
+                if max < min:
+                    raise error, "bad repeat interval"
+            else:
+                raise error, "not supported"
+            # figure out which item to repeat
+            if subpattern:
+                item = subpattern[-1:]
+            else:
+                item = None
+            if not item or (_len(item) == 1 and item[0][0] == AT):
+                raise error, "nothing to repeat"
+            if item[0][0] in REPEATCODES:
+                raise error, "multiple repeat"
+            if sourcematch("?"):
+                subpattern[-1] = (MIN_REPEAT, (min, max, item))
+            else:
+                subpattern[-1] = (MAX_REPEAT, (min, max, item))
+
+        elif this == ".":
+            subpatternappend((ANY, None))
+
+        elif this == "(":
+            group = 1
+            name = None
+            condgroup = None
+            if sourcematch("?"):
+                group = 0
+                # options
+                if sourcematch("P"):
+                    # python extensions
+                    if sourcematch("<"):
+                        # named group: skip forward to end of name
+                        name = ""
+                        while 1:
+                            char = sourceget()
+                            if char is None:
+                                raise error, "unterminated name"
+                            if char == ">":
+                                break
+                            name = name + char
+                        group = 1
+                        if not isname(name):
+                            raise error, "bad character in group name"
+                    elif sourcematch("="):
+                        # named backreference
+                        name = ""
+                        while 1:
+                            char = sourceget()
+                            if char is None:
+                                raise error, "unterminated name"
+                            if char == ")":
+                                break
+                            name = name + char
+                        if not isname(name):
+                            raise error, "bad character in group name"
+                        gid = state.groupdict.get(name)
+                        if gid is None:
+                            raise error, "unknown group name"
+                        subpatternappend((GROUPREF, gid))
+                        continue
+                    else:
+                        char = sourceget()
+                        if char is None:
+                            raise error, "unexpected end of pattern"
+                        raise error, "unknown specifier: ?P%s" % char
+                elif sourcematch(":"):
+                    # non-capturing group
+                    group = 2
+                elif sourcematch("#"):
+                    # comment
+                    while 1:
+                        if source.next is None or source.next == ")":
+                            break
+                        sourceget()
+                    if not sourcematch(")"):
+                        raise error, "unbalanced parenthesis"
+                    continue
+                elif source.next in ASSERTCHARS:
+                    # lookahead assertions
+                    char = sourceget()
+                    dir = 1
+                    if char == "<":
+                        if source.next not in LOOKBEHINDASSERTCHARS:
+                            raise error, "syntax error"
+                        dir = -1 # lookbehind
+                        char = sourceget()
+                    p = _parse_sub(source, state)
+                    if not sourcematch(")"):
+                        raise error, "unbalanced parenthesis"
+                    if char == "=":
+                        subpatternappend((ASSERT, (dir, p)))
+                    else:
+                        subpatternappend((ASSERT_NOT, (dir, p)))
+                    continue
+                elif sourcematch("("):
+                    # conditional backreference group
+                    condname = ""
+                    while 1:
+                        char = sourceget()
+                        if char is None:
+                            raise error, "unterminated name"
+                        if char == ")":
+                            break
+                        condname = condname + char
+                    group = 2
+                    if isname(condname):
+                        condgroup = state.groupdict.get(condname)
+                        if condgroup is None:
+                            raise error, "unknown group name"
+                    else:
+                        try:
+                            condgroup = int(condname)
+                        except ValueError:
+                            raise error, "bad character in group name"
+                else:
+                    # flags
+                    if not source.next in FLAGS:
+                        raise error, "unexpected end of pattern"
+                    while source.next in FLAGS:
+                        state.flags = state.flags | FLAGS[sourceget()]
+            if group:
+                # parse group contents
+                if group == 2:
+                    # anonymous group
+                    group = None
+                else:
+                    group = state.opengroup(name)
+                if condgroup:
+                    p = _parse_sub_cond(source, state, condgroup)
+                else:
+                    p = _parse_sub(source, state)
+                if not sourcematch(")"):
+                    raise error, "unbalanced parenthesis"
+                if group is not None:
+                    state.closegroup(group)
+                subpatternappend((SUBPATTERN, (group, p)))
+            else:
+                while 1:
+                    char = sourceget()
+                    if char is None:
+                        raise error, "unexpected end of pattern"
+                    if char == ")":
+                        break
+                    raise error, "unknown extension"
+
+        elif this == "^":
+            subpatternappend((AT, AT_BEGINNING))
+
+        elif this == "$":
+            subpattern.append((AT, AT_END))
+
+        elif this and this[0] == "\\":
+            code = _escape(source, this, state)
+            subpatternappend(code)
+
+        else:
+            raise error, "parser error"
+
+    return subpattern
+
+def parse(str, flags=0, pattern=None):
+    # parse 're' pattern into list of (opcode, argument) tuples
+
+    source = Tokenizer(str)
+
+    if pattern is None:
+        pattern = Pattern()
+    pattern.flags = flags
+    pattern.str = str
+
+    p = _parse_sub(source, pattern, 0)
+
+    tail = source.get()
+    if tail == ")":
+        raise error, "unbalanced parenthesis"
+    elif tail:
+        raise error, "bogus characters at end of regular expression"
+
+    if flags & SRE_FLAG_DEBUG:
+        p.dump()
+
+    if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
+        # the VERBOSE flag was switched on inside the pattern.  to be
+        # on the safe side, we'll parse the whole thing again...
+        return parse(str, p.pattern.flags)
+
+    return p
+
+def parse_template(source, pattern):
+    # parse 're' replacement string into list of literals and
+    # group references
+    s = Tokenizer(source)
+    sget = s.get
+    p = []
+    a = p.append
+    def literal(literal, p=p, pappend=a):
+        if p and p[-1][0] is LITERAL:
+            p[-1] = LITERAL, p[-1][1] + literal
+        else:
+            pappend((LITERAL, literal))
+    sep = source[:0]
+    if type(sep) is type(""):
+        makechar = chr
+    else:
+        makechar = unichr
+    while 1:
+        this = sget()
+        if this is None:
+            break # end of replacement string
+        if this and this[0] == "\\":
+            # group
+            c = this[1:2]
+            if c == "g":
+                name = ""
+                if s.match("<"):
+                    while 1:
+                        char = sget()
+                        if char is None:
+                            raise error, "unterminated group name"
+                        if char == ">":
+                            break
+                        name = name + char
+                if not name:
+                    raise error, "bad group name"
+                try:
+                    index = int(name)
+                    if index < 0:
+                        raise error, "negative group number"
+                except ValueError:
+                    if not isname(name):
+                        raise error, "bad character in group name"
+                    try:
+                        index = pattern.groupindex[name]
+                    except KeyError:
+                        raise IndexError, "unknown group name"
+                a((MARK, index))
+            elif c == "0":
+                if s.next in OCTDIGITS:
+                    this = this + sget()
+                    if s.next in OCTDIGITS:
+                        this = this + sget()
+                literal(makechar(int(this[1:], 8) & 0xff))
+            elif c in DIGITS:
+                isoctal = False
+                if s.next in DIGITS:
+                    this = this + sget()
+                    if (c in OCTDIGITS and this[2] in OCTDIGITS and
+                        s.next in OCTDIGITS):
+                        this = this + sget()
+                        isoctal = True
+                        literal(makechar(int(this[1:], 8) & 0xff))
+                if not isoctal:
+                    a((MARK, int(this[1:])))
+            else:
+                try:
+                    this = makechar(ESCAPES[this][1])
+                except KeyError:
+                    pass
+                literal(this)
+        else:
+            literal(this)
+    # convert template to groups and literals lists
+    i = 0
+    groups = []
+    groupsappend = groups.append
+    literals = [None] * len(p)
+    for c, s in p:
+        if c is MARK:
+            groupsappend((i, s))
+            # literal[i] is already None
+        else:
+            literals[i] = s
+        i = i + 1
+    return groups, literals
+
+def expand_template(template, match):
+    g = match.group
+    sep = match.string[:0]
+    groups, literals = template
+    literals = literals[:]
+    try:
+        for index, group in groups:
+            literals[index] = s = g(group)
+            if s is None:
+                raise error, "unmatched group"
+    except IndexError:
+        raise error, "invalid group reference"
+    return sep.join(literals)

File rpython/rlib/rsre/rsre_re.py

 class Scanner:
     # This class is copied directly from re.py.
     def __init__(self, lexicon, flags=0):
-        from sre_constants import BRANCH, SUBPATTERN
-        import sre_parse
+        from rpython.rlib.rsre.rpy.sre_constants import BRANCH, SUBPATTERN
+        from rpython.rlib.rsre.rpy import sre_parse
         self.lexicon = lexicon
         # combine phrases into a compound pattern
         p = []

File rpython/rlib/rsre/test/test_char.py

     assert not rsre_char.is_uni_word(ord(','))
 
 def test_category():
-    from sre_constants import CHCODES
+    from rpython.rlib.rsre.rpy.sre_constants import CHCODES
     cat = rsre_char.category_dispatch
     #
     assert     cat(CHCODES["category_digit"], ord('1'))

File rpython/rtyper/lltypesystem/rpbc.py

     # no __init__ here, AbstractClassesPBCRepr.__init__ is good enough
 
     def _instantiate_runtime_class(self, hop, vtypeptr, r_instance):
-        v_instantiate = hop.genop('getfield', [vtypeptr, hop.inputconst(Void, "instantiate")], resulttype=vtypeptr.concretetype.TO.instantiate)
-        possible_graphs = hop.inputconst(Void,
-            [desc.getclassdef(None).my_instantiate_graph for desc in self.s_pbc.descriptions]
-        )
-        v_inst = hop.genop('indirect_call', [v_instantiate, possible_graphs], resulttype=vtypeptr.concretetype.TO.instantiate.TO.RESULT)
+        graphs = []
+        for desc in self.s_pbc.descriptions:
+            classdef = desc.getclassdef(None)
+            assert hasattr(classdef, 'my_instantiate_graph')
+            graphs.append(classdef.my_instantiate_graph)
+        c_graphs = hop.inputconst(Void, graphs)
+        #
+        # "my_instantiate = typeptr.instantiate"
+        c_name = hop.inputconst(Void, 'instantiate')
+        v_instantiate = hop.genop('getfield', [vtypeptr, c_name],
+                                 resulttype = rclass.OBJECT_VTABLE.instantiate)
+        # "my_instantiate()"
+        v_inst = hop.genop('indirect_call', [v_instantiate, c_graphs],
+                           resulttype = rclass.OBJECTPTR)
         return hop.genop('cast_pointer', [v_inst], resulttype=r_instance)
 
     def getlowleveltype(self):

File rpython/rtyper/rbuiltin.py

     assert isinstance(hop.args_r[0], rclass.InstanceRepr)
     return hop.args_r[0].rtype_isinstance(hop)
 
-def ll_instantiate(typeptr):   # NB. used by rpbc.ClassesPBCRepr as well
-    my_instantiate = typeptr.instantiate
-    return my_instantiate()
-
 def rtype_instantiate(hop):
     hop.exception_cannot_occur()
     s_class = hop.args_s[0]
     if len(s_class.descriptions) != 1:
         # instantiate() on a variable class
         vtypeptr, = hop.inputargs(rclass.get_type_repr(hop.rtyper))
-        v_inst = hop.gendirectcall(ll_instantiate, vtypeptr)
-        return hop.genop('cast_pointer', [v_inst],    # v_type implicit in r_result
-                         resulttype = hop.r_result.lowleveltype)
-
+        r_class = hop.args_r[0]
+        return r_class._instantiate_runtime_class(hop, vtypeptr,
+                                                  hop.r_result.lowleveltype)
     classdef = s_class.any_description().getuniqueclassdef()
     return rclass.rtype_new_instance(hop.rtyper, classdef, hop.llops)
 

File rpython/translator/backendopt/gilanalysis.py

             return False
         else:
             return False
-
-    def analyze_instantiate_call(self, seen=None):
-        return False
                 
     def analyze_simple_operation(self, op, graphinfo):
         return False

File rpython/translator/backendopt/graphanalyze.py

                         result, self.analyze_direct_call(graph, seen))
         return result
 
-    def analyze_instantiate_call(self, seen=None):
-        return self.top_result()
-
     def analyze_link(self, graph, link):
         return self.bottom_result()
 
     def compute_graph_info(self, graph):
         return None
 
-    def analyze(self, op, seen=None, graphinfo=None, block=None):
+    def analyze(self, op, seen=None, graphinfo=None):
         if op.opname == "direct_call":
             graph = get_graph(op.args[0], self.translator)
             if graph is None:
         elif op.opname == "indirect_call":
             graphs = op.args[-1].value
             if graphs is None:
-                if block is not None:
-                    v_func = op.args[0]
-                    for op1 in block.operations:
-                        if (v_func is op1.result and
-                            op1.opname == 'getfield' and
-                            op1.args[0].concretetype == rclass.CLASSTYPE and
-                            op1.args[1].value == 'instantiate'):
-                            x = self.analyze_instantiate_call(seen)
-                            if self.verbose and x:
-                                self.dump_info('analyze_instantiate(%s): %r' % (
-                                    graphs, x))
-                            return x
                 if self.verbose:
                     self.dump_info('%s to unknown' % (op,))
                 return self.top_result()
                 for op in block.operations:
                     result = self.add_to_result(
                         result,
-                        self.analyze(op, seen, graphinfo, block=block)
+                        self.analyze(op, seen, graphinfo)
                     )
                     if self.is_top_result(result):
                         break
             graphs = self.translator.graphs
         for graph in graphs:
             for block, op in graph.iterblockops():
-                self.analyze(op, block=block)
+                self.analyze(op)
 
 
 class Dependency(object):

File rpython/translator/backendopt/test/test_writeanalyze.py

         assert not wa.analyze(op_call_m)
 
     def test_instantiate(self):
-        # instantiate is interesting, because it leads to one of the few cases of
-        # an indirect call without a list of graphs
         from rpython.rlib.objectmodel import instantiate
         class A:
             pass
         t, wa = self.translate(f, [int])
         fgraph = graphof(t, f)
         result = wa.analyze(fgraph.startblock.operations[0])
-        assert result is top_set
+        assert not result
 
     def test_llexternal(self):