Commits

Matt Chaput committed c4e2d98

Removed obsolete code.

  • Participants
  • Parent commits c321944

Comments (0)

Files changed (5)

File obsolete/filters.py

-#===============================================================================
-# Copyright 2007 Matt Chaput
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-import re
-
-from genshi.core import Namespace, QName, Attrs, Stream, StreamEventKind #@UnresolvedImport
-from genshi.core import END, START, TEXT #@UnresolvedImport
-
-from matchers import start, starta, end, BREAK
-from config import WIKI_NS_URL
-
-WIKI = Namespace(WIKI_NS_URL)
-
-startingIndent = re.compile("^ +")
-
-
-def JoinText(stream):
-    buf = None
-    bufpos = None
-    for ev in stream:
-        kind, data, pos = ev
-        if kind is TEXT and buf is None:
-            buf = data
-            bufpos = pos
-        elif kind is TEXT:
-            buf += data
-        else:
-            if buf is not None:
-                yield (TEXT, buf, bufpos)
-                buf = None
-            yield ev
-    if buf is not None:
-        yield (TEXT, buf, bufpos)
-
-
-def Blockifier(stream):
-    """Transforms raw linear events from the interpreter into
-    hierarchical blocks based on break events and indentation.
-    """
-    
-    # This filter essentially transforms...
-    #
-    # event1 event2 <BREAK/> event3 event4
-    #
-    # Into...
-    # 
-    # <b> <bc> event1 event2 </bc> </b> <b> <bc> event3 event4 </bc> </b>
-    #
-    # However, we want to arrange blocks hierarchically
-    # based on indentation, so...
-    #
-    # <BREAK indent="0"/> event <BREAK indent="4"/> event
-    #
-    # becomes...
-    #
-    # <b>
-    #  <bc> event1 </bc>
-    #  <indent>
-    #   <b> <bc> event2 </bc> </b>
-    #  </indent>
-    # </b>
-    
-    # The stack of queued up block headers.
-    stack = []
-    
-    inblock = False
-    emitted_bc = False
-
-    for ev in stream:
-        kind, data, _ = ev
-        
-        if kind is BREAK:
-            # The break event has the structure:
-            # (BREAK, (indent, right_indent, attrs), pos)
-            indent, rightindent, attrs = data
-            
-            if inblock:
-                # Get the indent and attributes of the last
-                # block header on the stack
-                currentindent = stack[-1][0]
-                
-                if emitted_bc:
-                    yield end(WIKI.bc)
-                
-                if indent > currentindent:
-                    yield start(WIKI.indent)
-                else:
-                    # Pop back to the indentation level of the new block.
-                    # This closes off all queued blocks that have a higher
-                    # indentation level than the new block.
-                    
-                    in_ancestors = False
-                    while len(stack) > 0 and indent <= stack[-1][0]:
-                        stack.pop()
-                        if in_ancestors:
-                            yield end(WIKI.indent)
-                        else:
-                            in_ancestors = True
-                        yield end(WIKI.b)
-                        
-            # Add the new block to the queue.
-            stack.append((indent, rightindent, attrs))
-            yield starta(WIKI.b, attrs)
-            emitted_bc = False
-            inblock = True
-            
-        else:
-            if not inblock:
-                stack.append((0, 0, Attrs()))
-                yield start(WIKI.b)
-                yield start(WIKI.bc)
-                inblock = True
-                emitted_bc = True
-            elif not emitted_bc:
-                yield start(WIKI.bc)
-                emitted_bc = True
-            yield ev
-    
-    # Close off all queued blocks.
-    if emitted_bc:
-        yield end(WIKI.bc)
-    
-    in_ancestors = False
-    while len(stack) > 0:
-        stack.pop()
-        if in_ancestors:
-            yield end(WIKI.indent)
-        else:
-            in_ancestors = True
-        yield end(WIKI.b)
-
-
-def DoubleBreakFilter(stream):
-    """Removes <BREAK><BREAK>... events from the stream so you don't
-    end up with empty paragraphs. This would be a good place to put
-    logic about BREAKS overriding each other; currently the newest
-    BREAK always overrides the previous one."""
-    
-    buffer = []
-    
-    for ev in stream:
-        if ev[0] is BREAK:
-            buffer = [ev]
-        
-        elif ev[0] is TEXT and ev[1].strip() == "":
-            buffer.append(ev)
-        
-        else:
-            if buffer:
-                for ev2 in buffer:
-                    yield ev2
-                buffer = []
-                
-            yield ev
-    
-#    for ev in stream:
-#        if ev[0] is BREAK:
-#            buffer = ev
-#        else:
-#            if buffer:
-#                yield buffer
-#                buffer = None
-#            yield ev
-
-
-class BlockGrouper(object):
-    def __init__(self, types):
-        self.typeset = set(types)
-        
-    def __call__(self, stream, level = 0):
-        typeset = self.typeset
-        stream = iter(stream)
-        currenttype = None
-        
-        for ev in stream:
-            kind, data, _ = ev
-            if kind is START and data[0] == WIKI.b:
-                attrs = data[1]
-                t = attrs.get("affinity", attrs.get("type"))
-                
-                if currenttype and t != currenttype:
-                    yield end(WIKI.group)
-                    currenttype = None
-                
-                if t in typeset and not currenttype:
-                    yield start(WIKI.group, type = t)
-                    currenttype = t
-                
-                yield ev
-            
-            elif kind is START and data[0] == WIKI.indent:
-                yield ev
-                for ev2 in self.__call__(stream, level + 1):
-                    yield ev2
-                    
-            elif kind is END and data == WIKI.indent:
-                if currenttype:
-                    yield end(WIKI.group)
-                yield ev
-                return
-            
-            else:
-                yield ev
-        
-        if currenttype:
-            yield end(WIKI.group)
-
-
-class PushbackNone(object): pass
-class PushbackStream(object):
-    def __init__(self, wrapped):
-        self.wrapped = iter(wrapped)
-        self.buf = PushbackNone
-        
-    def __iter__(self):
-        return self
-    
-    def push(self, val):
-        if self.buf is not PushbackNone:
-            raise Exception("You can only push back one value")
-        self.buf = val
-    
-    def next(self):
-        if self.buf is not PushbackNone:
-            n = self.buf
-            self.buf = PushbackNone
-            return n
-        else:
-            return self.wrapped.next()
-
-class PopHeading(Exception):
-    def __init__(self, level):
-        self.level = level
-
-def HeadingSorter(stream, level = -1, gathering = False):
-    """Converts linear heading structures, e.g.
-    
-    heading1
-    TEXT
-    heading2
-    TEXT
-    
-    ...into hiearchical structures with subordinate
-    content inside the <indent> section of the parent
-    heading, i.e.:
-    
-    heading1
-        TEXT
-        heading2
-            TEXT
-    """
-    
-    #TODO: use this implementation in the Blockifier, or possibly unite them
-    
-    if not isinstance(stream, PushbackStream):
-        stream = PushbackStream(stream)
-    
-    # Whether we're currently inside a heading block.
-    in_heading = False
-    
-    # The level of the level we're currently inside. This is compared
-    # to the "level" variable the function was called with.
-    heading_level = -1
-    
-    for ev in stream:
-        kind, data, _ = ev
-        
-        if kind is START and data[0] == WIKI.b and (data[1].get("type") == "heading" or data[1].get("role") == "heading"):
-            # This is the start of a heading block.
-            
-            heading_level = int(data[1].get("level", "0"))
-            
-            # If the level of this block is less than or equal to the level
-            # we're currently at, raise an exception to pop out of this
-            # execution context back to a previous level.
-            if heading_level <= level:
-                stream.push(ev)
-                raise PopHeading(heading_level)
-            
-            # Otherwise, yield the block start and remember that we're inside
-            # a heading block.
-            yield ev
-            in_heading = True
-            
-        elif kind is START and data[0] == WIKI.b:
-            yield ev
-        
-            # This block is not a heading, reset the in_heading flag.
-            in_heading = False
-        elif kind is START and data[0] == WIKI.indent:
-            yield ev
-        
-            # This is the start of an indent block. If a heading has an
-            # indent, it shouldn't gather subsequent blocks also, so reset
-            # the in_heading flag.
-            in_heading = False
-            
-            # Push a new recursive context for the contents of the indent block.
-            for ee in HeadingSorter(stream):
-                yield ee
-        elif kind is END and data == WIKI.indent:
-            # Leave an indent block, raising StopIteration to pop out
-            # of the indent context.
-            
-            if gathering:
-                stream.push(ev)
-            else:
-                yield ev
-            raise StopIteration
-        elif kind is END and data == WIKI.b and in_heading:
-            # OK, we've reached the end of a block. in_heading
-            # is True, which means it was a heading block and 
-            # it didn't have an indent. So, instead of emitting
-            # the block end, we should start gathering blocks.
-            
-            started = False
-            try:
-                # Push a new recursive context for gathering subsequent blocks.
-                for ee in HeadingSorter(stream, heading_level, gathering = True):
-                    # Only emit the start of the indent block when we gather
-                    # the first event.
-                    if not started:
-                        yield start(WIKI.indent)
-                        started = True
-                    
-                    yield ee
-                
-                # Emit the end of the indent block (if we gathered any events)
-                if started:
-                    yield end(WIKI.indent)
-                
-                # Finally, yield the original end-of-block event!
-                yield ev
-                
-            except PopHeading, e:
-                # Somewhere down in the recursive context(s) we created above,
-                # they raised HeadingException to pop back up.
-                
-                # Emit the end of the indent block (if we gathered any events)
-                if started:
-                    yield end(WIKI.indent)
-                
-                # Emit the original end-of-block event.
-                yield ev
-            
-                if e.level <= level:
-                    # Is the exception trying to pop back to an even higher level
-                    # than we are at? If so, re-raise the exception.
-                    raise PopHeading(e.level)
-        else:
-            yield ev
-
-
-def HeadingLevels(stream):
-    level = 1
-    queue = []
-    for ev in stream:
-        kind, data, pos = ev
-        if kind is START and data[0] == WIKI.b and (data[1].get("type") == "heading" or data[1].get("role") == "heading"):
-            level += 1
-            attrs = data[1] | [('level', str(level))]
-            queue.append(True)
-            yield (kind, (data[0], attrs), pos)
-        elif kind is START and data[0] == WIKI.b:
-            queue.append(False)
-            yield ev
-        elif kind is END and data == WIKI.b:
-            if queue.pop():
-                level -= 1
-            yield ev
-        else:
-            yield ev
-
-
-def PseudoXmlFilter(stream, gathering = False):
-    stream = iter(stream)
-    for ev in stream:
-        kind, data, _ = ev
-        
-        if kind is START and data[0] == WIKI.indent:
-            yield ev
-            for ev2 in PseudoXmlFilter(stream, gathering = False):
-                yield ev2
-        
-        elif kind is END and data == WIKI.indent:
-            if not gathering:
-                yield ev
-            return
-        
-        elif kind is START and data[0] == WIKI.b and data[1].get("type") == "px":
-            bcstart = stream.next()
-            assert bcstart[0] is START and bcstart[1][0] == WIKI.bc
-            
-            elstart = stream.next()
-            assert elstart[0] is START
-            qname = elstart[1][0]
-            yield elstart
-            
-            for ev2 in takeuntil(stream, qname, False):
-                yield ev2
-            
-            bcend = stream.next()
-            assert bcend[0] is END and bcend[1] == WIKI.bc
-            
-            nextev = stream.next()
-            if nextev[0] is START and nextev[1][0] == WIKI.indent:
-                for ev2 in PseudoXmlFilter(stream, gathering = True):
-                    yield ev2
-                yield end(qname)
-                
-                endb = stream.next()
-                assert endb[0] is END and endb[1] == WIKI.b
-                
-            elif nextev[0] is END and nextev[1] == WIKI.b:
-                yield end(qname)
-            else:
-                raise Exception("Expecting indent or /b, found %s" % repr(nextev))
-        else:
-            yield ev
-
-def takeuntil(stream, qname, emitlast):
-    level = 1
-    for ev in stream:
-        if ev[0] is START and ev[1][0] == qname:
-            level += 1
-            yield ev
-        elif ev[0] is END and ev[1] == qname:
-            level -= 1
-            if level < 1 and not emitlast:
-                return
-            yield ev
-            if level < 1:
-                return
-        else:
-            yield ev
-
-class IsoElement(object):
-    def __init__(self, qnames):
-        self.qnames = qnames
-
-    def __call__(self, stream):
-        stream = iter(stream)
-        
-        acceptable = set(self.qnames)
-        
-        for ev in stream:
-            kind, data, _ = ev
-            
-            if kind is START and data[0] == WIKI.b:
-                queue = []
-                cancelled = False
-                
-                for ev2 in stream:
-                    if ev2[0] is END and ev2[1] == WIKI.b:
-                        # Got to the end of the paragraph, and we haven't
-                        # cancelled yet, so this paragraph can be stripped.
-                        
-                        break
-                    
-                    elif ev2[0] is START and ev2[1][0] in acceptable:
-                        # Found a start element that's in the list of qnames
-                        # to extract. queue up the element and its contents.
-                        
-                        queue.append(ev2)
-                        for ev3 in takeuntil(stream, ev2[1][0], True):
-                            queue.append(ev3)
-                            
-                    elif ev2[0] is START and ev2[1][0] == WIKI.bc:
-                        # Found the <bc> start tag. This is the only other
-                        # start tag that won't trigger cancellation.
-                        
-                        queue.append(ev2)
-                        
-                    elif (ev2[0] is TEXT and ev2[1].strip() != "")\
-                         or ev2[0] is START:
-                        # Found a non-empty text node, or a start tag other
-                        # than bc or one of the qnames to extract. So, cancel
-                        # the operation.
-                        
-                        queue.append(ev2)
-                        cancelled = True
-                        break
-                    
-                    else:
-                        # Anything else, queue it up.
-                        queue.append(ev2)
-                
-                if cancelled:
-                    # Replay everything we've seen so far and move on.
-                    
-                    yield ev
-                    for qev in queue:
-                        yield qev
-                else:
-                    # Replay the queue, throwing away the bc tags.
-                    
-                    for qev in queue:
-                        if not ((qev[0] is START and qev[1][0] == WIKI.bc)\
-                                or (qev[0] is END and qev[1] == WIKI.bc)):
-                            yield qev
-            else:
-                yield ev
-                        
-def takenext(stream, kind, qname):
-    ev = stream.next()
-    assert ev[0] is kind
-    if kind is START:
-        assert ev[1][0] == qname
-        return ev[1][1]
-    elif kind is END: assert ev[1] == qname
-
-def RetroAttrs(stream):
-    stream = iter(stream)
-    
-    attrs = None
-    lastpos = None
-    taking = False
-    queue = []
-    
-    for ev in stream:
-        kind, data, pos = ev
-        addev = True
-        
-        if taking and kind is START and data[0] == WIKI.b and data[1].get("type") == "blockattr":
-            addev = False
-            takenext(stream, START, WIKI.bc)
-            
-            blockattrs = takenext(stream, START, WIKI.blockattr)
-            attrs |= [(QName(blockattrs.get("name")), blockattrs.get("value"))]
-            
-            takenext(stream, END, WIKI.blockattr)
-            takenext(stream, END, WIKI.bc)
-            takenext(stream, END, WIKI.b)
-        
-        elif kind is START and data[0] == WIKI.b:
-            if queue:
-                yield (START, (WIKI.b, attrs), lastpos)
-                for ev2 in queue[1:]:
-                    yield ev2
-                queue = []
-            
-            taking = True
-            attrs = data[1]
-            lastpos = pos
-        
-        elif kind is START and data[0] == WIKI.shortcut and data[1].get("name") == "Id":
-            addev = False
-            attrs |= [(QName("id"), data[1].get("value"))]
-            takenext(stream, END, WIKI.shortcut)
-        
-        elif kind is END and data == WIKI.b:
-            if queue:
-                yield (START, (WIKI.b, attrs), lastpos)
-                for ev2 in queue[1:]:
-                    yield ev2
-                queue = []
-            
-            taking = False
-        
-        if addev:
-            if taking:
-                queue.append(ev)
-            else:
-                yield ev
-            
-    assert len(queue) == 0
-    
-
-
-
-    

File obsolete/matchers.py

-#===============================================================================
-# Copyright 2007 Matt Chaput
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-import re
-from textwrap import dedent
-from collections import defaultdict
-
-from genshi.core import Namespace, QName, Attrs, Stream, StreamEventKind #@UnresolvedImport
-from genshi.core import END, START, TEXT #@UnresolvedImport
-from config import WIKI_NS_URL, XHTML_NS_URL
-
-WIKI = Namespace(WIKI_NS_URL)
-
-BREAK = StreamEventKind("BREAK")
-
-DEFAULT_NAMESPACES = {"w": WIKI_NS_URL, "h": XHTML_NS_URL}
-
-def d2a(d):
-    return Attrs([(QName(n), v) for n, v in d.iteritems() if v is not None])
-
-def start(qname, **kw):
-    return START, (qname, d2a(kw)), (None, -1, -1)
-def starta(qname, attrs):
-    return START, (qname, attrs), (None, -1, -1)
-
-def end(qname):
-    return END, qname, (None, -1, -1)
-
-def text(t):
-    return TEXT, t, (None, -1, -1)
-
-def br(ctx, indent, attrs = Attrs(), right_indent = None):
-    if right_indent is None:
-        right_indent = indent
-    ctx["indent"] = indent
-    ctx["right_indent"] = right_indent
-    ctx["block_ctx"] = {}
-    return BREAK, (indent, right_indent, attrs), (None, -1, -1)
-
-def justtext(events):
-    return u"".join([ev[1] for ev in events if ev[0] is TEXT])
-
-def ensure(m):
-    if isinstance(m, basestring):
-        return Literal(m)
-    return m
-
-newline = "((^\n*)|((\n *)*\n))"
-
-
-class ConfigurationError(Exception): pass
-class ParserFailure(Exception): pass
-class ParserSuccess(Exception): pass
-
-class Matcher(object):
-    def __init__(self):
-        raise NotImplementedError
-    
-    def __repr__(self):
-        return "%s()" % self.__class__.__name__
-    
-    def __add__(self, other):
-        other = ensure(other)
-        if isinstance(other, Matcher):
-            return Sequence([self, other])
-        else:
-            raise NotImplementedError
-        
-    def __radd__(self, other):
-        other = ensure(other)
-        if isinstance(other, Matcher):
-            return Sequence([other, self])
-        else:
-            raise NotImplementedError
-        
-    def __or__(self, other):
-        other = ensure(other)
-        if isinstance(other, Matcher):
-            return Choice([self, other])
-        else:
-            raise NotImplementedError
-        
-    def __ror__(self, other):
-        other = ensure(other)
-        if isinstance(other, Matcher):
-            return Choice([other, self])
-        else:
-            raise NotImplementedError
-    
-    #def __rshift__(self, name):
-    #    if not isinstance(name, basestring): raise NotImplementedError
-    #    return Capture(name, self)
-    
-    def parse(self, input, position, ctx):
-        raise NotImplementedError
-    
-    def match(self, input):
-        return self.parse(input, 0, {})
-    
-    def terminate(self, matcher):
-        return self
-    
-    def is_optional(self):
-        return False
-
-class Escape(Matcher):
-    """Matches a pattern (usually backslash) that escapes the next character."""
-    
-    def __init__(self, escape_chars):
-        self.first_chars = escape_chars
-    
-    def __repr__(self):
-        return "%s(%s)" % (self.__class__.__name__, repr(self.first_chars))
-    
-    def __unicode__(self):
-        return u"%s." % self.first_chars
-    
-    def parse(self, input, position, ctx):
-        if input[position] == self.first_chars:
-            if not position >= len(input) - 1:
-                yield position + 2
-                yield text(input[position + 1])
-
-class Literal(Matcher):
-    def __init__(self, text, suppress = False):
-        self.text = text
-        self.first_chars = text[0]
-        self.suppress = suppress
-    
-    def __repr__(self):
-        return "%s(%s)" % (self.__class__.__name__, repr(self.text))
-    
-    def __unicode__(self):
-        return '"%s"' % self.text.encode("unicode-escape")
-    
-    def parse(self, input, position, ctx):
-        t = self.text
-        right = position+len(t)
-        if input[position:right] == t:
-            if not self.suppress:
-                yield text(t)
-            yield right
-
-
-class MultiMatcher(Matcher):
-    def __init__(self, matchers):
-        raise NotImplementedError
-    
-    def __repr__(self):
-        return "%s(%s)" % (self.__class__.__name__, repr(self.matchers))
-
-class Sequence(MultiMatcher):
-    def __init__(self, matchers):
-        self.matchers = [ensure(m) for m in matchers]
-        self.terminated = False
-        self.first_chars = self.matchers[0].first_chars
-    
-    def __unicode__(self):
-        return u"(%s)" % (" + ".join([unicode(m) for m in self.matchers]))
-    
-    def __add__(self, other):
-        other = ensure(other)
-        if isinstance(other, Matcher):
-            return Sequence(self.matchers + [other])
-        else:
-            raise NotImplementedError
-        
-    def __radd__(self, other):
-        other = ensure(other)
-        if isinstance(other, Matcher):
-            return Sequence([other] + self.matchers)
-        else:
-            raise NotImplementedError
-    
-    def add_terminals(self):
-        for i in xrange(0, len(self.matchers) - 1):
-            self.matchers[i] = self.matchers[i].terminate(self.matchers[i+1])
-            # TODO: If the next matcher in the sequence is optional, we need
-            # to add a matcher for it too.
-    
-    def output(self, queue):
-        for mq in queue:
-            for ev in mq:
-                yield ev
-    
-    def parse(self, input, position, ctx):
-        if not self.terminated:
-            self.add_terminals()
-            self.terminated = True
-        
-        matchers = self.matchers
-        queue = []
-        
-        nctx = ctx.copy()
-        for m in matchers:
-            mq = []
-            matched = False
-            
-            try:
-                for ev in m.parse(input, position, nctx):
-                    matched = True
-                    if isinstance(ev, int):
-                        position = ev
-                    else:
-                        mq.append(ev)
-            except ParserFailure:
-                return
-            except ParserSuccess:
-                matched = True
-                pass
-            
-            if matched:
-                queue.append(mq)
-            else:
-                return
-        
-        ctx.update(nctx)
-        for ev in self.output(queue):
-            yield ev
-        yield position
-
-def Fences(qname, start_matcher, content_matcher, end_matcher):
-    return Element(qname, Sequence([
-                                    Hide(start_matcher),
-                                    content_matcher,
-                                    Hide(end_matcher)
-                                    ]))
-
-def RegexFences(qname, start_pattern, start_firstchars, content_matcher, end_pattern, end_firstchars):
-    return Fences(qname,
-                  RegexLiteral(start_pattern, start_firstchars),
-                  content_matcher,
-                  RegexLiteral(end_pattern, end_firstchars))
-
-def RegexSpan(qname, start_pattern, start_firstchars,
-              content_list,
-              end_pattern, end_firstchars):
-    return RegexFences(qname,
-                       start_pattern, start_firstchars,
-                       BMixed(content_list, inline_only = True, fail_at_end = True),
-                       end_pattern, end_firstchars)
-
-class Choice(MultiMatcher):
-    def __init__(self, matchers):
-        self.matchers = matchers
-        self.by_char = self._matcher_hash(matchers)
-        self.first_chars = "".join(self.by_char.keys())
-    
-    def __unicode__(self):
-        return u"(%s)" % (" | ".join([unicode(m) for m in self.matchers]))
-    
-    def __or__(self, other):
-        other = ensure(other)
-        if isinstance(other, Matcher):
-            return Choice(self.matchers + [other])
-        else:
-            raise NotImplementedError
-        
-    def __ror__(self, other):
-        other = ensure(other)
-        if isinstance(other, Matcher):
-            return Choice([other] + self.matchers)
-        else:
-            raise NotImplementedError
-        
-    def _matcher_hash(self, ms):
-        by_char = {}
-        for m in ms:
-            for c in m.first_chars:
-                if c in by_char:
-                    by_char[c].append(m)
-                else:
-                    by_char[c] = [m]
-        return by_char
-    
-    def parse(self, input, position, ctx):
-        matched = False
-        by_char = self.by_char
-        ch = input[position]
-        
-        if ch in by_char:
-            for m in by_char[ch]:
-                matched = False
-                for ev in m.parse(input, position, ctx):
-                    matched = True
-                    yield ev
-                
-                if matched:
-                    return
-
-
-class Optional(Matcher):
-    def __init__(self, matcher):
-        self.matcher = ensure(matcher)
-        self.first_chars = self.matcher.first_chars
-    
-    def __repr__(self):
-        return "%s(%s)" % (self.__class__.__name__, repr(self.matcher))
-    
-    def __unicode__(self):
-        return u"%s?" % unicode(self.matcher)
-    
-    def is_optional(self): return True
-    
-    def parse(self, input, position, ctx):
-        matched = False
-        for ev in self.matcher.parse(input, position, ctx):
-            matched = True
-            yield ev
-        
-        if not matched:
-            yield position
-            
-class ZeroOrMore(Matcher):
-    def __init__(self, matcher):
-        self.matcher = matcher
-        self.first_chars = matcher.first_chars
-    
-    def __repr__(self):
-        return "%s(%s)" % (self.__class__.__name__, repr(self.matcher))
-    
-    def __unicode__(self):
-        return u"%s+" % unicode(self.matcher)
-    
-    def is_optional(self): return True
-    
-    def parse(self, input, position, ctx):
-        matched = True
-        while matched:
-            matched = False
-            for ev in self.matcher.parse(input, position, ctx):
-                matched = True
-                if isinstance(ev, int):
-                    position = ev
-                else:
-                    yield ev
-        
-        yield position
-
-class OneOrMore(Matcher):
-    def __init__(self, matcher):
-        self.matcher = matcher
-        self.first_chars = matcher.first_chars
-    
-    def __repr__(self):
-        return "%s(%s)" % (self.__class__.__name__, repr(self.matcher))
-    
-    def __unicode__(self):
-        return u"%s+" % unicode(self.matcher)
-    
-    def parse(self, input, position, ctx):
-        matched = True
-        once = False
-        while matched:
-            matched = False
-            for ev in self.matcher.parse(input, position, ctx):
-                matched = True
-                once = True
-                if isinstance(ev, int):
-                    position = ev
-                else:
-                    yield ev
-        
-        if once:
-            yield position
-
-
-class Mixed(MultiMatcher):
-    def __init__(self, matchers):
-        self.matchers = matchers
-        self._by_char = None
-        self.first_chars = None
-    
-    def __unicode__(self):
-        return u"<%s>" % (", ".join([unicode(m) for m in self.matchers]))
-    
-    def __add__(self, other):
-        other = ensure(other)
-        if self.__class__ is other.__class__:
-            return self.__class__(self.matchers + other.matchers)
-        elif isinstance(other, Matcher):
-            return Sequence([self, other])
-        else:
-            raise NotImplementedError
-    
-    def by_char(self):
-        if self._by_char is None:
-            _by_char = defaultdict(list)
-            for m in self.matchers:
-                for ch in m.first_chars:
-                    _by_char[ch].append(m)
-                    
-            self._by_char = _by_char
-        return self._by_char
-    
-    def terminate(self, matcher):
-        ei = 0
-        while ei < len(self.matchers) and isinstance(self.matchers[ei], Escape):
-            ei += 1
-        
-        return Mixed(self.matchers[0:ei] + [SucceedBefore(matcher)] + self.matchers[ei:])
-    
-    def next(self, input, position, ctx, all_matchers, by_char):
-        matched = False
-        previous = position
-        
-        while position < len(input):
-            matchers = None
-            ch = input[position]
-            if position == 0:
-                matchers = all_matchers
-            elif ch in by_char:
-                matchers = by_char[ch]
-            
-            if matchers:
-                for matcher in matchers:
-                    for ev in matcher.parse(input, position, ctx):
-                        if not matched:
-                            matched = True
-                            if position > previous:
-                                yield text(input[previous:position])
-                        
-                        yield ev
-                    
-                    if matched:
-                        return
-            
-            position += 1
-        
-        if not matched:
-            yield text(input[previous:position])
-            yield position
-    
-    def parse(self, input, position, ctx):
-        """
-        Parses input text into events using the configured matchers.
-        """
-        
-        next = self.next
-        matchers = self.matchers
-        by_char = self.by_char()
-        
-        try:
-            while position < len(input):
-                lastpos = position
-                for ev in next(input, position, ctx, matchers, by_char):
-                    if isinstance(ev, int):
-                        position = ev
-                    else:
-                        yield ev
-                if position <= lastpos:
-                    raise Exception("Parser did not move forward")
-        except ParserSuccess:
-            yield position
-        except:
-            raise
-        
-class BMixed(Mixed):
-    def __init__(self, matchers, inline_only = False, fail_at_end = False):
-        super(BMixed, self).__init__(matchers)
-        self.inline_only = inline_only
-        self.fail_at_end = fail_at_end
-    
-    def terminate(self, matcher):
-        ei = 0
-        while ei < len(self.matchers) and isinstance(self.matchers[ei], Escape):
-            ei += 1
-        
-        return BMixed(self.matchers[0:ei] + [SucceedBefore(matcher)] + self.matchers[ei:],
-                      inline_only = self.inline_only)
-    
-    def parse(self, input, position, ctx):
-        """
-        Parses input text into events using the configured matchers.
-        Buffers the output in case a ParserFailure exception is thrown.
-        """
-        
-        next = self.next
-        matchers = self.matchers
-        by_char = self.by_char()
-        
-        nctx = ctx.copy()
-        queue = []
-        
-        try:
-            while position < len(input):
-                for ev in next(input, position, ctx, matchers, by_char):
-                    if isinstance(ev, int):
-                        position = ev
-                    elif ev[0] is BREAK: # TODO: Push this down to LineBreak
-                        if self.inline_only: raise ParserFailure
-                        queue.append(ev)
-                    else:
-                        queue.append(ev)
-                        
-            if self.fail_at_end:
-                return
-        except ParserFailure:
-            return
-        except ParserSuccess:
-            pass
-    
-        ctx.update(nctx)
-        for ev in queue:
-            yield ev
-        yield position
-
-
-class Wrapper(Matcher):
-    def __init__(self, matcher):
-        self.matcher = ensure(matcher)
-    
-    def _first_chars(self):
-        return self.matcher.first_chars
-    first_chars = property(_first_chars)
-    
-    def __repr__(self):
-        return "%s(%s)" % (self.__class__.__name__, repr(self.matcher))
-    
-    def terminate(self, matcher):
-        return self.__class__(self.matcher.terminate(matcher))
-    
-    def is_optional(self):
-        return self.matcher.is_optional()
-
-class Print(Wrapper):
-    def __init__(self, name, matcher):
-        self.name = name
-        self.matcher = ensure(matcher)
-    
-    def terminate(self, matcher):
-        return Print(self.name, self.matcher.terminate(matcher))
-    
-    def parse(self, input, position, ctx):
-        print "[%s] {" % self.name
-        for ev in self.matcher.parse(input, position, ctx):
-            print "[%s] %s" % (self.name, ev)
-            yield ev
-        print "[%s] }" % self.name
-
-class Forward(Wrapper):
-    def __init__(self):
-        self.matcher = None
-        self.first_chars = None
-        
-    def __lshift__(self, matcher):
-        self.matcher = ensure(matcher)
-        
-    def parse(self, input, position, ctx):
-        if self.matcher is None:
-            raise ConfigurationError("Forward matcher was never defined")
-        return self.matcher.parse(input, position, ctx)
-
-class Hide(Wrapper):
-    def parse(self, input, position, ctx):
-        for ev in self.matcher.parse(input, position, ctx):
-            if isinstance(ev, int):
-                yield ev
-
-class Not(Wrapper):
-    def parse(self, input, position, ctx):
-        for _ in self.matcher.parse(input, position, ctx):
-            raise ParserFailure
-
-class SucceedAfter(Wrapper):
-    def parse(self, input, position, ctx):
-        for ev in self.matcher.parse(input, position, ctx):
-            yield ev
-        raise ParserSuccess
-
-class SucceedBefore(Wrapper):
-    def __init__(self, matcher):
-        if isinstance(matcher, Mixed):
-            raise ValueError(matcher)
-        Wrapper.__init__(self, matcher)
-    
-    def parse(self, input, position, ctx):
-        for ev in self.matcher.parse(input, position, ctx): #@UnusedVariable
-            yield position
-            raise ParserSuccess
-
-class TextOf(Wrapper):
-    def parse(self, input, position, ctx):
-        queue = []
-        for ev in self.matcher.parse(input, position, ctx):
-            if isinstance(ev, int):
-                yield ev
-            elif ev[0] is TEXT:
-                queue.append(ev[1])
-        yield text("".join(queue))
-
-class BreakAfter(Wrapper):
-    def parse(self, input, position, ctx):
-        matched = False
-        for ev in self.matcher.parse(input, position, ctx):
-            matched = True
-            yield ev
-        if matched:
-            yield br(ctx, 0)
-
-class CaptureStream(Wrapper):
-    def __init__(self, name, matcher):
-        super(CaptureStream, self).__init__(matcher)
-        self.name = name
-    
-    def terminate(self, matcher):
-        return self.__class__(self.name, self.matcher.terminate(matcher))
-    
-    def __repr__(self):
-        return "%s(%s, %s)" % (self.__class__.__name__, repr(self.name), repr(self.matcher))
-
-    def parse(self, input, position, ctx):
-        queue = None
-        for ev in self.matcher.parse(input, position, ctx):
-            if not isinstance(ev, int) and ev[0] is not BREAK:
-                if not queue: queue = []
-                queue.append(ev)
-            yield ev
-        
-        if queue:
-            ctx[self.name] = Stream(queue)
-
-class CaptureString(CaptureStream):
-    def parse(self, input, position, ctx):
-        queue = []
-        for ev in self.matcher.parse(input, position, ctx):
-            if isinstance(ev, int):
-                yield ev
-            else:
-                if ev[0] is TEXT:
-                    queue.append(ev[1])
-                yield ev
-        
-        if len(queue) > 0:
-            ctx[self.name] = "".join(queue)
-
-class Attribute(CaptureStream):
-    def parse(self, input, position, ctx):
-        queue = []
-        for ev in self.matcher.parse(input, position, ctx):
-            if isinstance(ev, int):
-                yield ev
-            elif ev[0] is TEXT:
-                queue.append(ev[1])
-        if len(queue) > 0:
-            if "attributes" not in ctx:
-                raise ConfigurationError("Attribute used outside an Element")
-            
-            ctx["attributes"][self.name] = "".join(queue)
-
-class Element(Wrapper):
-    def __init__(self, qname, matcher):
-        super(Element, self).__init__(matcher)
-        self.qname = qname
-    
-    def __repr__(self):
-        return "%s(%s, %s)" % (self.__class__.__name__, repr(self.qname), repr(self.matcher))
-    
-    def terminate(self, matcher):
-        return self.__class__(self.qname, self.matcher.terminate(matcher))
-    
-    def parse(self, input, position, ctx):
-        queue = None
-        nctx = ctx.copy()
-        nctx["attributes"] = {}
-        for ev in self.matcher.parse(input, position, nctx):
-            if queue is None: queue = []
-            queue.append(ev)
-        
-        if queue:
-            yield starta(self.qname, d2a(nctx["attributes"]))
-            
-            del nctx["attributes"]
-            ctx.update(nctx)
-            
-            for ev in queue:
-                yield ev
-            yield end(self.qname)
-
-class BreakBuffer(Wrapper):
-    def parse(self, input, position, ctx):
-        queue = None
-        nctx = ctx.copy()
-        nctx["attributes"] = {}
-        for ev in self.matcher.parse(input, position, nctx):
-            if queue is None: queue = []
-            queue.append(ev)
-        
-        if queue:
-            addattrs = [(k, v) for k, v in nctx["attributes"].iteritems() if v is not None]
-            for ev in queue:
-                if isinstance(ev, tuple) and ev[0] is BREAK:
-                    indent, right_indent, attrs = ev[1]
-                    yield (BREAK, (indent, right_indent, attrs | addattrs), ev[2])
-                else:
-                    yield ev
-
-
-class RegexMatcher(Matcher):
-    def __init__(self, pattern, first_chars):
-        self.pattern = pattern
-        self.expression = re.compile(pattern, re.DOTALL | re.MULTILINE)
-        self.first_chars = first_chars
-    
-    def __repr__(self):
-        return "%s(%s)" % (self.__class__.__name__, repr(self.pattern))
-    
-    def output(self, ctx, match):
-        raise NotImplementedError
-    
-    def parse(self, input, position, ctx):
-        match = self.expression.match(input, position)
-        if match:
-            for ev in self.output(ctx, match):
-                yield ev
-
-class RegexText(RegexMatcher):
-    """Parses a regular expression and returns the match
-    as text."""
-    
-    def output(self, ctx, match):
-        yield match.end()
-        yield text(match.group(0))
-
-class RegexLiteral(RegexMatcher):
-    """Parses a regular expression and extracts literal text. Use
-    this to allow users to escape long passages.
-    Make sure the literal text is in group 1."""
-    
-    def output(self, ctx, match):
-        yield match.end()
-        if match.lastindex:
-            yield text(match.group(1))
-        else:
-            yield text(match.group(0))
-
-class RegexElement(RegexMatcher):
-    def __init__(self, qname, pattern, first_chars):
-        super(RegexElement, self).__init__(pattern, first_chars)
-        self.qname = qname
-        
-    def __repr__(self):
-        return "%s(%s, %s)" % (self.__class__.__name__, repr(self.qname), repr(self.pattern))
-        
-    def output(self, ctx, match):
-        yield match.end()
-        yield start(self.qname, **match.groupdict())
-        yield match.group(1)
-        yield end(self.qname)
-
-class RegexAttributes(RegexMatcher):
-    def output(self, ctx, match):
-        if "attributes" not in ctx:
-            raise ConfigurationError("RegexAttributes used outside an Element")
-        
-        ctx["attributes"].update(match.groupdict())
-        
-        yield match.end()
-
-class RegexProperty(RegexMatcher):
-    def output(self, ctx, match):
-        ctx[match.group("name")] = match.group("value")
-        yield match.end()
-
-
-class LineBreak(Matcher):
-    """Deals with line breaks and blank lines between paragraphs
-    defining blocks."""
-    
-    def __init__(self):
-        self.first_chars = "\n"
-        self.expression = re.compile("(\n *)*\n(?P<indent> *)")
-    
-    def parse(self, input, position, ctx):
-        match = self.expression.match(input, position)
-        if match:
-            yield match.end()
-            indent = len(match.group("indent"))
-            if match.group(0).count("\n") > 1 or indent != ctx.get("right_indent", 0):
-                yield br(ctx, indent)
-            else:
-                yield text(" ")
-
-class BlockStart(Matcher):
-    def __init__(self, type):
-        self.type = type
-        self.first_chars = "\n"
-        self.expression = re.compile("(\n *)*\n(?P<indent> *)")
-        
-    def parse(self, input, position, ctx):
-        match = self.expression.match(input, position)
-        if match:
-            indent = len(match.group("indent"))
-            yield match.end()
-            yield br(ctx, indent, d2a(dict(type = self.type)))
-
-class BlockStartSequence(Matcher):
-    def __init__(self, type, matchers):
-        self.type = type
-        self.sequence = Sequence(matchers)
-        
-        self.first_chars = "\n"
-        self.expression = re.compile("(\n *)*\n(?P<indent> *)")
-        
-    def parse(self, input, position, ctx):
-        match = self.expression.match(input, position)
-        if match:
-            indent = len(match.group("indent"))
-            
-            nctx = ctx.copy()
-            nctx["attributes"] = {}
-            ls = list(self.sequence.parse(input, match.end(), nctx))
-            
-            if len(ls) > 0:
-                yield br(ctx, indent, d2a(dict(type = self.type, **nctx["attributes"])))
-                for ev in ls:
-                    yield ev
-                    
-
-class BlockDivision(Matcher):
-    """Matches a pattern that marks the start of a new block,
-    such as a bullet at the beginning of a line."""
-    
-    def __init__(self, type, pattern, first_chars = "\n", placeholder = False):
-        self.type = type
-        self.pattern = pattern
-        self.expression = re.compile(pattern, re.MULTILINE)
-        self.first_chars = first_chars
-        self.placeholder = placeholder
-    
-    def __repr__(self):
-        return "%s(%s, %s)" % (self.__class__.__name__, repr(self.type), repr(self.pattern))
-    
-    def output(self, ctx, match, indent, right_indent):
-        yield br(ctx, indent, right_indent = right_indent, attrs = d2a({"type": self.type}))
-    
-    def parse(self, input, position, ctx):
-        match = self.expression.match(input, position)
-        if match:
-            indent = 0
-            gd = match.groupdict()
-            if gd.get("indent"):
-                indent = len(gd["indent"])
-            
-            right_indent = indent
-            if gd.get("extra"):
-                right_indent = indent + len(gd["extra"])
-            
-            yield match.end()
-            for ev in self.output(ctx, match, indent, right_indent):
-                yield ev
-                
-            if self.placeholder:
-                yield start(WIKI.ph)
-                yield end(WIKI.ph)
-
-class SimpleMarker(BlockDivision):
-    """Matches a block-level key/value pattern and emits a marker."""
-    
-    def output(self, ctx, match, indent, right_indent):
-        gd = match.groupdict()
-        if "indent" in gd:
-            del gd["indent"]
-        if "extra" in gd:
-            del gd["extra"]
-        
-        qname = WIKI[self.type]
-        yield br(ctx, indent, d2a(dict(type = self.type, affinity = "")))
-        yield start(qname, **gd)
-        yield end(qname)
-        yield br(ctx, indent)
-        
-class SimpleHeading(BlockDivision):
-    def __init__(self, pattern, first_chars, level):
-        super(SimpleHeading, self).__init__("heading", pattern, first_chars)
-        self.level = level
-    
-    def __repr__(self):
-        return "%s(%s, level=%s)" % (self.__class__.__name__, repr(repr(self.pattern), self.level))
-    
-    def output(self, ctx, match, indent, right_indent):
-        id = match.groupdict().get("id")
-        yield br(ctx, indent, d2a(dict(type = self.type, affinity = "", level = self.level, id = id)))
-
-#class Section(BlockDivision):
-#    def __init__(self, pattern, first_chars, level):
-#        super(SimpleHeading, self).__init__("heading", pattern, first_chars)
-#        self.level = level
-#        
-#    def output(self, ctx, match, indent, right_indent):
-#        id = match.groupdict().get("id")
-#        yield br(ctx, indent, mkattrs(type = self.type, affinity = "", level = self.level, id = id))
-
-class PatternHeading(BlockDivision):
-    def __init__(self, type, pattern, first_chars, level = None, fn = len, role = None):
-        super(PatternHeading, self).__init__(type, pattern, first_chars)
-        self.level = level
-        self.fn = fn
-        self.role = role
-
-    def __repr__(self):
-        return "%s(%s, fn=%s)" % (self.__class__.__name__, repr(self.pattern), repr(self.fn))
-
-    def output(self, ctx, match, indent, right_indent):
-        gd = match.groupdict()
-        level = 2
-        if self.level is not None:
-            level = self.level
-        elif gd.get("level"):
-            level = self.fn(gd["level"])
-        
-        yield br(ctx, indent, d2a(dict(type = self.type, affinity = "", level = unicode(level),
-                                       id = gd.get("id", ""), role = self.role)))
-
-
-class LineEnding(Matcher):
-    def __init__(self, pattern, first_chars, must_indent = False):
-        self.expression = re.compile(pattern)
-        self.first_chars = first_chars
-        self.must_indent = must_indent
-    
-    def parse(self, input, position, ctx):
-        match = self.expression.match(input, position)
-        if match:
-            gd = match.groupdict()
-            
-            last_indent = ctx["indent"]
-            indent = last_indent
-            
-            indented = False
-            if self.must_indent and "indent" in gd:
-                    indent = len(gd["indent"])
-                    if indent > last_indent:
-                        indented = True
-                
-            if indented or not self.must_indent:
-                if "putback" in gd:
-                    yield match.start("putback")
-                else:
-                    yield match.end()
-                
-                yield br(ctx, indent)
-
-
-#class Block(Wrapper):
-#    def __init__(self, type, matcher):
-#        super(self.__class__, self).__init__(matcher)
-#        self.type = type
-#        
-#    def __repr__(self):
-#        return "%s(%s, %s)" % (self.__class__.__name__, repr(self.type), repr(self.matcher))
-#    
-#    def terminate(self, matcher):
-#        return self.__class__(self.type, self.matcher.terminate(matcher))
-#
-#    def parse(self, input, position, ctx):
-#        nctx = ctx.copy()
-#        queue = iter(self.matcher.parse(input, position, nctx))
-#        if len(queue) > 0:
-#            a = [("type", self.type)]
-#            a += [(n, v) for n, v in nctx.get("attributes", {}).iteritems() if n != "type" and n != "indent"]
-#            yield br(nctx["indent"], attrs = Attrs(a))
-#            for ev in queue:
-#                yield ev
-#            yield br(nctx["indent"])
-
-
-def parse_xml_attributes(input):
-    d = {}
-    
-    if input:
-        m = Xml._attr_expr.search(input)
-        while m:
-            d[m.group("attrname")] = m.group("attrval")
-            m = Xml._attr_expr.search(input, m.end())
-        
-    return d2a(d)
-
-def parse_xml_name(input, namespaces):
-    colon = input.find(":")
-    if colon > -1:
-        return QName("%s}%s" % (namespaces[input[:colon]], input[colon+1:]))
-    else:
-        return QName(input)
-
-class Xml(object):
-    """Parses raw XML in the wiki markup into events."""
-    
-    _name = r"([-A-Za-z0-9_.]+:)?[-A-Za-z0-9_.]+"
-    _tagname = r"(?P<tagname>" + _name + ")"
-    _attr = r"\s+(?P<attrname>" + _name + r")\s*=\s*\"(?P<attrval>[^\"]*)\""
-    _attrs = "(?P<attrs>(" + _attr + ")*)"
-    _starttag = "(?<!<)<" + _tagname + _attrs + r"\s*(?P<empty>/?)>(?!>)"
-    _endtag = "</" + _tagname + ">"
-    
-    _attr_expr = re.compile(_attr)
-    
-    _start_expr = re.compile(_starttag)
-    _end_expr = re.compile(_endtag)
-    first_chars = "<"
-    
-    def __init__(self, namespaces = None, matchers = [], escape = None):
-        if namespaces is None:
-            namespaces = DEFAULT_NAMESPACES
-        self.namespaces = namespaces
-        self.matchers = matchers
-    
-    def __repr__(self):
-        return "%s(%s)" % (self.__class__.__name__, repr(self.matchers))
-    
-    def parse(self, input, position, ctx):
-        match = self._start_expr.match(input, position)
-        if match:
-            name = match.group("tagname")
-            qname = parse_xml_name(name, self.namespaces)
-            attrs = parse_xml_attributes(match.group("attrs"))
-            isempty = match.group("empty") == "/"
-            
-            if isempty:
-                yield match.end()
-                yield starta(qname, attrs)
-                yield end(qname)
-            else:
-                if input.find("</%s>" % name, match.end()) < 0:
-                    return
-                
-                started = False
-                parser = BMixed([EndTagMatcher(name, qname), self], fail_at_end = True)
-                for ev in parser.parse(input, match.end(), ctx):
-                    if not started:
-                        yield starta(qname, attrs)
-                        started = True
-                    yield ev
-
-class EndTagMatcher(Matcher):
-    """Helper for Xml, finds the end tag corresponding
-    to a start tag."""
-    
-    def __init__(self, name, qname):
-        self.first_chars = Xml.first_chars
-        self.name = name
-        self.qname = qname
-        
-    def __repr__(self):
-        return "%s('</%s>')" % (self.__class__.__name__, self.name)
-    
-    def parse(self, input, position, ctx):
-        match = Xml._end_expr.match(input, position)
-        if match:
-            if match.group("tagname") == self.name:
-                yield match.end()
-                yield end(self.qname)
-                raise ParserSuccess
-            else:
-                raise ParserFailure
-
-
-class Substitution(Matcher):
-    def __init__(self, subs, first_chars):
-        self.first_chars = first_chars
-        self.subcount = len(subs)
-        
-        self.texts = []
-        pattern = ""
-        for i, (pat, text) in enumerate(subs.iteritems()):
-            if i > 0:
-                pattern += "|"
-            pattern += "(?P<g%s>%s)" % (i, pat)
-            self.texts.append(text)
-        
-            i += 1
-        self.expression = re.compile(pattern, re.MULTILINE | re.UNICODE)
-
-    def parse(self, input, position, ctx):
-        match = self.expression.match(input, position)
-        if match:
-            if match.lastgroup:
-                yield match.end()
-                yield text(self.texts[int(match.lastgroup[1:])])
-
-class SmartQuotes(Matcher):
-    """Turns ASCII straight-quotes into unicode curly quotes."""
-    
-    def __init__(self):
-        self.first_chars = '"'
-    
-    def parse(self, input, position, ctx):
-        if input[position] == '"':
-            if "block_ctx" in ctx:
-                inquote = ctx["block_ctx"].get("inquote", False)
-            else:
-                ctx["block_ctx"] = {}
-                inquote = False
-            
-            if inquote:
-                codepoint = 8221 # close quote
-            else:
-                codepoint = 8220 # open quote
-            
-            yield position + 1
-            ctx["block_ctx"]["inquote"] = not(inquote)
-            yield text(unichr(codepoint))
-
-class CharacterEntities(Matcher):
-    """Turns XML-style numeric entities into characters."""
-    
-    expression = re.compile("&#(x(?P<hex>[A-Fa-f0-9]{1,4})|(?P<dec>[0-9]{1,5}));")
-    
-    def __init__(self):
-        self.first_chars = "&"
-    
-    def parse(self, input, position, ctx):
-        match = CharacterEntities.expression.match(input, position)
-        if match:
-            if match.group(1).startswith("x"):
-                codepoint = int(match.group("hex"), 16)
-            else:
-                codepoint = int(match.group("dec"))
-            
-            yield match.end()
-            yield text(unichr(codepoint))
-
-# newline + "(?P<indent> *)" + _tagname + _attrs + "\\s*>>(?P<rest>[^\n]*)"
-
-class PseudoXml(Matcher):
-    def __init__(self, pattern, first_chars, namespaces = None, rest_matchers = None):
-        self.pattern = pattern
-        self.expression = re.compile(pattern)
-        self.first_chars = first_chars
-        
-        if namespaces is None:
-            namespaces = DEFAULT_NAMESPACES
-        self.namespaces = namespaces
-        
-        self.rest_matchers = rest_matchers
-        self.subp = None
-        if rest_matchers:
-            self.subp = Mixed(rest_matchers)
-    
-    def __repr__(self):
-        return "%s(%s, namespaces=%s, rest_matchers=%s" % (self.__class__.__name__,
-                                                           repr(self.pattern),
-                                                           repr(self.namespaces),
-                                                           repr(self.rest_matchers))
-    
-    def parse(self, input, position, ctx):
-        match = self.expression.match(input, position)
-        if match:
-            gd = match.groupdict()
-            
-            indent = 0
-            if gd.get("indent"):
-                indent = len(gd["indent"])
-            
-            attrs = Attrs()
-            if gd.get("attrs"):
-                attrs = parse_xml_attributes(gd["attrs"])
-                
-            rest = gd.get("rest")
-            qname = parse_xml_name(gd.get("tagname"), self.namespaces)
-            attrs = parse_xml_attributes(gd.get("attrs"))
-            
-            yield match.end()
-            yield br(ctx, indent, d2a(dict(type = "px", affinity = "")))
-            yield starta(qname, attrs)
-            
-            if self.subp:
-                for ev in self.subp.parse(rest, 0, ctx):
-                    yield ev
-            elif rest and rest != '':
-                yield text(rest)
-            
-            yield end(qname)
-            yield br(ctx, indent)
-
-class Codeblock(Matcher):
-    def __init__(self, qname, pattern, first_chars):
-        self.qname = qname
-        self.expression = re.compile(pattern)
-        self.first_chars = first_chars
-    
-    def parse(self, input, position, ctx):
-        match = self.expression.match(input, position)
-        if match:
-            gd = match.groupdict()
-            
-            content = gd.get("content")
-            if content:
-                del gd["content"]
-                yield match.end()
-                yield start(self.qname, **gd)
-                yield text(dedent(content))
-                yield end(self.qname)
-
-#class ShortcutMatcher(Matcher):
-#    """Matches a textbook-style shortcut."""
-#    
-#    def __init__(self, pattern, first_chars):
-#        self.first_chars = first_chars
-#        self.expression = re.compile(pattern)
-#        
-#    def parse(self, input, position, ctx):
-#        match = self.expression.match(input, position)
-#        if match:
-#            content = match.group("content")
-#            name = match.group("name")
-#            value = match.group("value")
-#            if name is not None and value is not None:
-#                yield match.end()
-#                yield start(WIKI.shortcut, {"name": name, "value": value})
-#                if content is not None:
-#                    yield text(content) #TODO: parse this
-#                yield end(WIKI.shortcut)
-#
-
-if __name__ == '__main__':
-    input = "This is a test [of the|thingy] bit."
-    m = Sequence([Literal("["), Optional(Literal("of the|")), Literal("thingy]")])
-    mx = Mixed([m])
-    #print repr(iter(mx.parse(input)))
-    
-    p = Mixed([Literal("Hi"), Literal("Hello")])
-    print p
-    print unicode(p)
-    
-    esc = Escape("\\")
-    ln_title = Mixed([esc])
-    ln_ref = Mixed([esc])
-    ln = Element(WIKI.shortcut, Hide("[") + ln_title + Hide("|") + Attribute("ref", ln_ref) + Hide("]"))
-    print "ln=", ln
-    mx = Mixed([ln])
-    
-    b = iter(mx.match("Hi there this [is a big|Hello] to all the friends of Potter"))
-    print b
-    print Stream(b)
-    print "\n"
-    
-    t_content = Mixed([esc])
-    t = Element(WIKI.h, Hide("== ") + Optional(Element(WIKI.pre, "|>")) + t_content + Hide(" =="))
-    tx = Mixed([t])
-    
-    b = iter(tx.match("Hi there this == |>Heading 2 == to all the friends of Potter"))
-    print b
-    print Stream(b)
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-        
-        
-        

File obsolete/textbook.py

-from wikiparser.matchers import *
-
-HTML = Namespace("http://www.w3.org/1999/xhtml")
-
-ESC = Escape("\\")
-XML = Xml()
-LB = LineBreak()
-
-COMMENT = Element(WIKI.comment, RegexLiteral("(?:\n+ *)*<!--(.*?)-->", "<"))
-
-VAR = Element(HTML.var, RegexLiteral("<<([^\n]+?)>>", "<"))
-MATH = Element(WIKI.math, RegexLiteral("\\$\\$(.*?)\\$\\$", "$"))
-LONG_ESC = RegexLiteral("@<(.*?)>@", "@")
-
-TYPOGRAPHY = Substitution({
-                     "(?<=\w)'(?=[Ss])": unichr(8217),
-                     "(?<=\w)'(?=[Ll]{2})": unichr(8217),
-                     "(?<=\w)'(?=[Tt])": unichr(8217),
-                     "(?<=\w)'(?=[Nn][Tt])": unichr(8217),
-            
-                     "(?<!-)---(?!-)": unichr(8212),
-                     "(?<!-)--(?!-)": unichr(8211),
-            
-                     "(^|(?<=\s))<-($|(?=\s))": " " + unichr(8592) + " ",
-                     "(^|(?<=\s))->($|(?=\s))": " " + unichr(8594) + " ",
-                     "(^|(?<=\s))<=($|(?=\s))": " " + unichr(8804) + " ",
-                     "(^|(?<=\s))>=($|(?=\s))": " " + unichr(8805) + " ",
-                     "(^|(?<=\s))<=>($|(?=\s))": " " + unichr(8660) + " ",
-            
-                     "(^|(?<=\s))1/4($|(?=\s))": " " + unichr(188) + " ",
-                     "(^|(?<=\s))1/2($|(?=\s))": " " + unichr(189) + " ",
-                     "(^|(?<=\s))3/4($|(?=\s))": " " + unichr(190) + " ",
-            
-                     "\\.\\.\\.": unichr(8230),
-                     "(?<=\d)x(?=\d)": unichr(215),
-                     "\\(c\\)(?=[ ,.])": unichr(169),
-                     "\\(tm\\)(?=[ ,.])": unichr(8482),
-                     "\\(r\\)(?=[ ,.])": unichr(174)
-                     }, "'-<=13.x(")
-QUOTES = SmartQuotes()
-ENTITY_CHARS = CharacterEntities()
-
-_key_pattern = "LMB|MMB|RMB|mouse_wheel|Alt|Shift|Ctrl|Space|Up|Down|Left|Right|Tab|PgUp|PgDn|Home|End|Ins|Del|Backspace|Esc|Enter|Apple|Command|F[0-9]{1,2}|[-0-9A-Z!+~;',.<>/?\\[\\]]"
-_key_firstchars = "LMRmASCSUDLRTPHEIF-!+~;',.<>/?[]0123456789"
-
-KEY = Element(WIKI.key, RegexText(_key_pattern, _key_firstchars))
-KEY_SEP = RegexText(" *\\+ *", " +")
-KEYS = Element(WIKI.keys, Hide("((") + KEY + ZeroOrMore(KEY_SEP + KEY) + Hide("))"))
-
-_text_content = [ESC, LONG_ESC, VAR, XML, COMMENT, TYPOGRAPHY, QUOTES, ENTITY_CHARS, KEYS]
-
-_ui_start = "(^|(?<=\W))__"
-_ui_end = "__($|(?=\W))"
-_strong_start = "(^|(?<=\W))\\*"
-_strong_end = "\\*($|(?=\W))"
-_em_start = "(^|(?<=\W))_"
-_em_end = "_($|(?=\W))"
-
-_ui_content = []
-_strong_content = []
-_em_content = []
-
-UI = RegexSpan(WIKI.ui, _ui_start, "_", _ui_content, _ui_end, "_")
-STRONG = RegexSpan(HTML.strong, _strong_start, "*", _strong_content, _strong_end, "*")
-EM = RegexSpan(HTML.em, _em_start, "_", _em_content, _em_end, "_")
-
-CODE = Hide("`") + Element(WIKI.code, BMixed([ESC, VAR, LB], inline_only = True)) + Hide("`")
-
-_ui_content += [LB, ESC, VAR, STRONG]
-_strong_content += [LB] + _text_content + [UI, EM, CODE]
-_em_content += [LB] + _text_content + [STRONG, CODE]
-
-_styled_content = [LB] + _text_content + [UI, STRONG, EM, CODE]
-
-_link_pattern = "((?P<name>[A-Z][-_. A-Za-z0-9]*):)?(?P<value>[^|\\]\"\\[\n]*)"
-
-SHORTCUT_TITLED = Element(WIKI.shortcut, Hide("[") + BMixed(_styled_content, inline_only = True) + RegexAttributes("\\|%s\\]" % _link_pattern, "|" ))
-SHORTCUT_NOTITLE = Element(WIKI.shortcut, RegexAttributes("\\[%s\\]" % _link_pattern, "["))
-
-CODEBLOCK = Codeblock(WIKI.codeblock, "{{{([ \r\n\t]*#!(?P<lang>[ -_A-Za-z0-9]*)\n)?(?P<content>(.|[\r\n])*?)}}}", "{")
-
-_inline_content = [ESC, LONG_ESC, CODEBLOCK, XML, COMMENT, SHORTCUT_NOTITLE, SHORTCUT_TITLED, TYPOGRAPHY, QUOTES, UI, STRONG, EM, CODE, VAR, KEYS, MATH]
-_inline_content_lb = _inline_content + [LB]
-
-PSEUDOXML = PseudoXml(newline + "(?P<indent> *)" + Xml._tagname + Xml._attrs + "\\s*>>(?P<rest>[^\n]*)",
-                      "\n", rest_matchers = _inline_content)
-
-PROP_TOP = RegexProperty(newline + "#(?P<name>([A-Za-z][-_.A-Za-z0-9]*:)?[A-Za-z][-_.A-Za-z0-9]*) *: *(?P<value>[^\n]*)", "\n")
-PROP_EMBEDDED = SimpleMarker("blockattr", "(^|((\n *)*\n))(?P<indent> +)#(?P<name>([A-Za-z][-_.A-Za-z0-9]*:)?[A-Za-z][-_.A-Za-z0-9]*) *: *(?P<value>[^\n]*)", "\n")
-
-_section_start = PatternHeading("section", newline + "@(?P<id>[A-Za-z0-9_]+)", "\n", level = 1, role = "heading")
-_title_start = PatternHeading("title", newline + "= *(?!=)", "\n", level = 0)
-_heading_close = Choice([RegexText(" *=+ *(\\((?P<id>[A-Za-z0-9]+)\\))?", " =") + SucceedBefore(LB), SucceedBefore(LB)])
-
-SECTION = BreakAfter(_section_start +\
-                     Element(WIKI.sn, BMixed([Not(LB)])) +\
-                     SucceedBefore("\n"))
-
-TITLE = _title_start +\
-        CaptureStream("title",
-                      Optional(Element(WIKI.pretitle, CaptureStream("pretitle", BMixed([LB], inline_only = True)) + Hide("|>"))) +\
-                      BMixed(_inline_content_lb, inline_only = True) +\
-                      Choice([Element(WIKI.posttitle, Hide("<|") + CaptureStream("posttitle", BMixed([LB], inline_only = True)) + Hide(_heading_close)),
-                              Hide(_heading_close)
-                              ])
-                      )
-
-HEADING = BreakAfter(
-                     BreakBuffer(
-                                 PatternHeading("heading",
-                                                newline + "(?P<indent> *)(?P<level>==+) *",
-                                                "\n",
-                                                fn = len)
-                                 + BMixed(_inline_content_lb, inline_only = True)
-                                 + Choice([
-                                           RegexAttributes(" *=+ *(\\((?P<id>[A-Za-z0-9_]+)\\))?", " =") + SucceedBefore(LB),
-                                           SucceedBefore(LB)
-                                           ])
-                                 )
-                     
-                     )
-
-BULLET = BlockDivision("bullet", newline + "(?P<indent> *)(?P<extra>[-*]+ )")
-ORDINAL = BlockDivision("ord", newline + "(?P<indent> *)(?P<extra>\\#+\\.? )")
-
-TIP = BlockDivision("tip", newline + "(?P<indent> *)TIP:", placeholder = True)
-NOTE = BlockDivision("note", newline + "(?P<indent> *)NOTE:", placeholder = True)
-WARNING = BlockDivision("warning", newline + "(?P<indent> *)WARNING:", placeholder = True)
-
-#EXAMPLE = BlockStart("examplefile") + Hide(RegexText("EXAMPLE: *"))
-
-ITEM = BlockStart("item") + Hide("::") + BMixed(_inline_content_lb, inline_only = True) + LineEnding(":(?P<putback>\s*\n(?P<indent> *))", ":", must_indent = True)
-DEFN = BlockStart("defn") + BMixed(_inline_content_lb, inline_only = True) + LineEnding(":(?P<putback>\s*\n(?P<indent> *))", ":", must_indent = True)
-SUMMARY = Hide(BlockStart("summary") + Hide('"""') + CaptureStream("summary", BMixed(_inline_content_lb, inline_only = True)) + Hide('"""'))
-
-_block_content = [CODEBLOCK, PSEUDOXML, PROP_TOP, PROP_EMBEDDED, SECTION, HEADING, TITLE, BULLET, ORDINAL, TIP, NOTE, WARNING, ITEM, DEFN, SUMMARY, LB]
-
-main_matchers = _inline_content + _block_content
-
-
-
-        
-            
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

File obsolete/textify.py

-import re, textwrap
-from collections import defaultdict
-
-from genshi.core import Namespace, START, END, TEXT
-
-from config import WIKI_NS_URL
-
-
-WIKI = Namespace(WIKI_NS_URL)
-
-
-_normalize_exp = re.compile("[\r\n\t ]+")
-
-_charmap = [
-            (unichr(8221), '"'),
-            (unichr(8220),'"'),
-            
-            (unichr(8217), "'"),
-            (unichr(8212), "---"),
-            (unichr(8211), "--"),
-            
-            (unichr(8592), "<-"),
-            (unichr(8594), "->"),
-            (unichr(8804), "<="),
-            (unichr(8805), ">="),
-            (unichr(8660), "<=>"),
-            
-            (unichr(188), "1/4"),
-            (unichr(189), "1/2"),
-            (unichr(190), "3/4"),
-            
-            (unichr(8230), "..."),
-            (unichr(215), "x"),
-            (unichr(169), "(c)"),
-            (unichr(8482), "(tm)"),
-            (unichr(174), "(r)")
-            ]
-
-def dechar(text, charmap = _charmap):
-    for char, replacement in charmap:
-        text = text.replace(char, replacement)
-    return text
-
-
-def addindent(first, subsequent, text, trim = False):
-    lines = text.split("\n")
-    if trim and lines[0] == '': lines.pop(0)
-    if trim and lines[-1] == '': lines.pop()
-    
-    buf = ""
-    for i, line in enumerate(lines):
-        if i == 0:
-            buf += first
-        else:
-            buf += subsequent
-        
-        buf += line + "\n"
-    
-    return buf
-    
-
-class Block(object):
-    def __init__(self, wrapper, indent, wrap, before, after, first, subsequent, hidden):
-        self.wrapper = wrapper
-        self.indent = indent
-        self.wrap = wrap
-        self.before = before
-        self.after = after
-        self.first = first
-        self.subsequent = subsequent
-        self.hidden = hidden
-        
-        self.buffer = ""
-        self.children = []
-    
-    def __repr__(self):
-        return "Block()"
-    
-    def is_empty(self):
-        return self.buffer.strip() == ""
-    
-    def text(self, text, xlate = True):
-        if xlate: text = dechar(text)
-        self.buffer += text
-    
-    def append(self, block):
-        self.children.append(block)
-    
-    def dump(self, level = 0):
-        print "  " * level, "| indent=%s %r" % (self.indent, self.buffer)
-        if self.children:
-            print "  " * level, "["
-            for child in self.children:
-                child.dump(level = level+1)
-            print "  " * level, "]"
-    
-    def render(self, indent = None):
-        wrapper = self.wrapper
-        if self.hidden:
-            return ""
-        
-        if indent is None:
-            indent = self.indent
-        
-        t = "\n" * self.before
-        
-        if not self.is_empty():
-            diff = max(indent - self.indent, 0)
-            pretab = " " * diff
-            posttab_f = self.first + (" " * (self.indent - len(self.first)))
-            posttab_s = self.subsequent + (" " * (self.indent - len(self.subsequent)))
-            
-            if self.wrap:
-                txt = self.buffer.replace("\n", " ").strip()
-                wrapper.initial_indent = pretab + posttab_f
-                wrapper.subsequent_indent = pretab + posttab_s
-                t += wrapper.fill(txt)
-                
-                if not t.rstrip(" ").endswith("\n"):
-                    t += "\n"
-            else:
-                t += addindent(pretab + posttab_f, pretab + posttab_s, self.buffer)
-        
-        if self.children:
-            for child in self.children:
-                t += child.render(indent + child.indent)
-        
-        t += "\n" * self.after
-        return t
-
-
-class Textifier(object):
-    def __init__(self, metadata, encoding = "ascii",
-                 indent = 0, before = 0, after = 0,
-                 unknown = None, reader = None):