Commits

Matt Chaput  committed 1d6bc47

Added wikiparser.textify, wikiparser.core.Events. Renamed Replace to EventsIf.

  • Participants
  • Parent commits 0e78428

Comments (0)

Files changed (4)

File tests/test_matches.py

 import unittest
 import re
 
-from wikiparser.markup import BREAK, StreamTree, tostring
+from wikiparser.markup import ATTR, BREAK, StreamTree, tostring
 from wikiparser.match.blocks import LineStart, BlockStart, BreakBefore, BreakAfter, PyConsole
-from wikiparser.match.core import Suppress, OneOrMore, ZeroOrMore, StringEnd, Not, FollowedBy
-from wikiparser.match.data import Element
+from wikiparser.match.core import (Literal, Events,
+                                   Suppress, OneOrMore, ZeroOrMore, StringEnd, Not,
+                                   FollowedBy)
+from wikiparser.match.data import Element, Attr
 from wikiparser.match.text import Mixed, Regex, Until
 
 
                                   '>>> a = 5\n>>> b = 2\n>>> print a + b\n7\n',
                                   (BREAK, (0, 0, {})), 'Nice, huh?\n', 85
                                   ])
-
+        
+    def test_orelse(self):
+        m = Suppress(":") + Attr("type", Regex("[^:]+") | Events("item")) + Suppress(":")
+        stream = list(m.stream(":blah:"))
+        self.assertEqual(stream, [(ATTR, ('type', 'blah', True)), 6])
+        stream = list(m.stream("::"))
+        self.assertEqual(stream, [(ATTR, ('type', 'item', True)), 2])
 
 if __name__ == '__main__':
     unittest.main()

File wikiparser/formats/textbook/matchers.py

 #===============================================================================
 
 from wikiparser.markup import HTML, WIKI
-from wikiparser.match.blocks import BlockStart, BreakAfter, IndentChange, LineStart, StartMarker
-from wikiparser.match.core import Choice, Escape, FollowedBy, Optional, Suppress, ZeroOrMore
-from wikiparser.match.data import AttrIf, Element, RegexAttr
+from wikiparser.match.blocks import (BlockStart, BreakAfter, IndentChange, LineStart,
+                                     StartMarker)
+from wikiparser.match.core import (Choice, Escape, Events, FollowedBy, Optional, Suppress,
+                                   ZeroOrMore)
+from wikiparser.match.data import Attr, AttrIf, Element, RegexAttr
 from wikiparser.match.text import (CharacterEntities, Mixed, Regex, SmartQuotes,
                                    Substitution, Until)
 from wikiparser.match.xml import Xml
 
 bullet = LineStart(type="bullet") + StartMarker("[*-]+ ", first_chars="*-")
 ordinal = LineStart(type="ord") + StartMarker("#+\\.? ", first_chars="#")
-item = (LineStart(type="item")
-        + Suppress("::")
+item = (LineStart()
+        + Suppress(":") + Attr("type", Regex("[A-Za-z_][A-Za-z_0-9]*") | Events("item")) + Suppress(":")
         + Mixed(inlinecontent_lb, ":", inlineonly=True)
         + Suppress(":\n"))
 

File wikiparser/match/core.py

 from collections import defaultdict
 
 from wikiparser.events import BREAK, br, isbr
+from wikiparser.filters.streams import textof
 
 
 def ensure(m, ismatcher=True):
 
 class Apply(Wrapper):
     """Buffers events from the sub-matcher, then calls a function on the queued
-    events. The events returned by the function, if any, are then yielded.
+    events. Any event(s) returned by the function are then emitted.
     """
 
-    def __init__(self, function, matcher):
+    def __init__(self, function, matcher, astext=False):
+        """
+        :param function: the function to call on the wrapped matcher's events.
+        :param matcher: the wrapped matcher.
+        :param astext: if True, the text from the wrapped matcher's events is
+            extracted and the function is called on the extracted string,
+            instead of on a list of events.
+        """
+        
         Wrapper.__init__(self, matcher)
         self.function = function
+        self.astext = astext
     
     def parse(self, input, position, ctx):
         queue = []
                 queue.append(ev)
                 
         if queue:
-            for ev in self.function(queue):
-                yield ev
+            if self.astext:
+                yield self.function(textof(queue))
+            else:
+                for ev in self.function(queue):
+                    yield ev
             yield position
 
 
             yield ev
 
 
-class Replace(Wrapper):
-    """Replaces the output of the wrapped matcher with a pre-defined stream.
+class Events(Matcher):
+    """Inserts events into the stream. This is useful as a kind of "else"
+    clause when used as the last matcher in a Choice matcher, to insert
+    pre-determined events in the case where the other matchers don't match.
+    
+    For example, say you want a matcher that either matches a pattern like
+    `:typename:`, or allows the "typename" to be missing, in which case it
+    should be set to "item"::
+    
+        Suppress(":") + Attr("type", Regex("[^:]+") | Events("item")) + Suppress(":")
+    """
+    
+    def __init__(self, *events):
+        """Takes a variable number of arguments, where the arguments are used
+        as the list of events to emit.
+        """
+        self.events = events
+        
+    def parse(self, input, position, ctx):
+        for ev in self.events:
+            yield ev
+        yield position
+
+
+class EventsIf(Wrapper):
+    """If the wrapped matcher matches, replaces its events with a predefined
+    stream.
     """
     
     def __init__(self, matcher, *events):

File wikiparser/textify.py

+#===============================================================================
+# Copyright 2010 Matt Chaput
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+import textwrap
+
+
+_charmap = [
+            (unichr(8221), '"'),
+            (unichr(8220),'"'),
+            
+            (unichr(8217), "'"),
+            (unichr(8212), "---"),
+            (unichr(8211), "--"),
+            
+            (unichr(8592), "<-"),
+            (unichr(8594), "->"),
+            (unichr(8804), "<="),
+            (unichr(8805), ">="),
+            (unichr(8660), "<=>"),
+            
+            (unichr(188), "1/4"),
+            (unichr(189), "1/2"),
+            (unichr(190), "3/4"),
+            
+            (unichr(8230), "..."),
+            (unichr(215), "x"),
+            (unichr(169), "(c)"),
+            (unichr(8482), "(tm)"),
+            (unichr(174), "(r)")
+            ]
+
+def dechar(text, charmap = _charmap):
+    for char, replacement in charmap:
+        text = text.replace(char, replacement)
+    return text
+
+
+def addindent(first, subsequent, text, trim = False):
+    lines = text.split("\n")
+    if trim and lines[0] == '': lines.pop(0)
+    if trim and lines[-1] == '': lines.pop()
+    
+    buf = ""
+    for i, line in enumerate(lines):
+        if i == 0:
+            buf += first
+        else:
+            buf += subsequent
+        
+        buf += line + "\n"
+    
+    return buf
+
+
+class Textifier(object):
+    def __init__(self, width=72):
+        self._wrapper = textwrap.TextWrapper()
+        self._width = str(width)
+
+    def render_children(self, node, indent=0, width=None):
+        t = ""
+        for kid in node:
+            if kid.tag == "block":
+                t += self.render_block(kid, indent=indent, width=width)
+            elif kid.tag == "columns":
+                t += self.render_columns(kid, width, indent=indent)
+        return t
+
+    def render_block(self, node, indent=0, width=None):
+        hidden = node.get("hidden") == "true"
+        if hidden: return ''
+        
+        text = ''
+        tnode = node.find("text")
+        if tnode is not None:
+            text = tnode.text
+        
+        if width is None:
+            width = self._width
+        width = int(node.get("width", str(width)))
+        indent = int(node.get("indent", "0") + indent)
+        wrap = node.get("wrap") != "false"
+        before = int(node.get("before", "0"))
+        after = int(node.get("after", "0"))
+        first = node.get("first")
+        rest = node.get("rest")
+        
+        filter = node.get("filter") != "false"
+        if filter:
+            text = dechar(text)
+        
+        t = "\n" * before
+        if not text.strip() == "":
+            wrapper = self._wrapper
+            diff = max(indent - indent, 0)
+            pretab = " " * diff
+            posttab_first = first + (" " * (indent - len(first)))
+            posttab_rest = rest + (" " * (indent - len(rest)))
+            
+            wrapper.width = width
+            if wrap:
+                txt = buffer.replace("\n", " ").strip()
+                wrapper.initial_indent = pretab + posttab_first
+                wrapper.subsequent_indent = pretab + posttab_rest
+                t += wrapper.fill(txt)
+                
+                if not t.rstrip(" ").endswith("\n"):
+                    t += "\n"
+            else:
+                t += addindent(pretab + posttab_first, pretab + posttab_rest, text)
+        
+        t += self.render_children(node, indent=indent, width=width)
+        t += "\n" * after
+        return t
+
+    def render_columns(self, node, width, indent=0):
+        tabs = [int(t) for t in node.get("tabs", "").split(" ")]
+        blocks = node.findall("block")
+        if len(tabs) != len(blocks):
+            raise Exception("columns element has %s sub-blocks but only %s tab stops" % (len(blocks), len(tabs)))
+        tabs.append(width)
+        
+        texts = [self.render(block, indent=tabs[i], width=tabs[i+1]-tabs[i])
+                 for i, block in blocks]
+        
+        linelists = [text.split("\n") for text in texts]
+        maxlines = max(len(linelist) for linelist in linelists)
+        buffer = [""] * maxlines
+        for i, linelist in enumerate(linelists):
+            left = tabs[i]
+            right = tabs[i+1]
+            width = right - left
+            for lineno, line in enumerate(linelist):
+                fragment = line[left:right]
+                diff = width - len(fragment)
+                if diff and i < len(texts)-1:
+                    fragment += " " * diff
+                buffer[lineno] += fragment
+        return "\n".join(buffer)
+
+
+
+