Commits

Anonymous committed e2fc4ed

inline markup processing refactored
links required to close (again)

Comments (0)

Files changed (5)

-Version 0.?.? (Nov ?? 2007)
----------------------------
+Version 0.3 (Nov ?? 2007)
+-------------------------
 
- * links don't have to close! (makes them more robust)
-
+ * big refactoring of inline wiki markup processing
 
 Version 0.2.1 (Nov 6 2007)
 --------------------------

creoleparser/core.py

 escape_char = '~'
 esc_neg_look = '(?<!' + re.escape(escape_char) + ')'
 esc_to_remove = re.compile(''.join([r'(?<!',re.escape(escape_char),')',re.escape(escape_char),r'(?!([ \n]|$))']))
+element_store = {}
+store_id_seq = 1
 
-def fragmentize(text,wiki_elements,remove_escapes = True):
+def fill_from_store(text):
+    frags = []
+    mo = re.search(r'<<<(\d*?)>>>',text,re.DOTALL)
+    if mo:
+        if mo.start():
+            frags.append(text[:mo.start()])
+        frags.append(element_store[mo.group(1)])
+        if mo.end() < len(text):
+            frags.extend(fill_from_store(text[mo.end():]))
+    else:
+        frags = [text]
+    return frags
+
+def fragmentize(text,wiki_elements, remove_escapes = True):
 
     """Takes a string of wiki markup and outputs a list of genshi
     Fragments (Elements and strings).
     # remove escape characters 
     if not wiki_elements:
         if remove_escapes:
-            return [esc_to_remove.sub('',text)]
-        else:
-            return [text]
+            text = esc_to_remove.sub('',text)
+        return fill_from_store(text)
 
     # If the first supplied wiki_element is actually a list of elements, \
     # search for all of them and match the closest one only.
          
     frags = []
     if mo:
-        # call again for leading text and extend the result list 
-        if mo.start():
-            frags.extend(fragmentize(text[:mo.start()],wiki_elements[1:]))
-
-        # append the found wiki element to the result list
-        frags.append(wiki_element._build(mo))
-
-        # make the source output easier to read
-        if wiki_element.append_newline:
-            frags.append('\n')
-
-        # call again for trailing text and extend the result list
-        if mo.end() < len(text):
-            frags.extend(fragmentize(text[mo.end():],wiki_elements))
+        frags = wiki_element._process(mo, text, wiki_elements)
+##        # call again for leading text and extend the result list 
+##        if mo.start():
+##            frags.extend(fragmentize(text[:mo.start()],wiki_elements[1:]))
+##
+##        # append the found wiki element to the result list
+##        frags.append(wiki_element._build(mo))
+##
+##        # make the source output easier to read
+##        if wiki_element.append_newline:
+##            frags.append('\n')
+##
+##        # call again for trailing text and extend the result list
+##        if mo.end() < len(text):
+##            frags.extend(fragmentize(text[mo.end():],wiki_elements))
     else:
         frags = fragmentize(text,wiki_elements[1:])
 

creoleparser/dialects.py

         #self.strong.child_tags = [self.em,self.br,self.link,self.img,self.http_link]
         #self.link.child_tags = [(self.strong, self.em), self.img]
 
+##        if use_additions:
+##            self.tt = InlineElement('tt', '##',[(self.strong,self.em,self.link),self.br,self.img,self.http_link])
+##            self.strong.child_tags = [(self.em,self.tt,self.link),self.br,self.img,self.http_link]
+##            self.em.child_tags = [(self.strong,self.tt,self.link),self.br,self.img,self.http_link]
+##            self.link.child_tags = [(self.strong, self.em,self.tt), self.img]
+##            header_children = [self.no_wiki,(self.strong, self.em, self.tt,self.link),
+##                               self.br,self.img,self.http_link]
+##
+##        else:
+##            self.em.child_tags = [(self.strong,self.link),self.br,self.img,self.http_link]
+##            self.strong.child_tags = [(self.em,self.link),self.br,self.img,self.http_link]
+##            self.link.child_tags = [(self.strong, self.em), self.img]
+##            header_children = [self.no_wiki,(self.strong, self.em, self.link),
+##                               self.br,self.img,self.http_link]
+
         if use_additions:
-            self.tt = InlineElement('tt', '##',[(self.strong,self.em,self.link),self.br,self.img,self.http_link])
-            self.strong.child_tags = [(self.em,self.tt,self.link),self.br,self.img,self.http_link]
-            self.em.child_tags = [(self.strong,self.tt,self.link),self.br,self.img,self.http_link]
-            self.link.child_tags = [(self.strong, self.em,self.tt), self.img]
-            header_children = [self.no_wiki,(self.strong, self.em, self.tt,self.link),
-                               self.br,self.img,self.http_link]
+            self.tt = InlineElement('tt', '##',[self.strong,self.link,self.br,self.img,self.http_link,self.em])
+            self.strong.child_tags = [self.tt,self.link,self.br,self.img,self.http_link,self.em]
+            self.em.child_tags = [self.strong,self.tt,self.link,self.br,self.img,self.http_link]
+            self.link.child_tags = [self.strong, self.tt, self.img,self.em]
+            header_children = [self.no_wiki,self.strong, self.tt,self.link,
+                               self.br,self.img,self.http_link,self.em]
 
         else:
-            self.em.child_tags = [(self.strong,self.link),self.br,self.img,self.http_link]
-            self.strong.child_tags = [(self.em,self.link),self.br,self.img,self.http_link]
-            self.link.child_tags = [(self.strong, self.em), self.img]
-            header_children = [self.no_wiki,(self.strong, self.em, self.link),
-                               self.br,self.img,self.http_link]
-                
+            self.em.child_tags = [self.strong,self.link,self.br,self.img,self.http_link]
+            self.strong.child_tags = [self.link,self.br,self.img,self.http_link,self.em]
+            self.link.child_tags = [self.strong, self.img,self.em]
+            header_children = [self.no_wiki,self.strong, self.link,
+                               self.br,self.img,self.http_link,self.em]
+            
         self.hr = LoneElement('hr','----',[])
         #self.lone_br = LoneElement('br',r'\\',[])
         self.blank_line = BlankLine()

creoleparser/elements.py

 
 import genshi.builder as bldr
 
-from core import escape_char, esc_neg_look, fragmentize
+from core import escape_char, esc_neg_look, fragmentize, \
+     element_store, store_id_seq
 
 
 __docformat__ = 'restructuredtext en'
         """
         pass
 
+    def _process(self, mo, text, wiki_elements):
+        frags = []
+        # call again for leading text and extend the result list 
+        if mo.start():
+            frags.extend(fragmentize(text[:mo.start()],wiki_elements[1:]))
+        # append the found wiki element to the result list
+        frags.append(self._build(mo))
+        # make the source output easier to read
+        if self.append_newline:
+            frags.append('\n')
+        # call again for trailing text and extend the result list
+        if mo.end() < len(text):
+            frags.extend(fragmentize(text[mo.end():],wiki_elements))
+        return frags
+        
     def __repr__(self):
         return "<WikiElement "+str(self.tag)+">"
 
             return esc_neg_look + re.escape(self.token) + content + end
         else:
             content = '(.+?)'
-            return esc_neg_look + re.escape(self.token[0]) +\
-                   content + '(' + esc_neg_look + re.escape(self.token[1]) +\
-                   r'|$)'
+            return esc_neg_look + re.escape(self.token[0]) + content + esc_neg_look + re.escape(self.token[1])
+
+    def _process(self, mo, text, wiki_elements):
+
+        global store_id_seq
+        processed = self._build(mo)
+        store_id = str(store_id_seq) # str(hash(processed))
+        element_store[store_id] = processed
+        store_id_seq = store_id_seq + 1
+        text = ''.join([text[:mo.start()],'<<<',store_id,'>>>',
+                        text[mo.end():]])
+        frags = fragmentize(text,wiki_elements)
+        return frags
+
              
 
 class Link(InlineElement):
     def pre_escape_pattern(self):
         return '(' + re.escape(self.token[0]) + '.*?)' + \
                '(' + re.escape(self.delimiter) + '.*?' + \
-               '(' + re.escape(self.token[1]) + '|$))'
+               re.escape(self.token[1]) + ')'
         
     def _build(self,mo):
         body = mo.group(1).split(escape_char + self.delimiter, 1)
     def pre_escape_pattern(self):
         return '(' + re.escape(self.token[0]) + '.*?)' + \
                '(' + re.escape(self.delimiter) + '.*?' + \
-               '(' + re.escape(self.token[1]) + '|$))'
+               re.escape(self.token[1]) + ')'
 
     def _build(self,mo):
         body = mo.group(1).split(escape_char+self.delimiter,1)

creoleparser/tests.py

     assert creole_to_xhtml('{{{no **wiki** in here}}} but //here// is fine') == \
             '<p><tt>no **wiki** in here</tt> but <em>here</em> is fine</p>\n'
     assert creole_to_xhtml('steve **is strong //you know\n dude{{{not **weak**}}}\n') == \
-            '<p>steve <strong>is strong <em>you know\n dude</em></strong><tt>not **weak**</tt></p>\n'
+            '<p>steve <strong>is strong <em>you know\n dude<tt>not **weak**</tt></em></strong></p>\n'
 
     assert creole_to_xhtml(
 r"""   |= Item|= Size|= Price |