1. Aram Dulyan
  2. html2textile

Commits

Aram Dulyan  committed b8a3c40

Ensured that simple tags with attributes do not pollute the output when they contain links.

  • Participants
  • Parent commits 7f01ee7
  • Branches default

Comments (0)

Files changed (2)

File html2textile.py

View file
  • Ignore whitespace
     'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'br', 'blockquote',
 )
 
+# A regular expression to match additional attributes that can be passed to
+# textile tags
+TAG_SUFFIX = r'(\([^\)]+\)){0,2}(\{[^\}]+\})?(\[[^\]]+\])?'
+
 # A regular expression to to match any whitespace or lone paragraph/heading
 # declarations at the end of a string
 WHITESPACE_SUFFIX = re.compile(
-    r'((p|bq|h[1-6])(\([^\)]+\)){0,2}(\{[^\}]+\})?(\[[^\]]+\])?\.)?\s*$')
+    r'((p|bq|h[1-6])%s\.)?\s*$' % TAG_SUFFIX)
 
 def safe_url(text, characters):
     """
                 and self.open_simple_tags[0] == tag):
             return False
         for stack, idx in self._reverse_stack_iter():
-            if stack[idx].endswith(tag):
-                stack[idx] = stack[idx][:-len(tag)]
+            match = re.search(
+                r'%s%s$' % (re.escape(tag), TAG_SUFFIX),
+                stack[idx])
+            if match:
+                stack[idx] = stack[idx][:-len(match.group(0))]
                 return True
             break
         return False

File runtests.py

View file
  • Ignore whitespace
 >>> convert('<em><strong><a href="http://example.com">Link.</a></strong></em>')
 "_*Link.*_":http://example.com
 
+Test that tag attributes do not interfere with the process of moving them
+inside. Unfortunately, we're not preserving the attributes.
+>>> convert('<p><span style="color: navy"><span><a href="example.com">Link</a></span></span></p>')
+"%Link%":example.com
+
 
 Test classes and IDs.
 >>> convert('<p class="some_class" id="some_id">Some text.</p>')
 ## 
 ### Three.
 
+
 KNOWN ISSUES:
 
 #>>> convert('<p>---<em><br />*Note.</em>')