Commits

Blue committed 573d53b

Allow some extra posts that contain formally invalid xml to be parsed.

  • Participants
  • Parent commits 2f992c6

Comments (0)

Files changed (2)

File spotnet/post.py

     pass
 
 
+def wrap_with_cdata(xml_string, tag):
+    cdata_open = '<![CDATA['
+    cdata_close = ']]>'
+    open_tag_start = xml_string.find('<%s>' % tag)
+    open_tag_end = open_tag_start + 2 + len(tag)
+    if open_tag_start >= 0:
+        content_start = xml_string[open_tag_end:open_tag_end+len(cdata_open)]
+        if content_start != cdata_open:
+            close_tag_start = xml_string.find('</%s>' % tag)
+            if close_tag_start >= 0:
+                close_tag_end = close_tag_start + 3 + len(tag)
+                wrapped = [
+                    xml_string[:open_tag_end],
+                    cdata_open,
+                    xml_string[open_tag_end:close_tag_start],
+                    cdata_close,
+                    xml_string[close_tag_start:],
+                ]
+                return ''.join(wrapped)
+    return xml_string
+
+
 class MessageHeaders(UserDict):
     def __init__(self, rawpost):
         self.rawpost = rawpost
         try:
             xml = parseString(xml_string)
         except Exception as e:
-            raise InvalidPostXml("Post has invalid XML data for header X-XML: %s" % e)
+            # try wrapping the title and description in a cdata section
+            wrapped = xml_string
+            wrapped = wrap_with_cdata(wrapped, 'Title')
+            wrapped = wrap_with_cdata(wrapped, 'Description')
+            try:
+                xml = parseString(wrapped)
+            except Exception:
+                raise InvalidPostXml("Post has invalid XML data for header X-XML: %s" % e)
         doc = xml.documentElement
         if not doc.tagName in ('Spotnet', 'SpotNet'):
             raise InvalidPostXml(

File spotnet/tests/parsing.py

         import unittest
 
 from spotnet import settings
-from spotnet.post import RawPost, InvalidPost
+from spotnet.post import RawPost, InvalidPost, wrap_with_cdata
 from spotnet.models import Post
 
 
             [u'01a03', u'01b03'],
         )
 
+    def test_wrapping_in_cdata(self):
+        self.assertEqual(wrap_with_cdata(
+            "<test><Spam>abc</Spam></test>"
+        , 'Spam'), 
+            "<test><Spam><![CDATA[abc]]></Spam></test>"
+        )
+
+    def test_wrapping_in_cdata_with_multi_tags(self):
+        "If the xml contains the tag several times, only once occurance is wrapped."
+        xml = "<test><Spam>abc</Spam><other>xmf</other><Spam>xxx</Spam></test>"
+        self.assertEqual(wrap_with_cdata(
+            "<test><Spam>abc</Spam><other>xmf</other><Spam>xxx</Spam></test>"
+        , 'Spam'), 
+            "<test><Spam><![CDATA[abc]]></Spam><other>xmf</other><Spam>xxx</Spam></test>"
+        )
+
 
 class EncodingParsingTest(ParsingTest):