Commits

Blue  committed e52c8e7

Fixed errors with decoding post fields and updated the tests accordingly.

  • Participants
  • Parent commits cc83c5c

Comments (0)

Files changed (2)

File spotnet/post.py

         return d
 
     def decode_string(self, string):
-        if string is None or isinstance(string, unicode):
+        if string is None:
             return string
-        elif isinstance(string, str):
+        elif isinstance(string, (str, unicode)):
             decoded = email.header.decode_header(string)
             if decoded[0][1] is None:
                 decoded[0] = (decoded[0][0], 'utf8')  # a sensible default, since it's probably ascii

File spotnet/tests/parsing.py

 from mock import Mock
 from spotnet import settings
 from spotnet.post import RawPost
+from spotnet.models import Post
 
 
 class ParsingTest(unittest.TestCase):
-    def construct(self, headers, content, add_lines=True):
+    def construct(self, headers, content, xml=None):
+        """Construct a RawPost object from headers and content.
+
+        This automatically adds all required headers:
+        Lines, From and Date.
+        To override them, pass a value for these in the headers argument.
+        To omit these, pass them to the headers argument
+        with a value of None.
+        """
         if isinstance(content, str):
             lines = 1
             content = [content]
         else:
             lines = len(content)
-        if add_lines and 'Lines' not in headers:
+        if 'Lines' not in headers:
             headers['Lines'] = lines
+        if 'From' not in headers:
+            headers['From'] = 'Testuser'
+        if 'Date' not in headers:
+            headers['Date'] = 'Thu, 25 Sep 2003 10:49:41 -0300'
+        if xml:
+            if 'X-XML' in headers:
+                raise ValueError
+            else:
+                headers['X-XML'] = '<Spotnet><Posting>%s</Posting></Spotnet>' % ''.join(
+            '<%(key)s><![CDATA[%(val)s]]></%(key)s>' % dict(key=k, val=v) for k, v in xml.iteritems()
+        )
         return self.parse_to_post(
-            ['%s: %s' % (k, v) for k, v in headers.iteritems()]
+            ['%s: %s' % (k, v) for k, v in headers.iteritems() if v is not None]
         +
             ['']
         +
         ))
 
     def parse_to_post(self, content):
-        return RawPost(None, [None, None, None, content])
+        # we create a test-wide unique postnumber
+        # and messageid, so that they don't violate
+        # database unique constraints
+        import random
+        postnumber = random.randrange(0, 10000000)
+        messageid = '<testmessage-%s-%s@test.com>' % (postnumber, Post.objects.count())
+        return RawPost(postnumber, [None, None, messageid, content])
 
 
 class EncodingParsingTest(ParsingTest):
 
-    def test_parse_html_entity(self):
-        post = self.construct(dict(
+    def test_parse_html_entity_from_headers(self):
+        raw = self.construct(dict(
             Subject='dokter van een pati&#235;nt',
-        ), 'blaat')
-        self.assertEqual(post.subject, u'dokter van een pati\xebnt')
+        ), 'doe &#233;&#233;n ding')
+        # check if the html entities are properly decoded
+        self.assertEqual(raw.subject, u'dokter van een pati\xebnt')
+        self.assertEqual(raw.description, u'doe \xe9\xe9n ding')
+        post = Post.from_raw(raw)
+        # check if the resulting post instance has the correct strings
+        self.assertEqual(post.title, u'dokter van een pati\xebnt')
+        self.assertEqual(post.description, u'doe \xe9\xe9n ding')
+        post.save()
+        post = Post.objects.get(id=post.id)
+        # check if the correct title is saved to the database
+        self.assertEqual(post.title, u'dokter van een pati\xebnt')
+        self.assertEqual(post.description, u'doe \xe9\xe9n ding')
 
-    def test_parse_different_encoding_header(self):
-        post = self.construct(dict(
+    def test_parse_html_entity_from_xml(self):
+        raw = self.construct(
+            {},
+            'Not used',
+            dict(
+                Title='dokter van een pati&#235;nt',
+                Description='doe &#233;&#233;n ding',
+            )
+        )
+        # check if the html entities are properly decoded
+        self.assertEqual(raw.subject, u'dokter van een pati\xebnt')
+        self.assertEqual(raw.description, u'doe \xe9\xe9n ding')
+        post = Post.from_raw(raw)
+        # check if the resulting post instance has the correct strings
+        self.assertEqual(post.title, u'dokter van een pati\xebnt')
+        self.assertEqual(post.description, u'doe \xe9\xe9n ding')
+        post.save()
+        post = Post.objects.get(id=post.id)
+        # check if the correct title is saved to the database
+        self.assertEqual(post.title, u'dokter van een pati\xebnt')
+        self.assertEqual(post.description, u'doe \xe9\xe9n ding')
+
+    def test_parse_different_encoding_header_from_headers(self):
+        raw = self.construct(dict(
             Subject='=?ISO-8859-7?B?1PHp4e303Pb16+vv8iDM4ezc6u/y?=',
         ), 'blaat')
-        self.assertEqual(post.subject, u'\u03a4\u03c1\u03b9\u03b1\u03bd\u03c4\u03ac\u03c6\u03c5\u03bb\u03bb\u03bf\u03c2 \u039c\u03b1\u03bc\u03ac\u03ba\u03bf\u03c2')
+        self.assertEqual(raw.subject, u'\u03a4\u03c1\u03b9\u03b1\u03bd\u03c4\u03ac\u03c6\u03c5\u03bb\u03bb\u03bf\u03c2 \u039c\u03b1\u03bc\u03ac\u03ba\u03bf\u03c2')
+
+    def test_parse_different_encoding_header_from_xml(self):
+        raw = self.construct(
+            {},
+            'Not used',
+            dict(
+                Title='=?ISO-8859-7?B?1PHp4e303Pb16+vv8iDM4ezc6u/y?=',
+                Description='blaat',
+            )
+        )
+        self.assertEqual(raw.subject, u'\u03a4\u03c1\u03b9\u03b1\u03bd\u03c4\u03ac\u03c6\u03c5\u03bb\u03bb\u03bf\u03c2 \u039c\u03b1\u03bc\u03ac\u03ba\u03bf\u03c2')