Source

django-spotnet / spotnet / tests / parsing.py

try:
    from django.utils import unittest
except ImportError:
    try:
        import unittest2 as unittest
    except ImportError:
        import unittest

from mock import Mock
from spotnet import settings
from spotnet.post import RawPost
from spotnet.models import Post


class ParsingTest(unittest.TestCase):
    def construct(self, headers, content, xml=None):
        """Construct a RawPost object from headers and content.

        This automatically adds all required headers:
        Lines, From and Date.
        To override them, pass a value for these in the headers argument.
        To omit these, pass them to the headers argument
        with a value of None.
        """
        if isinstance(content, str):
            lines = 1
            content = [content]
        else:
            lines = len(content)
        if 'Lines' not in headers:
            headers['Lines'] = lines
        if 'From' not in headers:
            headers['From'] = 'Testuser'
        if 'Date' not in headers:
            headers['Date'] = 'Thu, 25 Sep 2003 10:49:41 -0300'
        if xml:
            if 'X-XML' in headers:
                raise ValueError
            else:
                headers['X-XML'] = '<Spotnet><Posting>%s</Posting></Spotnet>' % ''.join(
            '<%(key)s><![CDATA[%(val)s]]></%(key)s>' % dict(key=k, val=v) for k, v in xml.iteritems()
        )
        return self.parse_to_post(
            ['%s: %s' % (k, v) for k, v in headers.iteritems() if v is not None]
        +
            ['']
        +
            content
        )
        return self.parse_to_post('%s\n\n%s' % (
            '\n'.join('%s: %s' % (k, v) for k, v in headers.iteritems()),
            '\n'.join(content),
        ))

    def parse_to_post(self, content):
        # we create a test-wide unique postnumber
        # and messageid, so that they don't violate
        # database unique constraints
        import random
        postnumber = random.randrange(0, 10000000)
        messageid = '<testmessage-%s-%s@test.com>' % (postnumber, Post.objects.count())
        return RawPost(postnumber, [None, None, messageid, content])


class EncodingParsingTest(ParsingTest):

    def test_parse_html_entity_from_headers(self):
        raw = self.construct(dict(
            Subject='dokter van een pati&#235;nt',
        ), 'doe &#233;&#233;n ding')
        # check if the html entities are properly decoded
        self.assertEqual(raw.subject, u'dokter van een pati\xebnt')
        self.assertEqual(raw.description, u'doe \xe9\xe9n ding')
        post = Post.from_raw(raw)
        # check if the resulting post instance has the correct strings
        self.assertEqual(post.title, u'dokter van een pati\xebnt')
        self.assertEqual(post.description, u'doe \xe9\xe9n ding')
        post.save()
        post = Post.objects.get(id=post.id)
        # check if the correct title is saved to the database
        self.assertEqual(post.title, u'dokter van een pati\xebnt')
        self.assertEqual(post.description, u'doe \xe9\xe9n ding')

    def test_parse_html_entity_from_xml(self):
        raw = self.construct(
            {},
            'Not used',
            dict(
                Title='dokter van een pati&#235;nt',
                Description='doe &#233;&#233;n ding',
            )
        )
        # check if the html entities are properly decoded
        self.assertEqual(raw.subject, u'dokter van een pati\xebnt')
        self.assertEqual(raw.description, u'doe \xe9\xe9n ding')
        post = Post.from_raw(raw)
        # check if the resulting post instance has the correct strings
        self.assertEqual(post.title, u'dokter van een pati\xebnt')
        self.assertEqual(post.description, u'doe \xe9\xe9n ding')
        post.save()
        post = Post.objects.get(id=post.id)
        # check if the correct title is saved to the database
        self.assertEqual(post.title, u'dokter van een pati\xebnt')
        self.assertEqual(post.description, u'doe \xe9\xe9n ding')

    def test_parse_different_encoding_header_from_headers(self):
        raw = self.construct(dict(
            Subject='=?ISO-8859-7?B?1PHp4e303Pb16+vv8iDM4ezc6u/y?=',
        ), 'blaat')
        self.assertEqual(raw.subject, u'\u03a4\u03c1\u03b9\u03b1\u03bd\u03c4\u03ac\u03c6\u03c5\u03bb\u03bb\u03bf\u03c2 \u039c\u03b1\u03bc\u03ac\u03ba\u03bf\u03c2')

    def test_parse_different_encoding_header_from_xml(self):
        raw = self.construct(
            {},
            'Not used',
            dict(
                Title='=?ISO-8859-7?B?1PHp4e303Pb16+vv8iDM4ezc6u/y?=',
                Description='blaat',
            )
        )
        self.assertEqual(raw.subject, u'\u03a4\u03c1\u03b9\u03b1\u03bd\u03c4\u03ac\u03c6\u03c5\u03bb\u03bb\u03bf\u03c2 \u039c\u03b1\u03bc\u03ac\u03ba\u03bf\u03c2')