Source

django-spotnet / spotnet / tests / parsing.py

try:
    from django.utils import unittest
except ImportError:
    try:
        import unittest2 as unittest
    except ImportError:
        import unittest

from mock import Mock
from spotnet import settings
from spotnet.post import RawPost
from spotnet.models import Post


class ParsingTest(unittest.TestCase):
    def construct(self, headers, content, xml=None):
        """Construct a RawPost object from headers and content.

        This automatically adds all required headers:
        Lines, From and Date.
        To override them, pass a value for these in the headers argument.
        To omit these, pass them to the headers argument
        with a value of None.
        """
        if isinstance(content, str):
            lines = 1
            content = [content]
        else:
            lines = len(content)
        if 'Lines' not in headers:
            headers['Lines'] = lines
        if 'From' not in headers:
            headers['From'] = 'Testuser'
        if 'Date' not in headers:
            headers['Date'] = 'Thu, 25 Sep 2003 10:49:41 -0300'
        if xml:
            if 'X-XML' in headers:
                raise ValueError
            else:
                headers['X-XML'] = '<Spotnet><Posting>%s</Posting></Spotnet>' % ''.join(
            '<%(key)s><![CDATA[%(val)s]]></%(key)s>' % dict(key=k, val=v) for k, v in xml.iteritems()
        )
        return self.parse_to_post(
            ['%s: %s' % (k, v) for k, v in headers.iteritems() if v is not None]
        +
            ['']
        +
            content
        )
        return self.parse_to_post('%s\n\n%s' % (
            '\n'.join('%s: %s' % (k, v) for k, v in headers.iteritems()),
            '\n'.join(content),
        ))

    def parse_to_post(self, content):
        # we create a test-wide unique postnumber
        # and messageid, so that they don't violate
        # database unique constraints
        import random
        postnumber = random.randrange(0, 10000000)
        messageid = '<testmessage-%s-%s@test.com>' % (postnumber, Post.objects.count())
        return RawPost(postnumber, [None, None, messageid, content])


class EncodingParsingTest(ParsingTest):

    def test_decoding_non_asci(self):
        post = self.construct({}, 'whatever')

        # test handling of non-ascii characters
        self.assertEqual(post.decode_string('\xb2'), u'\ufffd')
        self.assertEqual(post.decode_string(u'\xb2'), u'\xb2')

    def test_decoding_different_encodings(self):
        post = self.construct({}, 'whatever')

        # test decoding headers using different encodings
        self.assertEqual(
            post.decode_string('=?ISO-8859-7?B?1PHp4e303Pb16+vv8iDM4ezc6u/y?='),
            u'\u03a4\u03c1\u03b9\u03b1\u03bd\u03c4\u03ac\u03c6\u03c5\u03bb\u03bb\u03bf\u03c2 \u039c\u03b1\u03bc\u03ac\u03ba\u03bf\u03c2',
        )
        # test that unicode strings in the same format are not
        # decoded like byte strings
        # unicode strings have already been decoded
        # and should not be in this format anymore
        self.assertEqual(
            post.decode_string(u'=?ISO-8859-7?B?1PHp4e303Pb16+vv8iDM4ezc6u/y?='),
            u'=?ISO-8859-7?B?1PHp4e303Pb16+vv8iDM4ezc6u/y?=',
        )

    def test_decoding_html_entities(self):
        post = self.construct({}, 'whatever')

        # test handling of html entities (both string and unicode)
        self.assertEqual(post.decode_string('&amp;'), u'&')
        self.assertEqual(post.decode_string(u'&amp;'), u'&')

        # test handling of decimal html entities (both string and unicode)
        self.assertEqual(post.decode_string('&#38;'), u'&')
        self.assertEqual(post.decode_string(u'&#38;'), u'&')

        # test handling of hex html entities (both string and unicode)
        self.assertEqual(post.decode_string('&#x26;'), u'&')
        self.assertEqual(post.decode_string(u'&#x26;'), u'&')

    def test_parsing_html_entity_from_header(self):
        raw = self.construct(dict(
            Subject='dokter van een pati&#235;nt',
        ), 'doe &#233;&#233;n ding')
        # check if the html entities are properly decoded
        self.assertEqual(raw.subject, u'dokter van een pati\xebnt')
        self.assertEqual(raw.description, u'doe \xe9\xe9n ding')
        post = Post.from_raw(raw)
        # check if the resulting post instance has the correct strings
        self.assertEqual(post.title, u'dokter van een pati\xebnt')
        self.assertEqual(post.description, u'doe \xe9\xe9n ding')
        post.save()
        post = Post.objects.get(id=post.id)
        # check if the correct title is saved to the database
        self.assertEqual(post.title, u'dokter van een pati\xebnt')
        self.assertEqual(post.description, u'doe \xe9\xe9n ding')

    def test_parsing_html_entity_from_xml(self):
        raw = self.construct(
            {},
            'Not used',
            dict(
                Title='dokter van een pati&#235;nt',
                Description='doe &#233;&#233;n ding',
            )
        )

        # check if the html entities are properly decoded
        self.assertEqual(raw.subject, u'dokter van een pati\xebnt')
        self.assertEqual(raw.description, u'doe \xe9\xe9n ding')
        post = Post.from_raw(raw)

        # check if the resulting post instance has the correct strings
        self.assertEqual(post.title, u'dokter van een pati\xebnt')
        self.assertEqual(post.description, u'doe \xe9\xe9n ding')
        post.save()
        post = Post.objects.get(id=post.id)

        # check if the correct title is saved to the database
        self.assertEqual(post.title, u'dokter van een pati\xebnt')
        self.assertEqual(post.description, u'doe \xe9\xe9n ding')

    def test_parsing_different_encoding_header(self):
        raw = self.construct(dict(
            Subject='=?ISO-8859-7?B?1PHp4e303Pb16+vv8iDM4ezc6u/y?=',
        ), 'blaat')
        self.assertEqual(raw.subject, u'\u03a4\u03c1\u03b9\u03b1\u03bd\u03c4\u03ac\u03c6\u03c5\u03bb\u03bb\u03bf\u03c2 \u039c\u03b1\u03bc\u03ac\u03ba\u03bf\u03c2')

    def test_parsing_different_encoding_header_from_xml(self):
        raw = self.construct(
            {},
            'Not used',
            dict(
                Title='=?ISO-8859-7?B?1PHp4e303Pb16+vv8iDM4ezc6u/y?=',
                Description='blaat',
            )
        )
        # see test_decoding_different_encodings
        # for why this should not be decoded like from headers
        self.assertEqual(raw.subject, u'=?ISO-8859-7?B?1PHp4e303Pb16+vv8iDM4ezc6u/y?=')
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.