Commits

Blue committed 2d11814

Also allow formally invalid xml in a few other tags, and fixed bug for posts with intermediate header lines. Added some new tests and little fixes.

Comments (0)

Files changed (7)

 language: python
+
 python:
   - 2.6
   - 2.7
   - pypy
+
 env:
   - DJANGO=1.2
   - DJANGO=1.3
   - DJANGO=1.4
+  - DJANGO=https://github.com/django/django/zipball/master
+
 install:
   - pip install -q Django==$DJANGO --use-mirrors
   - pip install pep8 --use-mirrors
   - pip install https://github.com/dcramer/pyflakes/tarball/master
   - pip install -q -e . --use-mirrors
+
 before_script:
   - 'pep8 --exclude=migrations --ignore=E501,E272,E221,E241 spotnet'
   - '[ "$TRAVIS_PYTHON_VERSION" \> 3 ] || pyflakes -x W spotnet'
+
 script:
   - python example_project/manage.py test

spotnet/connection.py

         if self.is_connected():
             try:
                 quitmsg = self._nntp.quit()
-            except (EOFError, nntplib.NNTPProtocolError):
+            except EOFError:
                 # seems to happen for me, but we're still disconnected
                 # TODO: find a way to check if we're really disconnected
                 # and rethrow the exception if not
         # get the full message from the nntp server
         try:
             raw = self.get_raw_post(messageid)
+        except ConnectionError as e:
+            # TODO: don't give up so easily
+            logger("Could not get raw post, error was %r" % e)
+            return False
+        try:
             snp = Post.from_raw(raw)
-        except ConnectionError:
-            # TODO: don't give up so easily
-            return False
         except InvalidPost as e:
             logger(
                 "Skipped invalid post %s %s: %s"
             )
             return False
         except DatabaseError as e:
+            # TODO: this is due to a bug in python
+            # source: http://stackoverflow.com/questions/3487377/how-can-i-check-a-python-unicode-string-to-see-that-it-actually-is-proper-unic
+            # and the fact that some posts aren't really textual posts
+            # but some sort of encoded nzb files (my guess)
             if not str(e).startswith('invalid byte sequence for encoding'):
                 raise
             else:

spotnet/downloadserver/sabnzbd.py

         if x['status'] is True:
             return ugettext(
                 u"Successfully added url '%(url)s' to Sabnzbd "
-                "server, it was added under the name '%(id)s'."
+                "server, it is named '%(id)s'."
             ) % dict(url=url, id=id)
         else:
             raise DownloadError(
 
 
 def wrap_with_cdata(xml_string, tag):
+    # TODO: this function does not handle spaces (or attributes) in tags!
     cdata_open = '<![CDATA['
     cdata_close = ']]>'
     open_tag_start = xml_string.find('<%s>' % tag)
             close_tag_start = xml_string.find('</%s>' % tag)
             if close_tag_start >= 0:
                 close_tag_end = close_tag_start + 3 + len(tag)
-                wrapped = [
-                    xml_string[:open_tag_end],
-                    cdata_open,
-                    xml_string[open_tag_end:close_tag_start],
-                    cdata_close,
-                    xml_string[close_tag_start:],
-                ]
-                return ''.join(wrapped)
+                content = xml_string[open_tag_end:close_tag_start]
+                if '<' not in content and '>' not in content:
+                    wrapped = [
+                        xml_string[:open_tag_end],
+                        cdata_open,
+                        content,
+                        cdata_close,
+                        xml_string[close_tag_start:],
+                    ]
+                    return ''.join(wrapped)
     return xml_string
 
+def get_line_header(line):
+    "Returns the header itself (which tests to True) if the line is a header line, or None."
+    # Header lines must have all of the following:
+    #  * Nonempty
+    #  * Start with a capital
+    #  * Contain a colon
+    #  * The colon must be followed by a space
+    #  * The colon must not pre preceeded by any spaces
+    if not line:
+        return None
+    if not line[0].isupper():
+        return None
+    colon = line.find(':')
+    if colon == -1:
+        return None
+    if not line[colon:colon+2] == ': ':
+        return None
+    if ' ' in line[:colon]:
+        return None
+    else:
+        return line[:colon]
+
 
 class MessageHeaders(UserDict):
     def __init__(self, rawpost):
                 pass
             else:
                 # this must be a header or an intermediate line
-                line_header = message.isheader(line)
+                line_header = get_line_header(line)
                 if line_header:
                     # this line contains a header
-                    if line_header == header.lower():
+                    if line_header.lower() == header.lower():
                         # this is the header we're looking for
                         result.append(line[len(header) + 2:])
                         last_line_contains_header = True
             elif line[0] == ' ':
                 # leave continuations untouched
                 new_messagefile.write(line)
-            elif self.rawpost.message.isheader(line):
+            elif get_line_header(line):
                 # leave headers untouched
                 new_messagefile.write(line)
             else:
         except Exception as e:
             # try wrapping the title and description in a cdata section
             wrapped = xml_string
-            wrapped = wrap_with_cdata(wrapped, 'Title')
-            wrapped = wrap_with_cdata(wrapped, 'Description')
+            wrap_tags = [
+                'Title',
+                'Description',
+                'Image',
+                'Tag',
+                'Website',
+                'Description',
+            ]
+            for tag in wrap_tags:
+                wrapped = wrap_with_cdata(wrapped, tag)
             try:
                 xml = parseString(wrapped)
             except Exception:

spotnet/templates/spotnet/paginate.html

 {% if page.number > 1 %}
     <li class="walk"><a href="?page=1">{% trans "First" %}</a></li>
 {% endif %}
-{% if page.number > 2 %}
+{% if page.number > 1 %}
     <li class="walk"><a href="?page={{ page.previous_page_number }}">{% trans "Previous" %}</a></li>
 {% else %}
     <li class="disable"><span>{% trans "Previous" %}</span></li>

spotnet/tests/parsing.py

             'Something: blaat',
             'Test: A',
             'B',  # the intermediate line
+            'Test: C',
             'Blaat: lkxjljsdf',
-            'Test: C',
+            'Test: D',
             'Another: doebi',
             '',  # start of body, the rest should be ignored by the tested method
-            'Test: im the body but i look line a header',
+            'Test: im the body but i look like a header',
         ]))
         self.assertEqual(
             raw.headers.join_with_intermediate('Test'),
-            'A\nB\nC',
+            'A\nB\nC\nD',
         )
 
         # reparse and verify
         raw.headers.resparse_with_intermediate()
+        # verify other headers are unchanged
+        self.assertEqual(
+            raw.headers['Something'],
+            'blaat',
+        )
+        self.assertEqual(
+            raw.headers['Blaat'],
+            'lkxjljsdf',
+        )
         # make sure that headers following the intermediate line
         # are parsed as headers and not the body, as the initial
         # parsing method would do (that method interprets the intermediate
         # make sure everything following the empty line is parsed as the body
         self.assertEqual(
             raw.get_content(),
-            'Test: im the body but i look line a header',
+            'Test: im the body but i look like a header',
         )
 
     def test_parse_xml_header_with_intermediate_lines(self):
             [u'01a03', u'01b03'],
         )
 
+    def test_xml_contains_invalid_ampersand(self):
+        raw = RawPost((None, 123, '<blaat@free.pt>', [
+            'X-XML: <Spotnet><Posting><Description>a&b</Description></Posting></Spotnet>',
+        ]))
+        self.assertEqual(raw.extra['Description'], 'a&b')
+
     def test_wrapping_in_cdata(self):
         self.assertEqual(
             wrap_with_cdata(
             "<test><Spam><![CDATA[abc]]></Spam></test>"
         )
 
+    def test_wrapping_in_cdata_while_containing_tags(self):
+        self.assertEqual(
+            wrap_with_cdata(
+                "<test><Spam>ab<Ham>cd</Ham>ef</Spam></test>",
+                'Spam',
+            ),
+            "<test><Spam>ab<Ham>cd</Ham>ef</Spam></test>",
+        )
+
     def test_wrapping_in_cdata_with_multi_tags(self):
         """If the xml contains the tag several times,
         only once occurance is wrapped.
             request,
             post.image_segments,
             decompress=False,
-            mimetype='image/jpeg',
+            #mimetype  # TODO
         )