Commits

Blue  committed 2f992c6

Lots of changes.

  • Participants
  • Parent commits 074f905

Comments (0)

Files changed (9)

File example_project/exproj/index/templates/index.html

   <ul>
     <li><a href="{% url admin:index %}">The django builtin admin</a></li>
     <li><a href="{% url spotnet:index %}">The spotnet pages</a></li>
+    <li><a href="{% url spotnet-stats:index %}">Spotnet statistics</a></li>
   </ul>
   <p>To customize this app to your specific needs, please visit the documentation for all the options available.</p>
   <ul>

File spotnet/connection.py

                     self.add_dispose_message(
                         postnumber,
                         messageid,
-                        poster,
-                        subject,
+                        poster.decode('utf8', 'ignore'),
+                        subject.decode('utf8', 'ignore'),
                         logger=lambda x: logger('  %s' % x),
                     )
                 else:
     def is_spotnet_post(self, messageid, subject):
         # we limit ourselves here since not all posts provide real spotnet posts
         # (they are the files that make up the downloads themselves)
-        return TITLE_SELECTION_REGEXP.match(subject) is None and \
-            not messageid.startswith('<part')
+        return (TITLE_SELECTION_REGEXP.match(subject) is None and
+            # this seems to conclusively indicate a download file
+            not messageid.startswith('<part') and
+            # this one too
+            not '$' in messageid)
 
     def is_dispose_message(self, subject):
         return subject.startswith('DISPOSE ')  # and '@' in subject
         # we check existance by messageid since it unique and consistent across nntp servers
         return Post.objects.filter(messageid=messageid[:80]).exists()
 
-    def dispose_message_exists(self, messageid, personid):
+    def dispose_message_exists(self, messageid, personid, logger):
         # we check existance by messageid since it unique and consistent across nntp servers
-        return PostMarker.objects.filter(messageid=messageid[:80], person_id=personid[:180]).exists()
+        try:
+            return PostMarker.objects.filter(messageid=messageid[:80], person_id=personid[:180]).exists()
+        except DatabaseError as e:
+            if not str(e).startswith('invalid byte sequence for encoding'):
+                raise
+            else:
+                # same situation as in the add_post method
+                logger("Skipped invalid dispose message %s: %s" % (messageid, e))
+                return False
 
     @transaction.commit_on_success
     def add_post(self, postnumber, messageid, logger=noop):
 
         dispose_messageid = '<%s>' % subject[len('DISPOSE '):]
 
-        if self.dispose_message_exists(dispose_messageid, poster):
+        if self.dispose_message_exists(dispose_messageid, poster, logger):
             if False:  # verbosity > 1
                 logger(
                     "Skipped existing dispose message %s: %s"

File spotnet/post.py

             )
         if len(doc.childNodes) >= 2:
             signature = doc.childNodes[1]
-            if not signature.tagName == 'Signature':
+            if signature.nodeType in (3, 4):
+                if signature.nodeValue.strip():
+                    raise InvalidPostXml(
+                        "XML for spotnet post has a second child node "
+                        "with text for 'Spotnet'"
+                    )
+                else:
+                    pass  # it's whitespace, just ignore it
+            elif not signature.tagName == 'Signature':
                 raise InvalidPostXml(
                     "XML for spotnet post has a second child node "
                     "not called 'Signature' for 'Spotnet'"
                 )
-            # TODO: use the signature
+            else:
+                pass  # TODO: use the signature
         else:
             signature = None
         # assemble dict of content
         d = {}
         for e in main.childNodes:
-            if len(e.childNodes) == 1 and e.childNodes[0].nodeType in (3, 4):
-                # if it has one child that is a textnode or cdata node, add it to the dict
-                d[e.tagName] = e.childNodes[0].nodeValue
+            if len(e.childNodes) == 0:
+                pass  # an empty value, this is equivalent to a None value
+            elif e.childNodes[0].nodeType in (3, 4):
+                if len(e.childNodes) == 1:
+                    # if it has one child that is a textnode or cdata node, add it to the dict
+                    d[e.tagName] = e.childNodes[0].nodeValue
+                elif e.tagName == 'Category':
+                    # this is a category tag that includes subcategories
+                    # like: <Category>01<Sub>01a03</Sub><Sub>01b03</Sub></Category>
+                    category_node = e.childNodes[0]
+                    d['Category'] = category_node.nodeValue
+                    d['Subcategories'] = []
+                    for sub in e.childNodes[1:]:
+                        if sub.nodeType in (3, 4):
+                            raise InvalidPostXml(
+                                "Unexpected text node in 'Category' node"
+                            )
+                        elif sub.tagName != 'Sub':
+                            raise InvalidPostXml(
+                                "Unexpected node %r in 'Category' node" % sub.tagName
+                            )
+                        else:
+                            assert sub.nodeType == 1
+                            assert sub.childNodes[0].nodeType in (3, 4)
+                            d['Subcategories'].append(sub.childNodes[0].nodeValue)
+                else:
+                    raise InvalidPostXml(
+                        "XML for spotnet post has more than one child "
+                        "for first node in Posting"
+                    )
             elif e.tagName == 'Category':
+                # TODO: do we still need this?
                 d['Category'] = []
                 d['Subcategories'] = []
                 for cat_node in e.childNodes:
-                    if cat_node.nodeType == 3:
+                    if cat_node.nodeType in (3, 4):
                         # a main category
                         d['Category'].append(cat_node.nodeValue)
                     if cat_node.nodeType == 1:
 
     @property
     def poster(self):
-        p = self.headers['From'].split('<', 1)[0].strip()
-        return self.decode_entities(p, 'ignore')
+        try:
+            p = self.headers['From'].split('<', 1)[0].strip()
+        except KeyError:
+            raise InvalidPost("Post does not have a From header")
+        else:
+            return self.decode_entities(p, 'ignore')
 
     @property
     def subject(self):

File spotnet/stats/functions.py

-from datetime import date, timedelta
+from datetime import date as date_base, timedelta
 from django.utils.translation import ugettext as _
+from django.conf import settings as django_settings
 from spotnet.models import Post
 from spotnet.settings import CATEGORY_MAPPING
 
 
+def date(year, month, day):
+    year_int, month_int, day_int = int(year), int(month), int(day)
+    return date_base(year_int, month_int, day_int)
+
+
 def posts_per_day():
     """Returns the number of posts for each day.
 
     results.append((_('Unknown'), ) + unknown_category)
 
     return results
+
+
+def spotnet_table_filesize():
+    "Get the filesize of the table storing the spotnet posts"
+
+    from django.db import connection, transaction
+    cursor = connection.cursor()
+
+    if cursor.db.vendor == 'sqlite':
+        return None
+    elif cursor.db.vendor == 'mysql':
+        query = """
+SELECT
+  (DATA_LENGTH + INDEX_LENGTH - DATA_FREE
+FROM INFORMATION_SCHEMA.TABLES
+WHERE
+  TABLE_SCHEMA = '%s'
+AND
+  TABLE_NAME = '%s'""" % (
+        django_settings.DATABASES['default']['name'],
+        Post._meta.db_table,
+    )
+    elif cursor.db.vendor == 'postgresql':
+        query = "SELECT pg_total_relation_size('%s')" % (Post._meta.db_table, )
+    else:
+        raise Exception("Unsupported database type")
+
+    cursor.execute(query)
+    data = cursor.fetchone()
+
+    return data[0]

File spotnet/stats/templates/spotnet_stats/general.html

       <td>{{ total_filesize|filesizeformat }}</td>
     </tr>
     <tr>
-      <th>{% trans "First post" %}</th>
+      <th>{% trans "Oldest post" %}</th>
       <td>{{ first_post }}</td>
     </tr>
     <tr>
-      <th>{% trans "Last post" %}</th>
+      <th>{% trans "Newest post" %}</th>
       <td>{{ last_post }} ({{ last_post|timesince }} {% trans "ago" %})</td>
     </tr>
+{% if table_filesize %}
     <tr>
-      <th>{% trans "X" %}</th>
-      <td></td>
+      <th>{% trans "Post table size" %}</th>
+      <td>{{ table_filesize|filesizeformat }}</td>
     </tr>
+{% endif %}
   </table>
 
   <h2>{% trans "Posts per category" %}</h2>

File spotnet/stats/views.py

         )
 
 from django.db.models import Sum
-from functions import posts_per_day, posts_per_category
+from functions import posts_per_day, posts_per_category, spotnet_table_filesize
 from spotnet.models import Post
 
 
         dict(
             total_posts = Post.objects.count(),
             category_posts = posts_per_category(),
+            table_filesize = spotnet_table_filesize(),
             total_filesize = Post.objects.aggregate(s=Sum('size'))['s'],
             first_post = Post.objects.order_by('posted').only('posted')[0].posted,
             last_post = Post.objects.order_by('-posted').only('posted')[0].posted,

File spotnet/templates/spotnet/viewpost.html

   </tr>
   <tr>
     <th>{% trans "Has nzb" %}</th>
-    <td>{% if post.has_nzb %}{% trans "True" %}{% else %}{% trans "False" %}{% endif %}</td>
+    <td>{% if post.has_nzb %}<a href="{% url spotnet:download_nzb post.id %}">{% trans "True" %}</a>{% else %}{% trans "False" %}{% endif %}</td>
   </tr>
 </table>
 

File spotnet/tests/parsing.py

             'X-XML: im the body but i look line a header',
         )
 
+    def test_parse_xml_category_with_subcategories(self):
+        raw = RawPost((None, 123, '<blaat@free.pt>', [
+            'Something: blaat',
+            'X-XML: <Spotnet><Posting><Category>01<Sub>01a03</Sub><Sub>01b03</Sub></Category></Posting></Spotnet>',
+            '',
+            'content',
+        ]))
+        self.assertEqual(
+            raw.extra['Category'],
+            '01',
+        )
+        self.assertEqual(
+            raw.extra['Subcategories'],
+            ['01a03', '01b03'],
+        )
+        self.assertEqual(
+            raw.category,
+            1,
+        )
+        self.assertEqual(
+            raw.subcategories,
+            [u'01a03', u'01b03'],
+        )
+
 
 class EncodingParsingTest(ParsingTest):
 

File spotnet/views.py

 import json
 from django.http import HttpResponse, HttpResponseRedirect, \
     HttpResponseNotFound, HttpResponseForbidden, Http404
+from django.core.servers.basehttp import FileWrapper
 from django.template import RequestContext
 from django.shortcuts import get_object_or_404
 from django.contrib import messages
 
 @authenticate
 def download_nzb(request, id):
-    pass  # TODO
+    post = get_object_or_404(Post, id=id)
+    nzb = post.get_nzb_file()
+    nzb.seek(0, 2)
+    filesize = nzb.tell()
+    nzb.seek(0)
+    response = HttpResponse(FileWrapper(nzb), mimetype='application/x-nzb')
+    response['Content-Disposition'] = (u'attachment; filename=%s.nzb' % post.title).encode('us-ascii', 'replace')
+    response['Content-Length'] = unicode(filesize)
+    return response