django-spotnet / spotnet / connection.py

import nntplib
import socket
import errno
import os
import re
import sys
import settings
import zlib
from cStringIO import StringIO
from django.db import transaction, IntegrityError
from django.db.utils import DatabaseError
from models import Post, PostMarker
from post import RawPost, InvalidPost
from nzb import decode_nzb, DecodeNzbError

# TODO:
# maybe decode headers using a function (new in python 3?)
# http://docs.python.org/py3k/library/nntplib.html?highlight=nntp#nntplib.decode_header

noop = lambda x: None


NEWSSERVER_UNAUTH_USERNAME = None
NEWSSERVER_UNAUTH_PASSWORD = None


# matches: [title]"filename"[[ -] 12,34[ ]MB[ -]] yEnc[ (1/234)]
# with optional parts in []
TITLE_SELECTION_REGEXP = re.compile('^(.+)?\".+"(( -)? \d+([,.]\d+)? ?[kKMGTPEZY][bB]( -)?)? yEnc( \(\d+/\d+\))?$')


class ConnectionError(Exception):
    pass


class ConnectError(ConnectionError):
    pass


class NotFoundError(Exception):
    pass


#
# We are working with two types of id's here
#
# - messageid : universally unique id, eg: '<3ae6b2dcccdf42988a85dc314370ba0123@free.pt>'
# - postnumber : server dependent id, eg: '65903' (always use strings in nntplib!)
#


class Connection(object):
    """Class to connect to the nntp server with."""

    def __init__(self, connect=True):
        self._nntp = None
        if connect:
            self.connect()

    # public methods

    def is_connected(self):
        return self._nntp is not None

    def connect(self):
        # connect to server
        try:
            if sys.version[:2] < (3, 2):
                nntp = nntplib.NNTP(
                    host=settings.SERVER_HOST,
                    port=settings.SERVER_PORT,
                    user=(
                        settings.SERVER_USERNAME
                        if settings.SERVER_USERNAME is not None
                        else NEWSSERVER_UNAUTH_USERNAME
                    ),
                    password=(
                        settings.SERVER_PASSWORD
                        if settings.SERVER_PASSWORD is not None
                        else NEWSSERVER_UNAUTH_PASSWORD
                    ),
                    readermode=settings.SERVER_READERMODE,
                    usenetrc=False,
                )
            else:
                # try to encrypt connection using ssl
                nntp = nntplib.NNTP(
                    host=settings.SERVER_HOST,
                    port=settings.SERVER_PORT,
                    readermode=settings.SERVER_READERMODE,
                    usenetrc=False,
                )
                # this 'starttls' method is introduced in python 3.2
                nntp.starttls(ssl_context=None)
                # login, now that we might be encrypted
                nntp.login(
                    user=settings.SERVER_USERNAME if settings.SERVER_USERNAME is not None else NEWSSERVER_UNAUTH_USERNAME,
                    password=settings.SERVER_PASSWORD if settings.SERVER_PASSWORD is not None else NEWSSERVER_UNAUTH_PASSWORD,
                )
        except (nntplib.NNTPError, socket.error) as e:
            raise ConnectError(e)
        self._nntp = nntp

    def disconnect(self):
        if self.is_connected():
            try:
                quitmsg = self._nntp.quit()
            except EOFError:
                # seems to happen for me, but we're still disconnected
                # TODO: find a way to check if we're really disconnected
                # and rethrow the exception if not
                pass
            except socket.error as e:
                if e.errno != errno.EPIPE:
                    raise
            self._nntp = None

    def update(self, logger=noop):
        "Retrieves all new posts."
        logger("Updating spotnet")
        for group in settings.UPDATE_GROUPS:
            self.update_group(group, logger=lambda x: logger('  %s' % x))

    # private update methods

    def update_group(self, groupname, logger=noop):
        logger("Updating group '%s'" % groupname)
        try:
            gresp = self._nntp.group(groupname)
        except (nntplib.NNTPError, socket.error) as e:
            logger("Failed updating group '%s': %s" % (groupname, e))
            return ConnectionError(e)

        first_on_server, last = gresp[2], gresp[3]  # first < last
        last_in_db = self.get_last_postnumber_in_db()

        first_options = [int(first_on_server)]
        if last_in_db is not None:
            first_options.append(int(last_in_db))
        if settings.UPDATE_MINPOST is not None:
            first_options.append(settings.UPDATE_MINPOST)
        first = max(first_options)

        last = int(last)
        curstart = first
        last_added = None
        while curstart < last + settings.UPDATE_EXTRA:
            x = self.update_group_postnumbers(
                groupname,
                curstart,
                curstart + settings.UPDATE_BULK_COUNT,
                logger=lambda x: logger('  %s' % x),
            )
            if x:
                last_added = x
            curstart += settings.UPDATE_BULK_COUNT

        # store last added post's postnumber
        # since there's not always a way to obtain it
        # from the last messageid alone
        if settings.UPDATE_LAST_STORAGE:
            self.set_last_postnumber_in_db(last_added)

    def update_group_postnumbers(self, groupname, start, end, logger=noop):
        logger("Updating group '%s', block [%d, %d]" % (groupname, start, end))
        try:
            xover = self._nntp.xover(str(start), str(end))
        except (nntplib.NNTPError, socket.error) as e:
            logger(
                "Error updating group '%s', block [%d, %d]: %s"
                % (groupname, start, end, e)
            )
            raise ConnectionError(e)
        last_added = None
        index = 0
        while index < len(xover[1]):
            post = xover[1][index]
            # we limit ourselves here since not all posts
            # seem to provide real spotnet posts
            if post[4][:-1].split('@', 1)[-1] not in settings.UPDATE_DISCARD_ENDINGS:
                if TITLE_SELECTION_REGEXP.match(post[1]) is not None:
                    # TODO: what to do when this is a dispose message?
                    #logger("Skipped non spotnet post %s %s based on subject: %s" % (post[0], post[4], post[1]))
                    index += 1
                    continue
                elif ' yEnc ' in post[1]:
                    logger("Warning: adding post %s %s that might not be from spotnet, subject is: %s" % (post[0], post[4], post[1]))
                if not Post.objects.filter(messageid=post[4]).exists():
                    try:
                        if self.add_post(
                            post[0],
                            post[4],
                            logger=lambda x: logger('  %s' % x),
                        ):
                            last_added = post[0]
                    except socket.error as e:
                        if e.errno == errno.ECONNRESET:
                            # this happens, just reconnect and proceed
                            self.disconnect()
                            self.connect()
                            index -= 1
                        else:
                            raise
            #else:
            #    if self.add_post(post[0], post[4]):
            #        raise Exception("Discarded real post!")
            index += 1
        return last_added

    def get_post_header(self, header, post):
        index = 0
        h = '%s: ' % header
        while not post[3][index].startswith(h) and index < len(post[3]):
            index += 1
        assert post[3][index].startswith(h), \
            "Post %s does not have a '%s' header!" % (post[2], header)
        return post[3][index][len(h):]

    @transaction.commit_on_success
    def add_post(self, postnumber, messageid, logger=noop):
        "Add a new post to the database (not post a new post)"
        try:
            post = self._nntp.article(messageid)
        except (nntplib.NNTPError, socket.error) as e:
            # TODO: don't give up so easily
            return False
        # check for dispose messages
        subject = self.get_post_header('Subject', post)
        if subject.startswith('DISPOSE '):  # and '@' in subject:
            # it's a dispose message
            dispose_messageid = '<%s>' % subject[len('DISPOSE '):]
            try:
                PostMarker.objects.create(
                    messageid=dispose_messageid[:80],
                    person_id=('$%s' % self.get_post_header('From', post))[:180],
                    is_good=False,
                )
            except IntegrityError:
                # this marker must already exist
                pass
            except DatabaseError as e:
                logger("Skipped invalid dispose message %s %s: %s" % (postnumber, messageid, e))
            return False
        # if this isn't a dispose message, add it as a real post
        try:
            raw = RawPost(postnumber, post)
        except InvalidPost as e:
            logger("Skipped invalid post %s %s: %s" % (postnumber, messageid, e))
            return False
        snp = Post.from_raw(raw)
        try:
            snp.save()
        except IntegrityError as e:
            if str(e) != 'column messageid is not unique':
                raise
            # this post must already exist
            logger(
                "Skipped existing post %s: %s"
                % (raw.postnumber, raw.messageid)
            )
            return False
        except DatabaseError as e:
            # TODO: this is due to a bug in python
            # source: http://stackoverflow.com/questions/3487377/how-can-i-check-a-python-unicode-string-to-see-that-it-actually-is-proper-unic
            # and the fact that some posts aren't really textual posts
            # but some sort of encoded nzb files (my guess)
            if not str(e).startswith('invalid byte sequence for encoding'):
                raise
            logger("Skipped invalid post %s %s: %s" % (postnumber, messageid, e))
            return False
        else:
            logger("Added post %s: %s" % (raw.postnumber, raw.messageid))
            return True

    def get_raw_post(self, messageid):
        try:
            post = self._nntp.article(messageid)
        except (nntplib.NNTPError, socket.error) as e:
            raise ConnectionError(e)
        else:
            return RawPost(None, post)  # TODO: maybe we do need the postnumber

    def verify_post(self, post):
        keys = settings.VERIFICATION_KEYS
        if keys is None or len(keys) == 0:
            return True
        else:
            raise NotImplementedError  # TODO

    # public functionality methods

    def get_nzb(self, post):
        "Retrieves the nzb for a post, returned is the nzb content"
        assert self.is_connected()
        zipped = StringIO()  # TODO: maybe replace this with os.tmpfile
        try:
            for messageid in post.nzb:
                self._nntp.body('<%s>' % messageid, zipped)
        except nntplib.NNTPTemporaryError as e:
            if e.response == '430 No such article':
                raise NotFoundError
            else:
                raise
        content = zipped.getvalue()
        del zipped

        try:
            return decode_nzb(content)
        except DecodeNzbError as e:
            raise ConnectionError(e.msg)

    def get_comments(self, post):
        "Retrieves the comments for a post"
        assert self.is_connected()
        raise NotImplementedError  # TODO

    # internal utility methods

    def get_last_messageid_in_db(self):
        try:
            snp = Post.objects.order_by('-posted').only('messageid')[0]
        except IndexError:
            return None
        else:
            return snp.messageid

    def get_last_postnumber_in_db(self):
        # to be server independend,
        # we get the last messageid
        # and then get the corresponding
        # postnumber
        messageid = self.get_last_messageid_in_db()
        if messageid is None:
            return None
        try:
            stat = self._nntp.stat(messageid)
        except (nntplib.NNTPError, socket.error) as e:
            return None
        else:
            return stat[1]

    def set_last_postnumber_in_db(self, last_added):
        if settings.UPDATE_LAST_STORAGE:
            raise NotImplementedError
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.