Commits

Colin Copeland committed 80b58c3

add aggregator app (extracted from djangoproject.com)

Comments (0)

Files changed (16)

lib/copelco/apps/aggregator/__init__.py

Empty file added.

lib/copelco/apps/aggregator/admin.py

+from django.contrib import admin
+
+from copelco.apps.aggregator import models as aggregator
+from copelco.apps.aggregator.tasks import UpdateFeed
+
+
+class FeedAdmin(admin.ModelAdmin):
+    list_display = ('title', 'feed_type', 'public_url')
+    list_filter = ('feed_type', 'is_defunct')
+    ordering = ('title',)
+    search_fields = ('title', 'public_url')
+    
+    def save_model(self, request, obj, form, change):
+        obj.save()
+        UpdateFeed.delay(feed_id=obj.pk)
+
+
+class FeedItemAdmin(admin.ModelAdmin):
+    list_display = ('title', 'feed', 'date_modified')
+    list_filter = ('date_modified', 'feed',)
+    ordering = ('-date_modified',)
+    search_fields = ('feed__title', 'feed__public_url', 'title')
+    date_heirarchy = ('date_modified',)
+
+
+admin.site.register(aggregator.Feed, FeedAdmin)
+admin.site.register(aggregator.FeedItem, FeedItemAdmin)

lib/copelco/apps/aggregator/feeds.py

+from __future__ import absolute_import
+
+from django.core import urlresolvers
+from django.contrib.syndication.views import Feed
+from django.shortcuts import get_object_or_404
+from .models import FeedType, FeedItem
+
+class BaseCommunityAggregatorFeed(Feed):
+    def item_title(self, item):
+        return item.title
+
+    def item_description(self, item):
+        return item.summary
+
+    def item_guid(self, item):
+        return item.guid
+
+    def item_link(self, item):
+        return item.link
+
+    def item_author_name(self, item):
+        return item.feed.title
+
+    def item_author_link(self, item):
+        return item.feed.public_url
+
+    def item_pubdate(self, item):
+        return item.date_modified
+
+class CommunityAggregatorFeed(BaseCommunityAggregatorFeed):
+    def get_object(self, request, slug=None):
+        return get_object_or_404(FeedType, slug=slug)
+
+    def items(self, obj):
+        qs = FeedItem.objects.filter(feed__feed_type=obj)
+        qs = qs.order_by('-date_modified')
+        qs = qs.select_related('feed', 'feed__feed_type')
+        return qs[:25]
+
+    def title(self, obj):
+        return "Django community aggregator: %s" % obj.name
+
+    def link(self, obj):
+        return urlresolvers.reverse('aggregator-feed', args=[obj.slug])
+
+    def description(self, obj):
+        return self.title(obj)
+
+class CommunityAggregatorFirehoseFeed(BaseCommunityAggregatorFeed):
+    title = 'Django community aggregator firehose'
+    description = 'All activity from the Django community aggregator'
+
+    def link(self):
+        return urlresolvers.reverse('aggregator-firehose-feed')
+
+    def items(self):
+        qs = FeedItem.objects.order_by('-date_modified').select_related('feed')
+        return qs[:50]

lib/copelco/apps/aggregator/forms.py

+from __future__ import absolute_import
+
+from django import forms
+from django.forms import widgets
+from .models import Feed, FeedType
+
+class FeedModelForm(forms.ModelForm):
+    title = forms.CharField(max_length=250,
+                            help_text="title of the resource / blog.")
+    feed_url = forms.URLField(label='Feed URL',
+                              help_text="link to the RSS/Atom feed. Please only use Django-specific feeds.")
+    public_url = forms.URLField(label='Public URL',
+                                help_text="link to main page (i.e. blog homepage)")
+
+    class Meta:
+        model = Feed
+        exclude = ('is_defunct', 'feed_type', 'owner')

lib/copelco/apps/aggregator/management/__init__.py

Empty file added.

lib/copelco/apps/aggregator/management/commands/__init__.py

Empty file added.

lib/copelco/apps/aggregator/management/commands/mark_defunct_feeds.py

+import urllib2
+from django.core.management.base import BaseCommand
+from django_website.apps.aggregator.models import Feed
+
+class Command(BaseCommand):
+    """
+    Mark people with 404'ing feeds as defunct.
+    """
+    def handle(self, *args, **kwargs):
+        verbose = kwargs.get('verbosity')
+        for f in Feed.objects.all():
+            try:
+                r = urllib2.urlopen(f.feed_url)
+            except urllib2.HTTPError, e:
+                if e.code == 404 or e.code == 500:
+                    if verbose:
+                        print "%s on %s; marking defunct" % (e.code, f)
+                    f.is_defunct = True
+                    f.save()
+                else:
+                    raise

lib/copelco/apps/aggregator/management/commands/update_feeds.py

+import datetime
+import feedparser
+import optparse
+import os
+import socket
+import sys
+import time
+import threading
+import Queue
+from django.core.management.base import BaseCommand
+from django_website.aggregator.models import Feed, FeedItem
+
+class Command(BaseCommand):
+    """
+    Update feeds for Django community page.  Requires Mark Pilgrim's excellent
+    Universal Feed Parser (http://feedparser.org)
+    """
+    LOCKFILE = "/tmp/update_feeds.lock"
+    
+    option_list = BaseCommand.option_list + (
+        optparse.make_option('-t', '--threads',
+            metavar='NUM',
+            type='int',
+            default=4,
+            help='Number of updater threads (default: 4).'
+        ),
+    )
+
+    def handle(self, *args, **kwargs):
+        try:
+            lockfile = os.open(self.LOCKFILE, os.O_CREAT | os.O_EXCL)
+        except OSError:
+            print >> sys.stderr, "Lockfile exists (%s). Aborting." % self.LOCKFILE
+            sys.exit(1)
+
+        try:
+            verbose = int(kwargs['verbosity']) > 0
+        except (KeyError, TypeError, ValueError):
+            verbose = True
+            
+        try:
+            socket.setdefaulttimeout(15)
+            self.update_feeds(verbose=verbose, num_threads=kwargs['threads'])
+        except:
+            sys.exit(1)
+        finally:
+            os.close(lockfile)
+            os.unlink(self.LOCKFILE)
+
+    def update_feeds(self, verbose=False, num_threads=4):
+        feed_queue = Queue.Queue()
+        for feed in Feed.objects.filter(is_defunct=False):
+            feed_queue.put(feed)
+
+        threadpool = []
+        for i in range(num_threads):
+            threadpool.append(FeedUpdateWorker(q=feed_queue, verbose=verbose))
+            
+        [t.start() for t in threadpool]
+        [t.join() for t in threadpool]
+
+class FeedUpdateWorker(threading.Thread):
+    
+    def __init__(self, q, verbose, **kwargs):
+        super(FeedUpdateWorker, self).__init__(**kwargs)
+        self.daemon = True
+        self.verbose = verbose
+        self.q = q
+        
+    def run(self):
+        while 1:
+            try:
+                feed = self.q.get_nowait()
+            except Queue.Empty:
+                return
+            self.update_feed(feed)
+            self.q.task_done()
+            
+    def update_feed(self, feed):
+        if self.verbose:
+            print feed
+        
+        parsed_feed = feedparser.parse(feed.feed_url)
+        for entry in parsed_feed.entries:
+            # Parse out the entry, handling all the fun stuff that feeds can do.
+            title = entry.title
+            guid = entry.get("id", entry.link)
+            link = entry.link
+
+            if not guid:
+                guid = link
+                        
+            if hasattr(entry, "summary"):
+                content = entry.summary
+            elif hasattr(entry, "content"):
+                content = entry.content[0].value
+            elif hasattr(entry, "description"):
+                content = entry.description
+            else:
+                content = u""
+
+            try:
+                if entry.has_key('modified_parsed'):
+                    date_modified = datetime.datetime.fromtimestamp(time.mktime(entry.modified_parsed))
+                elif parsed_feed.feed.has_key('modified_parsed'):
+                    date_modified = datetime.datetime.fromtimestamp(time.mktime(parsed_feed.feed.modified_parsed))
+                elif parsed_feed.has_key('modified'):
+                    date_modified = datetime.datetime.fromtimestamp(time.mktime(parsed_feed.modified))
+                else:
+                    date_modified = datetime.datetime.now()
+            except TypeError:
+                date_modified = datetime.datetime.now()
+            
+            FeedItem.objects.create_or_update_by_guid(guid,
+                feed = feed,
+                title = title,
+                link = link,
+                summary = content,
+                date_modified = date_modified
+            )

lib/copelco/apps/aggregator/models.py

+from django.db import models
+
+
+class Feed(models.Model):
+    FEED_TYPES = (
+        ('rss', 'Really Simple Syndication (RSS)'),
+    )
+
+    title = models.CharField(max_length=500)
+    feed_url = models.URLField(unique=True, max_length=500)
+    public_url = models.URLField(max_length=500)
+    is_defunct = models.BooleanField()
+    feed_type = models.CharField(choices=FEED_TYPES, max_length=16)
+
+    def __unicode__(self):
+        return self.title
+
+
+class FeedItemManager(models.Manager):
+    def create_or_update_by_guid(self, guid, **kwargs):
+        """
+        Look up a FeedItem by GUID, updating it if it exists, and creating
+        it if it doesn't.
+        
+        We don't limit it by feed because an item could be in another feed if
+        some feeds are themselves aggregators. That's also why we don't update
+        the feed field if the feed item already exists.
+        
+        Returns (item, created) like get_or_create().
+        """
+        try:
+            item = self.get(guid=guid)
+        
+        except self.model.DoesNotExist:
+            # Create a new item
+            kwargs['guid'] = guid
+            item = self.create(**kwargs)
+            
+        else:
+            # Update an existing one.
+            kwargs.pop('feed', None)
+            
+            # Don't update the date since most feeds get this wrong.
+            kwargs.pop('date_modified')
+            
+            for k,v in kwargs.items():
+                setattr(item, k, v)
+            item.save()
+            
+        return item
+
+
+class FeedItem(models.Model):
+    feed = models.ForeignKey(Feed)
+    title = models.CharField(max_length=500)
+    link = models.URLField(max_length=500)
+    summary = models.TextField(blank=True)
+    date_modified = models.DateTimeField()
+    guid = models.CharField(max_length=500, unique=True, db_index=True)
+
+    objects = FeedItemManager()
+
+    class Meta:
+        ordering = ("-date_modified",)
+
+    def __unicode__(self):
+        return self.title
+
+    def get_absolute_url(self):
+        return self.link
+

lib/copelco/apps/aggregator/tasks.py

+import time
+import datetime
+import feedparser
+
+from celery.task import Task
+from celery.registry import tasks
+
+from copelco.apps.aggregator.models import Feed, FeedItem
+
+
+class UpdateFeed(Task):
+    def run(self, feed_id):
+        logger = self.get_logger()
+        logger.debug("Looking up feed #{0}".format(feed_id))
+        feed = Feed.objects.get(pk=feed_id)
+        parsed_feed = feedparser.parse(feed.feed_url)
+        for entry in parsed_feed.entries:
+            # Parse out the entry, handling all the fun stuff that feeds can do.
+            title = entry.title
+            logger.debug("Title: {0}".format(title))
+            guid = entry.get("id", entry.link)
+            link = entry.link
+
+            if not guid:
+                guid = link
+                        
+            if hasattr(entry, "summary"):
+                content = entry.summary
+            elif hasattr(entry, "content"):
+                content = entry.content[0].value
+            elif hasattr(entry, "description"):
+                content = entry.description
+            else:
+                content = u""
+
+            try:
+                if entry.has_key('modified_parsed'):
+                    date_modified = datetime.datetime.fromtimestamp(time.mktime(entry.modified_parsed))
+                elif parsed_feed.feed.has_key('modified_parsed'):
+                    date_modified = datetime.datetime.fromtimestamp(time.mktime(parsed_feed.feed.modified_parsed))
+                elif parsed_feed.has_key('modified'):
+                    date_modified = datetime.datetime.fromtimestamp(time.mktime(parsed_feed.modified))
+                else:
+                    date_modified = datetime.datetime.now()
+            except TypeError:
+                date_modified = datetime.datetime.now()
+            
+            FeedItem.objects.create_or_update_by_guid(guid,
+                feed = feed,
+                title = title,
+                link = link,
+                summary = content,
+                date_modified = date_modified
+            )
+tasks.register(UpdateFeed)
+

lib/copelco/apps/aggregator/templatetags/__init__.py

Empty file added.

lib/copelco/apps/aggregator/templatetags/aggregator.py

+from __future__ import absolute_import
+
+from django import template
+from .models import Feed
+
+class FeedListNode(template.Node):
+    def __init__(self, varname):
+        self.varname = varname
+
+    def render(self, context):
+        context[self.varname] = Feed.objects.filter(is_defunct=False)
+        return ''
+
+def do_get_feed_list(parser, token):
+    """
+    {% get_feed_list as feed_list %}
+    """
+    bits = token.contents.split()
+    if len(bits) != 3:
+        raise template.TemplateSyntaxError, "'%s' tag takes two arguments" % bits[0]
+    if bits[1] != "as":
+        raise template.TemplateSyntaxError, "First argument to '%s' tag must be 'as'" % bits[0]
+    return FeedListNode(bits[2])
+
+register = template.Library()
+register.tag('get_feed_list', do_get_feed_list)

lib/copelco/apps/aggregator/urls.py

+from __future__ import absolute_import
+
+from django.conf.urls.defaults import *
+from . import views
+
+urlpatterns = patterns('', 
+    url(r'^$', 
+        views.index,
+        name = 'community-index'
+    ),
+    url(r'^mine/$', 
+        views.my_feeds,
+        name = 'community-my-feeds'
+    ),
+    url(
+        r'^(?P<feed_type_slug>[-\w]+)/$',
+        views.feed_list,
+        name = "community-feed-list"
+    ),
+    url(
+        r'^add/(?P<feed_type_slug>[-\w]+)/$',
+        views.add_feed,
+        name = 'community-add-feed'
+    ),
+    url(
+        r'^edit/(?P<feed_id>\d+)/$',
+        views.edit_feed,
+        name = 'community-edit-feed'
+    ),
+    url(
+        r'^delete/(?P<feed_id>\d+)/$',
+        views.delete_feed,
+        name = 'community-delete-feed'
+    ),
+)
+

lib/copelco/apps/aggregator/views.py

+from __future__ import absolute_import
+
+from django.shortcuts import render_to_response, get_object_or_404, redirect
+from django.template import RequestContext
+from django.contrib.auth.decorators import login_required
+from django.http import HttpResponseRedirect
+from django.core.urlresolvers import reverse
+from django.views.generic.list_detail import object_list
+from .models import FeedItem, Feed, FeedType
+from .forms import FeedModelForm
+from ..shortcuts import render
+
+def index(request):
+    """
+    Displays the latest feeds of each type.
+    """
+    ctx = {'feedtype_list': FeedType.objects.all()}
+    return render(request, 'aggregator/index.html', ctx)
+
+def feed_list(request, feed_type_slug):
+    """
+    Shows the latest feeds for the given type.
+    """
+    feed_type = get_object_or_404(FeedType, slug=feed_type_slug)
+    return object_list(request, 
+        queryset = FeedItem.objects.filter(feed__feed_type=feed_type), 
+        paginate_by = 25,
+        extra_context = {'feed_type': feed_type},
+    )
+
+@login_required
+def my_feeds(request):
+    """
+    Lets the user see, edit, and delete all of their owned feeds.
+    """
+    feed_types = FeedType.objects.all()
+    if not request.user.is_superuser:
+        feed_types = feed_types.filter(can_self_add=True)
+
+    ctx = {
+        'feeds': Feed.objects.filter(owner=request.user),
+        'feed_types': feed_types
+    }
+    return render(request, 'aggregator/my-feeds.html', ctx)
+
+@login_required
+def add_feed(request, feed_type_slug):
+    """
+    Lets users add new feeds to the aggregator.
+
+    Users only get to add new feeds of types indicated as "can self add."
+    """
+    ft = get_object_or_404(FeedType, slug=feed_type_slug, can_self_add=True)
+    if not ft.can_self_add and not request.user.is_superuser:
+        return render(request, 'aggregator/denied.html')
+        
+    instance = Feed(feed_type=ft, owner=request.user)
+    f = FeedModelForm(request.POST or None, instance=instance)
+    if f.is_valid():
+        f.save()
+        return redirect('community-index')
+
+    ctx = {'form': f, 'feed_type': ft, 'adding': True}
+    return render(request, 'aggregator/edit-feed.html', ctx)
+
+@login_required
+def edit_feed(request, feed_id):
+    """
+    Lets a user edit a feed they've previously added.
+
+    Only feeds the user "owns" can be edited.
+    """
+    feed = get_object_or_404(Feed, pk=feed_id, owner=request.user)
+    f = FeedModelForm(request.POST or None, instance=feed)
+    if f.is_valid():
+        f.save()
+        return redirect('community-my-feeds')
+    
+    ctx = {'form': f, 'feed': feed, 'adding': False}
+    return render(request, 'aggregator/edit-feed.html', ctx)
+
+@login_required
+def delete_feed(request, feed_id):
+    """
+    Lets a user delete a feed they've previously added.
+
+    Only feeds the user "owns" can be deleted.
+    """
+    feed = get_object_or_404(Feed, pk=feed_id, owner=request.user)
+    if request.method == 'POST':
+        feed.delete()
+        return redirect('community-my-feeds')
+    return render(request, 'aggregator/delete-confirm.html', {'feed': feed})

lib/copelco/settings.py

     'djcelery',
     'copelco.apps.default',
     'copelco.apps.estate',
+    'copelco.apps.aggregator',
 ]
 
 STATICFILES_DIRS = (

requirements/apps.txt

 BeautifulSoup==3.2.0
 celery==2.2.6
 django-celery==2.2.4
+feedparser==5.0.1