Commits

Dries Desmet committed 29ec627

Working state

Comments (0)

Files changed (8)

example/manage.py

File contents unchanged.

ibpcatalog/ibpparser.py

+from bs4 import BeautifulSoup
+import urllib2
+
+
+class IpbParser(object):
+    """docstring for IpbParser"""
+
+    def __init__(self, url):
+        self.page = urllib2.urlopen(url).read()
+        self.soup = BeautifulSoup(self.page, 'lxml')
+        
+
+    def parse(self):
+        """
+        list van dicts
+        dict : keys title,calc_url, author, url_id
+        via self.soup
+
+        self.results
+        """
+        # import ipdb; ipdb.set_trace()
+        self.results=[]
+
+        for entry in self.soup.find_all('entry'):
+            self.results.append({
+                'doc_id': entry.id.string.encode('utf-8'),
+                'title': entry.title.string.encode('utf-8'),
+                'productname': entry.find('d:productname').string.encode('utf-8'),
+                'companyname': entry.find('d:companyname').string.encode('utf-8'),
+                'category': entry.find('d:categorydescription').string.encode('utf-8'),
+                'calculatedurl': entry.find('d:calculatedurl').string.encode('utf-8'),
+
+            })

ibpcatalog/management/__init__.py

Empty file added.

ibpcatalog/management/commands/__init__.py

Empty file added.

ibpcatalog/management/commands/doparse.py

+from django.core.management.base import BaseCommand, CommandError
+from ibpcatalog.ibpparser import IpbParser
+from ibpcatalog.models import Feed, Entry
+
+class Command(BaseCommand):
+
+
+    def handle(self, *args, **options):
+
+
+        for feed in Feed.objects.all():
+            url = feed.url
+            ipbparser = IpbParser(url)
+            print "Parsing", url
+            ipbparser.parse()
+
+            print "Number of entries: ",len(ipbparser.results)
+            for result in ipbparser.results:
+                # import ipdb; ipdb.set_trace()
+                try:
+                    # First look for id
+                    database_entry = Entry.objects.get(doc_id=result['doc_id'], title=result['title'])
+                    print "Entry already in the database."
+                    
+                except Entry.DoesNotExist:
+                    e = Entry(
+                        feed = feed,
+                        doc_id = result['doc_id'],
+                        title = result['title'],
+                        productname = result['productname'],
+                        companyname = result['companyname'],
+                        category = result['category'],
+                        calculatedurl = result['calculatedurl']
+                    )
+                    e.save()
+                    print e,"saved..."
+                    

ibpcatalog/models.py

 
 class Feed(models.Model):
     """
-    (SimpleModel description)
+    TODO: 
     """
     
     name = models.CharField(max_length=255)
     @models.permalink
     def get_absolute_url(self):
         return ('feed_detail', [str(self.slug)])
+
+class Entry(models.Model):
+    """docstring for Entry"""
+    
+    feed = models.ForeignKey('Feed')
+    doc_id = models.CharField(max_length=255)
+    title = models.CharField(max_length=255)
+    productname = models.CharField(max_length=255)
+    companyname = models.CharField(max_length=255)
+    category = models.CharField(max_length=255)
+    calculatedurl = models.URLField()
     

ibpcatalog/templates/ibpcatalog/feed_detail.html

 {% extends "base.html" %}
 
 {% block content %}
-<dl>
-<dt><dfn>{{ object }}</dfn></dt>
-<dd>{{ object.url }}</dd>
-</dl>
+<ul>
+{% for entry in entry_list %}
+<li>
+	<h1>{{ entry.companyname }}</h1>
+	<table>
+		<tr>
+			<td>Product</td>
+			<td>Titel</td>
+			<td>Link</td>
+		</tr>
+		<tr>
+			<td>{{ entry.productname }}</td>
+			<td>{{ entry.title }}</td>
+			<td><a href="{{ entry.calculatedurl }}">{{ entry.calculatedurl }}</a></td>
+	</table>
 
-<h1>Entries: {{ enries }}</h1>
-<h1>Number of entries: {{ nentries }} </h1>
-
-{% for entry in entries %}
-<h2>ID: {{ entry.id }}</h2>
-<dl>
-<dt><dfn>entry content</dt></dfn>
-<dd>{{ entry.content }}</dd>
-<dt><dfn>entry link</dt></dfn>
-<dd><a href="{{ entry.link }}">{{ entry.link }}</a></dd>
-<dt><dfn>enclosures</dt></dfn>
-<dd>{{ entry.enclosures }}</dd>
-</dl>
+</li>
 {% endfor %}
+</ul>
 {% endblock %}

ibpcatalog/views.py

 from django.views.generic import ListView, DetailView
-from models import Feed
+from models import Feed, Entry
+
+
 
 class FeedList(ListView):
     model = Feed
 
 class FeedDetail(DetailView):
     model = Feed
-    
+
     def get_context_data(self, **kwargs):
         # Call the base implementation first to get a context
         context = super(FeedDetail, self).get_context_data(**kwargs)
-        import urllib2
-        from BeautifulSoup import BeautifulSoup
-        
-        page = urllib2.urlopen(self.object.url).read()
-        soup = BeautifulSoup(page)        
-        
-        soup.prettify()
-        
-        
-        
-
-       
-
-        context['entries'] = soup
-        # context['feedinfo'] = f.bozo
-        context['nentries'] = len(soup)
+        # Add in a QuerySet of all the books
+        context['entry_list'] = Entry.objects.order_by('companyname')
+        context['companies'] = Entry.objects.values('companyname').distinct()
         return context