Commits

Benoit Boissinot committed 08f77b4

add link / better html

Comments (0)

Files changed (4)

     if c:
         for t in c.findAll('a'):
             flatten(t)
+        for t in c.findAll('p', {'class': 'titre'}):
+            t.attrs = []
+            t.name = 'h1'
+            if t.parent.name == 'div':
+                flatten(t.parent)
+        for t in c.findAll('p', {'class': re.compile('rubrique_\d+')}):
+            level = int(t['class'][9:].lstrip('0'))+1
+            t.attrs = []
+            t.name = 'h%d' % level
         return date, url, cgi.escape(c.renderContents()).decode('utf8')
     raise Exception("No body")
 
 
 defaultfeed = 10
 
+def newarticle(d):
+    try:
+        date, url, html = fetch(d)
+    except Exception, err:
+        logging.info("Failed to fetch %s" % err)
+        return
+    link = "http://www.legifrance.gouv.fr/rechJO.do?champJour=%02d&champMois=%02d&champAnnee=%04d&bouton=Rechercher" % (d.day, d.month, d.year)
+    a = FeedArticle(title="Journal Officiel du %s" % date, link=cgi.escape(link),
+                    id=url, html=html, timestamp=datetime.combine(d, time(6)))
+    a.put()
+
 class Feed(object):
     def __init__(self):
         self.title = "JORF"
-        self.id = "http://www.example.com"
+        self.id = "http://jorf2rss.appspot.com"
         now = datetime.now()
         today = now.date()
         self.last_update = LastUpdate.get_or_insert("fetch", timestamp=now)
         if not last:
             for i in range(defaultfeed):
                 try:
-                    date, url, html = fetch(today - timedelta(i))
+                    newarticle(today - timedelta(i))
                 except Exception, err:
-                    logging.info("Failed fetch %s" % err)
-                    continue
-                a = FeedArticle(title="Journal Officiel du %s" % date, id=url, html=html, timestamp=datetime.combine(today-timedelta(i), time(6)))
-                a.put()
+                    logging.info("Failed to fetch %s" % err)
 
         if self.last_update.timestamp + timedelta(hours=6) < now and last[0].timestamp.date() != today:
             try:
-                date, url, html = fetch(today)
+                newarticle(today)
             except Exception, err:
-                    logging.info("Failed fetch %s" % err)
+                logging.info("Failed to fetch %s" % err)
             else:
-                a = FeedArticle(title="Journal Officiel du %s" % date, id=url, html=html, timestamp=datetime.combine(today-timedelta(i), time(6)))
-                a.put()
                 self.last_update.timestamp = now
                 self.last_update.put()
 
 class FeedArticle(db.Model):
     title = db.StringProperty(required=True)
     id = db.StringProperty(required=True)
+    link = db.StringProperty(required=True)
     html = db.TextProperty(required=True)
     timestamp = db.DateTimeProperty(required=True)
 
     <entry>
         <title>{{ article.title }}</title>
         <id>{{ article.id }}</id>
+        <link rel="alternate" href="{{ article.link }}"/>
         <updated>{{ article.timestamp }}</updated>
         <content type="html">
 		{{ article.html }}
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.