Commits

Jakub Zalewski committed 5e51a21

added hn_url and date_posted to saved stories

  • Participants
  • Parent commits 1cac142

Comments (0)

Files changed (1)

File hackernews.py

 from pyquery import PyQuery as pq
 import pystache
 import json
+import datetime
 
 BASE_PATH = os.path.dirname(__file__)
 COOKIE = os.path.join(BASE_PATH, 'hackernews.cookie')
 
     # Grab the stories.
     J = pq(kwargs['r'].content)
-    stories = J('table table td.title')
+    stories = J('table table td.title')    
+    stories_meta = J('table table td.subtext')
+    story_meta = []
+    for meta in stories_meta:
+        hn_url = J('a:last', meta).attr('href')
+        J('span, a', meta).remove()
+        days, unit = J(meta).text().replace('by', '').replace('|', '').replace('ago', '').strip().split()
+        if unit != 'days':
+            days = 0
+        date_posted = (datetime.datetime.now() + datetime.timedelta(days=-int(days))).strftime('%Y-%m-%d')
+        story_meta.append({'date_posted': date_posted, 'hn_url': hn_url})
+        i = 0
 
     for story in stories:
         title = J(story).text()
         url = J('a', story).attr('href')
 
-        # Skip digit-only <td>s and the 'More' link.        
-        if not re.match('\d+|\/x\?', title):
+        # Skip digit-only <td>s and the 'More' link.      
+        if not re.match('\d+', title) and not re.match('\/x\?', url):
 
             # For HN links, make absolute URL.
             if not url.startswith('http'):
             kwargs['saved'].append({
                 'title': title,
                 'url': url,
+                'date_posted': story_meta[i]['date_posted'],
+                'hn_url': story_meta[i]['hn_url'],
             })
 
     # If we're getting all saved stories.