Snippets

Dennis Hedegaard poegetter.py

Created by Dennis Hedegaard

File poegetter.py Added

  • Ignore whitespace
  • Hide word diff
+#!/usr/local/bin/python
+import os
+import requests
+import sqlite3
+from contextlib import closing
+
+from bs4 import BeautifulSoup as BS4
+
+
+URLTEMPLATE = (r'http://www.pathofexile.com/forum/view-forum'
+               r'/%(forumid)s/page/%(page)s')
+FORUMS = {
+    'duelist': 40,
+    'marauder': 23,
+    'ranger': 24,
+    'scion': 436,
+    'shadow': 303,
+    'templar': 41,
+    'witch': 22,
+}
+DBFILE = os.path.abspath(os.path.join(
+    os.path.dirname(__file__), os.path.splitext(__file__)[0] + '.db',
+))
+
+if os.path.exists(DBFILE):
+    os.unlink(DBFILE)
+
+dbi = sqlite3.connect(DBFILE)
+
+with closing(dbi.cursor()) as csr:
+    csr.execute('''
+create table if not exists threads(
+    forum text not null,
+    title text not null,
+    views integer not null,
+    replies integer not null,
+    page integer not null,
+    url text primary key
+);
+    ''')
+    csr.execute(
+        'create index if not exists threads_idx_forum on threads(forum);')
+    csr.execute(
+        'create index if not exists threads_idx_title on threads(title);')
+    csr.execute(
+        'create index if not exists threads_idx_views on threads(views);')
+    csr.execute(
+        'create index if not exists threads_idx_replies on threads(replies);')
+    csr.execute(
+        'create index if not exists threads_idx_page on threads(page);')
+    csr.execute('begin;')
+
+for forum, forumid in FORUMS.items():
+    pageno = 1
+    while True:
+        url = URLTEMPLATE % {'page': pageno, 'forumid': forumid}
+        body = requests.get(url).text
+
+        if 'No Threads' in body:
+            break
+
+        soup = BS4(body)
+        table = soup.find('table', id='view_forum_table')
+        for row in table.find_all('tr'):
+            thread = row.find('td', class_='thread')
+            if thread:
+                link = thread.find(class_='title').find('a')
+
+                url = link['href']
+                if url.startswith('/'):
+                    url = 'http://www.pathofexile.com%s' % url
+
+                title = link.text
+                views = int(row.find('td', class_='views').text)
+                replies = int(row.find('td', class_='replies').text)
+                with closing(dbi.cursor()) as csr:
+                    csr.execute(
+                        'insert into threads values (?, ?, ?, ?, ?, ?)',
+                        (forum, title, views, replies, pageno, url))
+        print 'FORUM %s: PAGE %s' % (forum, pageno)
+        pageno += 1
+
+with closing(dbi.cursor()) as csr:
+    try:
+        csr.execute('commit;')
+    except:
+        pass
+
+try:
+    dbi.close()
+except:
+    pass
HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.