#!/usr/local/bin/pythonimportosimportrequestsimportsqlite3fromcontextlibimportclosingfrombs4importBeautifulSoupasBS4URLTEMPLATE=(r'http://www.pathofexile.com/forum/view-forum'r'/%(forumid)s/page/%(page)s')FORUMS={'duelist':40,'marauder':23,'ranger':24,'scion':436,'shadow':303,'templar':41,'witch':22,}DBFILE=os.path.abspath(os.path.join(os.path.dirname(__file__),os.path.splitext(__file__)[0]+'.db',))ifos.path.exists(DBFILE):os.unlink(DBFILE)dbi=sqlite3.connect(DBFILE)withclosing(dbi.cursor())ascsr:csr.execute('''create table if not exists threads( forum text not null, title text not null, views integer not null, replies integer not null, page integer not null, url text primary key); ''')csr.execute('create index if not exists threads_idx_forum on threads(forum);')csr.execute('create index if not exists threads_idx_title on threads(title);')csr.execute('create index if not exists threads_idx_views on threads(views);')csr.execute('create index if not exists threads_idx_replies on threads(replies);')csr.execute('create index if not exists threads_idx_page on threads(page);')csr.execute('begin;')forforum,forumidinFORUMS.items():pageno=1whileTrue:url=URLTEMPLATE%{'page':pageno,'forumid':forumid}body=requests.get(url).textif'No Threads'inbody:breaksoup=BS4(body)table=soup.find('table',id='view_forum_table')forrowintable.find_all('tr'):thread=row.find('td',class_='thread')ifthread:link=thread.find(class_='title').find('a')url=link['href']ifurl.startswith('/'):url='http://www.pathofexile.com%s'%urltitle=link.textviews=int(row.find('td',class_='views').text)replies=int(row.find('td',class_='replies').text)withclosing(dbi.cursor())ascsr:csr.execute('insert into threads values (?, ?, ?, ?, ?, ?)',(forum,title,views,replies,pageno,url))print'FORUM %s: PAGE %s'%(forum,pageno)pageno+=1withclosing(dbi.cursor())ascsr:try:csr.execute('commit;')except:passtry:dbi.close()except:pass
Comments (0)
HTTPSSSH
You can clone a snippet to your computer for local editing.
Learn more.