Created by
Kamil Banach
| #-*- coding:utf-8 -*-
from bs4 import BeautifulSoup, Tag
import re, urllib2
def loadPage(code):
request = urllib2.Request(code)
handler=urllib2.HTTPHandler()
handler.set_http_debuglevel(1)
opener = urllib2.build_opener(handler)
opener.addheaders=[('user-agent','Mozillla/5.0')]
content = opener.open(request).read()
return content
codeList = open("files.txt")
for code in codeList:
content = loadPage("https://usos.wat.edu.pl/kontroler.php?_action=katalog2/przedmioty/pokazPrzedmiot&kod="+code.rstrip())
soup = BeautifulSoup(content)
h1elems = soup.findAll("h1")
for h1 in h1elems:
print code.rstrip()+";"+h1.get_text()
|