Source

owen&yufei / 234 / douban_parse3.py

The default branch has multiple heads

Full commit
#!/usr/bin/env python
#coding=utf-8

#导入
import urllib2
from BeautifulSoup import BeautifulSoup 

#赋值抓取、关闭
url = open('douban_parse3.html','r')
html = url.read()
p_h = BeautifulSoup(html)

#输出 a,b。
print "网站名称:", p_h.title.text
print "有多少个类:", len(p_h.findAll('class'))
print "有多少个div:", len(p_h.findAll('div'))
print "有多少个标题:", len(p_h.findAll('title'))
print "有多少个脚本(script):", len(p_h.findAll('script'))
print '=' * 70
print "最后一个div内容"
print p_h.findAll('div')[-1].prettify()
print '=' * 70
print "最后一个脚本内容"
print p_h.findAll('script')[-1].prettify()