Source

owen&yufei / 234 / h_parse2.py

The default branch has multiple heads

#!/usr/bin/env python
#coding=utf-8

#导入
import urllib2
from BeautifulSoup import BeautifulSoup 

#赋值抓取、关闭
u = open('douban.html','r')
h = u.read()
p_h = BeautifulSoup(h)

#输出 a,b。
print '网站名称:', p_h.title.text
print "有多少个div:" len(p_h.findAll('div'))
print "有多少个标题:" len(p_h.findAll('title'))
print "有多少个类:" len(p_h.findAll('class'))
print "有多少个脚本:" len(p_h.findAll('script'))