Commits

Mauro Baraldi committed 5fcb959

Crawler for www.batchpdb.com site. They don't have a search feature. =/

Comments (0)

Files changed (1)

+#!/usr/bin/env python
+# Crawling items list from http://www.batchpcb.com
+# They don't have a search in site!! =/
+
+import os
+import requests
+from BeautifulSoup import BeautifulSoup
+
+base_dir = os.path.dirname(__file__)
+results = os.path.join(base_dir, 'boards.csv')
+
+base_url = 'https://www.batchpcb.com/?pcb_page=%i'
+item_url = 'https://www.batchpcb.com/pcbs/%i'
+
+html = lambda url, page: BeautifulSoup(requests.get(url % page, verify=False).content)
+
+pages = int(html(base_url, 1).find('div', {'class':'pagination'}).findAll('li')[-2].text)
+
+with open(results,'wb') as fp:
+    for page in range(1,pages+1):
+        print 'Page %s' % page
+        items = html(base_url, page).findAll('div',{'class':'span3 design boxshadow'})
+        print '  id, model, layers, price, width, height, area'
+        for item in items:
+            _id_ = int(item.a['href'].replace('/pcbs/',''))
+            _item_ = html(item_url, _id_)
+            info = _item_.find('div', {'id':'info'}).find('table').findAll('tr')
+            model = _item_.find('h2', {'class':'bread'}).text
+            layers = info[4].text.replace('Layers:','')
+            price = info[5].text.replace('Price:','')
+            width = info[6].text.replace('Width:','')
+            height = info[7].text.replace('Height:','')
+            area = info[8].text.replace('Area:','')
+            print '  %i, %s, %s, %s, %s, %s, %s' % (_id_, model, layers, price, width, height, area)
+            fp.write('%i, %s, %s, %s, %s, %s, %s/n' % (_id_, model, layers, price, width, height, area))