Commits

Lammert Hilarides committed f452481 Draft

update scheepsbouw_nl scraper

Comments (0)

Files changed (1)

scraper/management/commands/scrape_scheepsbouw_nl.py

                 # concat base_url to unicode encoded relative url --> (css select 1st <a> with class item_title and extract href attribute)
             link = base_url + smart_unicode(li.cssselect('a[class=item_title]')[0].get('href'))
             title = smart_unicode(li.cssselect('a[class=item_title]')[0].text_content().strip())
-            description = smart_unicode(li.cssselect('div[class=item_description]')[0].text_content().strip())
-
+            if len(li.cssselect('div[class=item_description]')) > 0:
+                description = smart_unicode(li.cssselect('div[class=item_description]')[0].text_content().strip())
+            elif len(li.cssselect('div[class=item_description_photo]')) > 0:
+                description = smart_unicode(li.cssselect('div[class=item_description_photo]')[0].text_content().strip())
             if not scraper.items.filter(link=link):
                 item = ScraperItem(scraper=scraper, guid=link, link=link, title=title[:255], description=description[:1024], pubdate=timezone.localtime(timezone.now()))
                 item.save()