Commits

Petar Marić committed 686b927

Updated author and video metadata markers as TED updated their HTML layout

  • Participants
  • Parent commits 108f2b4

Comments (0)

Files changed (1)

File metaTED/crawler/get_talk_info.py

+# -*- coding: utf-8 -*-
 import logging
 from lxml import html
 from lxml.cssselect import CSSSelector
 
 _EXTERNALLY_HOSTED_DOWNLOADS_SELECTOR = CSSSelector('div#external_player')
 
-_VIDEO_PLAYER_SELECTOR = CSSSelector('div#videoPlayerSWF + script')
+_VIDEO_PLAYER_SELECTOR = CSSSelector('body script:last-child')
 _VIDEO_PLAYER_METADATA = {
     'event': re.compile('en:\"(.+)\",'),
     'filming-year': re.compile('fd:\"\w+ (\d+)\",'),
     'publishing-year': re.compile('pd:\"\w+ (\d+)\",'),
 }
 
-_AUTHOR_BIO_XPATH = XPath('//a[text()="Full bio and more links"]')
+_AUTHOR_BIO_XPATH = XPath(u'//a[text()="Full bio »"]')
 
 _THEME_SELECTOR = CSSSelector('ul.relatedThemes li a')