Petar Marić avatar Petar Marić committed 1d2a163

Updated author markers and detection code as TED updated their HTML layout

Comments (0)

Files changed (1)


 import logging
 from lxml import html
 from lxml.cssselect import CSSSelector
+from lxml.etree import XPath
 from os.path import splitext
 import re
 from urlparse import urljoin, urlsplit
     'publishing-year': re.compile('pd:\"\w+ (\d+)\",'),
-_AUTHOR_SELECTOR = CSSSelector('div#accordion div p strong')
+_AUTHOR_BIO_XPATH = XPath('//a[text()="Full bio and more links"]')
 _THEME_SELECTOR = CSSSelector('ul.relatedThemes li a')
     Tries to guess the author, or returns 'Unknown' if no author was found.
-    elements = _AUTHOR_SELECTOR(document)
+    elements = _AUTHOR_BIO_XPATH(document)
     if elements:
-        return _clean_up_file_name(elements[0].text)
+        author_bio_url = urljoin(SITE_URL, elements[0].get('href'))
+        author_bio_document = html.parse(author_bio_url)
+        return _clean_up_file_name(
+            author_bio_document.find('/head/title').text.split('|')[0].strip()
+        )
     logging.warning("Failed to guess the author of '%s'", talk_url)
     return 'Unknown'
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.