1. Petar Marić
  2. metaTED

Commits

Petar Marić  committed d10f68b

Updated author and talk theme markers as TED updated their HTML layout

  • Participants
  • Parent commits 56c48e4
  • Branches default

Comments (0)

Files changed (3)

File CHANGELOG

View file
+metaTED 1.0.6 (July 3, 2011)
+
+    * Updated author and talk theme markers as TED updated their HTML layout.
+
 metaTED 1.0.5 (March 22, 2010)
 
     * Updated talk theme markers as TED updated their HTML layout.

File README

View file
 .. _pip: http://pip.openplans.org/
 .. _hosted on bitbucket: http://bitbucket.org/petar/metated/
 
-New in metaTED 1.0.5
+New in metaTED 1.0.6
 ====================
 
-    * Updated talk theme markers as TED updated their HTML layout.
+    * Updated author and talk theme markers as TED updated their HTML layout.

File metaTED/crawler/get_talk_info.py

View file
 _INVALID_FILE_NAME_CHARS_RE = re.compile('[^\w\.\- ]+')
 _FILMING_YEAR_RE = re.compile('fd:\"\w+ (\d+)\",')
 _PUBLISHING_YEAR_RE = re.compile('pd:\"\w+ (\d+)\",')
-_THEME_RE = re.compile('Other talks from &(?:quot|ldquo);(.+)&(?:quot|rdquo);')
 
 
 class NoDownloadsFound(Exception):
     """
     Tries to guess the author, or returns 'Unknown' if no author was found.
     """
-    element = soup.find(id='tagline').findNextSibling('h3')
+    element = soup.find(id='accordion').findAll('div', recursive=False)[1].p.strong
     if element:
-        return _clean_up_file_name(element.string.split('About ', 1)[1])
+        return _clean_up_file_name(element.string)
     else:
         logging.warning(
             "Failed to guess the author of '%s'",
     """
     Tries to guess the talks theme, or returns 'Unknown' if no theme was found.
     """
-    element = soup.find('div', 'related').h3
+    element = soup.find('ul', 'relatedThemes').li.a
     if element:
-        match = _THEME_RE.search(element.string)
-        if match:
-            return _clean_up_file_name(match.group(1), True)
-    
+        return _clean_up_file_name(element.string)
+
     logging.warning(
         "Failed to guess the theme of '%s'",
         talk_url