Petar Marić avatar Petar Marić committed 2a41f32

Split `year` from talk_info metadata into `filming-year` and `publishing-year`, which automagically adds two new downloads grouppings

Comments (0)

Files changed (2)

metaTED/crawler/get_talk_info.py

     return _talk_list_document_cache
 
 
-def _guess_year(talk_url, document):
+def _guess_filming_year(talk_url, document):
     """
-    Tries to guess the filming year, or if it's not available - the publishing
-    year.
-    
-    Returns year as string, or 'Unknown' if no date was found.
+    Tries to guess the filming year, or returns 'Unknown' if no filming year was
+    found.
     """
     elements = _VIDEO_PLAYER_SELECTOR(document)
     if elements:
-        year_txt = elements[0].text
-        match = _FILMING_YEAR_RE.search(year_txt)
-        if match is None:
-            logging.debug("Failed to guess the filming year of '%s'", talk_url)
-            match = _PUBLISHING_YEAR_RE.search(year_txt)
+        match = _FILMING_YEAR_RE.search(elements[0].text)
         if match:
             return match.group(1)
     
-    logging.warning(
-        "Failed to guess both the publishing and filming year of '%s'",
-        talk_url
-    )
+    logging.warning("Failed to guess the filming year of '%s'", talk_url)
+    return 'Unknown'
+
+
+def _guess_publishing_year(talk_url, document):
+    """
+    Tries to guess the publishing year, or returns 'Unknown' if no publishing
+    year was found.
+    """
+    elements = _VIDEO_PLAYER_SELECTOR(document)
+    if elements:
+        match = _PUBLISHING_YEAR_RE.search(elements[0].text)
+        if match:
+            return match.group(1)
+    
+    logging.warning("Failed to guess the publishing year of '%s'", talk_url)
     return 'Unknown'
 
 
             )
     
     return {
-        'year': _guess_year(talk_url, document),
+        'filming-year': _guess_filming_year(talk_url, document),
+        'publishing-year': _guess_publishing_year(talk_url, document),
         'author': _guess_author(talk_url, document),
         'theme': _guess_theme(talk_url, document),
         'qualities': qualities,

metaTED/metalink.py

 
 
 def _get_metalink_description(quality, group_by):
-    group_part = group_by and " grouped by %s" % group_by or ''
+    group_part = group_by and " grouped by %s" % group_by.replace('-', ' ') or ''
     return "Download TED talks%s encoded in %s quality" % (group_part, quality)
 
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.