Commits

Petar Marić committed 9f6232f

Code review and Python 2.6-ization, style cleanup

Comments (0)

Files changed (7)

metaTED/__init__.py

         help='Write generated metalinks to specified output directory'
     )
     (options, _) = parser.parse_args()
-
+    
     # Configure logging
     log_level = options.verbosity or logging.INFO
     logging.basicConfig(level=log_level, format="[%(levelname)s] %(message)s")
-
+    
     # Generate metalinks
     from metaTED.metalink import generate_metalinks
     generate_metalinks(options.output_dir)
 
-
 if __name__ == '__main__':
     main()
     sync=1 # Minimizes data loss on various processing errors
 )
 
-cache = cached_storage._cache
+cache = cached_storage._cache

metaTED/crawler/get_downloadable_talks.py

+from concurrent import futures
 import logging
-from concurrent import futures
 from multiprocessing import cpu_count
-from metaTED.cache import cached_storage
-from metaTED.crawler.get_talk_info import get_talk_info, ExternallyHostedDownloads, NoDownloadsFound
-from metaTED.crawler.get_talks_urls import get_talks_urls
+from .get_talk_info import get_talk_info, ExternallyHostedDownloads, NoDownloadsFound
+from .get_talks_urls import get_talks_urls
+from ..cache import cached_storage
 
 
 _PAGINATE_BY = 20
         raise NoDownloadableTalksFound('No downloadable talks found')
     
     logging.info(
-        "Found %d downloadable talks in total",
+        "Found %d downloadable talk(s) in total",
         len(downloadable_talks)
     )
     return downloadable_talks

metaTED/crawler/get_talk_info.py

+import logging
+from lxml import html
+from lxml.cssselect import CSSSelector
+from os.path import splitext
 import re
-import logging
-from lxml.cssselect import CSSSelector
-from lxml import html
-from os.path import splitext
 from urlparse import urljoin, urlsplit
-from metaTED import SITE_URL
-from metaTED.crawler.get_talks_urls import TALKS_LIST_URL
-
-
-AVAILABLE_VIDEO_QUALITIES = {
-    'low': 'Low',
-    'standard': 'Regular',
-    'high': 'High',
-}
+from .get_talks_urls import TALKS_LIST_URL
+from .. import SITE_URL
 
 
 _HTML_ENTITY_RE = re.compile(r'&(#?[xX]?[0-9a-fA-F]+|\w{1,8});')
 
 _THEME_SELECTOR = CSSSelector('ul.relatedThemes li a')
 
+AVAILABLE_VIDEO_QUALITIES = {
+    'low': 'Low',
+    'standard': 'Regular',
+    'high': 'High',
+}
 _QUALITIES_XPATH_FMT = "//a[@href='%s']/ancestor::node()[name()='tr']/td[5]/a"
 
 
     # Should be clean now
     return file_name
 
-
 _talk_list_document_cache = None
 def _get_talk_list_document():
     global _talk_list_document_cache
     
     return _talk_list_document_cache
 
-
 def _guess_video_player_metadata(name, regexp, talk_url, document):
     elements = _VIDEO_PLAYER_SELECTOR(document)
     if elements:
     logging.warning("Failed to guess the %s of '%s'", name, talk_url)
     return 'Unknown'
 
-
 def _guess_author(talk_url, document):
     """
     Tries to guess the author, or returns 'Unknown' if no author was found.
     if elements:
         return _clean_up_file_name(elements[0].text)
     
-    logging.warning(
-        "Failed to guess the author of '%s'",
-        talk_url
-    )
+    logging.warning("Failed to guess the author of '%s'", talk_url)
     return 'Unknown'
 
-
 def _guess_theme(talk_url, document):
     """
     Tries to guess the talks theme, or returns 'Unknown' if no theme was found.
     if elements:
         return _clean_up_file_name(elements[0].text)
     
-    logging.warning(
-        "Failed to guess the theme of '%s'",
-        talk_url
-    )
+    logging.warning("Failed to guess the theme of '%s'", talk_url)
     return 'Unknown'
 
-
 def _get_download_urls_dict(talk_url):
     """
     Returns a dictionary of all download URLs for a given talk URL, mapping 
         )
     )
 
-
 def get_talk_info(talk_url):
     document = html.parse(talk_url)
     file_base_name = _clean_up_file_name(
         document.find('/head/title').text.split('|')[0].strip(),
-        True
+        replace_first_colon_with_dash=True
     )
     
     # Downloads not hosted by TED!
                 talk_url
             )
             qualities_missing.append(name)
-
-    if len(qualities_found) == 0: # No downloads found!
+    
+    if not qualities_found: # No downloads found!
         raise NoDownloadsFound(talk_url)
-
-    if len(qualities_missing) > 0: # Some found, but not all
+    
+    if qualities_missing: # Some found, but not all
         # Use what you got, emulate the rest with the first discovered quality
         emulator_name = qualities_found[0]
         emulator = qualities[emulator_name]

metaTED/crawler/get_talks_urls.py

 import logging
+from lxml import html
 from lxml.cssselect import CSSSelector
-from lxml import html
-from metaTED import SITE_URL
 from urlparse import urljoin
+from .. import SITE_URL
 
 
 TALKS_LIST_URL = "http://www.ted.com/talks/quick-list"

metaTED/metalink.py

+from email.utils import formatdate
+from jinja2 import Environment, PackageLoader
+import logging
 import os
-import logging
-from jinja2 import Environment, PackageLoader
-from metaTED import __version__
-from metaTED.cache import cached_storage
-from metaTED.crawler.get_downloadable_talks import get_downloadable_talks
-from metaTED.crawler.get_talk_info import AVAILABLE_VIDEO_QUALITIES
-
-try:
-    from email.utils import formatdate
-except ImportError:
-    from email.Utils import formatdate # Python 2.4 fallback
+from . import __version__
+from .cache import cached_storage
+from .crawler.get_downloadable_talks import get_downloadable_talks
+from .crawler.get_talk_info import AVAILABLE_VIDEO_QUALITIES
 
 
 _METALINK_BASE_URL = "http://metated.petarmaric.com/metalinks/%s"
             'download_url': quality_info['download_url'],
             'full_file_path': full_file_path
         })
+    
     return downloads
 
-
 def _get_metalink_file_name(quality, group_by):
-    group_part = group_by and "-grouped-by-%s" % group_by or ''
-    return "TED-talks%s-in-%s-quality.metalink" % (group_part, quality)
-
+    return "TED-talks%s-in-%s-quality.metalink" % (
+        "-grouped-by-%s" % group_by if group_by else '',
+        quality
+    )
 
 def _get_metalink_description(quality, group_by):
-    group_part = group_by and " grouped by %s" % group_by.replace('-', ' ') or ''
-    return "Download TED talks%s encoded in %s quality" % (group_part, quality)
-
+    return "Download TED talks%s encoded in %s quality" % (
+        " grouped by %s" % group_by.replace('-', ' ') if group_by else '',
+        quality
+    )
 
 def _get_group_downloads_by(downloadable_talks):
     # Also generate metalinks with no grouped downloads
     logging.debug("Downloads can be grouped by '%s'", groups)
     return groups
 
-
 def generate_metalinks(output_dir=None):
     output_dir = os.path.abspath(output_dir or '')
     if not os.path.exists(output_dir):
     # Prepare the template upfront, because it can be reused between metalinks
     env = Environment(loader=PackageLoader('metaTED'))
     template = env.get_template('template.metalink')
-
+    
     # Use the same dates/times for all metalinks because they should, in my
     # opinion, point out when the metalinks were being generated and not when
     # they were physically written do disk
                 'description': metalink_description
             })
             logging.info("Generated '%s' metalink", metalink_file_name)
+    
     return {
         'metaTED_version': __version__,
         'first_published_on': first_published_on,
         'console_scripts': ['metaTED=metaTED:main']
     },
     install_requires=open('requirements.txt').read().splitlines()
-)
+)