Petar Marić avatar Petar Marić committed 63db35e

Switched from `urllib2` to `requests`, removed page caching as it's never used

Comments (0)

Files changed (2)

metaTED/crawler/__init__.py

 import logging
-import urllib2
 import metaTED
-from metaTED.cache import cache
+import requests
 
 
 _DEFAULT_RETRY_TIMES = 5
 
 
-_opener = urllib2.build_opener()
-_opener.addheaders = [('User-agent', 'metaTED/%s' % metaTED.__version__)]
-
-
 def urlread(fullurl, max_retries=_DEFAULT_RETRY_TIMES):
-    # Check in-memory cache before requesting url
-    logging.debug("Searching cache for '%s' contents...", fullurl)
-    if fullurl in cache:
-        logging.debug("Found the cached version of '%s' contents", fullurl)
-        return cache[fullurl]
-    logging.debug("Failed to find the cached version of '%s' contents", fullurl)
-
     saved_exception = None
     for try_num in xrange(1, max_retries+1):
         try:
                 try_num,
                 max_retries
             )
-            data = _opener.open(fullurl).read()
+            r = requests.get(
+                url=fullurl,
+                headers={
+                    'User-Agent': "metaTED/%s" % metaTED.__version__,
+            })
+            
+            # Check if we made a bad request
+            r.raise_for_status()
+            
             logging.debug("Successfully read data from '%s'", fullurl)
-            cache[fullurl] = data
-            return data
-        except urllib2.URLError, e:
+            return r.content
+        except requests.RequestException, e:
             if try_num == max_retries:
                 log_func = logging.fatal
                 message = "Giving up! Could not read data from '%s': %s"
             log_func(message, fullurl, e)
     
     # Re-raise the last exception because crawler used up all retries
-    raise saved_exception
+    raise saved_exception
 BeautifulSoup>=3.1
 Jinja2>=2.1
+requests>=0.7
 shove>=0.2.2
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.