1. mt3
  2. grab

Commits

Gregory Petukhov  committed 90c2822

Add auto_referer option

  • Participants
  • Parent commits 29dc6a1
  • Branches default

Comments (0)

Files changed (2)

File grab/base.py

View file
  • Ignore whitespace
         user_agent_file = os.path.join(PACKAGE_DIR, 'user_agent.txt'),
         referer = None,
         reuse_referer = True,
+        auto_referer = True,
 
         # Cookies
         cookies = {},

File grab/transport/curl.py

View file
  • Ignore whitespace
 from StringIO import StringIO
 import threading
 import random
+from urlparse import urlsplit, urlunsplit
 
 from ..base import (GrabError, GrabMisuseError, UploadContent, UploadFile,
                     GrabTimeoutError, GrabNetworkError)
         """
         import pycurl
 
-        url = self.config['url']
-        if isinstance(url, unicode):
-            url = url.encode('utf-8')
-        self.curl.setopt(pycurl.URL, url)
+        request_url = self.config['url']
+        if isinstance(request_url, unicode):
+            request_url = request_url.encode('utf-8')
+        self.curl.setopt(pycurl.URL, request_url)
+
         self.curl.setopt(pycurl.FOLLOWLOCATION, 1 if self.config['follow_location'] else 0)
         self.curl.setopt(pycurl.MAXREDIRS, 5)
         self.curl.setopt(pycurl.CONNECTTIMEOUT, self.config['connect_timeout'])
             # To correctly support cookies in 302-redirects
             self.curl.setopt(pycurl.COOKIEFILE, '')
 
+        if self.config['auto_referer'] and self.config['referer'] is None:
+            urlinfo = urlsplit(request_url)
+            # build scheme + netloc
+            self.config['referer'] = '%s://%s' % (urlinfo.scheme, urlinfo.hostname)
+
         if self.config['referer']:
             self.curl.setopt(pycurl.REFERER, str(self.config['referer']))