Chris Adams  committed b6594f7

Added a default timeout to Retriever, Spider

This makes it easy for things like to have a command-line option to change the request timeout.

  • Participants
  • Parent commits 0dbc0e6

Comments (0)

Files changed (2)

File bin/

     parser = optparse.OptionParser(__doc__.strip())
     parser.add_option("--max-connections", type="int", default="2", help="Set the number of simultaneous connections to the remote server(s)")
+    parser.add_option("--timeout", type="int", default="15", help="Set the number of seconds to wait for a request to load")
     parser.add_option("--format", dest="report_format", default="text", help='Generate the report as HTML or text')
     parser.add_option("-o", "--report", "--output", dest="report_file", default=sys.stdout, help='Save report to a file instead of stdout')
     parser.add_option("--follow-offsite-redirects", action="store_true", default=False, help="Follow redirects which lead to outside servers to check for 404s")
             logging.critical("Cannot perform HTML validation. Try `pip install pytidylib` or see")
-    spider = QASpider(validate_html=options.validate_html, max_simultaneous_connections=options.max_connections)
+    spider = QASpider(
+        validate_html=options.validate_html,
+        max_simultaneous_connections=options.max_connections,
+        default_request_timeout=options.timeout
+    )
     spider.skip_media = options.skip_media
     spider.skip_resources = options.skip_resources
     spider.follow_offsite_redirects = options.follow_offsite_redirects

File webtoolbox/

         request_args = {
                 "follow_redirects": False,
                 "max_redirects": 5,
-                "request_timeout": 15
+                "request_timeout": self.default_request_timeout
     # servers but you might want to check them for reporting purposes:
     follow_offsite_redirects = False
+    #: This is the default time in seconds which we'll wait to receive a response:
+    default_request_timeout = 15
     #: All urls processed by this spider as a URL-keyed list of :class:URLStatus elements
     site_structure = defaultdict(URLStatus)
     url_history = set()
     redirect_map = {}
-    def __init__(self, log_name="Spider", **kwargs):
+    def __init__(self, log_name="Spider", default_request_timeout=15, **kwargs):
         """Create a new Spider, optionally with a custom logging name"""
         super(Spider, self).__init__(**kwargs)
+        self.default_request_timeout = default_request_timeout
         self.log = logging.getLogger(log_name)