Commits

Evgeniy Tatarkin committed c17ef3e

UrllibAdapterMiddleware now by default in SimpleDownloader

  • Participants
  • Parent commits 6f0c954

Comments (0)

Files changed (4)

File examples/pythonnews.py

 import logging
 from pomp.core.base import BaseCrawler, BasePipeline
 from pomp.core.item import Item, Field
-from pomp.contrib import SimpleDownloader, UrllibAdapterMiddleware
+from pomp.contrib import SimpleDownloader
 
 
 logging.basicConfig(level=logging.DEBUG)
     from pomp.core.engine import Pomp
 
     pomp = Pomp(
-        downloader=SimpleDownloader(middlewares=[UrllibAdapterMiddleware()]),
+        downloader=SimpleDownloader(),
         pipelines=[PrintPipeline()],
     )
 

File pomp/contrib/__init__.py

 
     TIMEOUT = 5
 
+    def __init__(self, *args, **kwargs):
+        super(SimpleDownloader, self).__init__(*args, **kwargs)
+        # insert urllib adpter middleware by default
+        self.middlewares.insert(0, UrllibAdapterMiddleware())
+
     def get(self, requests):
         responses = []
         for request in iterator(requests):

File pomp/core/base.py

 class BaseDownloader(object):
 
     def __init__(self, middlewares=None):
-        self.middlewares = middlewares or ()
+        self.middlewares = middlewares or []
 
     def process(self, urls, callback, crawler):
         # start downloading and processing

File tests/test_threaded.py

 from nose.tools import assert_set_equal
 from pomp.core.base import BaseCrawler, BaseDownloaderMiddleware
 from pomp.core.engine import Pomp
-from pomp.contrib import ThreadedDownloader, UrllibAdapterMiddleware
+from pomp.contrib import ThreadedDownloader
 from pomp.core.base import CRAWL_WIDTH_FIRST_METHOD
 
 from mockserver import HttpServer, make_sitemap
         req_resp_midlleware = RequestResponseMiddleware(prefix_url=self.httpd.location)
         pomp = Pomp(
             downloader=ThreadedDownloader(
-                middlewares=[UrllibAdapterMiddleware(), req_resp_midlleware]
+                middlewares=[req_resp_midlleware]
             ),
             pipelines=[],
         )