Evgeniy Tatarkin avatar Evgeniy Tatarkin committed 001da56

rename next_urls to next_requests

Comments (0)

Files changed (7)

 
 Release date to be decided.
 
+- next_url renamed to next_requests
 - async support
 - Twisted support
 - concurrent future support
  * lock in pomp.dive incrementing _in_process variable
  * option to limit `request per host`
- * downloader middlewares can return more than one reqeust/response/exception
+ * downloader middlewares can return more than one reqeust/response
  * plain threads support and async behavior
  * gevent support
  * tornado support

pomp/core/base.py

     def __init__(self):
         self._in_process = 0
 
-    def next_url(self, page):
-        """Getting next urls for processing.
+    def next_requests(self, page):
+        """Getting next requests for processing.
  
         Called after `extract_items` method.
 

pomp/core/engine.py

                     map(lambda i: pipe.process(crawler, i), items)
                 ))
 
-        urls = crawler.next_url(response)
+        urls = crawler.next_requests(response)
         if crawler.is_depth_first():
             if urls:
                 self.downloader.process(
 
         self.stop_deferred = defer.Deferred()
 
-        next_urls = self.downloader.process(
+        next_requests = self.downloader.process(
             iterator(crawler.ENTRY_URL),
             self.response_callback,
             crawler
         )
 
         if not crawler.is_depth_first():
-            self._call_next_urls(next_urls, crawler)
+            self._call_next_requests(next_requests, crawler)
         return self.stop_deferred
 
-    def _call_next_urls(self, next_urls, crawler):
-        deferreds = [n for n in next_urls if n and isinstance(n, defer.Deferred)]
+    def _call_next_requests(self, next_requests, crawler):
+        deferreds = [n for n in next_requests if n and isinstance(n, defer.Deferred)]
         if deferreds: # async behavior
             d = DeferredList(deferreds)
-            d.add_callback(self._on_next_urls, crawler)
+            d.add_callback(self._on_next_requests, crawler)
         else: # sync behavior
-            self._on_next_urls(next_urls, crawler)
+            self._on_next_requests(next_requests, crawler)
 
-    def _on_next_urls(self, next_urls, crawler):
-        for urls in next_urls:
+    def _on_next_requests(self, next_requests, crawler):
+        for requests in next_requests:
 
-            if not urls:
+            if not requests:
                 continue
 
-            _urls = self.downloader.process(
-                iterator(urls),
+            _requests = self.downloader.process(
+                iterator(requests),
                 self.response_callback,
                 crawler
             )
 
-            self._call_next_urls(_urls, crawler)
+            self._call_next_requests(_requests, crawler)
 
         if not self.stoped and not crawler.in_process():
             self._stop(crawler)

tests/test_contrib_urllib.py

                 return exception
 
         class MockCrawler(BaseCrawler):
-            def next_url(self, response):
+            def next_requests(self, response):
                 return
 
             def extract_items(self, response):

tests/test_simple_crawler.py

         super(Crawler, self).__init__()
         self.crawled_urls = []
 
-    def next_url(self, response):
+    def next_requests(self, response):
         url = 'http://python.org/1/trash'
         result = url if url not in self.crawled_urls else None
         self.crawled_urls.append(url)
     def __init__(self):
         super(DummyCrawler, self).__init__()
 
-    def next_url(self, response):
+    def next_requests(self, response):
         res = response.body.get('links', [])
         return res
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.