Commits

Evgeniy Tatarkin  committed 6607f1f

better generator usage

  • Participants
  • Parent commits a1f446d

Comments (0)

Files changed (4)

 
 Here you can see the full list of changes between pomp releases.
 
+Version 0.1.1
+-------------
+
+Not released yet
+
+- better generator usage
+
 
 Version 0.1.0
 -------------

File pomp/__init__.py

-__version__ = (0, 1, 0, 'final', 0)
+__version__ = (0, 1, 1, 'dev', 0)

File pomp/core/base.py

         self.response_middlewares.reverse()
 
     def process(self, urls, callback, crawler):
-        return list(self._lazy_process_async(urls, callback, crawler))
+        return self._lazy_process_async(urls, callback, crawler)
 
     def _lazy_process_async(self, urls, callback, crawler):
         if not urls:

File pomp/core/engine.py

 """
 Engine
 """
+import types
 import logging
 
 import defer
         log.info('Process %s', response)
         items = crawler.process(response)
 
-        if items:
-            for pipe in self.pipelines:
-                items = filter(None, list(
-                    map(lambda i: pipe.process(crawler, i), items)
-                ))
+        # pipe items
+        for pipe in self.pipelines:
+            items = filter(
+                None,
+                [pipe.process(crawler, i) for i in items],
+            )
 
         urls = crawler.next_requests(response)
         if crawler.is_depth_first():
 
         if not crawler.is_depth_first():
             self._call_next_requests(next_requests, crawler)
+        else:
+            # is width first method
+            # execute generator
+            if isinstance(next_requests, types.GeneratorType):
+                list(next_requests)  # fire generator
         return self.stop_deferred
 
     def _call_next_requests(self, next_requests, crawler):
-        deferreds = [
-            n for n in next_requests if n and isinstance(n, defer.Deferred)]
+        # separate deferred and regular requests
+        # fire generator
+        deferreds = []
+        other = []
+        for r in filter(None, next_requests):
+            if isinstance(r, defer.Deferred):
+                deferreds.append(r)
+            else:
+                other.append(r)
+
         if deferreds:  # async behavior
             d = DeferredList(deferreds)
             d.add_callback(self._on_next_requests, crawler)
-        else:  # sync behavior
-            self._on_next_requests(next_requests, crawler)
+
+        if other:  # sync behavior
+            self._on_next_requests(other, crawler)
 
     def _on_next_requests(self, next_requests, crawler):
-        for requests in next_requests:
-
-            if not requests:
-                continue
-
+        # execute request by downloader
+        for requests in filter(None, next_requests):
             _requests = self.downloader.process(
                 iterator(requests),
                 self.response_callback,