Commits

Gregory Petukhov  committed b7fba82

Add callback options to Task constructor that allows to pass any callable object as handler for responses generated with that task

  • Participants
  • Parent commits e439141

Comments (0)

Files changed (3)

File grab/spider/base.py

         to the network task queue.
         """
 
-        if handler is None:
-            raise SpiderMisuseError('Handler is not defined for task %s' % res['task'].name)
-
         try:
             handler_name = handler.__name__
         except AttributeError:
         except AttributeError:
             handler = None
 
-        #callback = res['task'].get('callback')
+        callback = res['task'].get('callback')
 
-        if handler is None:
-            raise NoTaskHandler('No handler defined for task %s' % res['task'].name)
+        if handler is None and callback is None:
+            raise NoTaskHandler('No handler or callback defined for task %s' % res['task'].name)
         else:
-            self.execute_task_handler(res, handler)
+            self.execute_task_handler(res, callback or handler)
 
     def change_proxy(self, task, grab):
         """

File grab/spider/task.py

                  disable_cache=False, refresh_cache=False,
                  valid_status=[], use_proxylist=True,
                  cache_timeout=None, delay=0,
-                 raw=False,
+                 raw=False, callback=None,
                  **kwargs):
         """
         Create `Task` object.
                 if `raw` is False (by default) then failed response is putting back
                 to task queue or if tries limit is reached then the processing of this 
                 request is finished.
+            :param callback: if you pass some function in `callback` option then the
+                network resposne will be passed to this callback and the usual 'task_*'
+                handler will be ignored and no error will be raised if such 'task_*' handler
+                does not exist.
             Any non-standard named arguments passed to `Task` constructor will be saved as
             attributes of the object. You can get their values later as attributes or with
             `get` method which allows to use default value if attrubute does not exist.
         self.use_proxylist = use_proxylist
         self.cache_timeout = cache_timeout
         self.raw = raw
+        self.callback = callback
         for key, value in kwargs.items():
             setattr(self, key, value)
 

File test/spider_task.py

 from unittest import TestCase
+import cPickle as pickle
 
 import grab.spider.base
 from grab import Grab
         bot.add_task(Task('page', url=SERVER.BASE_URL, raw=True))
         bot.run()
         self.assertEqual(2, len(bot.codes))
+
+    def test_task_callback(self):
+        class TestSpider(Spider):
+            def task_page(self, grab, task):
+                self.meta['tokens'].append('0_handler')
+
+        class FuncWithState(object):
+            def __init__(self, tokens):
+                self.tokens = tokens
+
+            def __call__(self, grab, task):
+                self.tokens.append('1_func')
+
+        tokens = []
+        func = FuncWithState(tokens)
+
+        bot = TestSpider()
+        bot.meta['tokens'] = tokens
+        bot.setup_queue()
+        # classic handler
+        bot.add_task(Task('page', url=SERVER.BASE_URL))
+        # callback option overried classic handler
+        bot.add_task(Task('page', url=SERVER.BASE_URL, callback=func))
+        # callback and null task name
+        bot.add_task(Task(name=None, url=SERVER.BASE_URL, callback=func))
+        # callback and default task name
+        bot.add_task(Task(url=SERVER.BASE_URL, callback=func))
+        bot.run()
+        self.assertEqual(['0_handler', '1_func', '1_func', '1_func'],
+                         sorted(tokens))
+
+
+    #def test_task_callback_serialization(self):
+        # 8-(
+        # FIX: pickling the spider instance completely does not work
+        # 8-(
+
+        #class FuncWithState(object):
+            #def __init__(self, tokens):
+                #self.tokens = tokens
+
+            #def __call__(self, grab, task):
+                #self.tokens.append('func')
+
+        #tokens = []
+        #func = FuncWithState(tokens)
+
+        #bot = SimpleSpider()
+        #bot.setup_queue()
+        ##bot.add_task(Task(url=SERVER.BASE_URL, callback=func))
+
+        #dump = pickle.dumps(bot)
+        #bot2 = pickle.loads(dump)
+
+        #bot.run()
+        #self.assertEqual(['func'], tokens)