Commits

Gregory Petukhov  committed 8df4550 Merge

Automated merge with ssh://bitbucket.org/lorien/grab

  • Participants
  • Parent commits f84da5b, acf8207

Comments (0)

Files changed (2)

File grab/spider/base.py

 from .pattern import SpiderPattern
 from .stat  import SpiderStat
 from .transport.multicurl import MulticurlTransport
-from .transport.threadpool import ThreadPoolTransport
 from ..proxylist import ProxyList
 
 DEFAULT_TASK_PRIORITY = 100
     def prepare(self):
         """
         You can do additional spider customizatin here
-        before it has started working.
+        before it has started working. Simply redefine
+        this method in your Spider class.
         """
 
     def sigusr1_handler(self, signal, frame):
 
     def prepare_before_run(self):
         """
-        Configure all things required to begin
-        executing tasks in main `run` method.
+        Configure all things required to start
+        main work loop.
         """
 
         # If queue is still not configured
         self.prepare()
 
         # Init task generator
+        # TODO: task generator should work in separate OS process
         self.task_generator_object = self.task_generator()
         self.task_generator_enabled = True
 
                 if self.task_generator_enabled:
                     self.process_task_generator()
 
+
                 # Increase task counters
                 self.inc_count('task')
                 self.inc_count('task-%s' % res['task'].name)

File grab/tools/files.py

             os.unlink(os.path.join(root, fname))
         for _dir in dirs:
             shutil.rmtree(os.path.join(root, _dir))
+
+
+def smart_copy_file(filename, dst_root):
+    dir_path, fname = os.path.split(filename)
+    dst_dir = os.path.join(dst_root, dir_path)
+    if not os.path.exists(dst_dir):
+        os.makedirs(dst_dir)
+    shutil.copy(filename, dst_dir)