Commits

Evgeniy Tatarkin committed ce38b5e

add logs

Comments (0)

Files changed (4)

examples/pythonnews.py

 Extract python news from python.org
 """
 import re
+import logging
 from pomp.core.base import BaseCrawler, BasePipeline
 from pomp.core.item import Item, Field
 from pomp.contrib import SimpleDownloader
 
 
+logging.basicConfig(level=logging.DEBUG)
 news_re = re.compile(r'<h2 class="news">(.*?)</h2>([\s\S]*?)<div class="pubdate">(.*?)</div>')
 
 

pomp/core/base.py

 CRAWL_DEPTH_FIRST_METHOD = 'depth'
 CRAWL_WIDTH_FIRST_METHOD = 'width'
 
-
 class BaseCrawler(object):
     ENTRY_URL = None
     CRAWL_METHOD = CRAWL_DEPTH_FIRST_METHOD

pomp/core/engine.py

 """
 Engine
 """
+import logging
 import itertools
 from pomp.core.utils import iterator
 
 
+log = logging.getLogger('pomp.engine')
+
+
 class Pomp(object):
 
     def __init__(self, downloader, pipelines=None):
 
     def response_callback(self, crawler, url, page):
 
+        log.info('Process %s', url)
         items = crawler.process(url, page)
 
         if items:
 
     def pump(self, crawler):
 
+        log.info('Start crawler: %s', crawler)
+
         for pipe in self.pipelines:
+            log.info('Start pipe: %s', pipe)
             pipe.start()
 
         try:
         finally:
 
             for pipe in self.pipelines:
+                log.info('Stop pipe: %s', pipe)
                 pipe.stop() 
+
+
+        log.info('Stop crawler: %s', crawler)

tests/test_simple_crawler.py

+import logging
 from nose.tools import assert_equal
 from pomp.core.base import BaseCrawler, BaseDownloader, BasePipeline
 from pomp.core.base import CRAWL_WIDTH_FIRST_METHOD
 from pomp.core.item import Item, Field
 
 
+logging.basicConfig(level=logging.DEBUG)
+
+
 class DummyItem(Item):
     value = Field()
     url = Field()