Commits

Gregory Petukhov committed 423c284

Better logging messages in grab.spider.pattern

  • Participants
  • Parent commits 609d2d6

Comments (0)

Files changed (3)

 NULL = object()
 NULL_BYTE = chr(0)
 
+#rex_script = re.compile(r'<script[^>]*>.+?</script>', re.S)
+#rex_style = re.compile(r'<style[^>]*>.+?<?style>', re.S)
+#rex_comment = re.compile(r'<!--(?:.(?!-->))-->', re.S)
+
+#def simplify_html(html, targets=['script', 'style', 'comment']):
+    #if 'script' in targets:
+        #html = rex_script.sub(' ', html)
+    #if 'style' in targets:
+        #html = rex_style.sub(' ', html)
+    #if 'comment' in targets:
+        #html = rex_comment.sub(' ', html)
+    #return html
+
 class LXMLExtension(object):
     def extra_reset(self):
         self._lxml_tree = None
                 body = '<html></html>'
             start = time.time()
 
+            #body = simplify_html(body)
             try:
                 self._lxml_tree = fromstring(body)
             except ParserError as ex:

grab/spider/pattern.py

         was found.
         """
 
-        logger.error('This method is deprecated. Use process_next_page method instead.')
+        logger.error('Method next_page_task is deprecated. Use process_next_page method instead.')
         nav = grab.xpath(xpath, None)
         if nav is not None:
             url = grab.make_url_absolute(nav.get('href'))
 
             self.follow_links(grab, '//div[@class="topic"]/a/@href', 'topic')
         """
-        logger.error('This method is deprecated. Use process_links method instead.')
+        logger.error('Method follow_links is deprecated. Use process_links method instead.')
 
         urls = []
         for url in grab.xpath_list(xpath):
+from grab.spider import Spider, Task
+
+class VkSpider(BaseSpider):
+    initial_urls = ['http://m.vk.com']
+
+    def task_initial(self, grab, task):
+        for account in self.meta['accounts']:
+            login, password = account.split(":")
+            grab.set_input("email", login)
+            grab.set_input("pass", password)
+            grab.submit(make_request=False)
+            yield Task(name="login", grab=grab, account=account)
+
+
+bot = VkSpider()
+bot.run()