Commits

Gregory Petukhov committed 5ad4238

Add Kit extension that provides `grab.doc` property. Refactor Doc extension.

Comments (0)

Files changed (7)

 from .ext.pquery import PyqueryExtension
 from .ext.ftp import FTPExtension
 from .ext.doc import DocExtension
+from .ext.kit import KitExtension
 
 __all__ = ('Grab', 'UploadContent', 'UploadFile')
 
 
 class Grab(LXMLExtension, FormExtension, PyqueryExtension,
            DjangoExtension, TextExtension, RegexpExtension,
-           FTPExtension, DocExtension):
+           FTPExtension, DocExtension, KitExtension):
 
     # Points which could be handled in extension classes
     extension_points = ('config', 'init', 'reset')
-# Copyright: 2011, Grigoriy Petukhov
+# Copyright: 2013, Grigoriy Petukhov
 # Author: Grigoriy Petukhov (http://lorien.name)
-# License: BSD
+# License: MIT
 from __future__ import absolute_import
 from grab.selector import XpathSelector
 
+class DocInterface(object):
+    def __init__(self, grab):
+        self.grab = grab
+
+    def select(self, *args, **kwargs):
+        return XpathSelector(self.grab.tree).select(*args, **kwargs)
+
+
 class DocExtension(object):
     def extra_reset(self):
         self._doc = None
     @property
     def doc(self):
         """
-        Return Selector object bined to the `self.tree`
+        Return DocInterface object which provides some
+        shortcuts for faster access to Selector functions.
         """
         
-        return XpathSelector(self.tree)
+        if not self._doc:
+            self._doc = DocInterface(self)
+        return self._doc
+# Copyright: 2013, Grigoriy Petukhov
+# Author: Grigoriy Petukhov (http://lorien.name)
+# License: MIT
+from __future__ import absolute_import
+from grab.selector import KitSelector
+
+class KitInterface(object):
+    def __init__(self, grab):
+        self.grab = grab
+
+    def select(self, *args, **kwargs):
+        qt_doc = self.grab.transport.kit.page.mainFrame().documentElement()
+        return KitSelector(qt_doc).select(*args, **kwargs)
+
+
+class KitExtension(object):
+    def extra_reset(self):
+        self._kit = None
+
+    @property
+    def kit(self):
+        """
+        Return KitInterface object which provides some
+        methods to communicate with Kit transport related functions.
+        """
+        
+        if not self._kit:
+            self._kit = KitInterface(self)
+        return self._kit

test/doc_extension.py

 </html>
 """
 
-XML = """
-<root>
-    <man>
-        <age>25</age>
-        <weight><![CDATA[30]]></weight>
-    </man>
-</root>
-"""
-
-
 class DocExtensionTest(TestCase):
     def setUp(self):
         SERVER.reset()
 
     def test_extension_in_general(self):
         self.assertTrue(self.g.doc)
+
+    def test_select_method(self):
+        self.assertEqual('test', self.g.doc.select('//h1').text())

test/kit_extension.py

+# coding: utf-8
+from unittest import TestCase
+from grab import Grab, DataNotFound
+
+from .tornado_util import SERVER
+
+GRAB_TRANSPORT = 'grab.transport.kit.KitTransport'
+
+HTML = u"""
+<html>
+    <body>
+        <h1>test</h1>
+    </body>
+</html>
+"""
+
+class KitExtensionTest(TestCase):
+    def setUp(self):
+        SERVER.reset()
+        SERVER.RESPONSE['get'] = HTML
+        self.g = Grab(transport=GRAB_TRANSPORT)
+        self.g.go(SERVER.BASE_URL)
+
+    def test_extension_in_general(self):
+        self.assertTrue(self.g.kit)
+
+    def test_select_method(self):
+        self.assertEqual('test', self.g.kit.select('h1').text())

test/kit_live_sites.py

+# coding: utf-8
+from unittest import TestCase
+
+from grab import Grab, GrabMisuseError
+
+GRAB_TRANSPORT = 'grab.transport.kit.KitTransport'
+
+class KitLiveSitesTestCase(TestCase):
+    def test_dumpz_copyright(self):
+        g = Grab(transport=GRAB_TRANSPORT)
+        g.go('http://dumpz.org')
+        self.assertTrue('Grigoriy Petukhov' in g.response.body)
+
+    def test_dumpz_codemirror(self):
+        g = Grab(transport='grab.transport.curl.CurlTransport')
+        g.go('http://dumpz.org')
+        self.assertFalse('<div class="CodeMirror' in g.response.runtime_body)
+
+        g = Grab(transport=GRAB_TRANSPORT)
+        g.go('http://dumpz.org')
+        # Dumpz.org contains javascript editor CodeMirror
+        # that builds some HTML in run-time
+        self.assertTrue('<div class="CodeMirror' in g.response.runtime_body)

test/selector_kit.py

+# coding: utf-8
+from unittest import TestCase
+
+#import os
+#import sys
+#root = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+#sys.path.insert(0, root)
+
+from util import GRAB_TRANSPORT, ignore_transport, only_transport
+from tornado_util import SERVER
+from grab.selector import KitSelector
+from grab import Grab
+
+HTML = """
+<html>
+    <body>
+        <h1>test</h1>
+        <ul>
+            <li>one</li>
+            <li>two</li>
+            <li>three</li>
+            <li class="zzz" id="6">z 4 foo</li>
+        </ul>
+        <ul id="second-list">
+            <li class="li-1">yet one</li>
+            <li class="li-2">yet two</li>
+        </ul>
+    </body>
+</html>
+"""
+
+class KitSelectorTestCase(TestCase):
+    def setUp(self):
+        g = Grab(transport='grab.transport.kit.KitTransport')
+        SERVER.RESPONSE['get'] = HTML
+        g.go(SERVER.BASE_URL)
+        self.qt_doc = g.transport.kit.page.mainFrame().documentElement()
+
+    def test_in_general(self):
+        sel = KitSelector(self.qt_doc)
+
+    def test_select_node(self):
+        sel = KitSelector(self.qt_doc).select('h1')[0]
+        self.assertEquals('test', sel.node.toInnerXml())
+
+    def test_html(self):
+        sel = KitSelector(self.qt_doc).select('h1')[0]
+        self.assertEquals('<h1>test</h1>', sel.html())
+
+    def test_textselector(self):
+        self.assertEquals('one', KitSelector(self.qt_doc).select('li').text())
+
+    def test_number(self):
+        self.assertEquals(4, KitSelector(self.qt_doc).select('li.zzz').number())
+
+    # TODO
+    # test the ID selector (#6)
+
+    #def test_text_selector(self):
+        #sel = KitSelector(self.qt_doc).select('//li/text()').one()
+        #self.assertTrue(isinstance(sel, TextSelector))
+
+    ## TODO: add --pyquery flag to runtest script
+    ##def test_select_pyquery(self):
+        ##root = Selector(self.qt_doc)
+        ##self.assertEquals('test', root.select(pyquery='h1')[0].node.text)
+        ##self.assertEquals('z 4 foo', root.select(pyquery='body')[0].select(pyquery='#6')[0].node.text)
+
+    def test_select_select(self):
+        root = KitSelector(self.qt_doc)
+        self.assertEquals(set(['one', 'yet one']),
+                          set([x.text() for x in root.select('ul').select('li:first-child')]),
+                          )
+
+    def test_text_list(self):
+        root = KitSelector(self.qt_doc)
+        self.assertEquals(set(['one', 'yet one']),
+                          set(root.select('ul > li:first-child').text_list()),
+                          )
+
+    def test_attr_list(self):
+        root = KitSelector(self.qt_doc)
+        self.assertEquals(set(['li-1', 'li-2']),
+                          set(root.select('ul[id=second-list] > li')\
+                                  .attr_list('class'))
+                          )
+
+
+class TestSelectorList(TestCase):
+    def setUp(self):
+        g = Grab(transport='grab.transport.kit.KitTransport')
+        SERVER.RESPONSE['get'] = HTML
+        g.go(SERVER.BASE_URL)
+        self.qt_doc = g.transport.kit.page.mainFrame().documentElement()
+
+    def test_one(self):
+        sel = KitSelector(self.qt_doc).select('ul > li')
+        self.assertEquals('one', unicode(sel.one().node.toPlainText()))
+        self.assertEquals('one', sel.text())
+
+    def test_number(self):
+        sel = KitSelector(self.qt_doc).select('li:nth-child(4)')
+        self.assertEquals(4, sel.number())
+
+    def test_exists(self):
+        sel = KitSelector(self.qt_doc).select('li:nth-child(4)')
+        self.assertEquals(True, sel.exists())
+
+        sel = KitSelector(self.qt_doc).select('li:nth-child(5)')
+        self.assertEquals(False, sel.exists())