Commits

Benjamin Wohlwend  committed ae37938

ensure that a pyquery instance that was instanciated using the
html parser continues to use that parser for new HTML fragments

  • Participants
  • Parent commits ca98385

Comments (0)

Files changed (2)

File pyquery/pyquery.py

         html = None
         elements = []
         self._base_url = None
-        parser = kwargs.get('parser')
+        self.parser = kwargs.get('parser', None)
         if 'parser' in kwargs:
             del kwargs['parser']
         if not kwargs and len(args) == 1 and isinstance(args[0], basestring) \
                 self._base_url = url
             else:
                 raise ValueError('Invalid keyword arguments %s' % kwargs)
-            elements = fromstring(html, parser)
+            elements = fromstring(html, self.parser)
         else:
             # get nodes
 
             # get context
             if isinstance(context, basestring):
                 try:
-                    elements = fromstring(context, parser)
+                    elements = fromstring(context, self.parser)
                 except Exception, e:
                     raise ValueError('%r, %s' % (e, context))
             elif isinstance(context, self.__class__):
             for tag in self:
                 for child in tag.getchildren():
                     tag.remove(child)
-                root = etree.fromstring('<root>' + new_html + '</root>')
+                root = fromstring('<root>' + new_html + '</root>', self.parser)[0]
                 children = root.getchildren()
                 if children:
                     tag.extend(children)
 
     def _get_root(self, value):
         if  isinstance(value, basestring):
-            root = etree.fromstring('<root>' + value + '</root>')
+            root = fromstring('<root>' + value + '</root>', self.parser)[0]
         elif isinstance(value, etree._Element):
             root = self.__class__(value)
         elif isinstance(value, PyQuery):

File pyquery/test.py

         val = d('a:last').html()
         assert val == ' My link text 2', repr(val)
 
+class TestHTMLParser(unittest.TestCase):
+    xml = "<div>I'm valid XML</div>"
+    html = '''
+    <div class="portlet">
+        Behind you, a three-headed HTML&dash;Entity!
+    </div>
+    '''
+    
+    def test_parser_persistance(self):
+        d = pq(self.xml, parser='xml')
+        self.assertRaises(etree.XMLSyntaxError, lambda: d.after(self.html))
+        d = pq(self.xml, parser='html')
+        d.after(self.html) # this should not fail
+        
 if __name__ == '__main__':
     fails, total = unittest.main()
     if fails == 0: