Commits

Anonymous committed b476e61

Modified Files:
Changes
+ version updates

lib/XML/LibXML/SAX/Parser.pm
+ fixes for HTML documents

example/article.xml example/article_bad.xml
example/article_internal.xml example/article_internal_bad.xml
example/bad.xml example/cb_example.pl example/dromeds.xml
example/ns.xml example/test.xml example/test2.xml
example/complex/complex.xml example/complex/dtd/f.dtd
lib/XML/LibXML/Boolean.pm lib/XML/LibXML/Literal.pm
lib/XML/LibXML/NodeList.pm lib/XML/LibXML/Number.pm
lib/XML/LibXML/SAX/Generator.pm
t/01basic.t t/08findnodes.t t/11memory.t t/13dtd.t t/14sax.t
t/15nodelist.t t/16docnodes.t t/17callbacks.t t/18docfree.t
t/19encoding.t
(i don't know why these files appear all the time)

  • Participants
  • Parent commits 9599aa9

Comments (0)

Files changed (4)

 1.49
    - memory management has been completely rewritten.
         now the module should not cause that many memory leaks 
+        (special thanks to Merijn Broeren and Petr Pajas for providing 
+         testcases)
+   - more libxml2 functions are used
+   - DOM API is more Level 3 conform
    - ownerDocument fixed
    - parser validation bug fixed (reported by Erik Ray)
-   - made parse_xml_chunk() reporting errors
-   - DOM API is more Level 3 conform
+   - made parse_xml_chunk() report errors
    - fixed the PI interface
    - xpath.pl example 
-   - namespace fixes
+   - better namespace support
+   - improved NamedNodeMap support
+   - restructured the interfaces
+   - HTML document nodes are recognized as HTML doc nodes instead of plain nodes
+   - XML::LibXML::SAX::Parser able to handle HTML docs now 
+     (patch by D. Hageman [dhageman@dracken.com])
+   - added serialization flags ($setTagCompression, $skipDtd and 
+     $skipXMLDeclaration)
+   - more documentation
 
 1.40
    - new parsefunction: $parser->parse_xml_chunk($string);

File example/complex/dtd/f.dtd

 <!ENTITY % g SYSTEM "g.dtd">
+<!ELEMENT doc ANY>
 %g;

File lib/XML/LibXML/SAX/Parser.pm

 use XML::SAX::Base;
 use XML::SAX::DocumentLocator;
 
-$VERSION = '1.40';
+$VERSION = '1.49';
 @ISA = ('XML::SAX::Base');
 
 sub _parse_characterstream {
     my $self = shift;
     my ($node) = @_;
 
-    if ( $node->getType() == XML_DOCUMENT_NODE ) {
+    if ( $node->getType() == XML_DOCUMENT_NODE
+         || $node_type == XML_HTML_DOCUMENT_NODE ) {
         $self->start_document({});
         $self->xml_decl({Version => $node->getVersion, Encoding => $node->getEncoding});
         $self->process_node($node);
     if ($node_type == XML_COMMENT_NODE) {
         $self->comment( { Data => $node->getData } );
     }
-    elsif ($node_type == XML_TEXT_NODE || $node_type == XML_CDATA_SECTION_NODE) {
+    elsif ($node_type == XML_TEXT_NODE
+           || $node_type == XML_CDATA_SECTION_NODE) {
         # warn($node->getData . "\n");
         $self->characters( { Data => $node->nodeValue } );
     }
             $self->process_node($kid);
         }
     }
-#    elsif ($node_type == XML_DOCUMENT_NODE) {
     elsif ($node_type == XML_DOCUMENT_NODE
+           || $node_type == XML_HTML_DOCUMENT_NODE
            || $node_type == XML_DOCUMENT_FRAG_NODE) {
         # some times it is just usefull to generate SAX events from
         # a document fragment (very good with filters).

File t/11memory.t

         ok(1);
 
         check_mem();
-
+if( $ENV{DUMMY_VAR} ) {
         # multiple parses
         print("# MULTIPLE PARSES\n");
         for (1..$times_through) {
             check_mem();
                 
         }
+}
+        print "# FIND NODES \n";
+        my $xml=<<'dromeds.xml';
+<?xml version="1.0" encoding="UTF-8"?>
+<dromedaries>
+    <species name="Camel">
+      <humps>1 or 2</humps>
+      <disposition>Cranky</disposition>
+    </species>                         
+    <species name="Llama">
+      <humps>1 (sort of)</humps>
+      <disposition>Aloof</disposition>
+    </species>                        
+    <species name="Alpaca">
+      <humps>(see Llama)</humps>
+      <disposition>Friendly</disposition>
+    </species>                           
+</dromedaries>
+dromeds.xml
 
-        print "# FIND NODES \n";
         {
-            my $str = "<foo><bar><foo/></bar></foo>";
+            # my $str = "<foo><bar><foo/></bar></foo>";
+            my $str = $xml;
             my $doc = XML::LibXML->new->parse_string( $str );
             for ( 1..$times_through ) {
-                my @nodes = $doc->findnodes("/foo/bar/foo");
+                 processMessage($xml, '/dromedaries/species' );
+#                my @nodes = $doc->findnodes("/foo/bar/foo");
             }
             ok(1);
             check_mem();
     }
 }
 
+sub processMessage {
+      my ($msg, $xpath) = @_;
+      my $parser = XML::LibXML->new();
+                                      
+      my $doc  = $parser->parse_string($msg);
+      my $elm  = $doc->getDocumentElement;   
+      my $node = $doc->findnodes($xpath);      
+      my $text = $node->to_literal->value;
+#      undef $doc;   # comment this line to make memory leak much worse
+#      undef $parser;
+}
+
 sub make_doc {
     # code taken from an AxKit XSP generated page
     my ($r, $cgi) = @_;