Anonymous avatar Anonymous committed 0100aed

Modified Files:
Changes README MANIFEST
+ notes about what is going on.

perl-libxml-sax.c perl-libxml-mm.c LibXML.xs
- push parser and SAX parser segfaults and memory leaks

LibXML.pm
+ more docs

t/02parse.t
+ more tests

Added Files:
LICENSE
+ make people happy

Comments (0)

Files changed (9)

 Revision history for Perl extension XML::LibXML
 
 *NOTE:* 
-Version 1.53 fixes potentional buffer overflows were possible with earlier
-versions of the package. It is recommended to upgrade any XML::LibXML since
-1.49 to 1.53 to avoid such problems.
+Version 1.54 fixes potentional buffer overflows were possible with earlier
+versions of the package. 
 
 1.54
    - Catalog interface is aware about libxml2 configuration
    - fixed default iterator of XML::LibXML::Iterator
    - interface for raw libxml2 DOM building functions 
      (currently just addChild(), others will follow)
-   - fixed memory leak in XML::LibXML::SAX
    - fixed memory leak in push parser code
    - fixed namespace handling if nodes are imported to a new DOM.
-   - fixed segmentation fault during validation
+   - fixed segmentation fault during validation under libxml2 2.4.25
    - fixed bad CDATA handing in XML::LibXML::SAX::Builder
    - fixed namespace handing in XML::LibXML::SAX
    - fixed attribute handing in XML::LibXML::SAX
+   - fixed memory leak       in XML::LibXML::SAX
+   - improved parser testsuite
    - more documentation 
 
 1.53
+XML::LibXML is dual licensed under the same terms as Perl itself.
+
+This means at your choice, either the Perl Artistic License, or
+the GNU GPL version 1 or higher.
+
+
             $MatchCB $ReadCB $OpenCB $CloseCB );
 use Carp;
 
-use XML::LibXML::Common qw(:encoding :libxml :w3c);
+use XML::LibXML::Common qw(:encoding :libxml);
 
 use XML::LibXML::NodeList;
 use IO::Handle; # for FH reads called as methods
 
-$VERSION = "1.53";
+
+$VERSION = "1.54";
 require Exporter;
 require DynaLoader;
 
 @ISA = qw(DynaLoader Exporter);
 
+#-------------------------------------------------------------------------#
+# export information                                                      #
+#-------------------------------------------------------------------------#
+%EXPORT_TAGS = (
+                all => [qw(
+                           XML_ELEMENT_NODE
+                           XML_ATTRIBUTE_NODE
+                           XML_TEXT_NODE
+                           XML_CDATA_SECTION_NODE
+                           XML_ENTITY_REF_NODE
+                           XML_ENTITY_NODE
+                           XML_PI_NODE
+                           XML_COMMENT_NODE
+                           XML_DOCUMENT_NODE
+                           XML_DOCUMENT_TYPE_NODE
+                           XML_DOCUMENT_FRAG_NODE
+                           XML_NOTATION_NODE
+                           XML_HTML_DOCUMENT_NODE
+                           XML_DTD_NODE
+                           XML_ELEMENT_DECL
+                           XML_ATTRIBUTE_DECL
+                           XML_ENTITY_DECL
+                           XML_NAMESPACE_DECL
+                           XML_XINCLUDE_END
+                           XML_XINCLUDE_START
+                           encodeToUTF8
+                           decodeFromUTF8
+                          )],
+                libxml => [qw(
+                           XML_ELEMENT_NODE
+                           XML_ATTRIBUTE_NODE
+                           XML_TEXT_NODE
+                           XML_CDATA_SECTION_NODE
+                           XML_ENTITY_REF_NODE
+                           XML_ENTITY_NODE
+                           XML_PI_NODE
+                           XML_COMMENT_NODE
+                           XML_DOCUMENT_NODE
+                           XML_DOCUMENT_TYPE_NODE
+                           XML_DOCUMENT_FRAG_NODE
+                           XML_NOTATION_NODE
+                           XML_HTML_DOCUMENT_NODE
+                           XML_DTD_NODE
+                           XML_ELEMENT_DECL
+                           XML_ATTRIBUTE_DECL
+                           XML_ENTITY_DECL
+                           XML_NAMESPACE_DECL
+                           XML_XINCLUDE_END
+                           XML_XINCLUDE_START
+                          )],
+                encoding => [qw(
+                                encodeToUTF8
+                                decodeFromUTF8
+                               )],
+               );
+
+@EXPORT_OK = (
+              @{$EXPORT_TAGS{all}},
+             );
+
+@EXPORT = (
+           @{$EXPORT_TAGS{all}},
+          );
+
+#-------------------------------------------------------------------------#
+# initialization of the global variables                                  #
+#-------------------------------------------------------------------------#
 $skipDTD            = 0;
 $skipXMLDeclaration = 0;
 $setTagCompression  = 0;
 $OpenCB  = undef;
 $CloseCB = undef;
 
+#-------------------------------------------------------------------------#
+# bootstrapping                                                           #
+#-------------------------------------------------------------------------#
 bootstrap XML::LibXML $VERSION;
 
+#-------------------------------------------------------------------------#
+# parser constructor                                                      #
+#-------------------------------------------------------------------------#
 sub new {
     my $class = shift;
     my %options = @_;
     return $self;
 }
 
+#-------------------------------------------------------------------------#
+# callback functions                                                      #
+#-------------------------------------------------------------------------#
 sub match_callback {
     my $self = shift;
     if ( ref $self ) {
     }
 }
 
+#-------------------------------------------------------------------------#
+# member variable manipulation                                            #
+#-------------------------------------------------------------------------#
 sub validation {
     my $self = shift;
     $self->{XML_LIBXML_VALIDATION} = shift if scalar @_;
     return $self->{XML_LIBXML_GDOME};
 }
 
+
+#-------------------------------------------------------------------------#
+# set the optional SAX(2) handler                                         #
+#-------------------------------------------------------------------------#
 sub set_handler {
     my $self = shift;
     if ( defined $_[0] ) {
     }
 }
 
+#-------------------------------------------------------------------------#
+# helper functions                                                        #
+#-------------------------------------------------------------------------#
 sub _auto_expand {
     my ( $self, $result, $uri ) = @_;
 
     return $result;
 }
 
+sub __read {
+    read($_[0], $_[1], $_[2]);
+}
+
+sub __write {
+    if ( ref( $_[0] ) ) {
+        $_[0]->write( $_[1], $_[2] );
+    }
+    else {
+        $_[0]->write( $_[1] );
+    }
+}
+
+#-------------------------------------------------------------------------#
+# parsing functions                                                       #
+#-------------------------------------------------------------------------#
+# all parsing functions handle normal as SAX parsing at the same time.
+# note that SAX parsing is handled incomplete! use XML::LibXML::SAX for
+# complete parsing sequences
+#-------------------------------------------------------------------------#
 sub parse_string {
     my $self = shift;
     croak("parse already in progress") if $self->{_State_};
     if ( defined $self->{SAX} ) {
         my $string = shift;
         $self->{SAX_ELSTACK} = [];
-        eval { $self->_parse_sax_string($string); };
+        eval {
+            $self->_parse_sax_string($string);
+        };
         my $err = $@;
         $self->{_State_} = 0;
         if ($err) {
 
     $self->{_State_} = 1;
     if ( defined $self->{SAX} ) {
-        eval { $result = $self->_parse_sax_xml_chunk( @_ ); };
+        eval {
+            $self->_parse_sax_xml_chunk( @_ );
+
+            # this is required for XML::GenericChunk.
+            # in normal case is_filter is not defined, an thus the parsing
+            # will be terminated. in case of a SAX filter the parsing is not
+            # finished at that state. therefore we must not reset the parsing
+            unless ( $self->{IS_FILTER} ) {
+                $result = $self->{HANDLER}->end_document();
+            }
+        };
     }
     else {
         eval { $result = $self->_parse_xml_chunk( @_ ); };
     return $result;
 }
 
+sub parse_balanced_chunk {
+    my $self = shift;
+    return $self->parse_xml_chunk( @_ );
+}
+
 sub processXIncludes {
     my $self = shift;
     my $doc = shift;
     return $self->_processXIncludes($doc || " ");
 }
 
+
+#-------------------------------------------------------------------------#
+# push parser interface                                                   #
+#-------------------------------------------------------------------------#
 sub init_push {
     my $self = shift;
 
     }
 
     if ( defined $self->{SAX} ) {
-        $self->{SAX_ELSTACK} = [];
         $self->{CONTEXT} = $self->_start_push(1);
     }
     else {
     }
 }
 
+# this function should be promoted!
+# the reason is because libxml2 uses xmlParseChunk() for this purpose!
+sub parse_chunk {
+    my $self = shift;
+    my $chunk = shift;
+    my $terminate = shift;
+
+    if ( not defined $self->{CONTEXT} ) {
+        $self->init_push();
+    }
+
+    if ( defined $chunk and length $chunk ) {
+        $self->_push( $self->{CONTEXT}, $chunk );
+    }
+
+    if ( $terminate ) {
+        return $self->finish_push();
+    }
+}
+
+
 sub finish_push {
     my $self = shift;
     my $restore = shift || 0;
     my $retval;
 
     if ( defined $self->{SAX} ) {
-        eval { $retval = $self->_end_sax_push( $self->{CONTEXT} ); };
+        eval {
+            $self->_end_sax_push( $self->{CONTEXT} );
+            $retval = $self->{HANDLER}->end_document( {} );
+        };
     }
     else {
         eval { $retval = $self->_end_push( $self->{CONTEXT}, $restore ); };
     }
+
     delete $self->{CONTEXT};
+
     if ( $@ ) {
         croak( $@ );
     }
     return $retval;
 }
 
-sub __read {
-    read($_[0], $_[1], $_[2]);
-}
-
-sub __write {
-    if ( ref( $_[0] ) ) {
-        $_[0]->write( $_[1], $_[2] );
-    }
-    else {
-        $_[0]->write( $_[1] );
-    }
-}
-
-
 1;
 
+#-------------------------------------------------------------------------#
+# XML::LibXML::Node Interface                                             #
+#-------------------------------------------------------------------------#
 package XML::LibXML::Node;
 
 sub isSupported {
 
 1;
 
+#-------------------------------------------------------------------------#
+# XML::LibXML::Document Interface                                         #
+#-------------------------------------------------------------------------#
 package XML::LibXML::Document;
 
 use vars qw(@ISA);
     return $retval;
 }
 
+#-------------------------------------------------------------------------#
+# bad style xinclude processing                                           #
+#-------------------------------------------------------------------------#
 sub process_xinclude {
     my $self = shift;
     XML::LibXML->new->processXIncludes( $self );
 
 1;
 
+#-------------------------------------------------------------------------#
+# XML::LibXML::DocumentFragment Interface                                 #
+#-------------------------------------------------------------------------#
 package XML::LibXML::DocumentFragment;
 
 use vars qw(@ISA);
 
 1;
 
+#-------------------------------------------------------------------------#
+# XML::LibXML::Element Interface                                          #
+#-------------------------------------------------------------------------#
 package XML::LibXML::Element;
 
 use vars qw(@ISA);
 
 1;
 
+#-------------------------------------------------------------------------#
+# XML::LibXML::Text Interface                                             #
+#-------------------------------------------------------------------------#
 package XML::LibXML::Text;
 
 use vars qw(@ISA);
 
 1;
 
+#-------------------------------------------------------------------------#
+# XML::LibXML::Attribute Interface                                        #
+#-------------------------------------------------------------------------#
 package XML::LibXML::Attr;
 use vars qw( @ISA ) ;
 @ISA = ('XML::LibXML::Node') ;
 
 1;
 
+#-------------------------------------------------------------------------#
+# XML::LibXML::Dtd Interface                                              #
+#-------------------------------------------------------------------------#
+# this is still under construction
+#
 package XML::LibXML::Dtd;
 use vars qw( @ISA );
 @ISA = ('XML::LibXML::Node');
 
 1;
 
+#-------------------------------------------------------------------------#
+# XML::LibXML::PI Interface                                               #
+#-------------------------------------------------------------------------#
 package XML::LibXML::PI;
 use vars qw( @ISA );
 @ISA = ('XML::LibXML::Node');
 
 1;
 
+#-------------------------------------------------------------------------#
+# XML::LibXML::Namespace Interface                                        #
+#-------------------------------------------------------------------------#
 package XML::LibXML::Namespace;
 
 # this is infact not a node!
 
 1;
 
+#-------------------------------------------------------------------------#
+# XML::LibXML::NamedNodeMap Interface                                     #
+#-------------------------------------------------------------------------#
 package XML::LibXML::NamedNodeMap;
 
 use XML::LibXML::Common qw(:libxml);
 
 package XML::LibXML::_SAXParser;
 
-# this is pseudo class!!!
+# this is pseudo class!!! and it will be removed as soon all functions
+# moved to XS level
 
 use XML::SAX::Exception;
 
-# the cdata section will go to the c-layer soon
-sub cdata_block {
-    my ( $parser, $data ) = @_;
-    $parser->{HANDLER}->start_cdata();
-    $parser->{HANDLER}->characters( $data );
-    $parser->{HANDLER}->end_cdata();
-}
-
 # these functions will use SAX exceptions as soon i know how things really work
 sub warning {
     my ( $parser, $message, $line, $col ) = @_;
 }
 
 1;
+
+#-------------------------------------------------------------------------#
+# XML::LibXML Parser documentation                                        #
+#-------------------------------------------------------------------------#
 __END__
 
 =head1 NAME
 
   my $doc = $parser->parse_html_file($filename);
 
-=head2 Push Parser
+=head2 The Push Parser
 
-XML::LibXML supports also a push parser interface. This allows one to
-parse large documents without actually loading the entire document
-into memory.
+XML::LibXML provides a push parser interface. This allows one to parse
+large documents without actually loading the entire document into
+memory. While parse_file() and parse_fh() won't load the document
+before parsing either. While parse_file() forces the data to be a
+wellformed XML file, parse_fh() may be used to parse data comming from
+any kind of source that delivers wellformed XML
+documents. parse_fh()'s parsing ability is limited to single
+documents. For a programmer there is no chance to interrupt the
+parsing process if for example multiple XML documents are recieved
+through the same channel. XML::LibXML's push parser works around this
+limitation and provides an interface to libxml2's pushparser. This
+parser will parse the data the application provides to it at the time
+they are pushed into the parser, rather than pulling the data itself.
 
-The interface is devided into two parts:
+Through this it is possible to preprocess incoming data if required -
+i.e. in the given example to find the document boundaries. Different
+to the pull parser implemented in parse_fh() or parse_file(), the push
+parser is not able to find out about the documents end itself. Thus
+the calling program needs to indicate explicitly when the parsing is
+done.
+
+In XML::LibXML this is done by a single function:
+
+  parse_chunk()
+
+parse_chunk() tries to parse a given chunk of data, which isn't
+nessecarily well balanced data. The function takes two parameters:
 
 =over 4
 
-=item * pushing the data into the parser
+=item 1. the chunk of data as a single string
 
-=item * finish the parse
+=item 2. (optional) a termination flag
 
 =back
 
-The user has no chance to access the document while still pushing the
-data to the parser. The resulting document will be returned when the
-parser is told to finish the parsing process.
+If the termination flag is set to a true value (e.g. 1), the parsing
+will be stopped and the resulting document will be returned.
+
+the following example may clearify this a bit:
+
+  my $parser = XML::LibXML->new;
+
+  for my $string ( "<", "foo", ' bar="hello worls"', "/>") {
+       $parser->parse_chunk( $string );
+  }
+  my $doc = $parser->parse_chunk("", 1); # terminate the parsing
+
+Internally the push parser uses two functions, push() and finish_push().
+they are not very usefull expect one likes to write a repairing parser.
+How to do this is described in the following part.
+
+Of course XML::LibXML's push parser is available as a SAX parser as
+well. To make use of the SAX capabilities one must any the SAX as the
+parsers SAX handler; otherwise parse_chunk() will work in the default
+mode.
 
 =over 4
 
 restore broken or non well formed (XML) documents as the following
 example shows:
 
-  $parser->push( "<foo>", "bar" );
-  eval { $doc = $parser->finish_push(); };      # will complain
+  eval {
+      $parser->push( "<foo>", "bar" );
+      $doc = $parser->finish_push();    # will report broken XML
+  };
   if ( $@ ) {
      # ...
   }
 This can be anoing if the closing tag misses by accident. The
 following code will restore the document:
 
-  $parser->push( "<foo>", "bar" );
-  eval { $doc = $parser->finish_push(1); };      # will not complain
-
+  eval {
+      $parser->push( "<foo>", "bar" );
+      $doc = $parser->finish_push(1);   # will return the data parsed
+                                        # until an error happend
+  };
   warn $doc->toString(); # returns "<foo>bar</foo>"
 
 of course finish_push() will return nothing if there was no data pushed to
         xmlParseChunk(ctxt, chunk, len, 0);
         LibXML_cleanup_callbacks();
         LibXML_cleanup_parser();
+    
         sv_2mortal(LibXML_error); 
+        LibXML_croak_error();
 
         RETVAL = 1;
     OUTPUT:
     CODE:
         PmmNODE( SvPROXYNODE( pctxt ) ) = NULL;
         LibXML_init_parser(self); 
+
         xmlParseChunk(ctxt, "", 0, 1); /* finish the parse */
         LibXML_cleanup_callbacks();
         LibXML_cleanup_parser();
+
         sv_2mortal(LibXML_error);
-        if ( ctxt->node != NULL && restore == 0 ) {
-            xmlFreeParserCtxt(ctxt);            
-            LibXML_croak_error();
+    
+        if ( SvCUR( LibXML_error ) > 0 && restore == 0 ) { 
+            xmlFreeDoc( ctxt->myDoc );
+            xmlFreeParserCtxt(ctxt); 
+            croak("%s",SvPV(LibXML_error, len));
         }
 
         doc = ctxt->myDoc;
+        ctxt->myDoc = NULL;
         xmlFreeParserCtxt(ctxt);
         if ( doc == NULL ){
             croak( "no document found!" );
         }
     CODE:
         PmmNODE( SvPROXYNODE( pctxt ) ) = NULL;
-        LibXML_init_parser(self); 
+        LibXML_init_parser(self);
+
         xmlParseChunk(ctxt, "", 0, 1); /* finish the parse */
-
         LibXML_cleanup_callbacks();
         LibXML_cleanup_parser();    
-        sv_2mortal(LibXML_error); 
+        sv_2mortal(LibXML_error);
 
         PmmSAXCloseContext(ctxt);
         xmlFreeParserCtxt(ctxt);
-        XSRETURN_UNDEF;
 
 SV*
 import_GDOME( dummy, sv_gdome, deep=1 )
 Changes
+LICENSE
 LibXML.pm
 LibXML.xs
 Makefile.PL
-XML::LibXML
+
+                $result = $self->{HANDLER}->end_document();XML::LibXML
 ===========
 
 This module implements much of the DOM Level 2 API as an 
 
 Copyright 2001-2002 AxKit.com Ltd, All rights reserved.
 
+
+DISCLAIMER
+==========
+
+THIS PROGRAM IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT
+WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED WARRANTY OF
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
 
 #endif
 
+#include "perl-libxml-sax.h"
+
 #ifdef __cplusplus
 }
 #endif
             xs_warn( "NODE DELETATION\n" );
             libnode = (xmlParserCtxtPtr)PmmNODE( node );
             if ( libnode != NULL ) {
-                free( libnode->_private );
-                libnode->_private = NULL;
+                if (libnode->_private != NULL ) {
+                    if ( libnode->_private != (void*)node ) {
+                        PmmSAXCloseContext( libnode );
+                    }
+                    else {
+                        xmlFree( libnode->_private );
+                    }
+                    libnode->_private = NULL;
+                }
                 PmmNODE( node )   = NULL;
                 xmlFreeParserCtxt(libnode);
             }

perl-libxml-sax.c

 {
     PmmSAXVector * vec = (PmmSAXVectorPtr) ctxt->_private;
     dTHX;
-    
+
     if ( vec->handler != NULL ) {
         SvREFCNT_dec( vec->handler );
+        vec->handler = NULL;
     }
 
     xmlFree( ctxt->sax );
     ctxt->sax = NULL;
 
     SvREFCNT_dec( vec->parser );
+    vec->parser = NULL;
 
     xmlFreeDoc( vec->ns_stack_root );
-    
+    vec->ns_stack_root;
     xmlFree( vec );
+    ctxt->_private = NULL;
 }
 
 
     XPUSHs(rv);
     PUTBACK;
 
-    perl_call_method( "start_prefix_mapping", 0 );
+    perl_call_method( "start_prefix_mapping", G_SCALAR | G_EVAL );
     sv_2mortal(rv);
     FREETMPS ;
     LEAVE ;
     XPUSHs(rv);
     PUTBACK;
 
-    perl_call_method( "end_prefix_mapping", 0 );
+    perl_call_method( "end_prefix_mapping", G_SCALAR | G_EVAL );
 
     sv_2mortal(rv);
 
             value = *ta; ta++;
 
             if ( name != NULL && xmlStrlen( name ) ) {
+                localname = xmlSplitQName(NULL, name, &prefix);
 
                 hv_store(atV, "Name", 4,
                          _C2Sv(name, NULL), NameHash);
                     
                 }
                 else if (xmlStrncmp("xmlns:", name, 6 ) == 0 ) {
-                    localname = xmlSplitQName(NULL, name, &prefix);                        
                     PmmAddNamespace( sax,
                                      localname,
                                      value,
                              _C2Sv(NSDEFAULTURI,NULL),
                              NsURIHash);
                 }
-                else if ( ns = PmmGetNsMapping( sax->ns_stack, prefix ) ) {
-                    localname = xmlSplitQName(NULL, name, &prefix);        
-                        
+                else if ( prefix != NULL
+                          && (ns = PmmGetNsMapping( sax->ns_stack, prefix ) ) ) {
                     hv_store(atV, "NamespaceURI", 12,
                              _C2Sv(ns->href, NULL), NsURIHash);
                     hv_store(atV, "Prefix", 6,
         XPUSHs(sv_2mortal(newRV_noinc((SV*)empty)));
         PUTBACK;
         
-        count = perl_call_method( "start_document", 0 );
+        count = perl_call_method( "start_document", G_SCALAR | G_EVAL );
         
         SPAGAIN;
 
 
         PUTBACK;
         
-        count = perl_call_method( "xml_decl", 0 );
+        count = perl_call_method( "xml_decl", G_SCALAR | G_EVAL );
         sv_2mortal(rv);
 
         FREETMPS ;
     XPUSHs(sax->parser);
     PUTBACK;
 
-    count = perl_call_pv( "XML::LibXML::_SAXParser::end_document", 0 );
+    count = perl_call_pv( "XML::LibXML::_SAXParser::end_document", G_SCALAR | G_EVAL );
 
     FREETMPS ;
     LEAVE ;
     XPUSHs(rv);
     PUTBACK;
 
-    count = perl_call_method( "start_element", 0 );
+    count = perl_call_method( "start_element", G_SCALAR | G_EVAL );
     
     sv_2mortal(rv) ;
     FREETMPS ;
     XPUSHs(rv);
     PUTBACK;
 
-    count = perl_call_method( "end_element", 0 );
+    count = perl_call_method( "end_element", G_SCALAR | G_EVAL );
 
     sv_2mortal(rv);
 
         XPUSHs(rv);
         PUTBACK;
 
-        count = perl_call_method( "characters", 0 );
+        count = perl_call_method( "characters", G_SCALAR | G_EVAL );
 
         sv_2mortal(rv);
 
         XPUSHs(rv);
         PUTBACK;
 
-        count = perl_call_method( "comment", 0 );
+        count = perl_call_method( "comment", G_SCALAR | G_EVAL );
 
         sv_2mortal(rv);
 
         PUSHMARK(SP) ;
         XPUSHs(handler);
         PUTBACK;
-        count = perl_call_method( "start_cdata", 0 );
+        count = perl_call_method( "start_cdata", G_SCALAR | G_EVAL );
 
         SPAGAIN;        
         PUSHMARK(SP) ;
         XPUSHs(rv);
         PUTBACK;
 
-        count = perl_call_method( "characters", 0 );
+        count = perl_call_method( "characters", G_SCALAR | G_EVAL );
 
         SPAGAIN;        
         PUSHMARK(SP) ;
         XPUSHs(handler);
         PUTBACK;
 
-        count = perl_call_method( "end_cdata", 0 );
+        count = perl_call_method( "end_cdata", G_SCALAR | G_EVAL );
 
         sv_2mortal(rv);
 
 
         PUTBACK;
 
-        count = perl_call_method( "processing_instruction", 0 );
+        count = perl_call_method( "processing_instruction", G_SCALAR | G_EVAL );
 
         sv_2mortal(rv);
 
 
     PUTBACK;
 
-    perl_call_pv( "XML::LibXML::_SAXParser::warning", 0 );
+    perl_call_pv( "XML::LibXML::_SAXParser::warning", G_SCALAR | G_EVAL );
     
     FREETMPS ;
     LEAVE ;
-    SvREFCNT_dec(svMessage);
+
     return 1;
 }
 
     XPUSHs(sv_2mortal(newSViv(ctxt->input->line)));
     XPUSHs(sv_2mortal(newSViv(ctxt->input->col)));
     PUTBACK;
-    perl_call_pv( "XML::LibXML::_SAXParser::error", 0 );
+    perl_call_pv( "XML::LibXML::_SAXParser::error", G_SCALAR | G_EVAL );
     
     FREETMPS ;
     LEAVE ;
-    SvREFCNT_dec(svMessage);
     return 1;
 }
 
     XPUSHs(sv_2mortal(newSViv(ctxt->input->line)));
     XPUSHs(sv_2mortal(newSViv(ctxt->input->col)));
     PUTBACK;
-    perl_call_pv( "XML::LibXML::_SAXParser::fatal_error", 0 );
+    perl_call_pv( "XML::LibXML::_SAXParser::fatal_error", G_SCALAR | G_EVAL );
     
     FREETMPS ;
     LEAVE ;
-    SvREFCNT_dec(svMessage);
     return 1;
 }
 
 use Test;
 use IO::File;
 
-BEGIN { plan tests => 351};
+BEGIN { plan tests => 457 };
 use XML::LibXML;
 use XML::LibXML::Common qw(:libxml);
 use XML::LibXML::SAX;
                     );
 
 my @goodWFNSStrings = (
-XML_DECL. '<foobar xmlns:bar="foo" bar:foo="bar"/>',
-XML_DECL. '<foobar xmlns="foo" foo="bar"><foo/></foobar>',
-XML_DECL. '<bar:foobar xmlns:bar="foo" foo="bar"><bar:foo/></bar:foobar>',
-XML_DECL. '<bar:foobar xmlns:bar="foo" foo="bar"><foo/></bar:foobar>',
-XML_DECL. '<bar:foobar xmlns:bar="foo" bar:foo="bar"><bar:foo/></bar:foobar>',
+XML_DECL. '<foobar xmlns:bar="foo" bar:foo="bar"/>'."\n",
+XML_DECL. '<foobar xmlns="foo" foo="bar"><foo/></foobar>'."\n",
+XML_DECL. '<bar:foobar xmlns:bar="foo" foo="bar"><foo/></bar:foobar>'."\n",
+XML_DECL. '<bar:foobar xmlns:bar="foo" foo="bar"><bar:foo/></bar:foobar>'."\n",
+XML_DECL. '<bar:foobar xmlns:bar="foo" bar:foo="bar"><bar:foo/></bar:foobar>'."\n",
                       );
 
 my @goodWFDTDStrings = (
 '<ouch><!-----></ouch>',                   # bad either... (is this conform with the spec????)
                     );
 
-my @goodWBStrings = (
-" ",
-"<!--good-->",
-"<![CDATA[>&<]]>",
-"foo<bar/>foo",
-"foo<bar/>",
-"<bar/>foo",
-"&gt;&#160;",
-'<foo bar="&gt;"/>',
-'<foo/>&gt;',
-'<foo/><bar/>',
-'<bar:foobar xmlns:bar="foo" bar:foo="bar"/><foo/>',
-                    );
-
-my @badWBStrings = (
-"",
-"<ouch>",
-"<ouch>bar",
-"bar</ouch>",
-"<ouch/>&foo;", # undefined entity
-"&",            # bad char
-"h�h?",         # bad encoding
-"<!--->",       # bad stays bad ;)
-"<!----->",     # bad stays bad ;)
-);
-
-
     my %goodPushWF = (
 single1 => ['<foobar/>'],
 single2 => ['<foobar>','</foobar>'],
 my $badfile1 = "example/bad.xml";
 my $badfile2 = "does_not_exist.xml";
 
-
 my $parser = XML::LibXML->new();
 
 print "# 1 NON VALIDATING PARSER\n";
 
 $parser->pedantic_parser(0);
 
-
-
-print "# 1.2 WELL BALLANCED STRING PARSING\n";
-
-print "# 1.2.1 DEFAULT VALUES\n";
-{
-    foreach my $str ( @goodWBStrings ) {
-        my $fragment = $parser->parse_xml_chunk($str);
-        ok($fragment);
-    }
-}
-
-eval { my $fail = $parser->parse_xml_chunk(undef); };
-ok($@);
-
-eval { my $fail = $parser->parse_xml_chunk(undef); };
-ok($@);
-
-foreach my $str ( @badWBStrings ) {
-    eval { my $fail = $parser->parse_xml_chunk($str); };  
-    ok($@);
-}
-
-
-print "# 1.3 PARSE A FILE\n";
+print "# 1.2 PARSE A FILE\n";
 
 {
     my $doc = $parser->parse_file($goodfile);
     $XML::LibXML::skipXMLDeclaration = 0;
 }
 
-print "# 1.4 PARSE A HANDLE\n";
+print "# 1.3 PARSE A HANDLE\n";
 
 my $fh = IO::File->new($goodfile);
 ok($fh);
     ok( scalar @cn, 3 );
 }
 
-print "# 1.5 x-include processing\n";
+print "# 1.4 x-include processing\n";
 
 my $goodXInclude = q{
 <x>
     ok($@);
 }
 
-print "# 2 push parser\n";
+print "# 2 PUSH PARSER\n";
 
 {
+    print "# 2.1 PARSING WELLFORMED DOCUMENTS\n";
     foreach my $key ( keys %goodPushWF ) {
         foreach ( @{$goodPushWF{$key}} ) {
-            $parser->push( $_);
+            $parser->parse_chunk( $_ );
         }
 
         my $doc;
-        eval {$doc = $parser->finish_push; };
-        ok($doc && !$@);                    
+        eval {$doc = $parser->parse_chunk("",1); };
+        ok($doc && !$@);      
     }
 
     my @good_strings = ("<foo>", "bar", "</foo>" );
-    my @bad_strings  = ("<foo>", "bar");
+    my %bad_strings  = ( 
+                            predocend1   => ["<A>" ],
+                            predocend2   => ["<A>", "B"],
+                            predocend3   => ["<A>", "<C>"],
+                            predocend4   => ["<A>", "<C/>"],
+                            postdocend1  => ["<A/>", "<C/>"],
+# use with libxml2 2.4.26:  postdocend2  => ["<A/>", "B"],    # libxml2 < 2.4.26 bug
+                            postdocend3  => ["<A/>", "BB"],
+                            badcdata     => ["<A> ","<!","[CDATA[B]","</A>"],
+                            badending1   => ["<A> ","B","</C>"],
+                            badending2   => ["<A> ","</C>","</A>"],
+                       );
 
     my $parser = XML::LibXML->new;
     {
         for ( @good_strings ) {        
-            $parser->push( $_ );
+            $parser->parse_chunk( $_ );
         }
-        my $doc = $parser->finish_push;
+        my $doc = $parser->parse_chunk("",1);
         ok($doc);
     }
 
     {
-        foreach ( @bad_strings ) {
-            $parser->push( $_);
+        print "# 2.2 PARSING BROKEN DOCUMENTS\n";
+        my $doc;
+        foreach my $key ( keys %bad_strings ) {
+            print "# $key\n";
+            $doc = undef;
+            foreach ( @{$bad_strings{$key}} ) {
+               eval { $parser->parse_chunk( $_ );};
+            }
+            if ( $@ ) {
+                ok(1);
+                $parser->parse_chunk("",1); # will cause no harm anymore, but is still needed
+                next;
+            }
+            eval {    
+                $doc = $parser->parse_chunk("",1);
+            };
+            ok( $@ );
         }
 
-        eval { my $doc = $parser->finish_push; };
-        ok( $@ );
     }
 
     {
+        print "# 2.3 RECOVERING PUSH PARSER\n";
         $parser->init_push;
 
-        foreach ( @bad_strings ) {
+        foreach ( "<A>", "B" ) {
             $parser->push( $_);
         }
 
     ok( scalar @cn );
     ok( $cn[0]->nodeType, XML_CDATA_SECTION_NODE );
     ok( $cn[0]->textContent, "&foo<bar" );
+    ok( $cn[0]->toString, '<![CDATA[&foo<bar]]>');
 
     print "# 3.2 NAMESPACE TESTS\n";
 
+    my $i = 0;
     foreach my $str ( @goodWFNSStrings ) {
         my $doc = $generator->parse_string( $str );
         ok( $doc );
+
+        # skip the nested node tests until there is a xmlNormalizeNs().
+        ok(1),next if $i > 2;
+
+        ok( $doc->toString(), $str );
+        $i++
     }
 
     print "# DATA CONSISTENCE\n";    
         ok(0);
     }
 
+    my $root = $doc->documentElement;
+
+    # bad thing: i have to do some NS normalizing.
+    # libxml2 will only do some fixing. this will lead to multiple 
+    # declarations, if a node with a new namespace is added.
+
+    # my $vstring = q{<foo xmlns:bar="http://foo.bar">bar<bar:bi/></foo>};
+    my $vstring = q{<foo xmlns:bar="http://foo.bar">bar<bar:bi xmlns:bar="http://foo.bar"/></foo>};
+    ok($root->toString, $vstring );
+
     print "# 3.3 INTERNAL SUBSETS\n";
 
     foreach my $str ( @goodWFDTDStrings ) {
     ok($doc);
 
     print "# 3.6 PARSE CHUNK\n";
-    
+
+        
 }
 
 print "# 4 SAXY PUSHER\n";
     }
 }
 
+print "# 5 PARSE WELL BALANCED CHUNKS\n";
+{
+    my $MAX_WF_C = 10;
+    my $MAX_WB_C = 16;
+
+    my %chunks = ( 
+                    wellformed1  => '<A/>',
+                    wellformed2  => '<A></A>',
+                    wellformed3  => '<A B="C"/>',
+                    wellformed4  => '<A>D</A>',
+                    wellformed5  => '<A><![CDATA[D]]></A>',
+                    wellformed6  => '<A><!--D--></A>',
+                    wellformed7  => '<A><K/></A>',
+                    wellformed8  => '<A xmlns="E"/>',
+                    wellformed9  => '<F:A xmlns:F="G" F:A="B">D</F:A>',
+                    wellformed10 => '<!--D-->',                    
+                    wellbalance1 => '<A/><A/>',
+                    wellbalance2 => '<A></A><A></A>',
+                    wellbalance3 => '<A B="C"/><A B="H"/>',
+                    wellbalance4 => '<A>D</A><A>I</A>',
+                    wellbalance5 => '<A><K/></A><A><L/></A>',
+                    wellbalance6 => '<A><![CDATA[D]]></A><A><![CDATA[I]]></A>',
+                    wellbalance7 => '<A><!--D--></A><A><!--I--></A>',
+                    wellbalance8 => '<F:A xmlns:F="G" F:A="B">D</F:A><J:A xmlns:J="G" J:A="M">D</J:A>',
+                    wellbalance9 => 'D<A/>',                    
+                    wellbalance10=> 'D<A/>D',
+                    wellbalance11=> 'D<A/><!--D-->',
+                    wellbalance12=> 'D<A/><![CDATA[D]]>',
+                    wellbalance13=> '<![CDATA[D]]><A/>D',
+                    wellbalance14=> '<!--D--><A/>',
+                    wellbalance15=> '<![CDATA[D]]>',
+                    wellbalance16=> 'D',
+                 );
+
+    my @badWBStrings = (
+        "",
+        "<ouch>",
+        "<ouch>bar",
+        "bar</ouch>",
+        "<ouch/>&foo;", # undefined entity
+        "&",            # bad char
+        "h�h?",         # bad encoding
+        "<!--->",       # bad stays bad ;)
+        "<!----->",     # bad stays bad ;)
+    );
+
+
+    my $parser = XML::LibXML->new;
+    
+    print "# 5.1 DOM CHUNK PARSER\n";
+
+    for ( 1..$MAX_WF_C ) {
+        my $frag = $parser->parse_xml_chunk($chunks{'wellformed'.$_});
+        ok($frag);
+        if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE
+             && $frag->hasChildNodes ) {
+            if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) {
+                if ( $chunks{'wellformed'.$_} =~ /\<A\>\<\/A\>/ ) {
+                    $_--;
+                }
+                ok($frag->toString,$chunks{'wellformed'.$_});                
+                next;
+            }
+        }
+        ok(0);
+    }
+
+    for ( 1..$MAX_WB_C ) {
+        my $frag = $parser->parse_xml_chunk($chunks{'wellbalance'.$_});
+        ok($frag);
+        if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE
+             && $frag->hasChildNodes ) {
+            if ( $chunks{'wellbalance'.$_} =~ /<A><\/A>/ ) {
+                $_--;
+            }
+            ok($frag->toString,$chunks{'wellbalance'.$_});                
+            next;
+        }
+        ok(0);
+    }
+
+    eval { my $fail = $parser->parse_xml_chunk(undef); };
+    ok($@);
+
+    eval { my $fail = $parser->parse_xml_chunk(""); };
+    ok($@);
+
+    foreach my $str ( @badWBStrings ) {
+        eval { my $fail = $parser->parse_xml_chunk($str); };  
+        ok($@);
+    }
+
+    print "# 5.2 SAX CHUNK PARSER\n";
+
+    my $handler = XML::LibXML::SAX::Builder->new();
+    $parser->set_handler( $handler );
+    for ( 1..$MAX_WF_C ) {
+        my $frag = $parser->parse_xml_chunk($chunks{'wellformed'.$_});
+        ok($frag);
+        if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE
+             && $frag->hasChildNodes ) {
+            if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) {
+                if ( $chunks{'wellformed'.$_} =~ /\<A\>\<\/A\>/ ) {
+                    $_--;
+                }
+                ok($frag->toString,$chunks{'wellformed'.$_});                
+                next;
+            }
+        }
+        ok(0);
+    }
+
+    for ( 1..$MAX_WB_C ) {
+        my $frag = $parser->parse_xml_chunk($chunks{'wellbalance'.$_});
+        ok($frag);
+        if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE
+             && $frag->hasChildNodes ) {
+            if ( $chunks{'wellbalance'.$_} =~ /<A><\/A>/ ) {
+                $_--;
+            }
+            ok($frag->toString,$chunks{'wellbalance'.$_});                
+            next;
+        }
+        ok(0);
+    }
+}
+
 sub tsub {
     my $doc = shift;
 
     $e2->appendChild( $th->{d}->importNode( $doc->documentElement() ) );
 
     return $th->{d};
-}
+}
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.