Anonymous avatar Anonymous committed 74ac949

This is an incomplete checkin. The whole system compiles and runs,
but a longtime runing is not guaranteed. This is because libxml2 does not
(yet) implement reinitialization of callback, therefore the current
this will fill the cb-stack with each parse.

As well the interface changed slightly:
Now the interface allows (currently more theoreticly) multiple
parserinstances in a single process. Each parser may has its own
set of callbacks. the multiple parser implementation has a perl and a c
layer. the c-layer is implemented in parser.c.

The libxml2 parser is now initialized each time, a file, string or
handle is parsed. Therefor the parse_* function can be assumed as
atomar functions.

Modified Files:
LibXML.pm
+ documentation
+ perl-layer for callback/ parser handling
+ extended parsing functions
! Version is now 1.01

LibXML.xs
+ applied T.J. Mathers's patch (included the new version nbr.) :)
+ wrapper functions for c-layer of the parser handling

MANIFEST
+ the new files

PHISHS.CHANGES
=)

typemap
+ perlxmlParserObjectPtr as O_OBJECT

t/07nodelist.t t/13dtd.t
+ adapted to the changed interface

Added Files:
parser.c parser.h
C-Layer for the multiple parser interface

example/cb_example.pl example/test.xml example/test2.xml
example how to use callbacks and xincludes
(the xml files are used by make test!!!)

example/dtd.xml
required by test 13dtd.t

t/16docnodes.t t/17callbacks.t
additional tests

TODO:
cleanup testsuite
fixing callbackstack bug
adding user configuratable error callback
testing (and fixing) of the entity loader callback
initializing the parser for seperate xincludes
check phish-engliche ;)

Comments (0)

Files changed (15)

 use Carp;
 use XML::LibXML::NodeList;
 
-$VERSION = "1.00";
+$VERSION = "1.01";
 require Exporter;
 require DynaLoader;
 
     my $class = shift;
     my %options = @_;
     my $self = bless \%options, $class;
+    $self->{XML_LIBXML_PARSER_OBJECT} = $self->_init_parser();
     return $self;
 }
 
+sub XML::LibXML::DESTROY {
+    my $self = shift;
+    $self->_delete_parser( $self->{XML_LIBXML_PARSER_OBJECT} );
+    $self->{XML_LIBXML_PARSER_OBJECT} = undef;
+}
+
+sub match_callback {
+    my $self = shift;
+    return $self->{XML_LIBXML_MATCH_CB} = shift;
+}
+
+sub read_callback {
+    my $self = shift;
+    return $self->{XML_LIBXML_READ_CB} = shift;
+}
+
+sub close_callback {
+    my $self = shift;
+    return $self->{XML_LIBXML_CLOSE_CB} = shift;
+}
+
+sub open_callback {
+    my $self = shift;
+    return $self->{XML_LIBXML_OPEN_CB} = shift;
+}
+
+
+sub validation {
+    my $self = shift;
+    return $self->_validation( $self->{XML_LIBXML_PARSER_OBJECT}, @_ );
+}
+
+sub expand_entities {
+    my $self = shift;
+    return $self->_expand_entities( $self->{XML_LIBXML_PARSER_OBJECT}, @_ );
+}
+
+sub keep_blanks {
+    my $self = shift;
+    return $self->_keep_blanks( $self->{XML_LIBXML_PARSER_OBJECT}, @_ );
+}
+
+
+sub pedantic_parser {
+    my $self = shift;
+    return $self->_pedantic_parser( $self->{XML_LIBXML_PARSER_OBJECT}, @_ );
+}
+
+sub load_ext_dtd {
+    my $self = shift;
+    return $self->_load_ext_dtd( $self->{XML_LIBXML_PARSER_OBJECT}, @_ );
+}
+
+sub complete_attributes {
+    my $self = shift;
+    return $self->_complete_attributes( $self->{XML_LIBXML_PARSER_OBJECT}, @_ );
+}
+
+sub expand_xinclude  {
+    my $self = shift;
+    $self->{XML_LIBXML_EXPAND_XINCLUDE} = shift;
+    return $self->{XML_LIBXML_EXPAND_XINCLUDE};
+}
+
 sub parse_string {
     my $self = shift;
     croak("parse already in progress") if $self->{_State_};
     $self->{_State_} = 1;
     my $result;
     eval {
-        $result = $self->_parse_string(@_);
+        $self->_match_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                                $self->{XML_LIBXML_MATCH_CB} )
+          if $self->{XML_LIBXML_MATCH_CB};
+        $self->_read_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                               $self->{XML_LIBXML_READ_CB} )
+          if $self->{XML_LIBXML_READ_CB};
+        $self->_open_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                               $self->{XML_LIBXML_OPEN_CB} )
+          if $self->{XML_LIBXML_OPEN_CB};
+        $self->_close_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                                $self->{XML_LIBXML_CLOSE_CB} )
+          if $self->{XML_LIBXML_CLOSE_CB};
+
+        $self->_prepare_parser( $self->{XML_LIBXML_PARSER_OBJECT} );
+
+        $result = $self->_parse_string( @_ );
+        $result->_fix_extra;
+        if ( $self->{XML_LIBXML_EXPAND_XINCLUDE} ) {
+            warn "use xinclude!" ;
+            $result->process_xinclude();
+        }
+        $self->_cleanup_parser_callbacks( $self->{XML_LIBXML_PARSER_OBJECT} );
+
     };
     my $err = $@;
     $self->{_State_} = 0;
     $self->{_State_} = 1;
     my $result;
     eval {
-        $result = $self->_parse_fh(@_);
+        $self->_match_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                                $self->{XML_LIBXML_MATCH_CB} )
+          if $self->{XML_LIBXML_MATCH_CB};
+        $self->_read_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                               $self->{XML_LIBXML_READ_CB} )
+          if $self->{XML_LIBXML_READ_CB};
+        $self->_open_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                               $self->{XML_LIBXML_OPEN_CB} )
+          if $self->{XML_LIBXML_OPEN_CB};
+        $self->_close_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                                $self->{XML_LIBXML_CLOSE_CB} )
+          if $self->{XML_LIBXML_CLOSE_CB};
+
+        $self->_prepare_parser( $self->{XML_LIBXML_PARSER_OBJECT} );
+
+        $result = $self->_parse_fh( @_ );
+
+        $result->_fix_extra;
+        if ( $self->{XML_LIBXML_EXPAND_XINCLUDE} ) {
+            warn "use xinclude!" ;
+            $result->process_xinclude();
+        }
+        $self->_cleanup_parser_callbacks( $self->{XML_LIBXML_PARSER_OBJECT} );
+
     };
     my $err = $@;
     $self->{_State_} = 0;
     $self->{_State_} = 1;
     my $result;
     eval {
+        $self->_match_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                                $self->{XML_LIBXML_MATCH_CB} )
+          if $self->{XML_LIBXML_MATCH_CB};
+        $self->_read_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                               $self->{XML_LIBXML_READ_CB} )
+          if $self->{XML_LIBXML_READ_CB};
+        $self->_open_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                               $self->{XML_LIBXML_OPEN_CB} )
+          if $self->{XML_LIBXML_OPEN_CB};
+        $self->_close_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                                $self->{XML_LIBXML_CLOSE_CB} )
+          if $self->{XML_LIBXML_CLOSE_CB};
+
+        $self->_prepare_parser( $self->{XML_LIBXML_PARSER_OBJECT} );
+
         $result = $self->_parse_file(@_);
+        $result->_fix_extra;
+        if ( $self->{XML_LIBXML_EXPAND_XINCLUDE} ) {
+            # warn "use xinclude!" ;
+            $result->process_xinclude();
+        }
+        $self->_cleanup_parser_callbacks( $self->{XML_LIBXML_PARSER_OBJECT} );
+
     };
     my $err = $@;
     $self->{_State_} = 0;
     return $result;
 }
 
+sub parse_html_string {
+    my $self = shift;
+    $self->_match_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                            $self->{XML_LIBXML_MATCH_CB} )
+      if $self->{XML_LIBXML_MATCH_CB};
+    $self->_read_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                           $self->{XML_LIBXML_READ_CB} )
+      if $self->{XML_LIBXML_READ_CB};
+    $self->_open_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                           $self->{XML_LIBXML_OPEN_CB} )
+      if $self->{XML_LIBXML_OPEN_CB};
+    $self->_close_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                            $self->{XML_LIBXML_CLOSE_CB} )
+      if $self->{XML_LIBXML_CLOSE_CB};
+
+    $self->_prepare_parser( $self->{XML_LIBXML_PARSER_OBJECT} );
+
+    my $retval = $self->_parse_html_string( @_ );
+
+    $self->_cleanup_parser_callbacks( $self->{XML_LIBXML_PARSER_OBJECT} );
+
+    return $retval;
+}
+
+sub parse_html_fh {
+    my $self = shift;
+    $self->_match_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                            $self->{XML_LIBXML_MATCH_CB} )
+      if $self->{XML_LIBXML_MATCH_CB};
+    $self->_read_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                           $self->{XML_LIBXML_READ_CB} )
+      if $self->{XML_LIBXML_READ_CB};
+    $self->_open_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                           $self->{XML_LIBXML_OPEN_CB} )
+      if $self->{XML_LIBXML_OPEN_CB};
+    $self->_close_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                            $self->{XML_LIBXML_CLOSE_CB} )
+      if $self->{XML_LIBXML_CLOSE_CB};
+    $self->_prepare_parser( $self->{XML_LIBXML_PARSER_OBJECT} );
+
+    my $retval = $self->_parse_html_fh( @_ );
+    $self->_cleanup_parser_callbacks( $self->{XML_LIBXML_PARSER_OBJECT} );
+
+    return $retval;
+}
+
+sub parse_html_file {
+    my $self = shift;
+    $self->_match_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                            $self->{XML_LIBXML_MATCH_CB} )
+      if $self->{XML_LIBXML_MATCH_CB};
+    $self->_read_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                           $self->{XML_LIBXML_READ_CB} )
+      if $self->{XML_LIBXML_READ_CB};
+    $self->_open_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                           $self->{XML_LIBXML_OPEN_CB} )
+      if $self->{XML_LIBXML_OPEN_CB};
+    $self->_close_callback( $self->{XML_LIBXML_PARSER_OBJECT},
+                            $self->{XML_LIBXML_CLOSE_CB} )
+      if $self->{XML_LIBXML_CLOSE_CB};
+
+    $self->_prepare_parser( $self->{XML_LIBXML_PARSER_OBJECT} );
+
+    my $retval = $self->_parse_html_file( @_ );
+    $self->_cleanup_parser_callbacks( $self->{XML_LIBXML_PARSER_OBJECT} );
+    return $retval;
+}
+
 sub XML_ELEMENT_NODE(){1;}
 sub XML_ATTRIBUTE_NODE(){2;}
 sub XML_TEXT_NODE(){3;}
 
 =head2 validation
 
-  XML::LibXML->validation(1);
+  $parser->validation(1);
 
 Turn validation on (or off). Defaults to off.
 
 =head2 expand_entities
 
-  XML::LibXML->expand_entities(0);
+  $parser->expand_entities(0);
 
 Turn entity expansion on or off, enabled by default. If entity expansion
 is off, any external parsed entities in the document are left as entities.
 
 =head2 keep_blanks
 
-  XML::LibXML->keep_blanks(0);
+ $parser->keep_blanks(0);
 
 Allows you to turn off XML::LibXML's default behaviour of maintaining
 whitespace in the document.
 
 =head2 pedantic_parser
 
-  XML::LibXML->pedantic_parser(1);
+  $parser->pedantic_parser(1);
 
 You can make XML::LibXML more pedantic if you want to.
 
 =head2 load_ext_dtd
 
-  XML::LibXML->load_ext_dtd(1);
+  $parser->load_ext_dtd(1);
 
 Load external DTD subsets while parsing.
 
+=head2 complete_attributes
+
+  $parser->complete_attributes(1);
+
+Complete the elements attributes lists with the ones defaulted from the DTDs.
+By default, this option is enabled.
+
+=head2 expand_xinclude
+
+  $parser->expand_xinclude
+
+Expands XIinclude tags imidiatly while parsing the document. This flag
+ashures that the parser callbacks are used while parsing the included
+Document.
+
 =head2 match_callback
 
-  XML::LibXML->match_callback($subref);
+  $parser->match_callback($subref);
 
 Sets a "match" callback. See L<"Input Callbacks"> below.
 
 =head2 open_callback
 
-  XML::LibXML->open_callback($subref);
+  $parser->open_callback($subref);
 
 Sets an open callback. See L<"Input Callbacks"> below.
 
 =head2 read_callback
 
-  XML::LibXML->read_callback($subref);
+  $parser->read_callback($subref);
 
 Sets a read callback. See L<"Input Callbacks"> below.
 
 =head2 close_callback
 
-  XML::LibXML->close_callback($subref);
+  $parser->close_callback($subref);
 
 Sets a close callback. See L<"Input Callbacks"> below.
 
 
 =head2 C<$doc-E<gt>is_valid($dtd)>
 
-Same as the above, but allows you to pass in a DTD created from 
+Same as the above, but allows you to pass in a DTD created from
 L<"XML::LibXML::Dtd">.
 
 =head2 C<$doc-E<gt>process_xinclude>
 
-Process any xinclude tags in the file.
+Process any xinclude tags in the file. (currently using B<only> libxml2's
+default callbacks)
 
 =head1 XML::LibXML::Dtd
 
 then you can revert to the default way of handling input. This allows, for
 example, to only handle certain URI schemes.
 
+Callbacks are only used on files, but not on strings or filehandles. This is
+because LibXML requires the match event to find out about which callback set
+is shall be used for the current input stream. LibXML can decide this only
+before the stream is open. For LibXML strings and filehandles are already
+opened streams.
+
 The following callbacks are defined:
 
 =head2 match(uri)
 
 =head2 read(handle, bytes)
 
-Read a certain number of bytes from the resource.
+Read a certain number of bytes from the resource. This callback is
+called even if the entire Document has already read.
 
 =head2 close(handle)
 
 This is a purely fictitious example that uses a MyScheme::Handler object
 that responds to methods similar to an IO::Handle.
 
-  XML::LibXML->match_callback(\&match_uri);
+  $parser->match_callback(\&match_uri);
   
-  XML::LibXML->open_callback(\&open_uri);
+  $parser->open_callback(\&open_uri);
   
-  XML::LibXML->read_callback(\&read_uri);
+  $parser->read_callback(\&read_uri);
   
-  XML::LibXML->close_callback(\&close_uri);
+  $parser->close_callback(\&close_uri);
   
   sub match_uri {
     my $uri = shift;
     close($handler);
   }
 
+A more realistic example can be found in the L<"example"> directory
+
 =head1 Encoding
 
 All data will be stored UTF-8 encoded. Nevertheless the input and
 #include <libxml/xinclude.h>
 #include <libxml/valid.h>
 
+#include "parser.h"
 #include "dom.h"
 #include "xpath.h"
 
 struct _ProxyObject {
     void * object;
     SV * extra;
+    /* ProxyObject * next; */
 };
 
 static SV * LibXML_match_cb = NULL;
 static SV * LibXML_close_cb = NULL;
 static SV * LibXML_error = NULL;
 
+/* static ProxyObject * LibXML_nodelist_head = NULL; */
+/* static ProxyObject * LibXML_nodelist_sentinel = NULL */
+
 ProxyObject *
 make_proxy_node (xmlNodePtr node)
 {
     ProxyObject * proxy;
-    
+    /* ProxyObject * list = LibXML_nodelist; */
+ 
     proxy = (ProxyObject*)New(0, proxy, 1, ProxyObject);
     if (proxy != NULL) {
         proxy->object = (void*)node;
         proxy->extra = NULL;
+        /* proxy->next = NULL; */
+        /* append the node to the end of the list */
+        /* if ( list == NULL ) { */
+        /*    LibXML_nodelist = proxy; */
+        /*}*/
+        /*else {*/
+        /*    while ( list->next != NULL ) {*/
+        /*        list = list->next;*/
+        /*    }*/
+        /*    list->next = proxy;*/
+        /*} */
     }
     return proxy;
 }
 
-void
-LibXML_free_all_callbacks(void)
-{
-    if (LibXML_match_cb) {
-        SvREFCNT_dec(LibXML_match_cb);
-    }
-    
-    if (LibXML_read_cb) {
-        SvREFCNT_dec(LibXML_read_cb);
-    }
-    
-    if (LibXML_open_cb) {
-        SvREFCNT_dec(LibXML_open_cb);
-    }
-    
-    if (LibXML_close_cb) {
-        SvREFCNT_dec(LibXML_close_cb);
-    }
-
-}
-
 xmlParserInputPtr
 LibXML_load_external_entity(
         const char * URL, 
 LibXML_input_open(char const * filename)
 {
     SV * results;
-    
+
     if (LibXML_open_cb && SvTRUE(LibXML_open_cb)) {
         int count;
 
         }
 
         results = POPs;
-        
+
         SvREFCNT_inc(results);
-        
+
         PUTBACK;
         FREETMPS;
         LEAVE;
 }
 
 void
-LibXML_update_callbacks()
-{
-    xmlInputMatchCallback mc;
-    xmlInputOpenCallback oc;
-    xmlInputReadCallback rc;
-    xmlInputCloseCallback cc;
-
-    mc = LibXML_match_cb ? (xmlInputMatchCallback)LibXML_input_match : NULL;
-    oc = LibXML_open_cb ? (xmlInputOpenCallback)LibXML_input_open : NULL;
-    rc = LibXML_read_cb ? (xmlInputReadCallback)LibXML_input_read : NULL;
-    cc = LibXML_close_cb ? (xmlInputCloseCallback)LibXML_input_close : NULL;
-
- /* warn("update_callbacks: mc: %d, oc: %d, rc: %d, cc: %d\n", mc, oc, rc, cc); */
-
-    xmlRegisterInputCallbacks(mc, oc, rc, cc);
-}
-
-void
 LibXML_error_handler(void * ctxt, const char * msg, ...)
 {
     va_list args;
     sv_vsetpvfn(sv, msg, strlen(msg), &args, NULL, 0, NULL);
     va_end(args);
     
-    sv_catsv(LibXML_error, sv);
+    sv_catsv(LibXML_error, sv); /* remember the last error */
     SvREFCNT_dec(sv);
 }
 
 xmlDocPtr
 LibXML_parse_stream(SV * self, SV * ioref)
 {
-    xmlDocPtr doc;
+    xmlDocPtr doc = NULL;
     xmlParserCtxtPtr ctxt;
-    int well_formed;
-    int valid;
+    int well_formed = 0;
+    int valid = 0;
     char buffer[1024];
     int read_length;
     int ret = -1;
             croak("Could not create push parser context: %s", strerror(errno));
         }
         ctxt->_private = (void*)self;
-
         while(read_length = LibXML_read_perl(ioref, buffer, 1024)) {
             xmlParseChunk(ctxt, buffer, read_length, 0);
         }
         ret = xmlParseChunk(ctxt, buffer, 0, 1);
-        
-        doc = ctxt->myDoc;
-        well_formed = ctxt->wellFormed;
-        valid = ctxt->valid;
 
-        xmlFreeParserCtxt(ctxt);
+        /* jsut being paranoid */
+        if ( ret == 0 ) {
+            doc = ctxt->myDoc;
+            well_formed = ctxt->wellFormed;
+            xmlFreeParserCtxt(ctxt);
+        }
     }
     
     if (!well_formed || (xmlDoValidityCheckingDefaultValue && !valid)) {
         xmlFreeDoc(doc);
         return NULL;
     }
-    
     /* this should be done by libxml2 !? */
     if (doc->encoding == NULL) {
         doc->encoding = xmlStrdup("utf-8");
 xmlDocPtr
 LibXML_parse_html_stream(SV * self, SV * ioref)
 {
-    xmlDocPtr doc;
+    xmlDocPtr doc = NULL;
     htmlParserCtxtPtr ctxt;
-    int well_formed;
+    int well_formed = 0;
     char buffer[1024];
     int read_length;
     int ret = -1;
         ctxt->_private = (void*)self;
 
         while(read_length = LibXML_read_perl(ioref, buffer, 1024)) {
-            htmlParseChunk(ctxt, buffer, read_length, 0);
+            ret = htmlParseChunk(ctxt, buffer, read_length, 0);
+            if ( ret != 0 ) {
+                break;
+            }   
         }
         ret = htmlParseChunk(ctxt, buffer, 0, 1);
-        
-        doc = ctxt->myDoc;
-        well_formed = ctxt->wellFormed;
-
-        htmlFreeParserCtxt(ctxt);
+        if ( ret == 0 ) {
+            doc = ctxt->myDoc;
+            well_formed = ctxt->wellFormed;
+            htmlFreeParserCtxt(ctxt);
+        }
     }
     
     if (!well_formed) {
     return doc;
 }
 
+/* ex XML::LibXML::BOOT()! */
+
+perlxmlParserObjectPtr
+make_parser() 
+{
+    perlxmlParserObjectPtr parser;
+    perlxmlInitParserObject( &parser );
+
+    perlxmlSetErrorCallback( parser,
+                            (xmlGenericErrorFunc)LibXML_error_handler);
+    perlxmlSetExtEntityLoader( parser,
+                              (xmlExternalEntityLoader)LibXML_load_external_entity);
+
+    perlxmlSetErrorOutHandler( parser, PerlIO_stderr() );
+    return parser;
+}
+
+/* ex LibXML_free_all_callbacks */
+void
+delete_parser( perlxmlParserObjectPtr realparser ) 
+{
+    if ( realparser != NULL ) {
+        perlxmlDestroyParserObject( &realparser );
+        realparser = NULL;
+    }
+}
+
 MODULE = XML::LibXML         PACKAGE = XML::LibXML
 
 PROTOTYPES: DISABLE
 BOOT:
     LIBXML_TEST_VERSION
     xmlInitParser();
-    xmlSubstituteEntitiesDefaultValue = 1;
-    xmlKeepBlanksDefaultValue = 1;
-    xmlSetExternalEntityLoader((xmlExternalEntityLoader)LibXML_load_external_entity);
-    xmlSetGenericErrorFunc(PerlIO_stderr(), (xmlGenericErrorFunc)LibXML_error_handler);
-    xmlGetWarningsDefaultValue = 0;
-    xmlLoadExtDtdDefaultValue = 1;
 
 void
 END()
     CODE:
-        LibXML_free_all_callbacks();
         xmlCleanupParser();
 
+perlxmlParserObjectPtr
+_init_parser( self ) 
+        SV * self
+    PREINIT:
+        const char * CLASS = "XML::LibXML::Parser";
+    CODE:
+        RETVAL = make_parser();
+    OUTPUT:
+        RETVAL
+
+void
+_delete_parser( self, parser ) 
+        SV * self
+        perlxmlParserObjectPtr parser
+    CODE:
+        delete_parser( parser );        
+
+
+
+void
+_prepare_parser( self, parser )
+        SV * self
+        perlxmlParserObjectPtr parser
+    CODE:
+        perlxmlInitLibParser( parser );
+
+
+void
+_cleanup_parser_callbacks( self, parser )
+        SV * self
+        perlxmlParserObjectPtr parser
+    CODE:
+    if ( parser != NULL ) {
+        perlxmlCleanupLibParser( parser );
+        LibXML_match_cb = NULL;
+        LibXML_read_cb = NULL;
+        LibXML_open_cb = NULL;
+        LibXML_close_cb = NULL;
+    }
+
+
 SV *
-match_callback(self, ...)
+_match_callback(self,parser, ...)
         SV * self
+        perlxmlParserObjectPtr parser
     CODE:
-        if (items > 1) {
-            SET_CB(LibXML_match_cb, ST(1));
-            LibXML_update_callbacks();
+        if (items > 2) {
+            SET_CB(LibXML_match_cb, ST(2));
+            /* LibXML_update_callbacks(); */
+            perlxmlSetMatchCallback( parser,
+                                     (xmlInputMatchCallback)LibXML_input_match);
         }
         else {
             RETVAL = LibXML_match_cb ? sv_2mortal(LibXML_match_cb) : &PL_sv_undef;
         RETVAL
 
 SV *
-open_callback(self, ...)
+_open_callback(self,parser, ...)
         SV * self
+        perlxmlParserObjectPtr parser
     CODE:
-        if (items > 1) {
-            SET_CB(LibXML_open_cb, ST(1));
-            LibXML_update_callbacks();
+        if (items > 2) {
+            SET_CB(LibXML_open_cb, ST(2));
+            /* LibXML_update_callbacks(); */
+            perlxmlSetOpenCallback( parser,
+                                    (xmlInputOpenCallback)LibXML_input_open);
+
         }
         else {
             RETVAL = LibXML_open_cb ? sv_2mortal(LibXML_open_cb) : &PL_sv_undef;
         RETVAL
 
 SV *
-read_callback(self, ...)
+_read_callback(self, parser, ...)
         SV * self
+        perlxmlParserObjectPtr parser
     CODE:
-        if (items > 1) {
-            SET_CB(LibXML_read_cb, ST(1));
-            LibXML_update_callbacks();
+        if (items > 2) {
+            SET_CB(LibXML_read_cb, ST(2));
+            /* LibXML_update_callbacks(); */
+            perlxmlSetReadCallback( parser,
+                                    (xmlInputReadCallback)LibXML_input_read);
         }
         else {
             RETVAL = LibXML_read_cb ? sv_2mortal(LibXML_read_cb) : &PL_sv_undef;
         RETVAL
 
 SV *
-close_callback(self, ...)
+_close_callback(self,parser, ...)
         SV * self
+        perlxmlParserObjectPtr parser
     CODE:
-        if (items > 1) {
-            SET_CB(LibXML_close_cb, ST(1));
-            LibXML_update_callbacks();
+        if (items > 2) {
+            SET_CB(LibXML_close_cb, ST(2));
+            /* LibXML_update_callbacks(); */
+            perlxmlSetCloseCallback( parser,
+                                    (xmlInputCloseCallback)LibXML_input_close);
         }
         else {
             RETVAL = LibXML_close_cb ? sv_2mortal(LibXML_close_cb) : &PL_sv_undef;
         RETVAL
 
 int
-validation(self, ...)
+_validation(self,parser, ...)
         SV * self
+        perlxmlParserObjectPtr parser
     CODE:
-        RETVAL = xmlDoValidityCheckingDefaultValue;
-        if (items > 1) {
-            xmlDoValidityCheckingDefaultValue = SvTRUE(ST(1)) ? 1 : 0;
+        /* init retval with the current default value */
+        RETVAL = xmlDoValidityCheckingDefaultValue; 
+        if ( parser != NULL ) {
+            RETVAL = parser->do_validation;
+            if (items > 2) {
+                parser->do_validation = SvTRUE(ST(2)) ? 1 : 0;
+            }
         }
     OUTPUT:
         RETVAL
 
 int
-expand_entities(self, ...)
+_expand_entities(self,parser, ...)
         SV * self
+        perlxmlParserObjectPtr parser
     CODE:
         RETVAL = xmlSubstituteEntitiesDefaultValue;
-        if (items > 1) {
-            xmlSubstituteEntitiesDefaultValue = SvTRUE(ST(1)) ? 1 : 0;
+        if ( parser != NULL ) {
+            RETVAL = parser->substitute_entities;
+            if (items > 2) {
+                parser->substitute_entities = SvTRUE(ST(2)) ? 1 : 0;
+            }
         }
     OUTPUT:
         RETVAL
 
 int
-keep_blanks(self, ...)
+_keep_blanks(self,parser, ...)
         SV * self
+        perlxmlParserObjectPtr parser
     CODE:
         RETVAL = xmlKeepBlanksDefaultValue;
-        if (items > 1) {
-            xmlKeepBlanksDefaultValue = SvTRUE(ST(1)) ? 1 : 0;
+        if ( parser != NULL ) {
+            RETVAL = parser->keep_blanks;
+            if (items > 2) {
+                parser->keep_blanks = SvTRUE(ST(2)) ? 1 : 0;
+            }
         }
     OUTPUT:
         RETVAL
 
 int
-pedantic_parser(self, ...)
+_pedantic_parser(self,parser, ...)
         SV * self
+        perlxmlParserObjectPtr parser
     CODE:
         RETVAL = xmlPedanticParserDefaultValue;
-        if (items > 1) {
-            xmlPedanticParserDefaultValue = SvTRUE(ST(1)) ? 1 : 0;
+        if ( parser != NULL ) {
+            RETVAL = parser->be_pedantic;
+            if (items > 2)  {
+                parser->be_pedantic = SvTRUE(ST(2)) ? 1 : 0;
+            }
         }
     OUTPUT:
         RETVAL
 
 int
-load_ext_dtd(self, ...)
+_load_ext_dtd(self,parser, ...)
         SV * self
+        perlxmlParserObjectPtr parser
     CODE:
-        RETVAL = xmlLoadExtDtdDefaultValue;
-        if (items > 1) {
-            xmlLoadExtDtdDefaultValue = SvTRUE(ST(1)) ? 1 : 0;
+        RETVAL = ( xmlLoadExtDtdDefaultValue == (xmlLoadExtDtdDefaultValue | 1 ));
+        if ( parser != NULL ) {
+            if (items > 2) {
+                if (SvTRUE(ST(2)))
+                    parser->load_ext_entities |= 1;
+                else
+                    parser->load_ext_entities ^= 1;
+            }
+            RETVAL = ( parser->load_ext_entities == (parser->load_ext_entities | 1 ));
         }
     OUTPUT:
         RETVAL
 
+int
+_complete_attributes(self,parser, ...)
+        SV * self
+        perlxmlParserObjectPtr parser
+    CODE:
+        RETVAL = ( xmlLoadExtDtdDefaultValue == (xmlLoadExtDtdDefaultValue | XML_COMPLETE_ATTRS));
+        if ( parser != NULL ) {
+            if (items > 2) {
+                if (SvTRUE(ST(2)))
+                    parser->load_ext_entities |= XML_COMPLETE_ATTRS;
+                else
+                    parser->load_ext_entities ^= XML_COMPLETE_ATTRS;
+            }
+            RETVAL = ( parser->load_ext_entities == (parser->load_ext_entities | XML_COMPLETE_ATTRS ));
+        }
+    OUTPUT:
+        RETVAL
+
+
 char *
 get_last_error(CLASS)
-        char * CLASS
-    PREINIT:
+        char * CLASS 
+    PREINIT: 
         STRLEN len;
     CODE:
         RETVAL = NULL;
     OUTPUT:
         RETVAL
 
+
 SV*
 _parse_string(self, string)
         SV * self
         if (ctxt == NULL) {
             croak("Couldn't create memory parser context: %s", strerror(errno));
         }
+
+        # warn( "context created\n");
+
         ctxt->_private = (void*)self;
-        
+
         LibXML_error = newSVpv("", 0);
-        
+
+        # warn( "context initialized \n");        
         ret = xmlParseDocument(ctxt);
-        
+
+        # warn( "document parsed \n");
+
         well_formed = ctxt->wellFormed;
         valid = ctxt->valid;
 
         real_dom = ctxt->myDoc;
         xmlFreeParserCtxt(ctxt);
-        
         sv_2mortal(LibXML_error);
-        
         if (!well_formed || (xmlDoValidityCheckingDefaultValue && !valid)) {
             xmlFreeDoc(real_dom);
             RETVAL = &PL_sv_undef;    
         ProxyObject* proxy;
     CODE:
         LibXML_error = newSVpv("", 0);
-        
         real_dom = LibXML_parse_stream(self, fh);
-        
+
         sv_2mortal(LibXML_error);
         
         if (real_dom == NULL) {
     PREINIT:
         xmlParserCtxtPtr ctxt;
         char * CLASS = "XML::LibXML::Document";
-        int well_formed;
-        int valid;
+        int well_formed = 0;
+        int valid = 0;
         STRLEN len;
-        xmlDocPtr real_dom;
-        ProxyObject * proxy;
+        xmlDocPtr real_dom = NULL;
+        ProxyObject * proxy = NULL;
     CODE:
         ctxt = xmlCreateFileParserCtxt(filename);
+
         if (ctxt == NULL) {
             croak("Could not create file parser context for file '%s' : %s", filename, strerror(errno));
         }
         ctxt->_private = (void*)self;
-        
         LibXML_error = newSVpv("", 0);
-        
+
         xmlParseDocument(ctxt);
         well_formed = ctxt->wellFormed;
         valid = ctxt->valid;
         RETVAL
 
 SV*
-parse_html_string(self, string)
+_parse_html_string(self, string)
         SV * self
         SV * string
     PREINIT:
             real_dom->URL = xmlStrdup(SvPV(newURI, n_a));
             SvREFCNT_dec(newURI);
             proxy = make_proxy_node( (xmlNodePtr)real_dom ); 
-
-            RETVAL = sv_newmortal();
+             RETVAL = sv_newmortal();
             sv_setref_pv( RETVAL, (char *)CLASS, (void*)proxy );
             proxy->extra = RETVAL;
             SvREFCNT_inc(RETVAL);
         RETVAL
 
 SV*
-parse_html_fh(self, fh)
+_parse_html_fh(self, fh)
         SV * self
         SV * fh
     PREINIT:
         ProxyObject* proxy;
     CODE:
         LibXML_error = newSVpv("", 0);
-        
         real_dom = LibXML_parse_html_stream(self, fh);
         
         sv_2mortal(LibXML_error);
             real_dom->URL = xmlStrdup(SvPV(newURI, n_a));
             SvREFCNT_dec(newURI);
             proxy = make_proxy_node( (xmlNodePtr)real_dom ); 
-
-            RETVAL = sv_newmortal();
+             RETVAL = sv_newmortal();
             sv_setref_pv( RETVAL, (char *)CLASS, (void*)proxy );
             proxy->extra = RETVAL;
             SvREFCNT_inc(RETVAL);
         RETVAL
         
 SV*
-parse_html_file(self, filename)
+_parse_html_file(self, filename)
         SV * self
         const char * filename
     PREINIT:
         ProxyObject * proxy;
     CODE:
         LibXML_error = newSVpv("", 0);
-        
         real_dom = htmlParseFile(filename, NULL);
 
         sv_2mortal(LibXML_error);
         }
         else {
             proxy = make_proxy_node( (xmlNodePtr)real_dom ); 
-
-            RETVAL = sv_newmortal();
+             RETVAL = sv_newmortal();
             sv_setref_pv( RETVAL, (char *)CLASS, (void*)proxy );
             proxy->extra = RETVAL;
             SvREFCNT_inc(RETVAL);
 MODULE = XML::LibXML         PACKAGE = XML::LibXML::Document
 
 void
+_fix_extra(node_sv)
+        SV * node_sv
+    PREINIT:
+        ProxyObject* node;
+    CODE:
+        node = (ProxyObject *)SvIV((SV*)SvRV(node_sv));
+        node->extra = node_sv;
+
+void
 DESTROY(self)
         ProxyObject* self
     CODE:
-        /* warn("destroy DOC\n"); */
         if ( self->object != NULL ) {
-            xmlFreeDoc((xmlDocPtr)self->object);
-            #warn( "REAL DOCUMENT DROP SUCCEEDS" );
+            if ( self->extra != NULL && SvREFCNT( self->extra ) > 1 ) {
+                SvREFCNT_dec( self->extra );
+#                warn( "TWO Document nodes" );
+            } else {
+                xmlFreeDoc((xmlDocPtr)self->object);
+#                warn( "REAL DOCUMENT DROP SUCCEEDS" );
+            }
         }        
         self->object = NULL;
         Safefree( self );
             domSetDocumentElement( real_dom, elem );
             proxy->extra = dom;
             SvREFCNT_inc(dom);
-            SvREFCNT_dec( oldsv );  
+            SvREFCNT_dec( oldsv );
         }
 
 ProxyObject *
 
 char*
 getEncoding( self )
-         ProxyObject* self
+        ProxyObject* self
     CODE:
         if( self != NULL && self->object!=NULL) {
             RETVAL = xmlStrdup( ((xmlDocPtr)self->object)->encoding );
     OUTPUT:
         RETVAL
 
+void
+setEncoding( self, encoding )
+        ProxyObject* self
+        char *encoding
+    CODE:
+        if( self != NULL && self->object!=NULL) {
+            ((xmlDocPtr)self->object)->encoding = xmlStrdup( encoding );
+        }
+
 char*
 getVersion( self ) 
          ProxyObject* self
         RETVAL
 
 void
-DESTROY( self )
-        xmlDtdPtr self
+DESTROY( node )
+        ProxyObject * node
+    PREINIT:
+        xmlDtdPtr real_node;
     CODE:
-        xmlFreeDtd(self);
+        real_node = (xmlDtdPtr)node->object;
+        if ( node->extra == NULL )
+            xmlFreeDtd(real_node);
 
 MODULE = XML::LibXML         PACKAGE = XML::LibXML::Node
 
     PREINIT:
         xmlNodePtr real_node;
     CODE:
-        /* warn("destroy NODE\n"); */
+        /* XXX should destroy node->extra if refcnt == 0 */
         if (node == NULL) {
            XSRETURN_UNDEF;
         }
             if( real_node->type == XML_DOCUMENT_FRAG_NODE ) {
                 warn( "NODE DESTROY: NODE ISA DOCUMENT_FRAGMENT!" );
             }
-            
+
             if ( SvREFCNT( node->extra ) > 0 ){
-                /* warn("dec REFCNT extra : %d\n", SvREFCNT(node->extra)); */
                 SvREFCNT_dec(node->extra);
             }
             if ( real_node->type != XML_DOCUMENT_NODE ) {
         }
         else if ( real_node == NULL ) {
             Safefree(node);
-        }     
- 
+        }
+
 int 
 getType( node ) 
         xmlNodePtr node
             CLASS = domNodeTypeName( ret );
             RETVAL = make_proxy_node(ret);
             if( elem->extra != NULL ) {
-                RETVAL->extra = elem->extra ;
-                SvREFCNT_inc(elem->extra);                
+                RETVAL->extra = elem->extra;
+                SvREFCNT_inc(elem->extra);
             }
         }
     OUTPUT:
             CLASS = domNodeTypeName( ret );
             RETVAL = make_proxy_node(ret);
             if( elem->extra != NULL ) {
-                RETVAL->extra = elem->extra ;
+                RETVAL->extra = elem->extra;
                 SvREFCNT_inc(elem->extra);
             }
         }
                 element = sv_newmortal();
                 cls = domNodeTypeName( tnode );
 
-                proxy = make_proxy_node(tnode);
-                if ( node->extra != NULL && ((xmlNodePtr)node->object)->type != XML_DOCUMENT_NODE ) {
-                    proxy->extra = node->extra;
-                    SvREFCNT_inc(node->extra);
+                if (tnode->type == XML_NAMESPACE_DECL) {
+                    element = sv_setref_pv( element, (char *)cls, (void*)tnode );
+                } else {
+                    proxy = make_proxy_node(tnode);
+                    if ( node->extra != NULL ) {
+                        proxy->extra = node->extra;
+                        SvREFCNT_inc(node->extra);
+                    }
+                    element = sv_setref_pv( element, (char *)cls, (void*)proxy );
                 }
-        
-                element = sv_setref_pv( element, (char *)cls, (void*)proxy );
-                cls = domNodeTypeName( tnode );
                 XPUSHs( element );
             }
             
                         cls = domNodeTypeName( tnode );
         
                         proxy = make_proxy_node(tnode);
-                        if ( node->extra != NULL && ((xmlNodePtr)node->object)->type != XML_DOCUMENT_NODE ) {
+                        if ( node->extra != NULL
+                             && ((xmlNodePtr)node->object)->type != XML_DOCUMENT_NODE ) {
                             proxy->extra = node->extra;
                             SvREFCNT_inc(node->extra);
                         }
         }
         if ( wantarray == G_SCALAR ) {
             XPUSHs( newSViv(len) );
-        }        
+        }
 
 SV*
 toString( self )
     OUTPUT:
         RETVAL
 
+int
+getPointer( self )
+        xmlNodePtr self
+    CODE:
+        RETVAL = (int)self;
+    OUTPUT:
+        RETVAL
+
 SV*
 getLocalName( node )
         xmlNodePtr node
             XPUSHs( newSViv( len ) );
         }
 
+void
+getNamespace ( node, prefix )
+        xmlNodePtr node
+        char * prefix
+    PREINIT:
+        xmlNsPtr ns = NULL;
+        const char * CLASS = "XML::LibXML::Namespace";
+        SV * element;
+    PPCODE:
+        ns = node->nsDef;
+        while ( ns != NULL ) {
+            if (ns->prefix != NULL) {
+                if (strcmp(prefix, ns->prefix) == 0) {
+                    element = sv_newmortal();
+                    XPUSHs( sv_setref_pv( element, (char *)CLASS, (void*)ns ) );
+                    break;
+                }
+            } else {
+                if (strlen(prefix) == 0) {
+                    element = sv_newmortal();
+                    XPUSHs( sv_setref_pv( element, (char *)CLASS, (void*)ns ) );
+                    break;
+                }
+            }
+            ns = ns->next;
+        }
+
 char *
 string_value ( node )
         xmlNodePtr node
 
 ProxyObject *
 getAttributeNode( elemobj, name )
-        ProxyObject* elemobj 
+        ProxyObject * elemobj
         char * name
     PREINIT:
         const char * CLASS = "XML::LibXML::Attr";
-        xmlNodePtr elem = (xmlNodePtr) elemobj->object;
+        xmlNodePtr elem;
         xmlAttrPtr attrnode = NULL;
     CODE:
+        elem = (xmlNodePtr) elemobj->object;
         RETVAL = NULL;
         attrnode = xmlHasProp( elem, name );
         if ( attrnode != NULL ) {
         char * name
     PREINIT:
         const char * CLASS = "XML::LibXML::Attr";
-        xmlNodePtr elem = (xmlNodePtr) elemobj->object;
+        xmlNodePtr elem;
         xmlAttrPtr attrnode = NULL;
     CODE:
+        elem = (xmlNodePtr) elemobj->object;
         RETVAL = NULL;
         attrnode = domHasNsProp( elem, name, nsURI );
         if ( attrnode != NULL ) {
 DESTROY(self)
         ProxyObject* self
     CODE:
+        /* XXX free self->extra if refcnt self->extra == 0 */
         if ( (xmlNodePtr)self->object != NULL 
               && ((xmlNodePtr)self->object)->parent == NULL ) {
             ((xmlNodePtr)self->object)->doc =NULL;
 void
 DESTROY(self)
         ProxyObject* self
+    PREINIT:
+        xmlNodePtr object;
     CODE:
-        /* warn("destroy FRAGMENT\n"); */
-        if ( (xmlNodePtr)self->object != NULL ) {
+        object = (xmlNodePtr)self->object;
+        if ( object != NULL && object->doc == NULL ) {
             # domSetOwnerDocument( (xmlNodePtr)self->object, NULL ); 
             # if( ((xmlNodePtr)self->object)->children !=NULL){
             #     warn("CLDNODES EXIST");
     ALIAS:
         XML::LibXML::Namespace::name = 1
     CODE:
-        RETVAL = newSVpv("xmlns:", 0);
-        sv_catpv(RETVAL, (char*)self->prefix);
+        if (self->prefix != NULL && strlen(self->prefix) > 0) {
+            RETVAL = newSVpv("xmlns:", 0);
+            sv_catpv(RETVAL, (char*)self->prefix);
+        } else {
+            RETVAL = newSVpv("xmlns", 0);
+        }
     OUTPUT:
         RETVAL
         
     OUTPUT:
         RETVAL
 
+int
+getPointer( self )
+        xmlNsPtr self
+    CODE:
+        RETVAL = (int)self;
+    OUTPUT:
+        RETVAL
 README
 dom.c
 dom.h
+parser.c
+parser.h
 xpath.c
 xpath.h
 typemap
 example/bad.xml
 example/libxml.xml
 example/xml2pod.pl
+example/cb_example.pl
 example/test.html
 example/test.xhtml
 example/test.dtd
 example/article.xml
 example/article_bad.xml
-example/article_internal.xml
-example/article_internal_bad.xml
 t/01basic.t
 t/02parsestring.t
 t/03parsefile.t
 t/13dtd.t
 t/14sax.t
 t/15nodelist.t
+t/16docnodes.t
+t/17callbacks.t
 lib/XML/LibXML/Attr.pod
 lib/XML/LibXML/Comment.pod
 lib/XML/LibXML/Dtd.pod
 t/06nodetypes.t
    + document_fragment tests 
 
-todo:
-    make shure appendChild or similar functions do not allow
-    wrong node type to be appended
-    make the node iterator function XS to avoid GC overhead 
-    for temporary iterator nodes.
+VERSION 1.01
+
+   + multiple parser layer (it looks like overkill but it is not!)
+   + on the fly XInclude expanding while parsing
+

example/cb_example.pl

+use XML::LibXML;
+
+# first instanciate the parser
+my $parser = XML::LibXML->new();
+
+# initialize the callbacks
+$parser->match_callback( \&match_uri );
+$parser->read_callback( \&read_uri );
+$parser->open_callback( \&open_uri );
+$parser->close_callback( \&close_uri );
+
+# include XIncludes on the fly
+$parser->expand_xinclude( 1 );
+
+# parse the file "text.xml" in the current directory
+$dom = $parser->parse_file("test.xml");
+
+print $dom->toString() , "\n";
+
+# the callbacks follow
+# these callbacks are used for both the original parse AND the XInclude
+sub match_uri {
+    my $uri = shift;
+    return $uri !~ /:\/\// ? 1 : 0; # we handle only files
+}
+
+sub open_uri {
+    my $uri = shift;
+
+    my $handler = new IO::File;
+    if ( not $handler->open( "<$uri" ) ){
+        $file = 0;
+    }   
+   
+    return $file;
+}
+
+sub read_uri {
+    my $handler = shift;
+    my $length  = shift;
+    my $buffer = undef;
+    if ( $handler ) {
+        $handler->read( $rv , $length );
+    }
+    return $buffer;
+}
+
+sub close_uri {
+    my $handler = shift;
+    if ( $handler ) {
+        $handler->close();
+    }
+    return 1;
+}
+
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>This is a valid document !</doc>
+<x xmlns:xinclude="http://www.w3.org/2001/XInclude">
+<xml>
+test
+<xinclude:include href="test2.xml"/>
+</xml>
+</x>

example/test2.xml

+<xsl>..</xsl>
+/* parser.c
+ * $Id$
+ * Author: Christian Glahn (2001) 
+ *
+ * This modules keeps the the c-implementation of the multiple parser
+ * implementation. I think this module is required, so we keep the
+ * perl implementation clear of adding c-features to
+ *
+ * TODO:
+ * add all parser flags to the parser object
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h> /* for the globals we don't have in libxml */
+#include <libxml/parser.h>
+#include <libxml/parserInternals.h>
+#include <libxml/tree.h>
+#include <libxml/xmlIO.h>
+#include <libxml/xmlmemory.h>
+#include <libxml/xpath.h>
+#include <libxml/xmlerror.h>
+
+#ifdef __cplusplus
+}
+#endif
+
+struct _perlxmlParserObject 
+{
+    /* general callbacks */
+    xmlInputMatchCallback match_cb;
+    xmlInputReadCallback read_cb;
+    xmlInputOpenCallback open_cb;
+    xmlInputCloseCallback close_cb;
+
+    xmlGenericErrorFunc error_cb;
+    xmlExternalEntityLoader entity_loader_cb;
+
+    /* then the pseudo sax handler */
+    xmlSAXHandlerPtr SAX_handler; /* this is for the time when daniel 
+                                   * implemented real SAX funcitonality 
+                                   */
+
+    void * error_fh; /* e.g. standard out */
+
+    /* library parser flags */
+    int substitute_entities; 
+    int keep_blanks;
+    int get_warnings;
+    int load_ext_entities;
+    int do_validation;
+    int be_pedantic;
+};
+
+/* we have to redefine the header stuff to avoid the include */
+typedef struct _perlxmlParserObject perlxmlParserObject;
+typedef perlxmlParserObject *perlxmlParserObjectPtr;
+
+/**
+ * perlxmlInitParserObject
+ * 
+ * perlxmlParserObjectPtr * objectPtr: is a pointer to the object reference.
+ *
+ * Description:
+ *
+ * This function creates a new ParserObject in memory. The objectPtr
+ * parameter should be a reference to NULL (but not NULL itself!), the
+ * reference to the new perlxmlParserObject will be left in this
+ * reference. 
+ *
+ **/
+void
+perlxmlInitParserObject( perlxmlParserObjectPtr * objectPtr )
+{
+    if ( objectPtr != NULL  ) {
+        /* we create only a new parser object if the parameter is not
+         * already a parseobject 
+         */
+        (*objectPtr) = (perlxmlParserObjectPtr)xmlMalloc( sizeof( perlxmlParserObject ) );
+        if ( (*objectPtr) != NULL ) {
+            (*objectPtr)->match_cb         = NULL;
+            (*objectPtr)->read_cb          = NULL;
+            (*objectPtr)->open_cb          = NULL;
+            (*objectPtr)->close_cb         = NULL;
+            (*objectPtr)->error_cb         = NULL;
+            (*objectPtr)->entity_loader_cb = NULL;
+
+            (*objectPtr)->SAX_handler      = NULL;
+            (*objectPtr)->error_fh         = NULL;
+
+            (*objectPtr)->substitute_entities = 1;
+            (*objectPtr)->keep_blanks         = 1;
+            (*objectPtr)->get_warnings        = 0;
+            (*objectPtr)->load_ext_entities   = 5;
+            (*objectPtr)->do_validation       = 0;
+            (*objectPtr)->be_pedantic         = 0;
+        }
+    }
+}
+
+/**
+ * perlxmlDestroyParserObject
+ *
+ * perlxmlParserObjectPtr * objectPtr: is a pointer to the object reference.
+ * 
+ * Description:
+ *
+ * this function will remove the parser object from memory. the
+ * reference to the parser object will be NULL. The function will not
+ * touch any of the callback references (just reset them to NULL)
+ * before destroying the parser object.
+ *
+ **/
+void
+perlxmlDestroyParserObject( perlxmlParserObjectPtr * objectPtr )
+{
+    if ( objectPtr != NULL ) {
+        (*objectPtr)->match_cb         = NULL;
+        (*objectPtr)->read_cb          = NULL;
+        (*objectPtr)->open_cb          = NULL;
+        (*objectPtr)->close_cb         = NULL;
+        (*objectPtr)->error_cb         = NULL;
+        (*objectPtr)->entity_loader_cb = NULL;
+
+        (*objectPtr)->SAX_handler      = NULL;
+            
+        (*objectPtr)->substitute_entities = 0;
+        (*objectPtr)->keep_blanks         = 0;
+        (*objectPtr)->get_warnings        = 0;
+        (*objectPtr)->load_ext_entities   = 0;
+        (*objectPtr)->do_validation       = 0;
+        (*objectPtr)->be_pedantic         = 0;
+
+        xmlFree( *objectPtr );
+        *objectPtr = NULL;
+    }
+}
+
+/* the following functions are simply wrappers for the libxml2 functions */
+
+void
+perlxmlInitLibParser ( perlxmlParserObjectPtr parser ) 
+{
+    if ( parser != NULL ) {
+        int regtest = -1;
+/*         xmlInitParser(); */
+        if ( parser->match_cb != NULL 
+             || parser->open_cb != NULL
+             || parser->read_cb != NULL
+             || parser->close_cb != NULL ) {
+
+            regtest = xmlRegisterInputCallbacks(
+                                                parser->match_cb,
+                                                parser->open_cb,
+                                                parser->read_cb,
+                                                parser->close_cb
+                                                );
+        }
+
+        if ( regtest != -1 ) {
+            printf( "%d \n",regtest );
+        }
+
+        xmlSetExternalEntityLoader( parser->entity_loader_cb );
+        xmlSetGenericErrorFunc(parser->error_fh, parser->error_cb );
+
+        xmlSubstituteEntitiesDefaultValue = parser->substitute_entities;
+        xmlKeepBlanksDefaultValue = parser->keep_blanks;
+        xmlGetWarningsDefaultValue = parser->get_warnings;
+        xmlLoadExtDtdDefaultValue = parser->load_ext_entities;
+        xmlPedanticParserDefaultValue = parser->be_pedantic;
+        xmlDoValidityCheckingDefaultValue = parser->do_validation;
+    }
+}
+
+void
+perlxmlCleanupLibParser ( perlxmlParserObjectPtr parser ) 
+{
+    if ( parser != NULL ) {
+        xmlSubstituteEntitiesDefaultValue = 1;
+        xmlKeepBlanksDefaultValue = 1;
+        xmlSetExternalEntityLoader( NULL );
+        xmlSetGenericErrorFunc( NULL, NULL );
+        xmlGetWarningsDefaultValue = 0;
+        xmlLoadExtDtdDefaultValue = 5;
+
+        xmlPedanticParserDefaultValue = 0;
+        xmlDoValidityCheckingDefaultValue = 0;
+
+        /* here we should be able to unregister our callbacks.
+         * since we know the id, this function should expect this id
+         * to remove this handler set.
+         * another opinion would be a callback pop, that pops the last
+         * callback function off the callback stack
+         */
+
+/*         xmlCleanupParser(); */
+    }
+}
+
+xmlDocPtr
+perlxmlParseFile( perlxmlParserObjectPtr parserObject,
+                  xmlChar * filename ) 
+{
+    xmlDocPtr retval = NULL;
+    if ( parserObject != NULL && filename != NULL ) {
+        perlxmlInitLibParser( parserObject );
+        retval = xmlParseFile( filename );
+        perlxmlCleanupLibParser(parserObject);
+    }
+    return retval;
+}
+
+xmlDocPtr
+perlxmlParseMemory( perlxmlParserObjectPtr parserObject, 
+                    const char *buffer,
+                    int size )
+{
+    xmlDocPtr retval = NULL;
+    if ( parserObject != NULL && buffer != NULL && size != 0 ) {
+        perlxmlInitLibParser( parserObject );
+        retval = xmlParseMemory( buffer, size );
+        perlxmlCleanupLibParser(parserObject);
+    }
+    return retval;
+}
+
+xmlDocPtr
+perlxmlParseDoc( perlxmlParserObjectPtr parserObject,
+                 xmlChar * cur )
+{
+    xmlDocPtr retval = NULL;
+    if ( parserObject != NULL && cur != NULL ) {
+        perlxmlInitLibParser( parserObject );
+        retval = xmlParseDoc( cur );
+        perlxmlCleanupLibParser(parserObject);
+    }
+    return retval;
+}
+
+/**
+ * Name: perlxmlParseBalancedChunkMemory
+ * Synopsis: xmlNodePtr perlxmlParseBalancedChunkMemory( perlxmlParserObjectPtr parser,xmlDocPtr doc, xmlChar *string )
+ * @parser: the parserobject
+ * @doc: the document, the string should belong to
+ * @string: the string to parse
+ *
+ * this function is pretty neat, since you can read in well balanced 
+ * strings and get a list of nodes, which can be added to any other node.
+ * (shure - this should return a doucment_fragment, but still it doesn't)
+ *
+ * the code is pretty heavy i think, but deep in my heard i believe it's 
+ * worth it :) (e.g. if you like to read a chunk of well-balanced code 
+ * from a databasefield)
+ *
+ * in 99% the cases i believe it is faster than to create the dom by hand,
+ * and skip the parsing job which has to be done here.
+ **/
+xmlNodePtr
+perlxmlParseBalancedChunkMemory( perlxmlParserObjectPtr parserObject, 
+                                 xmlDocPtr document,
+                                 const xmlChar * string ){
+    int parserreturn = -1;
+    xmlNodePtr helper = NULL;
+    xmlNodePtr retval = NULL;
+
+    if ( parserObject != NULL && document != NULL && string != NULL ) {
+        perlxmlInitLibParser( parserObject );
+
+        parserreturn = xmlParseBalancedChunkMemory( document,
+                                                    parserObject->SAX_handler,
+                                                    NULL,
+                                                    0,
+                                                    string,
+                                                    &retval );
+
+        /* error handling */
+        if ( parserreturn != 0 ) {
+            /* if the code was not well balanced, we will not return 
+             * a bad node list, but we have to free the nodes */
+            while( retval != NULL ) {
+                helper = retval->next;
+                xmlFreeNode( retval );
+                retval = helper;
+            }
+        }
+
+        perlxmlCleanupLibParser( parserObject );
+    }
+    return retval;
+}
+
+void 
+perlxmlSetErrorCallback( perlxmlParserObjectPtr parserObject, 
+                         xmlGenericErrorFunc error_callback )
+{
+    if ( parserObject != NULL ) {
+        parserObject->error_cb = error_callback;
+    }
+}
+
+void
+perlxmlSetExtEntityLoader( perlxmlParserObjectPtr parserObject,
+                           xmlExternalEntityLoader entity_loader )
+{
+    if ( parserObject != NULL ) {
+        parserObject->entity_loader_cb = entity_loader;
+    }
+}
+
+void
+perlxmlSetOpenCallback( perlxmlParserObjectPtr parserObject,
+                        xmlInputOpenCallback open_callback)
+{
+    if ( parserObject != NULL ) {
+        parserObject->open_cb = open_callback;
+    }
+}
+
+void
+perlxmlSetCloseCallback( perlxmlParserObjectPtr parserObject,
+                         xmlInputCloseCallback close_callback)
+{
+    if ( parserObject != NULL ) {
+        parserObject->close_cb = close_callback;
+    }
+}
+
+void
+perlxmlSetMatchCallback( perlxmlParserObjectPtr parserObject,
+                         xmlInputMatchCallback match_callback )
+{
+    if ( parserObject != NULL ) {
+        parserObject->match_cb = match_callback;
+    }
+}
+
+void
+perlxmlSetReadCallback( perlxmlParserObjectPtr parserObject,
+                        xmlInputReadCallback read_callback )
+{
+    if ( parserObject != NULL ) {
+        parserObject->read_cb = read_callback;
+    }
+}
+
+void
+perlxmlSetErrorOutHandler( perlxmlParserObjectPtr parserObject,
+                           void * error_fh )
+{
+    if ( parserObject != NULL ) {
+        parserObject->error_fh = error_fh;
+    }
+}
+
+/* need the html functions too */
+/* parser.h
+ * $Id$
+ * Author: Christian Glahn (2001) 
+ *
+ * This header keeps the the c-part of the multiple parser
+ * implementation. I think this module is required, so we keep the
+ * perl implementation clear of adding c-features to 
+ *
+ * TODO:
+ * add all parser flags
+ */
+
+#ifndef __LIBXML_PARSER_H__
+#define __LIBXML_PARSER_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <libxml/parser.h>
+#include <libxml/xmlIO.h>
+#include <libxml/xpath.h>
+#include <libxml/xmlerror.h>
+
+#ifdef __cplusplus
+}
+#endif
+
+struct _perlxmlParserObject 
+{
+    /* general callbacks */
+    xmlInputMatchCallback match_cb;
+    xmlInputReadCallback read_cb;
+    xmlInputOpenCallback open_cb;
+    xmlInputCloseCallback close_cb;
+
+    xmlGenericErrorFunc error_cb;
+    xmlExternalEntityLoader entity_loader_cb;
+
+    /* then the pseudo sax handler */
+    xmlSAXHandlerPtr SAX_handler; /* this is for the time when daniel 
+                                   * implemented real SAX funcitonality 
+                                   */
+
+    void * error_fh;
+
+    /* library parser flags */
+    int substitute_entities; 
+    int keep_blanks;
+    int get_warnings;
+    int load_ext_entities;
+    int do_validation;
+    int be_pedantic;
+};
+
+typedef struct _perlxmlParserObject perlxmlParserObject;
+typedef perlxmlParserObject *perlxmlParserObjectPtr;
+
+void
+perlxmlInitParserObject( perlxmlParserObjectPtr * objectPtr );
+
+void
+perlxmlDestroyParserObject( perlxmlParserObjectPtr * objectPtr );
+
+/* the following 2 functions are used to init the library parser with a parserobject */
+void
+perlxmlInitLibParser ( perlxmlParserObjectPtr parser );
+
+void
+perlxmlCleanupLibParser ( perlxmlParserObjectPtr parser );
+
+/* the following functions are simply wrappers for the libxml2 functions */
+
+xmlDocPtr
+perlxmlParseFile( perlxmlParserObjectPtr object,
+                  xmlChar * filename );
+
+xmlDocPtr
+perlxmlParseMemory( perlxmlParserObjectPtr object, 
+                    const char * buffer,
+                    int size );
+
+xmlDocPtr
+perlxmlParseDoc( perlxmlParserObjectPtr object,
+                 xmlChar * cur );
+
+xmlNodeSetPtr
+perlxmlParseBalancedChunkMemory( perlxmlParserObjectPtr object, 
+                                 xmlDocPtr document,
+                                 const xmlChar * string );
+                                 
+                               
+
+void 
+perlxmlSetErrorCallback( perlxmlParserObjectPtr parser, 
+                         xmlGenericErrorFunc error_callback );
+
+void
+perlxmlSetExtEntityLoader( perlxmlParserObjectPtr parser,
+                           xmlExternalEntityLoader entity_loader );
+
+void
+perlxmlSetOpenCallback( perlxmlParserObjectPtr parser,
+                        xmlInputOpenCallback open_callback);
+
+void
+perlxmlSetCloseCallback( perlxmlParserObjectPtr parser,
+                         xmlInputCloseCallback close_callback);
+
+void
+perlxmlSetMatchCallback( perlxmlParserObjectPtr parser,
+                         xmlInputMatchCallback match_callback );
+
+void
+perlxmlSetReadCallback( perlxmlParserObjectPtr parser,
+                        xmlInputReadCallback match_callback );
+
+void
+perlxmlSetErrorOutHandler( perlxmlParserObjectPtr parserObject,
+                           void * error_fh );
+
+#endif
     }	
   
     # we need to create a new document since dromeds is in ASCII ...
-    my $doc = XML::LibXML::Document->new( '1.0' );
+    my $doc = XML::LibXML::Document->new( '1.0','iso-8859-1' );
     my $elem2 = $doc->createElement( $camel );
     $doc->setDocumentElement( $elem2 ); 
 
 ok($@);
 }
 
-{
-# validate a document with a <!DOCTYPE> declaration
-XML::LibXML->validation(1);
-my $xml = XML::LibXML->new->parse_file('example/article_internal.xml');
-ok($xml);
-}
+# this test fails under XML-LibXML-1.00 with a segfault because the
+# underlying DTD element in the C libxml library was freed twice
 
-{
-# validate an invalid document with <!DOCTYPE declaration
-XML::LibXML->validation(1);
-eval {
-my $xml = XML::LibXML->new->parse_file('example/article_internal_bad.xml');
-ok(0);
-};
-ok($@);
-}
+my $parser = XML::LibXML->new();
+my $doc = $parser->parse_file('example/dtd.xml');
+my @a = $doc->getChildnodes;
+ok(scalar(@a),2);
+undef @a;
+undef $doc;
+ 
+ok(1);
+use XML::LibXML;
+use Test;
+
+# this test fails under XML-LibXML-1.00 with a segfault after the
+# second parsing.  it was fixed by putting in code in getChildNodes
+# to handle the special case where the node was the document node
+
+BEGIN { plan tests => 9 }
+
+  my $input = <<EOD;
+<doc>
+   <clean>   </clean>
+   <dirty>   A   B   </dirty>
+   <mixed>
+      A
+      <clean>   </clean>
+      B
+      <dirty>   A   B   </dirty>
+      C
+   </mixed>
+</doc>
+EOD
+
+for (0 .. 2) {
+  my $parser = XML::LibXML->new();
+  my $doc = $parser->parse_string($input);
+  my @a = $doc->getChildnodes;
+  ok(scalar(@a),1);
+}
+
+my $parser = XML::LibXML->new();
+my $doc = $parser->parse_string($input);
+for (0 .. 2) {
+  my $a = $doc->getFirstChild;
+  ok(ref($a),'XML::LibXML::Element');
+}
+
+for (0 .. 2) {
+  my $a = $doc->getLastChild;
+  ok(ref($a),'XML::LibXML::Element');
+}
+
+use Test;
+BEGIN { plan tests => 12 }
+END { ok(0) unless $loaded }
+use XML::LibXML;
+$loaded = 1;
+ok(1);
+
+my $parser = XML::LibXML->new();
+ok($parser);
+
+$parser->match_callback( \&match );
+$parser->read_callback( \&read );
+$parser->open_callback( \&open );
+$parser->close_callback( \&close );
+
+$parser->expand_xinclude( 1 );
+
+$dom = $parser->parse_file("example/test.xml");
+
+ok($dom);
+
+my $root = $dom->getDocumentElement();
+
+my @nodes = $root->findnodes( 'xml/xsl' );
+ok( scalar @nodes );
+
+# warn $dom->toString() , "\n";
+
+sub match {
+#    warn "match!\n";
+    ok(1);
+    return 1;
+}
+
+sub close {
+#    warn "close!\n";
+    ok(1);
+    if ( $_[0] ) {
+        $_[0]->close();
+    }
+    return 1;
+}
+
+sub open {
+    $file = new IO::File;
+    if ( $file->open( "<$_[0]" ) ){
+#        warn "open!\n";
+        ok(1);
+    }
+    else {
+#        warn "cannot open $_[0] $!\n";
+        $file = 0;
+    }   
+   
+    return $file;
+}
+
+sub read {
+#    warn "read!";
+    my $rv = undef;
+    my $n = 0;
+    if ( $_[0] ) {
+#        warn "read $_[1] bytes!\n";
+        $n = $_[0]->read( $rv , $_[1] );
+        ok(1) if $n > 0
+    }
+    return $rv;
+}
 TYPEMAP
-const char *        T_PV
-xmlDocPtr           PROXY_OBJECT
-xmlNodePtr          PROXY_OBJECT
-xmlNsPtr            O_OBJECT
-xmlParserCtxtPtr    O_OBJECT
-xmlDtdPtr           PROXY_OBJECT
-xmlNodeSetPtr       O_OBJECT
-ProxyObject *       O_OBJECT
+const char *                T_PV
+xmlDocPtr                   PROXY_OBJECT
+xmlNodePtr                  PROXY_OBJECT
+xmlNsPtr                    O_OBJECT
+xmlParserCtxtPtr            O_OBJECT
+xmlDtdPtr                   PROXY_OBJECT
+xmlNodeSetPtr               O_OBJECT
+ProxyObject *               O_OBJECT
+perlxmlParserObjectPtr      O_OBJECT
 
 
 INPUT
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.