Commits

Anonymous committed 95e7453

Modified Files:
Changes MANIFEST
+ version notes

LibXML.pm LibXML.xs
+ rudimentary native SAX interface
+ push parser

perl-libxml-mm.c perl-libxml-mm.h
+ new functions: parser context helper

Added Files:
perl-libxml-sax.c perl-libxml-sax.h
these files contain the sax caller interface, so the XS file will not
get too polluted.

  • Participants
  • Parent commits 3e31947

Comments (0)

Files changed (8)

 Revision history for Perl extension XML::LibXML
 
+1.50
+   - push parser interface
+   - basic native libxml2 SAX interface
+   - XML::LibXML::Document::process_xincludes reintroduced
+   - code cleanings
+   - more documentation
+
 1.49
    - memory management has been completely rewritten.
         now the module should not cause that many memory leaks 
 package XML::LibXML;
 
 use strict;
-use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
+use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS
+            $skipDTD $skipXMLDeclaration $setTagCompression);
 use Carp;
 use XML::LibXML::NodeList;
 use IO::Handle; # for FH reads called as methods
 
 @ISA = qw(DynaLoader Exporter);
 
-$XML::LibXML::skipDTD            = 0;
-$XML::LibXML::skipXMLDeclaration = 0;
-$XML::LibXML::setTagCompression  = 0;
+$skipDTD            = 0;
+$skipXMLDeclaration = 0;
+$setTagCompression  = 0;
 
 bootstrap XML::LibXML $VERSION;
 
     return $self->{XML_LIBXML_BASE_URI};
 }
 
+sub set_sax_handler {
+    my $self = shift;
+    $self->{SAX} = {HANDLER => $_[0]};
+}
+
+sub _auto_expand {
+    my ( $self, $result, $uri ) = @_;
+
+    $result->setBaseURI( $uri ) if defined $uri;
+
+    if ( defined $self->{XML_LIBXML_EXPAND_XINCLUDE}
+         and  $self->{XML_LIBXML_EXPAND_XINCLUDE} == 1 ) {
+        $self->{_State_} = 1;
+        eval { $self->processXIncludes($result); };
+            my $err = $@;
+        $self->{_State_} = 0;
+        if ($err) {
+            $result = undef;
+            croak $err;
+        }
+    }
+    return $result;
+}
+
 sub parse_string {
     my $self = shift;
     croak("parse already in progress") if $self->{_State_};
     $self->{_State_} = 1;
     my $result;
 
-    eval {
-        $result = $self->_parse_string( @_ );
-    };
+    if ( defined $self->{SAX} ) {
+        my $string = shift;
+        eval { $self->_parse_sax_string($string); };
 
-    my $err = $@;
-    $self->{_State_} = 0;
-    if ($err) {
-        croak $err;
+        my $err = $@;
+        $self->{_State_} = 0;
+        if ($err) {
+            croak $err;
+        }
     }
+    else {
+        eval { $result = $self->_parse_string( @_ ); };
 
-    my $uri = $self->{XML_LIBXML_BASE_URI};
-    $result->setBaseURI( $uri ) if defined $uri;
+        my $err = $@;
+        $self->{_State_} = 0;
+        if ($err) {
+            croak $err;
+        }
 
-    if ( defined $self->{XML_LIBXML_EXPAND_XINCLUDE}
-         and  $self->{XML_LIBXML_EXPAND_XINCLUDE} == 1 ) {
-         $self->{_State_} = 1;
-         eval { $self->processXIncludes($result); };
-         my $err = $@;
-         $self->{_State_} = 0;
-         if ($err) {
-             $result = undef;
-             croak $err;
-         }
-     }
+        $result = $self->_auto_expand( $result, $self->{XML_LIBXML_BASE_URI} );
+    }
 
     return $result;
 }
     croak("parse already in progress") if $self->{_State_};
     $self->{_State_} = 1;
     my $result;
-    eval {
-        $result = $self->_parse_fh( @_ );
-    };
-    my $err = $@;
-    $self->{_State_} = 0;
-    if ($err) {
-        croak $err;
+    if ( defined $self->{SAX} ) {
+        eval { $self->_parse_sax_fh( @_ );  };
+        my $err = $@;
+        $self->{_State_} = 0;
+        if ($err) {
+            croak $err;
+        }
     }
+    else {
+        eval { $result = $self->_parse_fh( @_ ); };
+        my $err = $@;
+        $self->{_State_} = 0;
+        if ($err) {
+            croak $err;
+        }
 
-    my $uri = $self->{XML_LIBXML_BASE_URI} ;
-    $result->setBaseURI( $uri ) if defined $uri;
-
-    if ( defined $self->{XML_LIBXML_EXPAND_XINCLUDE}
-         and  $self->{XML_LIBXML_EXPAND_XINCLUDE} == 1 ) {
-         $self->{_State_} = 1;
-         eval { $self->processXIncludes($result); };
-         my $err = $@;
-         $self->{_State_} = 0;
-         if ($err) {
-            $result = undef;
-            croak $err;
-         }
-     }
+        $result = $self->_auto_expand( $result,, $self->{XML_LIBXML_BASE_URI} );
+    }
 
     return $result;
 }
     croak("parse already in progress") if $self->{_State_};
     $self->{_State_} = 1;
     my $result;
-    eval {
-        $result = $self->_parse_file(@_);
-    };
-    my $err = $@;
-    $self->{_State_} = 0;
-    if ($err) {
-        croak $err;
+    if ( defined $self->{SAX} ) {
+        eval { $self->_parse_sax_file( @_ );  };
+        my $err = $@;
+        $self->{_State_} = 0;
+        if ($err) {
+            croak $err;
+        }
     }
+    else {
+        eval { $result = $self->_parse_file(@_); };
+        my $err = $@;
+        $self->{_State_} = 0;
+        if ($err) {
+            croak $err;
+        }
 
-    # files will not get a base dir, since they are based by their
-    # filename.
-
-    if ( defined $self->{XML_LIBXML_EXPAND_XINCLUDE}
-         and  $self->{XML_LIBXML_EXPAND_XINCLUDE} == 1 ) {
-
-         $self->{_State_} = 1;
-         eval { $self->processXIncludes($result); };
-         my $err = $@;
-         $self->{_State_} = 0;
-         if ($err) {
-             $result = undef;
-             croak $err;
-         }
-     }
+        $result = $self->_auto_expand( $result );
+    }
 
     return $result;
 }
     }
 
     $self->{_State_} = 1;
+    if ( defined $self->{SAX} ) {
+        eval { $result = $self->_parse_sax_xml_chunk( @_ ); };
+    }
+    else {
+        eval { $result = $self->_parse_xml_chunk( @_ ); };
+    }
 
-    eval {
-        $result = $self->_parse_xml_chunk( @_ );
-    };
     my $err = $@;
     $self->{_State_} = 0;
     if ($err) {
     return $self->_processXIncludes($doc || " ");
 }
 
+sub init_push {
+    my $self = shift;
+
+    if ( defined $self->{CONTEXT} ) {
+        delete $self->{COMTEXT};
+    }
+
+    if ( defined $self->{SAX} ) {
+        $self->{CONTEXT} = $self->_start_push(1);
+    }
+    else {
+        $self->{CONTEXT} = $self->_start_push(0);
+    }
+}
+
+
+sub push {
+    my $self = shift;
+
+    if ( not defined $self->{CONTEXT} ) {
+        if ( defined $self->{SAX} ) {
+            $self->{CONTEXT} = $self->_start_push(1);
+        }
+        else {
+            $self->{CONTEXT} = $self->_start_push(0);
+        }
+    }
+
+    foreach ( @_ ) {
+        $self->_push( $self->{CONTEXT}, $_ );
+    }
+}
+
+sub finish_push {
+    my $self = shift;
+    my $restore = shift || 0;
+    return undef unless defined $self->{CONTEXT};
+
+    my $retval;
+
+    if ( defined $self->{SAX} ) {
+        eval { $retval = $self->_end_sax_push( $self->{CONTEXT} ); };
+    }
+    else {
+        eval { $retval = $self->_end_push( $self->{CONTEXT}, $restore ); };
+    }
+    delete $self->{CONTEXT};
+    if ( $@ ) {
+        croak( $@ );
+    }
+    return $retval;
+}
+
 sub __read {
     read($_[0], $_[1], $_[2]);
 }
     return $retval;
 }
 
+sub process_xinclude {
+    my $self = shift;
+    XML::LibXML->new->processXIncludes( $self );
+}
+
 1;
 
 package XML::LibXML::DocumentFragment;
 
 1;
 
+package XML::LibXML::_SAXParser;
+
+# this is pseudo class!!!
+
+sub start_document {
+    my $parser = shift;
+    warn "start DOCUMENT!";
+}
+
+sub xml_decl {
+    my ( $parser, $version, $encoding ) = @_;
+    warn "xml declaration $version, $encoding\n";
+}
+
+sub end_document {
+    my $parser = shift;
+    warn "end DOCUMENT!";
+}
+
+sub start_element {
+    my ( $parser, $name, %attrs ) = @_;
+    warn "start ELEMENT $name!";
+    warn "   " . join " ", map { $_ . "=\"".$attrs{$_}."\"" } keys %attrs ;
+}
+
+sub end_element {
+    my ( $parser, $name ) = @_;
+    warn "end ELEMENT $name!";
+}
+
+sub characters {
+    my ( $parser, $data ) = @_;
+    warn "characters '$data'\n";
+}
+
+sub comment {
+    my ( $parser, $data ) = @_;
+    warn "comment '$data'\n";
+}
+
+sub cdata_block {
+    my ( $parser, $data ) = @_;
+    warn "cdata '$data'\n";
+}
+
+sub processing_instruction {
+    my ( $parser, $target, $data ) = @_;
+    warn "processing instruction '$target' -> '$data'\n";
+}
+
+1;
 __END__
 
 =head1 NAME
 If expand_xincludes is set to 1, the method is only required to process
 XIncludes appended to the DOM after its original parsing.
 
+=head1 PUSH PARSER
+
+XML::LibXML supports also a push parser interface. This allows one to
+parse large documents without actually loading the entire document
+into memory.
+
+The interface is devided into two parts:
+
+=over 4
+
+=item * pushing the data into the parser
+
+=item * finish the parse
+
+=back
+
+The user has no chance to access the document while still pushing the
+data to the parser. The resulting document will be returned when the
+parser is told to finish the parsing process.
+
+=over 4
+
+=item $parser->push( @data )
+
+This function pushs the data stored inside the array to libxml2's
+parse. Each entry in @data must be a normal scalar!
+
+=item $parser->finish( $restore );
+
+This function returns the result of the parsing process. If this
+function is called without a parameter it will complain about non
+wellformed documents. If $restore is 1, the push parser can be used to
+restore broken or non well formed (XML) documents as the following
+example shows:
+
+  $parser->push( "<foo>", "bar" );
+  eval { $doc = $parser->finish; };      # will complain
+  if ( $@ ) {
+     # ...
+  }
+
+This can be anoing if the closing tag misses by accident. The
+following code will restore the document:
+
+  $parser->push( "<foo>", "bar" );
+  eval { $doc = $parser->finish(1); };      # will not complain
+
+  warn $doc->toString(); # returns "<foo>bar</foo>"
+
+of course finish() will return nothing if there was no data pushed to
+the parser before.
+
+=back
+
 =head1 SERIALIZATION
 
 The oposite of parsing is serialization. In XML::LibXML this can be
 
 /* XML::LibXML stuff */
 #include "perl-libxml-mm.h"
+#include "perl-libxml-sax.h"
 
 #include "dom.h"
 #include "xpath.h"
     return doc;
 }
 
+void
+LibXML_parse_sax_stream(SV * self, SV * ioref, char * directory)
+{
+    xmlParserCtxtPtr ctxt;
+    char buffer[1024];
+    int read_length;
+    int ret = -1;
+    char current_dir[512];
+    
+    if (directory == NULL) {
+        if (getcwd(current_dir, 512) != 0) {
+            directory = current_dir;
+        }
+        else {
+            warn("couldn't get current directory: %s\n", strerror(errno));
+        }
+    }
+    
+    read_length = LibXML_read_perl(ioref, buffer, 4);
+    if (read_length > 0) {
+        ctxt = xmlCreatePushParserCtxt(PSaxGetHandler(),
+                                       NULL,
+                                       buffer,
+                                       read_length,
+                                       NULL);
+        if (ctxt == NULL) {
+            croak("Could not create push parser context: %s", strerror(errno));
+        }
+        ctxt->directory = directory;
+        ctxt->_private = (void*)self;
+        while(read_length = LibXML_read_perl(ioref, buffer, 1024)) {
+            xmlParseChunk(ctxt, buffer, read_length, 0);
+        }
+        ret = xmlParseChunk(ctxt, buffer, 0, 1);
+
+        ctxt->directory = NULL;
+
+        xmlFree(ctxt->sax);
+        ctxt->sax = NULL;
+        xmlFreeParserCtxt(ctxt);
+
+    }
+    else {
+        croak( "Empty Stream" );
+    }
+
+}
+
 xmlDocPtr
 LibXML_parse_html_stream(SV * self, SV * ioref)
 {
         char * directory
     PREINIT:
         xmlParserCtxtPtr ctxt;
-        char * CLASS = "XML::LibXML::Document";
         STRLEN len;
         char * ptr;
         int well_formed;
     OUTPUT:
         RETVAL
 
+int
+_parse_sax_string(self, string)
+        SV * self
+        SV * string
+    PREINIT:
+        xmlParserCtxtPtr ctxt;
+        STRLEN len;
+        char * ptr;
+        int well_formed;
+        int ret;
+    INIT:
+        ptr = SvPV(string, len);
+        if (len == 0) {
+            croak("Empty string");
+            XSRETURN_UNDEF;
+        }
+    CODE:
+        ctxt = xmlCreateMemoryParserCtxt(ptr, len);
+        if (ctxt == NULL) {
+            croak("Couldn't create memory parser context: %s", strerror(errno));
+        }
+
+        ctxt->_private = (void*)self;
+
+        ctxt->sax = PSaxGetHandler();
+
+        LibXML_init_parser(self);
+        RETVAL = xmlParseDocument(ctxt);
+        xmlFree( ctxt->sax );
+        ctxt->sax = NULL;
+        xmlFreeParserCtxt(ctxt);
+
+        LibXML_cleanup_callbacks();
+        LibXML_cleanup_parser(); 
+
+    OUTPUT:
+        RETVAL
+
 SV*
 _parse_fh(self, fh, directory = NULL)
         SV * self
         SV * fh
         char * directory
     PREINIT:
-        char * CLASS = "XML::LibXML::Document";
         STRLEN len;
         xmlDocPtr real_dom;
     CODE:
     OUTPUT:
         RETVAL
 
+void
+_parse_sax_fh(self, fh, directory = NULL)
+        SV * self
+        SV * fh
+        char * directory
+    PREINIT:
+    CODE:  warn $@;
+        LibXML_error = NEWSV(0, 512);
+        sv_setpvn(LibXML_error, "", 0);
+
+        LibXML_init_parser(self);
+        LibXML_parse_sax_stream(self, fh, directory);
+        
+        sv_2mortal(LibXML_error);
+        
+        LibXML_cleanup_callbacks();
+        LibXML_cleanup_parser();
+
 SV*
 _parse_file(self, filename)
         SV * self
         const char * filename
     PREINIT:
         xmlParserCtxtPtr ctxt;
-        char * CLASS = "XML::LibXML::Document";
         int well_formed = 0;
         int valid = 0;
         STRLEN len;
     OUTPUT:
         RETVAL
 
+void
+_parse_sax_file(self, filename)
+        SV * self
+        const char * filename
+    PREINIT:
+        xmlParserCtxtPtr ctxt;
+        STRLEN len;
+    CODE:
+        LibXML_init_parser(self);
+        ctxt = xmlCreateFileParserCtxt(filename);
+
+        if (ctxt == NULL) {
+            croak("Could not create file parser context for file '%s' : %s", filename, strerror(errno));
+        }
+
+        ctxt->sax = PSaxGetHandler();
+        ctxt->_private = (void*)self;
+        
+        LibXML_error = NEWSV(0, 512);
+        sv_setpvn(LibXML_error, "", 0);
+
+        xmlParseDocument(ctxt);
+
+        xmlFree(ctxt->sax);
+        ctxt->sax = NULL;
+        xmlFreeParserCtxt(ctxt);
+                
+        sv_2mortal(LibXML_error);
+        
+        LibXML_cleanup_callbacks();
+        LibXML_cleanup_parser();
+
 SV*
 parse_html_string(self, string)
         SV * self
         SV * string
     PREINIT:
         htmlParserCtxtPtr ctxt;
-        char * CLASS = "XML::LibXML::Document";
         STRLEN len;
         char * ptr;
         int well_formed;
         SV * self
         SV * fh
     PREINIT:
-        char * CLASS = "XML::LibXML::Document";
         STRLEN len;
         xmlDocPtr real_dom;
     CODE:
         SV * self
         const char * filename
     PREINIT:
-        char * CLASS = "XML::LibXML::Document";
         STRLEN len;
         xmlDocPtr real_dom;
     CODE:
         SV * svchunk
         char * encoding
     PREINIT:
-        char * CLASS = "XML::LibXML::DocumentFragment";
         xmlChar *chunk;
         xmlNodePtr rv = NULL;
         xmlNodePtr fragment= NULL;
     OUTPUT:
         RETVAL
 
+void
+_parse_sax_xml_chunk( self, svchunk, encoding="UTF-8" )
+        SV * self
+        SV * svchunk
+        char * encoding
+    PREINIT:
+        xmlChar *chunk;
+        char * ptr;
+        STRLEN len;
+        int retCode              = -1;
+        xmlNodePtr nodes         = NULL;
+        xmlSAXHandlerPtr handler = NULL;
+    INIT:
+        if ( encoding == NULL ) encoding = "UTF-8";
+        ptr = SvPV(svchunk, len);
+        if (len == 0) {
+            croak("Empty string");
+        }
+    CODE:
+        /* encode the chunk to UTF8 */
+        chunk = Sv2C(svchunk, (const xmlChar*)encoding);
+
+        if ( chunk != NULL ) {
+            LibXML_error = sv_2mortal(newSVpv("", 0));
+
+            LibXML_init_parser(self);
+            handler = PSaxGetHandler();
+
+            retCode = xmlParseBalancedChunkMemory( NULL, 
+                                                   handler,
+                                                   NULL,
+                                                   0,
+                                                   chunk,
+                                                   &nodes );       
+            xmlFree( handler );            
+            LibXML_cleanup_callbacks();
+            LibXML_cleanup_parser();    
+            xmlFree( chunk );
+
+            if ( retCode == -1 ) {
+                croak(SvPV(LibXML_error, len)); 
+            }
+        }
 
 int
 _processXIncludes( self, dom )
     OUTPUT:
         RETVAL
 
+SV*
+_start_push( self, with_sax=0 ) 
+        SV * self
+        int with_sax
+    PREINIT:
+        xmlParserCtxtPtr ctxt = NULL;
+    CODE:
+        /* create empty context */
+        LibXML_error = NEWSV(0, 512);
+        sv_setpvn(LibXML_error, "", 0);
+
+        if ( with_sax == 1 ) {
+            ctxt = xmlCreatePushParserCtxt( PSaxGetHandler(),
+                                            NULL,
+                                            NULL,
+                                            0,
+                                            NULL );
+        }
+        else {
+            ctxt = xmlCreatePushParserCtxt( NULL, NULL, NULL, 0, NULL );
+        }
+
+        sv_2mortal(LibXML_error);
+
+        RETVAL = PmmContextSv( ctxt );
+    OUTPUT:
+        RETVAL
+
+int
+_push( self, pctxt, data ) 
+        SV * self
+        SV * pctxt
+        SV * data
+    PREINIT:
+        xmlParserCtxtPtr ctxt = NULL;
+        STRLEN len = 0;
+        xmlChar * chunk = NULL;
+    INIT:
+        ctxt = PmmSvContext( pctxt );
+        if ( ctxt == NULL ) {
+            croak( "parser context already freed" );
+            XSRETURN_UNDEF;
+        }
+        if ( data == &PL_sv_undef ) {
+            XSRETURN_UNDEF;
+        }
+    CODE:
+        chunk = SvPV( data, len );
+        if ( len <= 0 ) {
+            xs_warn( "empty string" );
+            XSRETURN_UNDEF;
+        }
+
+        LibXML_error = NEWSV(0, 512);
+        sv_setpvn(LibXML_error, "", 0);
+
+        LibXML_init_parser(self);
+
+        ctxt->_private = (void*)self;
+        xmlParseChunk(ctxt, chunk, len, 0);
+
+        LibXML_cleanup_callbacks();
+        LibXML_cleanup_parser();    
+
+        sv_2mortal(LibXML_error);
+
+        RETVAL = 1;
+    OUTPUT:
+        RETVAL
+
+SV*
+_end_push( self, pctxt, restore ) 
+        SV * self
+        SV * pctxt
+        int restore
+    PREINIT:
+        xmlParserCtxtPtr ctxt = NULL;
+        xmlDocPtr doc = NULL;
+    INIT:
+        ctxt = PmmSvContext( pctxt );
+        if ( ctxt == NULL ) {
+            croak( "parser context already freed" );
+            XSRETURN_UNDEF;
+        }
+    CODE:
+        PmmNODE( SvPROXYNODE( pctxt ) ) = NULL;
+
+        LibXML_error = NEWSV(0, 512);
+        sv_setpvn(LibXML_error, "", 0);
+
+        LibXML_init_parser(self);
+        xmlParseChunk(ctxt, "", 0, 1); /* finish the parse */
+        LibXML_cleanup_callbacks();
+        LibXML_cleanup_parser();    
+
+        sv_2mortal(LibXML_error);
+
+        if ( ctxt->node != NULL && restore == 0 ) {
+            xmlFreeParserCtxt(ctxt);
+            croak( "document is not wellformed" );
+        }
+
+        doc = ctxt->myDoc;
+        xmlFreeParserCtxt(ctxt);
+        if ( doc == NULL ){
+            croak( "no document found!" );
+            XSRETURN_UNDEF;
+        }
+        RETVAL = PmmNodeToSv( (xmlNodePtr) doc, NULL );
+    OUTPUT:
+        RETVAL
+
+void
+_end_sax_push( self, pctxt ) 
+        SV * self
+        SV * pctxt
+    PREINIT:
+        xmlParserCtxtPtr ctxt = NULL;
+        xmlDocPtr doc = NULL;
+    INIT:
+        ctxt = PmmSvContext( pctxt );
+        if ( ctxt == NULL ) {
+            croak( "parser context already freed" );
+            XSRETURN_UNDEF;
+        }
+    CODE:
+        PmmNODE( SvPROXYNODE( pctxt ) ) = NULL;
+
+        LibXML_error = NEWSV(0, 512);
+        sv_setpvn(LibXML_error, "", 0);
+
+        LibXML_init_parser(self);
+        xmlParseChunk(ctxt, "", 0, 1); /* finish the parse */
+        LibXML_cleanup_callbacks();
+        LibXML_cleanup_parser();    
+
+        sv_2mortal(LibXML_error);
+
+        xmlFree(ctxt->sax);
+        ctxt->sax = NULL;
+        xmlFreeParserCtxt(ctxt);
+        XSRETURN_UNDEF;
+
+MODULE = XML::LibXML         PACKAGE = XML::LibXML::ParserContext
+
+void
+DESTROY( self ) 
+        SV * self
+    CODE:
+        PmmContextREFCNT_dec( SvPROXYNODE( self ) );
+
+
 MODULE = XML::LibXML         PACKAGE = XML::LibXML::Document
 
 SV *
         xmlNsPtr ns = NULL;
         SV * element;
         int len=0;
-        const char * CLASS = "XML::LibXML::Attr";
         int wantarray = GIMME_V;
     PPCODE:
         real_node = PmmSvNode(node);
 xpath.h
 perl-libxml-mm.c
 perl-libxml-mm.h
+perl-libxml-sax.c
+perl-libxml-sax.h
 typemap
 example/dromeds.xml
 example/bad.xml
     return(0);
 }
 
+ProxyNodePtr
+PmmNewContext(xmlParserCtxtPtr node)
+{
+    ProxyNodePtr proxy;
+
+    if ( node->_private == NULL ) {
+        proxy = (ProxyNodePtr)malloc(sizeof(ProxyNode));
+        if (proxy != NULL) {
+            proxy->node  = (xmlNodePtr)node;
+            proxy->owner   = NULL;
+            proxy->count   = 0;
+            node->_private = (void*) proxy;
+        }
+    }
+    else {
+        proxy = (ProxyNodePtr)node->_private;
+    }
+    return proxy;
+}
+ 
+int
+PmmContextREFCNT_dec( ProxyNodePtr node ) 
+{ 
+    xmlParserCtxtPtr libnode = NULL;
+    int retval = 0;
+    if ( node ) {
+        retval = PmmREFCNT(node)--;
+        if ( PmmREFCNT(node) <= 0 ) {
+            xs_warn( "NODE DELETATION\n" );
+            libnode = (xmlParserCtxtPtr)PmmNODE( node );
+            if ( libnode != NULL ) {
+                libnode->_private = NULL;
+                PmmNODE( node ) = NULL;
+                xmlFreeParserCtxt(libnode);
+            }
+            free( node );
+        }
+    }
+    return retval;
+}
+
+SV*
+PmmContextSv( xmlParserCtxtPtr ctxt )
+{
+    ProxyNodePtr dfProxy= NULL;
+    SV * retval = &PL_sv_undef;
+    const char * CLASS = "XML::LibXML::ParserContext";
+
+    if ( ctxt != NULL ) {
+        dfProxy = PmmNewContext(ctxt);
+
+        retval = NEWSV(0,0);
+        sv_setref_pv( retval, CLASS, (void*)dfProxy );
+        PmmREFCNT_inc(dfProxy);            
+    }         
+    else {
+        xs_warn( "no node found!" );
+    }
+
+    return retval;
+}
+
+xmlParserCtxtPtr
+PmmSvContext( SV * scalar ) 
+{
+    xmlParserCtxtPtr retval = NULL;
+
+    if ( scalar != NULL
+         && scalar != &PL_sv_undef
+         && sv_isa( scalar, "XML::LibXML::ParserContext" )
+         && SvPROXYNODE(scalar) != NULL  ) {
+        retval = (xmlParserCtxtPtr)PmmNODE( SvPROXYNODE(scalar) );
+    }
+    return retval;
+}
+
 /** 
  * encodeString returns an UTF-8 encoded String
  * while the encodig has the name of the encoding of string
 void
 PmmFixOwner(ProxyNodePtr node, ProxyNodePtr newOwner );
 
+int
+PmmContextREFCNT_dec( ProxyNodePtr node );
+
+SV*
+PmmContextSv( xmlParserCtxtPtr ctxt );
+
+xmlParserCtxtPtr
+PmmSvContext( SV * perlctxt );
+
 /**
  * NAME domNodeTypeName
  * TYPE function

perl-libxml-sax.c

+/**
+ * perl-libxml-sax.c
+ * $Id$
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "EXTERN.h"
+#include "perl.h"
+
+
+#include <stdlib.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxml/entities.h>
+#include <libxml/xmlerror.h>
+
+#ifdef __cplusplus
+}
+#endif
+
+int
+PSaxStartDocument(void * ctx)
+{
+    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
+    int count = 0;
+    dSP;
+    
+    ENTER;
+    SAVETMPS;
+
+    PUSHMARK(SP) ;
+    XPUSHs(sv_mortalcopy((SV*)ctxt->_private));
+    PUTBACK;
+
+    count = perl_call_pv( "XML::LibXML::_SAXParser::start_document", 0 );
+
+    SPAGAIN;
+
+    PUSHMARK(SP) ;
+    XPUSHs(sv_mortalcopy((SV*)ctxt->_private));
+
+    if ( ctxt->version != NULL ) 
+        XPUSHs(sv_2mortal(newSVpv((char*)ctxt->version, 0)));
+
+    if ( ctxt->encoding != NULL ) 
+        XPUSHs(sv_2mortal(newSVpv((char*)ctxt->encoding, 0)));    
+
+    PUTBACK;
+    
+    count = perl_call_pv( "XML::LibXML::_SAXParser::xml_decl", 0 );
+
+    FREETMPS ;
+    LEAVE ;
+
+    return 1;
+}
+
+int
+PSaxEndDocument(void * ctx)
+{
+    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
+    int count = 0;
+
+    dSP;
+    
+    ENTER;
+    SAVETMPS;
+
+    PUSHMARK(SP) ;
+    XPUSHs(sv_mortalcopy((SV*)ctxt->_private));
+    PUTBACK;
+
+    count = perl_call_pv( "XML::LibXML::_SAXParser::end_document", 0 );
+
+    FREETMPS ;
+    LEAVE ;
+
+    return 1;
+}
+
+int
+PSaxStartElement(void *ctx, const xmlChar * name, const xmlChar** attr) {
+    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
+    int count = 0;
+
+    dSP;
+    
+    ENTER;
+    SAVETMPS;
+
+    PUSHMARK(SP) ;
+    XPUSHs(sv_mortalcopy((SV*)ctxt->_private));
+    XPUSHs(sv_2mortal(newSVpv((char*)name, 0)));
+
+    if ( attr != NULL ) {
+        const xmlChar ** ta = attr;
+        while ( *ta ) {
+            XPUSHs(sv_2mortal(newSVpv((char*)*ta, 0)));
+            ta++;
+        }
+    }
+
+    PUTBACK;
+
+    count = perl_call_pv( "XML::LibXML::_SAXParser::start_element", 0 );
+
+    FREETMPS ;
+    LEAVE ;
+
+    return 1;
+}
+
+int
+PSaxEndElement(void *ctx, const xmlChar * name) {
+    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
+    int count = 0;
+
+    dSP;
+    
+    ENTER;
+    SAVETMPS;
+
+    PUSHMARK(SP) ;
+    XPUSHs(sv_mortalcopy((SV*)ctxt->_private));
+    XPUSHs(sv_2mortal(newSVpv((char*)name, 0)));
+    PUTBACK;
+
+    count = perl_call_pv( "XML::LibXML::_SAXParser::end_element", 0 );
+
+    FREETMPS ;
+    LEAVE ;
+
+    return 1;
+}
+
+int
+PSaxCharacters(void *ctx, const xmlChar * ch, int len) {
+    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
+    int count = 0;
+    if ( ch != NULL ) {
+        xmlChar * data = xmlStrndup( ch, len );
+
+        dSP;
+    
+        ENTER;
+        SAVETMPS;
+
+        PUSHMARK(SP) ;
+        XPUSHs(sv_mortalcopy((SV*)ctxt->_private));
+        XPUSHs(sv_2mortal(newSVpv((char*)data, 0)));
+        PUTBACK;
+
+        count = perl_call_pv( "XML::LibXML::_SAXParser::characters", 0 );
+
+        FREETMPS ;
+        LEAVE ;
+
+        xmlFree( data );
+    }
+
+    return 1;
+}
+
+int
+PSaxComment(void *ctx, const xmlChar * ch) {
+    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
+    int count = 0;
+    if ( ch != NULL ) {
+        xmlChar * data = xmlStrdup( ch );
+
+        dSP;
+    
+        ENTER;
+        SAVETMPS;
+
+        PUSHMARK(SP) ;
+        XPUSHs(sv_mortalcopy((SV*)ctxt->_private));
+        XPUSHs(sv_2mortal(newSVpv((char*)data, 0)));
+        PUTBACK;
+
+        count = perl_call_pv( "XML::LibXML::_SAXParser::comment", 0 );
+
+        FREETMPS ;
+        LEAVE ;
+
+        xmlFree( data );
+    }
+
+    return 1;
+}
+
+int
+PSaxCDATABlock(void *ctx, const xmlChar * ch, int len) {
+    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
+    int count = 0;
+    if ( ch != NULL ) {
+        xmlChar * data = xmlStrndup( ch, len );
+
+        dSP;
+    
+        ENTER;
+        SAVETMPS;
+
+        PUSHMARK(SP) ;
+        XPUSHs(sv_mortalcopy((SV*)ctxt->_private));
+        XPUSHs(sv_2mortal(newSVpv((char*)data, 0)));
+        PUTBACK;
+
+        count = perl_call_pv( "XML::LibXML::_SAXParser::cdata_block", 0 );
+
+        FREETMPS ;
+        LEAVE ;
+
+        xmlFree( data );
+    }
+
+    return 1;
+}
+
+int
+PSaxProcessingInstruction( void * ctx, const xmlChar * target, const xmlChar * data )
+{
+    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
+    int count = 0;
+
+    dSP;
+    
+    ENTER;
+    SAVETMPS;
+
+    PUSHMARK(SP) ;
+    XPUSHs(sv_mortalcopy((SV*)ctxt->_private));
+    XPUSHs(sv_2mortal(newSVpv((char*)target, 0)));
+    XPUSHs(sv_2mortal(newSVpv((char*)data, 0)));
+    PUTBACK;
+
+    count = perl_call_pv( "XML::LibXML::_SAXParser::processing_instruction", 0 );
+
+    FREETMPS ;
+    LEAVE ;
+    
+    return 1;
+}
+
+xmlSAXHandlerPtr
+PSaxGetHandler()
+{
+    xmlSAXHandlerPtr retval = (xmlSAXHandlerPtr)xmlMalloc(sizeof(xmlSAXHandler));
+    memset(retval, 0, sizeof(xmlSAXHandler));
+
+    retval->startDocument = (startDocumentSAXFunc)&PSaxStartDocument;
+    retval->endDocument   = (endDocumentSAXFunc)&PSaxEndDocument;
+
+    retval->startElement  = (startElementSAXFunc)&PSaxStartElement;
+    retval->endElement    = (endElementSAXFunc)&PSaxEndElement;
+
+    retval->characters    = (charactersSAXFunc)&PSaxCharacters;
+    retval->comment       = (commentSAXFunc)&PSaxComment;
+    retval->cdataBlock    = (cdataBlockSAXFunc)&PSaxCDATABlock;
+
+    retval->processingInstruction = (processingInstructionSAXFunc)&PSaxProcessingInstruction;
+
+    /* warning functions should be internal */
+    retval->warning    = &xmlParserWarning;
+    retval->error      = &xmlParserError;
+    retval->fatalError = &xmlParserError;
+
+    return retval;
+}

perl-libxml-sax.h

+/**
+ * perl-libxml-sax.h
+ * $Id$
+ */
+
+#ifndef __PERL_LIBXML_SAX_H__
+#define __PERL_LIBXML_SAX_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <libxml/tree.h>
+
+#ifdef __cplusplus
+}
+#endif
+
+xmlSAXHandlerPtr
+PSaxGetHandler();
+
+#endif