Anonymous avatar Anonymous committed 2bbfa54

Modified Files:
Changes
o version notes

LibXML.xs
+ parse_sgml* functions
[fix] namespace handling for attributes. they will work 100% spec conform
as soon my patch got applied to the libxml2 distro

perl-libxml-mm.c perl-libxml-mm.h
+ memory management

xpath.c
+ namespaces of the document element will allways be registred

example/libxml.xml
+ more docs

lib/XML/LibXML/SAX/Parser.pm
[fix] ignores dtd nodes until they are supported.

t/04node.t t/06elements.t
+ more test cases

Comments (0)

Files changed (10)

 manpage!
 
 1.53
+   - enabled SGML parsing
+   - fixed namespace setting in XPath functions:
+     the namespaces of the document element will always be added now
    - implemented libxml2 dom recovering
    - introduced transparent XML::GDOME import. (EXPERIMENTAL)
    - calling external entity handlers work again
 #include <libxml/parserInternals.h>
 #include <libxml/HTMLparser.h>
 #include <libxml/HTMLtree.h>
+#include <libxml/DOCBparser.h>
 #include <libxml/tree.h>
 #include <libxml/xpath.h>
 #include <libxml/xmlIO.h>
     return doc;
 }
 
+
+xmlDocPtr
+LibXML_parse_sgml_stream(SV * self, SV * ioref, SV * enc )
+{
+    xmlDocPtr doc = NULL;
+    htmlParserCtxtPtr ctxt;
+    int well_formed = 0;
+    char buffer[1024];
+    int read_length;
+    int ret = -1;
+
+    const xmlChar * encoding = Sv2C( enc, NULL );
+
+    read_length = LibXML_read_perl(ioref, buffer, 4);
+    if (read_length > 0) {
+        ctxt = docbCreatePushParserCtxt(NULL, NULL, buffer, read_length,
+                                        NULL,
+                                        xmlParseCharEncoding( (const char*)encoding ));
+        if (ctxt == NULL) {
+            croak("Could not create sgml push parser context: %s",
+                  strerror(errno));
+        }
+
+        ctxt->_private = (void*)self;
+
+        while(read_length = LibXML_read_perl(ioref, buffer, 1024)) {
+            ret = docbParseChunk(ctxt, buffer, read_length, 0);
+            if ( ret != 0 ) {
+                break;
+            }   
+        }
+        ret = docbParseChunk(ctxt, buffer, 0, 1);
+
+        if ( ret == 0 ) {
+            doc = ctxt->myDoc;
+            well_formed = ctxt->wellFormed;
+            docbFreeParserCtxt(ctxt);
+        }
+    }
+    else {
+        croak( "Empty Stream" );
+    }
+    
+    if (!well_formed) {
+        xmlFreeDoc(doc);
+        return NULL;
+    }
+    
+    return doc;
+}
+
 MODULE = XML::LibXML         PACKAGE = XML::LibXML
 
 PROTOTYPES: DISABLE
         }
     OUTPUT:
         RETVAL
-        
+       
 SV*
 parse_html_file(self, filename)
         SV * self
         RETVAL
 
 SV*
+parse_sgml_fh(self, fh, encoding)
+        SV * self
+        SV * fh
+        SV * encoding
+    PREINIT:
+        STRLEN len;
+        xmlDocPtr real_dom;
+        HV* real_obj = (HV *)SvRV(self);
+        SV** item    = NULL;
+    CODE:
+        LibXML_error = NEWSV(0, 512);
+        sv_setpvn(LibXML_error, "", 0);
+        
+        LibXML_init_parser(self);
+        real_dom = LibXML_parse_sgml_stream(self, fh, encoding);
+        LibXML_cleanup_callbacks();
+        LibXML_cleanup_parser();
+        
+        sv_2mortal(LibXML_error);
+        
+        if (!real_dom || ((*SvPV(LibXML_error, len)) != '\0')) {
+            RETVAL = &PL_sv_undef;    
+            croak(SvPV(LibXML_error, len));
+        }
+        else {
+            STRLEN n_a;
+            SV * newURI = newSVpvf("unknown-%12.12d", real_dom);
+            real_dom->URL = xmlStrdup((const xmlChar*)SvPV(newURI, n_a));
+            SvREFCNT_dec(newURI);
+            item = hv_fetch( real_obj, "XML_LIBXML_GDOME", 16, 0 );
+
+            if ( item != NULL && SvTRUE(*item) ) {  
+                RETVAL = PmmNodeToGdomeSv( (xmlNodePtr)real_dom );
+            }
+            else {
+                RETVAL = PmmNodeToSv((xmlNodePtr)real_dom, NULL);
+            }
+        }
+    OUTPUT:
+        RETVAL
+
+SV*
+parse_sgml_string(self, string, encoding)
+        SV * self
+        SV * string
+        SV * encoding
+    PREINIT:
+        htmlParserCtxtPtr ctxt;
+        STRLEN len;
+        char * ptr;
+        int well_formed;
+        int ret;
+        xmlDocPtr real_dom;
+        HV* real_obj = (HV *)SvRV(self);
+        SV** item    = NULL;
+    CODE:
+        ptr = SvPV(string, len);
+        if (len == 0) {
+            croak("Empty string");
+        }
+        
+        LibXML_error = NEWSV(0, 512);
+        sv_setpvn(LibXML_error, "", 0);
+        
+        LibXML_init_parser(self);
+        real_dom = (xmlDocPtr) docbParseDoc((xmlChar*)ptr,
+                                            Sv2C(encoding, NULL));
+
+        LibXML_cleanup_callbacks();
+        LibXML_cleanup_parser();        
+
+        sv_2mortal(LibXML_error);
+        
+        if (!real_dom) {
+            if ( SvCUR( LibXML_error ) > 0 ) {
+                croak(SvPV(LibXML_error, len));
+            }
+            XSRETURN_UNDEF;
+        }
+        else {
+            STRLEN n_a;
+            SV * newURI = newSVpvf("unknown-%12.12d", real_dom);
+            real_dom->URL = xmlStrdup((const xmlChar*)SvPV(newURI, n_a));
+            SvREFCNT_dec(newURI);
+
+            item = hv_fetch( real_obj, "XML_LIBXML_GDOME", 16, 0 );            
+            if ( item != NULL && SvTRUE(*item) ) {  
+                RETVAL = PmmNodeToGdomeSv( (xmlNodePtr)real_dom );
+            }
+            else {
+                RETVAL = PmmNodeToSv((xmlNodePtr)real_dom, NULL);
+            }
+        }
+    OUTPUT:
+        RETVAL
+
+SV*
+parse_sgml_file(self, fn, encoding)
+        SV * self
+        SV * fn
+        SV * encoding
+    PREINIT:
+        const char * filename = (const char*)Sv2C( fn, NULL );
+        STRLEN len;
+        xmlDocPtr real_dom;
+        HV* real_obj = (HV *)SvRV(self);
+        SV** item    = NULL;
+    CODE:
+        LibXML_error = NEWSV(0, 512);
+        sv_setpvn(LibXML_error, "", 0);
+        
+        LibXML_init_parser(self);
+        real_dom = (xmlDocPtr) docbParseFile(filename,
+                                             Sv2C(encoding, NULL));
+        LibXML_cleanup_callbacks();
+        LibXML_cleanup_parser();
+
+        sv_2mortal(LibXML_error);
+        
+        if (!real_dom) {
+            if ( SvCUR( LibXML_error ) > 0 ) {
+                croak(SvPV(LibXML_error, len));
+            }
+            XSRETURN_UNDEF;
+        }
+        else {
+            item = hv_fetch( real_obj, "XML_LIBXML_GDOME", 16, 0 );
+
+            if ( item != NULL && SvTRUE(*item) ) {  
+                RETVAL = PmmNodeToGdomeSv( (xmlNodePtr)real_dom );
+            }
+            else {
+                RETVAL = PmmNodeToSv((xmlNodePtr)real_dom, NULL);
+            }
+        }
+    OUTPUT:
+        RETVAL
+
+
+void
+parse_sax_sgml_file(self, fn, enc )
+        SV * self
+        SV * fn
+        SV * enc
+    PREINIT:
+        const char * filename = Sv2C(fn, NULL);  
+        const char * encoding = Sv2C(enc, NULL);
+        xmlParserCtxtPtr ctxt;
+        STRLEN len;
+    CODE:
+        LibXML_init_parser(self);
+        ctxt = (xmlParserCtxtPtr) docbCreateFileParserCtxt(filename, encoding);
+
+        if (ctxt == NULL) {
+            croak("Could not create file parser context for file '%s' : %s", filename, strerror(errno));
+        }
+
+        ctxt->sax = PSaxGetHandler();
+        PmmSAXInitContext( ctxt, self );
+        
+        LibXML_error = NEWSV(0, 512);
+        sv_setpvn(LibXML_error, "", 0);
+
+        docbParseDocument(ctxt);
+
+        xmlFree(ctxt->sax);
+        ctxt->sax = NULL;
+        PmmSAXCloseContext(ctxt);
+        xmlFreeParserCtxt(ctxt);
+                
+        sv_2mortal(LibXML_error);
+        
+        LibXML_cleanup_callbacks();
+        LibXML_cleanup_parser();
+
+
+SV*
 _parse_xml_chunk( self, svchunk, encoding="UTF-8" )
         SV * self
         SV * svchunk
                 }
             }
             else {
-                warn( "bad chunk" );
+                xs_warn( "bad chunk" );
                 croak(SvPV(LibXML_error, len));
                 XSRETURN_UNDEF;
             }
         RETVAL
 
 void
+removeChildNodes( pparen )
+        SV * pparen
+    PREINIT:
+        xmlNodePtr paren, elem, fragment;
+        ProxyNodePtr docfrag;
+    INIT:
+        if ( pparen == NULL
+             || pparen == &PL_sv_undef ) {
+            XSRETURN_UNDEF;
+        }
+        paren = PmmSvNode( pparen );
+        if ( paren == NULL ) {
+            croak( "In Scalar there was no node" );
+            XSRETURN_UNDEF;
+        }
+    CODE:
+        docfrag  = PmmNewFragment( paren->doc );
+        fragment = PmmNODE( docfrag );
+        elem = paren->children;
+        while ( elem ) {
+            xmlUnlinkNode( elem );
+            if ( elem->type != XML_ATTRIBUTE_NODE ) {
+                /* this following piece is the function of domAppendChild()
+                 * but in this special case we can avoid most of the logic of
+                 * that function.
+                 */ 
+                if ( fragment->children != NULL ) {
+                    xs_warn("unlink node!\n");
+                    domAddNodeToList( elem, fragment->last, NULL );
+                }
+                else {
+                    fragment->children = elem;
+                    fragment->last     = elem;
+                    elem->parent= fragment;
+                }
+            }
+            PmmFixOwnerNode( elem, docfrag );
+            elem = elem->next;
+        }
+
+        paren->children = paren->last = NULL;
+        if ( PmmREFCNT(docfrag) <= 0 ) {
+            xs_warn( "have not references left" );
+            PmmREFCNT_dec( docfrag );
+        }
+
+void
 unbindNode( proxyelem )
         SV* proxyelem
     PREINIT:
             XSRETURN_UNDEF;
         }
     CODE:
-        if ( elem->type != XML_ATTRIBUTE_NODE ) {
-            if ( elem->doc != NULL ) 
-                dfProxy = PmmNewFragment(elem->doc);
-            else 
-                dfProxy = PmmNewFragment(NULL);
-        }
-
-        xmlUnlinkNode( elem );
-        if ( elem->type != XML_ATTRIBUTE_NODE )            
-            domAppendChild( PmmNODE(dfProxy), elem );
-        if ( elem->_private != NULL ) {
+        if ( elem->type != XML_DOCUMENT_NODE
+             || elem->type != XML_DOCUMENT_FRAG_NODE ) {
+            if ( elem->type != XML_ATTRIBUTE_NODE ) {
+                if ( elem->doc != NULL ) 
+                    dfProxy = PmmNewFragment(elem->doc);
+                else 
+                    dfProxy = PmmNewFragment(NULL);
+            }
+            xmlUnlinkNode( elem );
+            if ( elem->type != XML_ATTRIBUTE_NODE ) {
+                domAppendChild( PmmNODE(dfProxy), elem );
+            }
             PmmFixOwner( SvPROXYNODE(proxyelem), dfProxy );
         }
 
             }
             ns = ns->next;
         }
-    
+
+SV *
+getNamespace( pnode )
+        SV * pnode
+    ALIAS:  
+        localNamespace = 1
+        localNS        = 2
+    PREINIT:
+        xmlNodePtr node;
+        xmlNsPtr ns = NULL;
+        xmlNsPtr newns = NULL;
+        const char * class = "XML::LibXML::Namespace";
+    CODE:
+        node = PmmSvNode(pnode);
+        ns = node->ns;
+        if ( ns != NULL ) {
+            newns = xmlCopyNamespace(ns);
+            if ( newns != NULL ) {
+                RETVAL = NEWSV(0,0);
+                RETVAL = sv_setref_pv( RETVAL,
+                                       (const char *)class,
+                                       (void*)newns
+                                      );
+            }
+        }
+    OUTPUT:
+        RETVAL
         
 MODULE = XML::LibXML         PACKAGE = XML::LibXML::Element
 
         }
 
         nsPrefix = nodeSv2C(namespacePrefix, node);
-        if ( ns = xmlSearchNsByHref(node->doc, node, nsURI) )
-            RETVAL = 1;
+        if ( ns = xmlSearchNsByHref(node->doc, node, nsURI) ) {
+            if ( ns->prefix == nsPrefix               /* both are NULL then */
+                 || xmlStrEqual( ns->prefix, nsPrefix ) ) {            
+                RETVAL = 1;
+            }
+            else if ( ns = xmlNewNs( node, nsURI, nsPrefix ) ) {
+                RETVAL = 1;
+            }
+            else {
+                RETVAL = 0;
+            }
+        }
         else if ( ns = xmlNewNs( node, nsURI, nsPrefix ) )
             RETVAL = 1;
         else
             xmlFree(nsURI);
             XSRETURN_UNDEF;
         }
-        
-        ret = xmlGetNsProp( node, name, nsURI );
+        if ( nsURI && xmlStrlen(nsURI) ) {     
+            ret = xmlGetNsProp( node, name, nsURI );
+        }
+        else {
+            ret = xmlGetProp( node, name );
+        }
+
         xmlFree( name );
-        xmlFree( nsURI );
+        if ( nsURI ) {
+            xmlFree( nsURI );
+        }
         if ( ret ) {
             RETVAL = nodeC2Sv( ret, node );
             xmlFree( ret );
         xmlChar * name  = NULL;
         xmlChar * value = NULL;
         const xmlChar * pchar = NULL;
-        xmlNsPtr ns;
+        xmlNsPtr ns         = NULL;
+        xmlChar * localname = NULL;
+        xmlChar * prefix    = NULL;
+    INIT:
+        name  = nodeSv2C( attr_name, node );        
+        if ( !name && !xmlStrlen( name ) ) {
+            if ( nsURI ) {
+                xmlFree( nsURI);
+            }
+            croak( "no name" );
+            XSRETURN_UNDEF;
+        }
+        localname = xmlSplitQName2(name, &prefix); 
+        if ( localname ) {
+            xmlFree( name ); 
+            name = localname;
+        }
     CODE:
+        value = nodeSv2C( attr_value, node ); 
+
         if ( nsURI && xmlStrlen(nsURI) ) {
+            xs_warn( "found uri" );
+
             ns = xmlSearchNsByHref( node->doc, node, nsURI );
             if ( !ns ) {
                 /* create new ns */
-                xmlChar * localname = NULL;
-                xmlChar * prefix = NULL;
-
-                name  = nodeSv2C( attr_name, node );
-                if ( ! name ) {
-                    xmlFree(nsURI);
-                    XSRETURN_UNDEF;
+                 if ( prefix && xmlStrlen( prefix ) ) {
+                    ns = xmlNewNs(node, nsURI , prefix );
+                 }
+                 else {
+                    ns = NULL;
+                 }
+            }
+            else if ( !ns->prefix ) {
+                if ( ns->next && ns->next->prefix ) {
+                    ns = ns->next;
                 }
-
-                pchar = xmlStrchr(name, ':');
-                if ( pchar ) {
-                    localname = xmlSplitQName2(name, &prefix); 
+                else if ( prefix && xmlStrlen( prefix ) ) {
+                    ns = xmlNewNs(node, nsURI , prefix );
                 }
                 else {
-                    localname = xmlStrdup( name );
-                }
-            
-                xmlFree( name );
-                name = localname;
-            
-                ns = xmlNewNs(node, nsURI , prefix );
-                if ( prefix ) {
-                    xmlFree( prefix );
+                    ns = NULL;
                 }
             }
-            else {
-                xmlChar * localname = NULL;
-                xmlChar * prefix = NULL;
-
-                name  = nodeSv2C( attr_name, node );
-                if (!name) {
-                    xmlFree(nsURI);
-                    XSRETURN_UNDEF;
-                }
-
-                pchar = xmlStrchr(name, ':');
-                if ( pchar ) {
-                    localname = xmlSplitQName2(name, &prefix); 
-                }
-                else {
-                    localname = xmlStrdup( name );
-                }
-
-                if ( prefix ) {
-                    xmlFree(prefix);
-                }
-                xmlFree(name);
-                name = localname;
-            }
-            xmlFree(nsURI);
-
-            value = nodeSv2C( attr_value, node );
-         
-            xmlSetNsProp( node, ns, name, value );
+        }
+
+        if ( nsURI && xmlStrlen(nsURI) && !ns ) {
+            xs_warn( "bad ns attribute!" );
         }
         else {
-            name  = nodeSv2C( attr_name, node );
-            if (!name) {
-                xmlFree(nsURI);
-                XSRETURN_UNDEF;
-            }
-            value = nodeSv2C( attr_value, node ); 
-            xmlSetProp( node, name, value );            
+            /* warn( "set attribute %s->%s", name, value ); */
+            xmlSetNsProp( node, ns, name, value );            
+        }
+        
+        if ( prefix ) {
+            xmlFree( prefix );
+        }
+        if ( nsURI ) {
+            xmlFree( nsURI );
         }
         xmlFree( name );
         xmlFree( value );
             xattr = xmlHasNsProp( node, name, nsURI );
         }
         else {
-            xattr = xmlHasProp( node, name );
+            xattr = xmlHasNsProp( node, name, NULL );
         }
         if ( xattr ) {
             xmlUnlinkNode((xmlNodePtr)xattr);

example/libxml.xml

     </package>
 
     <package name="XML::LibXML::Node">
-        <short>"virtual" Base Class DOM-Nodes</short>
+        <short>abstract Base Class DOM-Nodes</short>
         <description>
             <p>
                 LibXML::Node defines functions that are common to all
                 </p>
             </method>
 
+            <method name="removeChildNodes" synopsis="$node->removeChildNodes();">
+                <p>
+                    This function is not specified for any DOM level:
+                    It removes all childnodes from a node in a single
+                    step.  Other than the libxml2 function itself
+                    (xmlFreeNodeList), this function will not
+                    imediatly remove the nodes from the memory. This
+                    safes one from getting memory violations, if there are 
+                    nodes still refered from the Perl level. 
+                </p>
+            </method>
+
         </description>
         <also>
             <item name="XML::LibXML"/>

lib/XML/LibXML/SAX/Parser.pm

         # ignore!
         # i may want to handle this one day, dunno yet
     }
+    else if ($node->type == XML_DTD_NODE ) {
+        # ignore!
+        # i will support DTDs, but had no time yet.
+    }
     else {
         warn("unsupported node type: $node_type");
     }
     return(0);
 }
 
+void
+PmmFixOwnerNode( xmlNodePtr node, ProxyNodePtr parent )
+{
+    if ( node != NULL && parent != NULL ) {
+        if ( node->_private != NULL ) {
+            PmmFixOwner( node->_private, parent );
+        }
+        else {
+            PmmFixOwnerList(node->children, parent );
+        } 
+    }
+} 
+
 ProxyNodePtr
 PmmNewContext(xmlParserCtxtPtr node)
 {
 void
 PmmFixOwner(ProxyNodePtr node, ProxyNodePtr newOwner );
 
+void
+PmmFixOwnerNode(xmlNodePtr node, ProxyNodePtr newOwner );
+
 int
 PmmContextREFCNT_dec( ProxyNodePtr node );
 
 
 use Test;
 
-BEGIN { plan tests => 118 };
+BEGIN { plan tests => 121 };
 use XML::LibXML;
 
 my $xmlstring = q{<foo>bar<foobar/><bar foo="foobar"/><!--foo--><![CDATA[&foo bar]]></foo>};
 
     }
 
-    print "# 2.2 Invalid Operations\n";    
+    print "# 2.2 Invalid Operations\n";
+
+
+    print "# 2.3 DOM extensions \n";
+    {
+        my $str = "<foo><bar/>com</foo>";
+        my $doc = XML::LibXML->new->parse_string( $str );
+        my $elem= $doc->documentElement;
+        ok( $elem );
+        ok( $elem->hasChildNodes );
+        $elem->removeChildNodes;
+        ok( $elem->hasChildNodes,0 );
+        $elem->toString;
+    }    
 }
 
 print "# 3   Standalone With NameSpaces\n\n"; 
 
 use Test;
 
-BEGIN { plan tests => 55 };
+BEGIN { plan tests => 58 };
 use XML::LibXML;
 
 my $foo       = "foo";
     $elem->removeAttributeNS( $nsURI.".x", $foo);
     ok( !$elem->hasAttributeNS($nsURI.".x", $foo) );
 
+    # warn $elem->toString;
+    print "# set attribute ".$prefix . ":". $attname1."\n";
+
     $elem->setAttributeNS( $nsURI, $prefix . ":". $attname1, $attvalue2 );
+    # warn $elem->toString;
+
 
     $elem->removeAttributeNS("",$attname1);
+    # warn $elem->toString;
+
     ok( $elem->hasAttribute($attname1) );
     ok( $elem->hasAttributeNS($nsURI,$attname1) );
+    # warn $elem->toString;
 } 
 
 print "# 2. unbound node\n";
     # warn $elem->toString() , "\n";
 }
 
-print "# 3. Namespace switching\n";
+print "# 3. Namespace handling\n";
+print "# 3.1 Namespace switching\n";
 {
     my $elem = XML::LibXML::Element->new($foo);
     ok($elem);
     # warn $elem->toString() , "\n";
 } 
 
+print "# 3.2 default Namespace and Attributes\n";
+{
+    my $doc  = XML::LibXML::Document->new();
+    my $elem = $doc->createElementNS( "foo", "root" );
+    $doc->setDocumentElement( $elem );
+
+    $elem->setNamespace( "foo", "bar" );
+
+    $elem->setAttributeNS( "foo", "x:attr",  "test" );
+    $elem->setAttributeNS( undef, "attr2",  "test" );
+
+    ok( $elem->getAttributeNS( "foo", "attr" ), "test" );
+    ok( $elem->getAttributeNS( "", "attr2" ), "test" );
+
+    # warn $doc->toString;
+    # actually this doesn't work correctly with libxml2 <= 2.4.23
+    $elem->setAttributeNS( "foo", "attr2",  "bar" );
+    ok( $elem->getAttributeNS( "foo", "attr2" ), "bar" );
+    # warn $doc->toString;
+}
+
 print "# 4. Text Append and Normalization\n";
 
 {
 /usr/bin/perl \
                  -Iblib/arch \
                  -Iblib/lib \
-                 -I/usr/local/lib/perl5/5.6.1/i686-linux \
-                 -I/usr/local/lib/perl5/5.6.1 \
                  -e 'use Test::Harness qw(&runtests $verbose); $verbose=1; runtests @ARGV;' \
                   t/$1*.t
     
         /* get the namespace information */
         if (refNode->type == XML_DOCUMENT_NODE) {
-            ctxt->namespaces = xmlGetNsList(refNode->doc, refNode->children);
+            ctxt->namespaces = xmlGetNsList( refNode->doc,
+                                             xmlDocGetRootElement( refNode->doc ) );
         }
         else {
             ctxt->namespaces = xmlGetNsList(refNode->doc, refNode);
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.