Commits

Anonymous committed 39b7cf8

Modified Files:
LibXML.xs LibXML.pm
+ canonization support for XML::LibXML::Nodes.

example/libxml.dkb
+ documentation for the new functions

Added Files:
t/24c14n.t
+ canonization tests

  • Participants
  • Parent commits eda7882

Comments (0)

Files changed (4)

     $doc->adoptNode( $self );
 }
 
+sub toStringC14N {
+    my $self = shift;
+    my ($comments, $xpath) = @_;
+
+    $comments = 0 unless defined $comments;
+    return $self->_toStringC14N( $comments, $xpath );
+}
+
 1;
 
 #-------------------------------------------------------------------------#
 package XML::LibXML::Document;
 
 use vars qw(@ISA);
-@ISA = 'XML::LibXML::Node';
+@ISA = ('XML::LibXML::Node');
 
 sub setDocumentElement {
     my $doc = shift;
  */
 #include <libxml/xmlversion.h>
 
+#define DEBUG_C14N
+
 /* libxml2 stuff */
 #include <libxml/xmlmemory.h>
 #include <libxml/parser.h>
  * therefore there is the need to ship as well the GDOME core headers.
  */
 #ifdef XML_LIBXML_GDOME_SUPPORT
+
 #include <libgdome/gdome.h>
 #include <libgdome/gdome-libxml-util.h>
+
 #endif
 
 /* XML::LibXML stuff */
 void
 unbindNode( self )
         xmlNodePtr self
+    ALIAS:
+        XML::LibXML::Node::unlink = 1
+        XML::LibXML::Node::unlinkNode = 2
     PREINIT:
         ProxyNodePtr dfProxy  = NULL;
         ProxyNodePtr docfrag     = NULL;
     OUTPUT:
         RETVAL
 
+
+SV *
+_toStringC14N(self, comments, xpath)
+        xmlNodePtr self
+        int comments
+        SV * xpath
+    PREINIT:
+        xmlChar *result               = NULL;
+        xmlChar *nodepath             = NULL;
+        xmlXPathContextPtr child_ctxt = NULL;
+        xmlXPathObjectPtr child_xpath = NULL;
+        xmlNodeSetPtr nodelist        = NULL;
+        xmlNodePtr refNode            = NULL;
+    INIT:
+        /* due to how c14n is implemented, the nodeset it receives must
+          include child nodes; ie, child nodes aren't assumed to be rendered.
+          so we use an xpath expression to find all of the child nodes. */
+        
+        if ( self->doc == NULL ) {
+            croak("Node passed to toStringC14N must be part of a document");
+        }
+
+        refNode = self;
+    CODE:
+        if ( xpath != NULL && xpath != &PL_sv_undef ) {
+            nodepath = Sv2C( xpath, NULL );
+        }
+
+        if ( nodepath != NULL && xmlStrlen( nodepath ) == 0 ) {
+            xmlFree( nodepath );
+            nodepath = NULL;
+        }
+
+        if ( nodepath == NULL 
+             && self->type != XML_DOCUMENT_NODE 
+             && self->type != XML_HTML_DOCUMENT_NODE 
+             && self->type != XML_DOCB_DOCUMENT_NODE
+           ) {
+            nodepath = xmlStrdup( ".//*" );         
+        }
+
+        if ( nodepath != NULL ) {
+            if ( self->type == XML_DOCUMENT_NODE
+                 || self->type == XML_HTML_DOCUMENT_NODE
+                 || self->type == XML_DOCB_DOCUMENT_NODE ) {
+                refNode = xmlDocGetRootElement( self->doc );
+            }
+        
+            child_ctxt = xmlXPathNewContext(self->doc);
+            if (!child_ctxt) {
+                if ( nodepath != NULL ) {
+                    xmlFree( nodepath );
+                }
+                croak("Failed to create xpath context");
+            }
+    
+            child_ctxt->node = self;
+            /* get the namespace information */
+            if (self->type == XML_DOCUMENT_NODE) {
+                child_ctxt->namespaces = xmlGetNsList( self->doc,
+                                                       xmlDocGetRootElement( self->doc ) );
+            }
+            else {
+                child_ctxt->namespaces = xmlGetNsList(self->doc, self);
+            }
+            child_ctxt->nsNr = 0;
+            if (child_ctxt->namespaces != NULL) {
+                while (child_ctxt->namespaces[child_ctxt->nsNr] != NULL)
+                child_ctxt->nsNr++;
+            }
+
+            child_xpath = xmlXPathEval(nodepath, child_ctxt);
+            if (child_xpath == NULL) {
+                if (child_ctxt->namespaces != NULL) {
+                    xmlFree( child_ctxt->namespaces );
+                }
+                xmlXPathFreeContext(child_ctxt);
+                if ( nodepath != NULL ) {
+                    xmlFree( nodepath );
+                }
+                croak("2 Failed to compile xpath expression");
+            }
+
+            nodelist = child_xpath->nodesetval;        
+            if ( nodelist == NULL ) {
+                xmlFree( nodepath );
+                xmlXPathFreeObject(child_xpath);
+                if (child_ctxt->namespaces != NULL) {
+                    xmlFree( child_ctxt->namespaces );
+                }
+                xmlXPathFreeContext(child_ctxt);
+                croak( "cannot canonize empty nodeset!" );
+            }
+        }
+        /* LibXML_init_error(); */
+        
+        xmlC14NDocDumpMemory( self->doc,
+                              nodelist,
+                              0, NULL,
+                              comments,
+                              &result );
+
+        if ( child_xpath ) {
+            xmlXPathFreeObject(child_xpath);
+        }
+        if ( child_ctxt ) {
+            if (child_ctxt->namespaces != NULL) {
+                xmlFree( child_ctxt->namespaces );
+            }
+            xmlXPathFreeContext(child_ctxt);
+        }
+        if ( nodepath != NULL ) {
+            xmlFree( nodepath );
+        }
+
+        /* sv_2mortal( LibXML_error ); */
+        /* LibXML_croak_error(); */
+
+        if (result == NULL) {
+             croak("Failed to convert doc to string in doc->toStringC14N");
+        } else {
+            RETVAL = C2Sv( result, NULL );
+            xmlFree(result);
+        }
+    OUTPUT:
+        RETVAL
+
 SV*
 string_value ( self, useDomEncoding = &PL_sv_undef )
         xmlNodePtr self

File example/libxml.dkb

       <para>For further information, please check the following documentation:</para>
       <variablelist>
         <varlistentry>
-          <term>XML::LibXML-Parser</term>
+          <term>XML::LibXML::Parser</term>
           <listitem>
             <para>Parsing XML Files with XML::LibXML</para>
           </listitem>
         </varlistentry>
         <varlistentry>
-          <term>XML::LibXML-DOM</term>
+          <term>XML::LibXML::DOM</term>
           <listitem>
             <para>XML::LibXML DOM Implementation</para>
           </listitem>
   <chapter>
     <title>Parsing XML Files with XML::LibXML</title>
     <titleabbrev>XML::LibXML-Parser</titleabbrev>
-    <para></para>
+    <para>TO BE DONE</para>
     <sect1>
       <title>Parsing</title>
-      <para></para>
+      <para>TO BE DONE</para>
       <sect2>
         <title>Input Callbacks</title>
-        <para></para>
+        <para>TO BE DONE</para>
       </sect2>
       <sect2>
         <title>DOM Parser</title>
-        <para></para>
+        <para>TO BE DONE</para>
       </sect2>
       <sect2>
         <title>Push Parser</title>
       </sect2>
       <sect2>
         <title>SAX Parser</title>
-        <para></para>
+        <para>TO BE DONE</para>
       </sect2>
     </sect1>
     <sect1>
       <title>Serialization</title>
-      <para></para>
+      <para>TO BE DONE</para>
     </sect1>
     <sect1>
       <title>Options</title>
-      <para></para>
+      <para>TO BE DONE</para>
     </sect1>
     <sect1>
       <title>Error Reporting</title>
-      <para></para>
+      <para>TO BE DONE</para>
     </sect1>
   </chapter>
   <chapter>
         </listitem>
       </varlistentry>
       <varlistentry>
+        <term>toStringC14N</term>
+        <listitem>
+          <para><funcsynopsis><funcsynopsisinfo>$c14nstr = $doc-&#62;toStringC14N($comment_flag,$xpath); </funcsynopsisinfo></funcsynopsis>A
+          variation to toString, that returns the canonized from of the given
+          document.</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
         <term>toFile</term>
         <listitem>
           <funcsynopsis>
           encoding of the document) rather than UTF8.</para>
         </listitem>
       </varlistentry>
+      <varlistentry>
+        <term>toStringC14N</term>
+        <listitem>
+          <funcsynopsis>
+            <funcsynopsisinfo>$c14nstring = $node-&#62;toString($with_comments, $xpath_expression);</funcsynopsisinfo>
+          </funcsynopsis>
+          <para>The function is similar to toString(). Instead of simply
+          searializing the document tree, it transforms it as it is specified
+          in the XML-C14N Specification. Such transformation is known as
+          canonization. </para>
+          <para>If $with_comments is 0 or not defined, the result-document
+          will not contain any comments that exist in the original document.
+          To include comments into the canonized document, $with_comments has
+          to be set to 1.</para>
+          <para>The parameter $xpath_expression defines the nodeset of nodes
+          that should be visible in the resulting document. This can be used
+          to filter out some nodes. One has to note, that only the nodes that
+          are part of the nodeset, will be included into the result-document.
+          Their child-nodes will not exist in the resulting document, unless
+          they are part of the nodeset defined by the xpath expression.</para>
+          <para>If $xpath_expression is ommitted or empty, toStringC14N() will
+          include all nodes in the given sub-tree.</para>
+          <para>No serializing flags will be recognized by this function!</para>
+        </listitem>
+      </varlistentry>
     </variablelist>
     <variablelist>
       <varlistentry>
     </variablelist>
     <variablelist>
       <varlistentry>
-        <term>removeChildNodes</term>
+        <term>removeChildNode</term>
         <listitem>
           <funcsynopsis>
             <funcsynopsisinfo>$node-&#62;removeChildNodes();</funcsynopsisinfo>
+# $Id$
+
+##
+# these testcases are for xml canonization interfaces.
+#
+
+use Test;
+use strict;
+
+BEGIN { plan tests => 13 };
+use XML::LibXML;
+use XML::LibXML::Common qw(:libxml);
+
+my $parser = XML::LibXML->new;
+
+{
+    my $doc = $parser->parse_string( "<a><b/> <c/> <!-- d --> </a>" );
+
+    my $c14n_res = $doc->toStringC14N();
+    ok( $c14n_res, "<a><b></b> <c></c>  </a>" );
+
+    my $c14n_res = $doc->toStringC14N(1);
+    ok( $c14n_res, "<a><b></b> <c></c> <!-- d --> </a>" );
+}
+
+{
+    my $doc = $parser->parse_string( '<a><b/><![CDATA[ >e&f<]]><!-- d --> </a>' );
+    
+    my $c14n_res = $doc->toStringC14N();
+    ok( $c14n_res, '<a><b></b> &gt;e&amp;f&lt; </a>' );
+    my $c14n_res = $doc->toStringC14N(1);
+    ok( $c14n_res, '<a><b></b> &gt;e&amp;f&lt;<!-- d --> </a>' );
+}
+
+{
+    my $doc = $parser->parse_string( '<a a="foo"/>' );
+    
+    my $c14n_res;
+    $c14n_res = $doc->toStringC14N(0);
+    ok( $c14n_res, '<a a="foo"></a>' );
+}
+
+{
+    my $doc = $parser->parse_string( '<b:a xmlns:b="http://foo"/>' );
+    
+    my $c14n_res;
+    $c14n_res = $doc->toStringC14N(0);
+    ok( $c14n_res, '<b:a xmlns:b="http://foo"></b:a>' );
+}
+
+
+# ----------------------------------------------------------------- #
+# The C14N says: remove unused namespaces, libxml2 just orders them
+# ----------------------------------------------------------------- #
+{
+    my $doc = $parser->parse_string( '<b:a xmlns:b="http://foo" xmlns:a="xml://bar"/>' );
+    
+    my $c14n_res;
+    $c14n_res = $doc->toStringC14N(0);
+    ok( $c14n_res, '<b:a xmlns:a="xml://bar" xmlns:b="http://foo"></b:a>' );
+
+    # would be correct, but will not work.
+    # ok( $c14n_res, '<b:a xmlns:b="http://foo"></b:a>' );
+}
+
+# ----------------------------------------------------------------- #
+# The C14N says: remove redundant namespaces
+# ----------------------------------------------------------------- #
+{
+    my $doc = $parser->parse_string( '<b:a xmlns:b="http://foo"><b:b xmlns:b="http://foo"/></b:a>' );
+    
+    my $c14n_res;
+    $c14n_res = $doc->toStringC14N(0);
+    ok( $c14n_res, '<b:a xmlns:b="http://foo"><b:b></b:b></b:a>' );
+}
+
+{
+    my $doc = $parser->parse_string( '<a xmlns="xml://foo"/>' );
+    
+    my $c14n_res;
+    $c14n_res = $doc->toStringC14N(0);
+    ok( $c14n_res, '<a xmlns="xml://foo"></a>' );
+}
+
+{
+    my $doc = $parser->parse_string( <<EOX );
+<?xml version="1.0" encoding="iso-8859-1"?>
+<a><b/></a>
+EOX
+
+    my $c14n_res;
+    $c14n_res = $doc->toStringC14N(0);
+    ok( $c14n_res, '<a><b></b></a>' );
+}
+
+print "# canonize with xpath expressions\n";
+{
+    my $doc = $parser->parse_string( <<EOX );
+<?xml version="1.0" encoding="iso-8859-1"?>
+<a><b><c/><d/></b></a>
+EOX
+    my $c14n_res;
+    $c14n_res = $doc->toStringC14N(0, "//d" );
+    ok( $c14n_res, '<d></d>' );
+}
+
+{
+    my $doc = $parser->parse_string( <<EOX );
+<?xml version="1.0" encoding="iso-8859-1"?>
+<a><b><c/><d><e/></d></b></a>
+EOX
+
+    my $rootnode = $doc->documentElement;
+    my $c14n_res;
+    $c14n_res = $rootnode->toStringC14N(0, "//d" );
+    ok( $c14n_res, '<d></d>' );
+    $rootnode = $doc->documentElement->firstChild;
+    $c14n_res = $rootnode->toStringC14N(0);
+    ok( $c14n_res, '<c></c><d><e></e></d>' );
+}