Commits

ph...@9ae0c189-cd1f-4510-a509-f4891f5cf20d  committed 80028c5

Modified Files:
Changes
version notes

README
+ installation note for HPUX

LibXML.xs LibXML.pm perl-libxml-sax.c
[fix] namespace handling in _find and _findnodes
+ getNamespaces()
+ improved SAX handling (becomes less experimental)

t/04node.t t/08findnodes.t
+ tests about the bugs reported by Petr Pajas

example/libxml.xml
+ getNamespaces() docs

lib/XML/LibXML/SAX/Builder.pm
+ can generate now DocumentFragments

  • Participants
  • Parent commits e59e8c7

Comments (0)

Files changed (9)

      (very special thanks to randy kobes for the patch)
    - fixed findnodes() and find() to return empty arrays in array context
      if the statement was legal but produced no result.
+   - fixed namespace handling in xpath functions
    - fixed local namespace handling in DOM functions
    - pretty formating to all serializing functions
      *NOTE* the XML::LibXML::Node::toString interface changed
      check the XML::LibXML::Node man page 
    - made xpath functions verbose to perl (one can wrap evals now)
+   - improved native SAX interface
+   - improved XML::LibXML::SAX::Builder
+   - added getNamespaces to the node interface
    - better libxml2 version testing
    - more documentation
 
 # this is infact not a node!
 sub prefix { return "xmlns"; }
 
+sub getNamespaces { return (); }
+
 sub nodeName {
     my $self = shift;
     my $nsP  = $self->name;
     return length($nsP) ? "xmlns:$nsP" : "xmlns";
 }
 
+sub getNodeName { my $self = shift; return $self->nodeName; }
+
 sub isEqualNode {
     my ( $self, $ref ) = @_;
     if ( ref($ref) eq "XML::LibXML::Namespace" ) {
 package XML::LibXML::_SAXParser;
 
 # this is pseudo class!!!
-use Carp;
+
+use XML::SAX::Exception;
+
+# NOTE: there is not end_document ON PURPOSE!
 
 sub start_document {
     my $parser = shift;
-    $parser->{SAX}->{State} = 1;
     $parser->{HANDLER}->start_document({});
 }
 
-sub end_document {
-    my $parser = shift;
-    $parser->{SAX}->{State} = 0;
-}
-
 sub xml_decl {
     my ( $parser, $version, $encoding ) = @_;
 
     if ( defined $attrs ) {
         $parser->{HANDLER}->start_element( { %$elem, Attributes=>$attrs} )
     }
-
 }
 
 sub end_element {
     my (  $parser, $name ) = @_;
     my $elem = pop @{$parser->{SAX}->{ELSTACK}};
     if ( $elem->{Name} ne $name ) {
-        croak( "cought error where parser should work ($elem->{Name} != $name" );
+        my $error = XML::SAX::Execption::Parse->new( Message => "cought error where parser should catch ('$elem->{Name}' ne '$name' )" );
+        $parser->{HANDLER}->error( $error );
+        return;
     }
     $parser->{HANDLER}->end_element( $elem );
 }
 
 sub characters {
     my ( $parser, $data ) = @_;
-    $parser->{HANDLER}->characters( {Data => $data} );
+    $parser->{HANDLER}->characters( $data );
 }
 
 sub comment {
     my ( $parser, $data ) = @_;
-    $parser->{HANDLER}->comment( {Data => $data} );
+    $parser->{HANDLER}->comment( $data );
 }
 
 sub cdata_block {
     my ( $parser, $data ) = @_;
     $parser->{HANDLER}->start_cdata();
-    $parser->{HANDLER}->characters( {Data => $data} );
+    $parser->{HANDLER}->characters( $data );
     $parser->{HANDLER}->end_cdata();
 }
 
 sub processing_instruction {
-    my ( $parser, $target, $data ) = @_;
-    $parser->{HANDLER}->processing_instruction( {Target => $target,
-                                                 Data   => $data} );
+    my ( $parser, $target ) = @_;
+    $parser->{HANDLER}->processing_instruction( $target );
 }
 
 # these functions will use SAX exceptions as soon i know how things really work
 sub warning {
-    my ( $parser, $message ) = @_;
-    eval { die( $message ); };
+    my ( $parser, $message, $line, $col ) = @_;
+    my $error = XML::SAX::Exception::Parse->new( LineNumber   => $line,
+                                                 ColumnNumber => $col,
+                                                 Message      => $message, );
+    $parser->{HANDLER}->warning( $error );
 }
 
 sub error {
-    my ( $parser, $message ) = @_;
-    eval { die( $message ); };
+    my ( $parser, $message, $line, $col ) = @_;
+
+    my $error = XML::SAX::Exception::Parse->new( LineNumber   => $line,
+                                                 ColumnNumber => $col,
+                                                 Message      => $message, );
+    $parser->{HANDLER}->error( $error );
 }
 
 sub fatal_error {
-    my ( $parser, $message ) = @_;
-    die( $message );
+    my ( $parser, $message, $line, $col ) = @_;
+    my $error = XML::SAX::Exception::Parse->new( LineNumber   => $line,
+                                                 ColumnNumber => $col,
+                                                 Message      => $message, );
+    $parser->{HANDLER}->fatal_error( $error );
 }
 
 1;
 
                                 /* let's be paranoid */
                                 if (tnode->type == XML_NAMESPACE_DECL) {
-                                    element = sv_newmortal();
-                                    cls = PmmNodeTypeName( tnode );
-                                    element = sv_setref_pv( element,
-                                                            (const char *)cls,
-                                                            (void *)xmlCopyNamespace((xmlNsPtr)tnode)
+                                     xmlNsPtr newns = xmlCopyNamespace((xmlNsPtr)tnode);
+                                    if ( newns != NULL ) {
+                                        element = NEWSV(0,0);
+                                        cls = PmmNodeTypeName( tnode );
+                                        element = sv_setref_pv( element,
+                                                                (const char *)cls,
+                                                                (void *)newns
                                                           );
+                                    }
+                                    else {
+                                        continue;
+                                    }
                                 }
                                 else {
                                     element = PmmNodeToSv(tnode, owner);
                     element = NULL;
                     tnode = nodelist->nodeTab[i];
                     if (tnode->type == XML_NAMESPACE_DECL) {
-                        element = sv_newmortal();
-                        cls = PmmNodeTypeName( tnode );
-                        element = sv_setref_pv( element,
-                                                (const char *)cls,
-                                                (void *)xmlCopyNamespace((xmlNsPtr)tnode)
-                                              );
+                        xmlNsPtr newns = xmlCopyNamespace((xmlNsPtr)tnode);
+                        if ( newns != NULL ) {
+                            element = NEWSV(0,0);
+                            cls = PmmNodeTypeName( tnode );
+                            element = sv_setref_pv( element,
+                                                    (const char *)cls,
+                                                    newns
+                                                  );
+                        }
+                        else {
+                            continue;
+                        }
                     }
                     else {
                         element = PmmNodeToSv(tnode, owner);
                 croak(SvPV(LibXML_error, len));
             }
         }
+
+void
+getNamespaces( pnode )
+        SV * pnode
+    ALIAS:  
+        namespaces = 1
+    PREINIT:
+        xmlNodePtr node;
+        xmlNsPtr ns = NULL;
+        xmlNsPtr newns = NULL;
+        SV* element;
+        const char * class = "XML::LibXML::Namespace";
+    PPCODE:
+        node = PmmSvNode(pnode);
+        ns = node->nsDef;
+        while ( ns != NULL ) {
+            newns = xmlCopyNamespace((xmlNsPtr)ns);
+            if ( newns != NULL ) {
+                element = NEWSV(0,0);
+                element = sv_setref_pv( element,
+                                        (const char *)class,
+                                        (void*)newns
+                                      );
+                XPUSHs( sv_2mortal(element) );
+            }
+            ns = ns->next;
+        }
+    
         
 MODULE = XML::LibXML         PACKAGE = XML::LibXML::Element
 
         SV * string
     ALIAS:
         appendTextNode = 1
+        XML::LibXML::DocumentFragment::appendText = 2
+        XML::LibXML::DocumentFragment::appendTextNode = 3
     PREINIT:
         xmlNodePtr node   = PmmSvNode( self );
         xmlChar * content = nodeSv2C( string, node );
 will ask '/usr/brand-new/bin/xml2-config' about your real 
 libxml2 configuration.
 
+NOTES FOR HPUX
+==============
+
+XML::LibXML requires libxml2 2.4.20 or later. That means the current
+binary libxml2 package for HPUX cannot be used with XML::LibXML. For
+some reasons the HPUX cc will not compile libxml2 correct, which will
+force you to recompile perl with gcc (if you havn't already done that).
+
+Additionally I got these Note from Rozi Kovesdi:
+
+> Here is my report if someone else runs into the same problem:
+>
+> Finally I am done with installing all the libraries and XML Perl
+> modules
+>
+> The combination that worked best for me was:
+> gcc
+> GNU make
+>
+> Most important - before trying to install Perl modules that depend on
+> libxml2:
+>
+> must set SHLIB_PATH  to include  the path to  libxml2 shared library
+> 
+> assuming that you used the default:
+>
+> export  SHLIB=/usr/local/lib
+>
+> also, make sure that the config  files have execute permission:
+>
+> /usr/local/bin/xml2-config
+> /usr/local/bin/xslt-config
+> 
+> they did not have +x after they were installed by 'make install'
+> and it took me a while to realize that this was my problem
+> 
+> or one can use:
+> 
+> perl Makefile.PL LIBS='-L/path/to/lib' INC='-I/path/to/include'
+
+
 Contact
 =======
 

File example/libxml.xml

                 </p>
             </method>
 
-            <method name="normalize" synopsis="$node->normalize">
+            <method name="normalize" synopsis="$node->normalize;">
                  <p>
                    This function normalizes adjacent textnodes. This
                    function is not as strict as libxml2's
                  </p>
             </method>
 
+            <method name="getNamespaces" synopsis="@nslist = $node->getNamespaces;">
+                <p>
+                    If a node has any namespaces defined, this
+                    function will return these namespaces. Note, that
+                    this will not return all namespaces that are in
+                    scope, but only the ones declares explicitly for
+                    that node.
+                </p>
+                <p>
+                    Although getNamespaces is available for all nodes,
+                    it makes only sense if used with element nodes.
+                </p>
+            </method>
+
         </description>
         <also>
             <item name="XML::LibXML"/>

File lib/XML/LibXML/SAX/Builder.pm

 sub end_document {
     my ($self, $doc) = @_;
     my $dom = $self->{DOM};
+    $dom = $self->{Parent} unless defined $dom; # this is for parsing document chunks
     delete $self->{Parent};
     delete $self->{DOM};
     return $dom;
 sub start_element {
     my ($self, $el) = @_;
     my $node;
+
+    unless ( defined $self->{DOM} or defined $self->{Parent} ) {
+        $self->{Parent} = XML::LibXML::DocumentFragment->new();
+    }
+
     if ($el->{NamespaceURI}) {
-        $node = $self->{DOM}->createElementNS($el->{NamespaceURI}, $el->{Name});
+        if ( defined $self->{DOM} ) {
+            $node = $self->{DOM}->createElementNS($el->{NamespaceURI},
+                                                  $el->{Name});
+        }
+        else {
+            $node = XML::LibXML::Element->new( $el->{Name} );
+            $node->setNamespace( $el->{NamespaceURI},$el->{Prefix} , 1 );
+        }
     }
     else {
-        $node = $self->{DOM}->createElement($el->{Name});
+        if ( defined $self->{DOM} ) {
+            $node = $self->{DOM}->createElement($el->{Name});
+        }
+        else {
+            $node = XML::LibXML::Element->new( $el->{Name} );
+        }
     }
 
     # append
 
 sub characters {
     my ($self, $chars) = @_;
+    if ( not defined $self->{DOM} and not defined $self->{Parent} ) {
+        $self->{Parent} = XML::LibXML::DocumentFragment->new();
+    }
     return unless $self->{Parent};
     $self->{Parent}->appendText($chars->{Data});
 }
 
 sub comment {
     my ($self, $chars) = @_;
-    my $comment = $self->{DOM}->createComment( $chars->{Data} );
+    my $comment;
+    if ( not defined $self->{DOM} and not defined $self->{Parent} ) {
+        $self->{Parent} = XML::LibXML::DocumentFragment->new();
+    }
+
+    if ( defined $self->{DOM} ) {
+        $comment = $self->{DOM}->createComment( $chars->{Data} );
+    }
+    else {
+        $comment = XML::LibXML::Comment->new( $chars->{Data} );
+    }
+
     if ( defined $self->{Parent} ) {
         $self->{Parent}->appendChild($comment);
     }
 
 sub processing_instruction {
     my ( $self,  $pi ) = @_;
-    my $PI = $self->{DOM}->createPI( $pi->{Target}, $pi->{Data} );
+    my $PI;
+    return unless  defined $self->{DOM};
+    $PI = $self->{DOM}->createPI( $pi->{Target}, $pi->{Data} );
+
     if ( defined $self->{Parent} ) {
         $self->{Parent}->appendChild( $PI );
     }
     }
 }
 
+sub warning {
+    my $self = shift;
+    my $error = shift;
+    # fill $@ but do not die seriously
+    eval { $error->throw; };
+}
+
+sub error {
+    my $self = shift;
+    my $error = shift;
+    delete $self->{Parent};
+    delete $self->{DOM};
+    $error->throw;
+}
+
+sub fatal_error {
+    my $self = shift;
+    my $error = shift;
+    delete $self->{Parent};
+    delete $self->{DOM};
+    $error->throw;
+}
+
 1;
 
 __END__

File perl-libxml-sax.c

 typedef struct {
     SV * parser;
     xmlNodePtr ns_stack;
+    xmlSAXLocator * locator;
 } PmmSAXVector;
 
 typedef PmmSAXVector* PmmSAXVectorPtr;
 
     vec = (PmmSAXVector*) xmlMalloc( sizeof(PmmSAXVector) );
     vec->ns_stack = xmlNewNode( NULL, "stack" );
+    vec->locator = NULL;
     SvREFCNT_inc( parser );
     vec->parser = parser;
     ctxt->_private = (void*)vec;
     return retval;
 }
 
+HV * 
+PmmGenCharDataSV( pTHX_ PmmSAXVectorPtr sax, const xmlChar * data ) {
+    HV * retval = newHV();
+
+    if ( data != NULL && xmlStrlen( data ) ) {
+        hv_store(retval, "Data", 4,
+                 C2Sv(data, NULL), DataHash);
+    }
+
+    return retval;
+}
+
+HV * 
+PmmGenPISV( pTHX_ PmmSAXVectorPtr sax, const xmlChar * target, const xmlChar * data ) {
+    HV * retval = newHV();
+
+    if ( target != NULL && xmlStrlen( target ) ) {
+        hv_store(retval, "Target", 6,
+                 C2Sv(data, NULL), TargetHash);
+
+        if ( data != NULL && xmlStrlen( data ) ) {
+            hv_store(retval, "Data", 4,
+                     C2Sv(data, NULL), DataHash);
+        }
+        else {
+            hv_store(retval, "Data", 4,
+                     C2Sv("", NULL), DataHash);
+        }
+    }
+
+    return retval;
+}
 int
 PSaxStartDocument(void * ctx)
 {
     XPUSHs(sax->parser);
 
     if ( ctxt->version != NULL ) {
-        XPUSHs(C2Sv(ctxt->version, NULL));
+        XPUSHs(sv_2mortal(C2Sv(ctxt->version, NULL)));
     }
 
     if ( ctxt->encoding != NULL ) {
-        XPUSHs(C2Sv(ctxt->encoding, NULL));
+        XPUSHs(sv_2mortal(C2Sv(ctxt->encoding, NULL)));
     }
 
     PUTBACK;
 
     dTHX;
     dSP;
-    
+
     ENTER;
     SAVETMPS;
 
  
     dTHX;
     dSP;
+
     
     ENTER;
     SAVETMPS;
 
     PUSHMARK(SP) ;
     XPUSHs(sax->parser);
-    XPUSHs(C2Sv(name, NULL));
+    XPUSHs(sv_2mortal(C2Sv(name, NULL)));
     PUTBACK;
 
     count = perl_call_pv( "XML::LibXML::_SAXParser::end_element", 0 );
 
         dTHX;
         dSP;
-    
+
         ENTER;
         SAVETMPS;
 
         PUSHMARK(SP) ;
         XPUSHs(sax->parser);
-        XPUSHs(C2Sv(data, NULL));
+        XPUSHs(newRV_noinc((SV*)PmmGenCharDataSV(aTHX_ sax,data)));
         PUTBACK;
 
         count = perl_call_pv( "XML::LibXML::_SAXParser::characters", 0 );
 
         dTHX;
         dSP;
-    
+
         ENTER;
         SAVETMPS;
 
         PUSHMARK(SP) ;
         XPUSHs(sax->parser);
-        XPUSHs(C2Sv(data, NULL));
+        XPUSHs(newRV_noinc((SV*)PmmGenCharDataSV(aTHX_ sax,data)));
         PUTBACK;
 
         count = perl_call_pv( "XML::LibXML::_SAXParser::comment", 0 );
 
         dTHX;
         dSP;
-    
+
         ENTER;
         SAVETMPS;
 
         PUSHMARK(SP) ;
         XPUSHs(sax->parser);
-        XPUSHs(C2Sv(data, NULL));
+        XPUSHs(newRV_noinc((SV*)PmmGenCharDataSV(aTHX_ sax,data)));
         PUTBACK;
 
         count = perl_call_pv( "XML::LibXML::_SAXParser::cdata_block", 0 );
 
     PUSHMARK(SP) ;
     XPUSHs(sax->parser);
-    XPUSHs(C2Sv(target, NULL));
-    XPUSHs(C2Sv(data, NULL));
+    XPUSHs(newRV_noinc((SV*)PmmGenPISV(aTHX_ sax, target, data)));
+
     PUTBACK;
 
     count = perl_call_pv( "XML::LibXML::_SAXParser::processing_instruction", 0 );
     XPUSHs(sax->parser);
 
     XPUSHs(svMessage);
+    XPUSHs(sv_2mortal(newSViv(ctxt->input->line)));
+    XPUSHs(sv_2mortal(newSViv(ctxt->input->col)));
+
     PUTBACK;
 
     perl_call_pv( "XML::LibXML::_SAXParser::warning", 0 );
     dTHX;
     dSP;
 
-
     svMessage = NEWSV(0,512);
 
     va_start(args, msg);
     XPUSHs(sax->parser);
 
     XPUSHs(svMessage);
+    XPUSHs(sv_2mortal(newSViv(ctxt->input->line)));
+    XPUSHs(sv_2mortal(newSViv(ctxt->input->col)));
     PUTBACK;
     perl_call_pv( "XML::LibXML::_SAXParser::error", 0 );
     
     XPUSHs(sax->parser);
 
     XPUSHs(svMessage);
+    XPUSHs(sv_2mortal(newSViv(ctxt->input->line)));
+    XPUSHs(sv_2mortal(newSViv(ctxt->input->col)));
     PUTBACK;
     perl_call_pv( "XML::LibXML::_SAXParser::fatal_error", 0 );
     
     memset(retval, 0, sizeof(xmlSAXHandler));
 
     retval->startDocument = (startDocumentSAXFunc)&PSaxStartDocument;
-    retval->endDocument   = (endDocumentSAXFunc)&PSaxEndDocument;
+    retval->endDocument   = NULL; /* (endDocumentSAXFunc)&PSaxEndDocument; */
 
     retval->startElement  = (startElementSAXFunc)&PSaxStartElement;
     retval->endElement    = (endElementSAXFunc)&PSaxEndElement;
 
     retval->characters    = (charactersSAXFunc)&PSaxCharacters;
+    retval->ignorableWhitespace = (ignorableWhitespaceSAXFunc)&PSaxCharacters;
+
     retval->comment       = (commentSAXFunc)&PSaxComment;
     retval->cdataBlock    = (cdataBlockSAXFunc)&PSaxCDATABlock;
 
 
 use Test;
 
-BEGIN { plan tests => 117 };
+BEGIN { plan tests => 118 };
 use XML::LibXML;
 
 my $xmlstring = q{<foo>bar<foobar/><bar foo="foobar"/><!--foo--><![CDATA[&foo bar]]></foo>};
 
     ok( $elem->lookupNamespacePrefix( $URI ), $pre);
     ok( $elem->lookupNamespaceURI( $pre ), $URI);
+
+    my @ns = $elem->getNamespaces;
+    ok( scalar(@ns) ,1 );
 }
 
 print "# 4.   Document swtiching\n";

File t/08findnodes.t

 use Test;
-BEGIN { plan tests=>15 }
+BEGIN { plan tests=>18 }
 END {ok(0) unless $loaded;}
 use XML::LibXML;
 $loaded = 1;
 eval { my $literal = $root->findvalue( "/-" ); };
 ok( $@ );
 
+eval { my @nodes = $root->findnodes( "/-" ); };
+ok( $@ );
+
+# this query should result an empty array!
+my @nodes = $root->findnodes( "/humpty/dumpty" );
+ok( scalar(@nodes), 0 );
+
+
+my $docstring = q{
+<foo xmlns="http://kungfoo" xmlns:bar="http://foo"/>
+};
+ $doc = $parser->parse_string( $docstring );
+ $root = $doc->documentElement;
+
+my @ns = $root->findnodes('namespace::*');
+ok(scalar(@ns), 2 );
+
+
 
 sub finddoc {
     my $doc = shift;