Commits

ph...@9ae0c189-cd1f-4510-a509-f4891f5cf20d  committed 7051277

Modified Files:
Makefile.PL
+ remove redundant linker flags

LibXML.pm
+ iterator fix
lib/XML/LibXML/NodeList.pm
+ iterator class

example/libxml.xml
+ documentation fixes

Added Files:
lib/XML/LibXML/Iterator.pm
iterator for linear tree walking

t/22iterator.t
iterator tests

  • Participants
  • Parent commits 60df103

Comments (0)

Files changed (6)

 
 sub iterator {
     my $self = shift;
-    my $funcref = shift;
-    my $child = undef;
+    require XML::LibXML::Iterator;
+    return XML::LibXML::Iterator->new( $self );
+}
 
-    my $rv = $funcref->( $self );
-    foreach $child ( $self->childNodes() ){
-        $rv = $child->iterator( $funcref );
-    }
-    return $rv;
-}
 
 sub findnodes {
     my ($node, $xpath) = @_;
 libxml2 not found
 Try setting LIBS and INC values on the command line
 Or get libxml2 from 
-  http://www.libxml.org/
+  http://xmlsoft.org/
 If you install via RPMs, make sure you also install the -devel
 RPMs, as this is where the headers (.h files) are.
 DEATH
 WriteMakefile(
     'NAME'	=> 'XML::LibXML',
     'VERSION_FROM' => 'LibXML.pm', # finds $VERSION
-    'AUTHOR'    => 'Matt Sergeant',
+    'AUTHOR'    => 'Christian Glahn',
     'ABSTRACT'  => 'Interface to Gnome libxml2 xml parsing and DOM library',
-    'PREREQ_PM' => { 'XML::SAX' => 0,
+    'PREREQ_PM' => { 'XML::SAX' => '0.11',
                      'XML::NamespaceSupport' => '1.07',
                      'XML::LibXML::Common' => 0,
                    },
     close($cfile);
     my $quote = $is_Win32 ? '"' : "'";
     xsystem("$^X Makefile.PL " . join(' ', map { "${quote}$_=$config{$_}${quote}" } keys %config));
+
+    # I am not shure if OTHERLDFLAGS is really required - at least the
+    # libraries to include do not belong here!
+    # I would assume a user to set OTHERLDFLAGS in the %config if they are
+    # really required. if done so, we don't have to pass them here ...
     xsystem("$Config{make} test ${quote}OTHERLDFLAGS=${opt}${quote}");
 } # end try_link0
 
         my $libs = $is_Win32 ? " $lib.lib  " : "-l$lib";
 #        my $libs = "-l$lib";
         if ($is_Win32) {
-            $result = try_link(<<"SRC", $libs);
+            $result = try_link(<<"SRC", undef);
 #include <windows.h>
 #include <winsock.h>
 blank() { return 0; }
 int t() { ${func}(); return 0; }
 SRC
             unless ($result) {
-                $result = try_link(<<"SRC", $libs);
+                $result = try_link(<<"SRC", undef);
 #include <windows.h>
 #include <winsock.h>
 blank() { return 0; }
             }
         }
         else {
-            $result = try_link(<<"SRC", $libs);
+            $result = try_link(<<"SRC", undef);
 blank() { return 0; }
 int t() { ${func}(); return 0; }
 SRC

File example/libxml.xml

             </method>
 
             <method name="lookupNamespaceURI"
-                    synopsis="$nsnode = $node->lookupNamespaceURI( $URI );">
+                    synopsis="$URI = $node->lookupNamespaceURI( $prefix );">
                 <p>
-                    Find a namespace by its URI starting at the
+                    Find a namespace URI by its prefix starting at the
                     current node.
                 </p>
             </method>
 
             <method name="lookupNamespacePrefix"
-                    synopsis="$nsnode = $node->lookupNamespacePrefix( $prefix );">
+                    synopsis="$prefix = $node->lookupNamespacePrefix( $URI );">
                 <p>
-                    Find a namespace by its prefix starting at the
+                    Find a namespace prefix by its URI starting at the
                     current node.
                 </p>
                 <p>
-                    <st>NOTE</st> Only the namespace URIs are
-                    unique. The prefix is only document related.
+                    <st>NOTE</st> Only the namespace URIs are ment to be
+                    unique. The prefix is only document related. also 
+                    document might has more than a single prefix defined 
+                    for a namespace.
                 </p>
             </method>
 
             <method name="iterator"
-                    synopsis="$node->iterator( \&amp;nodehandler );">
+                    synopsis="$iter = $node->iterator;">
                 <p>
-                    This is little helper function, that lets one
-                    define a function, that will be processed on the
-                    current node and all its children. The function
-                    will recieve as its only parameter the node to
-                    proceed. The function uses inorder proceeding to
-                    traverse the subtree. Therefore you can't reach
-                    the childnodes anymore, if the nodehandler removes
-                    childnodes.
+                   This function returns a new iterator object based
+                   with the current element as first element. This
+                   iterator can be used for linear tree walking.
                 </p>
 
                 <example><![CDATA[
- $node->iterator( sub { print $_[0]->nodeName(),"\n"; } );
+ $node->iterator->iterate( sub { shift;print $_[0]->nodeName(),"\n"; } );
                 ]]></example>
 
                 <p>
-                    The example will print all node names in the current subtree.
+                    The example will print all node names in the
+                    current subtree.
                 </p>
+
                 <p>
-                    The <st>iterator</st> function will return the
-                    return value of the nodehandler while processing
-                    the last child of the current node.
+                   Check the <em>XML::LibXML::Iterator</em> man page
+                   for more details.
+                </p>
+
+                <p>
+                    <st>NOTE:</st> The function has changed with
+                    version 1.53. Earlier versions did not return an
+                    iterator object, but ran the iterate() function
+                    directly.
                 </p>
             </method>
 
                     trick for you.  But this is only done if the
                     String is <em>well-balanced</em>.
                 </p>
+   		<p>
+ 		    <st>Note that appendWellBalancedChunk() is only left for
+		    compatibility reasons</st>. Implicitly it uses 
+		</p>
+<example><![CDATA[
+  my $fragment = $parser->parse_xml_chunk( $chunk );
+  $node->appendChild( $fragment );
+]]></example>
+ 		<p>
+		    This form is more explicit and makes it easier to 
+		    control the flow of a script. 
+		</p>
             </method>
 
             <method name="appendText" 

File lib/XML/LibXML/Iterator.pm

+# $Id$
+#
+
+package XML::LibXML::Iterator;
+use strict;
+
+use overload
+  '++' => sub { $_[0]->next; $_[0]; },
+  '--' => sub { $_[0]->previous; $_[0]; },
+  '<>' => sub {
+      if ( wantarray ) {
+          my @rv = ();
+          while ( $_[0]->next ){
+              push @rv;
+          }
+          return @rv;
+      } else {
+          return $_[0]->next
+      };
+  },
+;
+
+sub new {
+    my $class = shift;
+    my $node  = shift;
+
+    return undef unless defined $node;
+
+    my $self = bless {}, $class;
+
+    $self->{FIRST} = $node;
+    $self->first;
+    $self->{ITERATOR} = \&default_iterator;
+
+    return $self;
+}
+
+sub iterator_function {
+    my $self = shift;
+    my $func = shift;
+
+    return if defined $func and ref( $func ) ne "CODE";
+
+    $self->first;
+    if ( defined $func ) {
+        $self->{ITERATOR} = $func;
+    }
+    else {
+        $self->{ITERATOR} = \&default_iterator;
+    }
+}
+
+sub current  { return $_[0]->{CURRENT}; }
+sub index    { return $_[0]->{INDEX}; }
+
+sub next     {
+    my $self = shift;
+    my $node = $self->{ITERATOR}->( $self, 1 );
+
+    if ( defined $node ) {
+        $self->{CURRENT} = $node;
+        $self->{INDEX}++;
+    }
+
+    return $node;
+}
+
+sub previous {
+    my $self = shift;
+
+    my $node = $self->{ITERATOR}->( $self, -1 );
+
+    if ( defined $node ) {
+        $self->{CURRENT} = $node;
+        $self->{INDEX}--;
+    }
+
+    return $node;
+}
+
+
+sub first {
+    my $self = shift;
+    $self->{CURRENT} = $self->{FIRST};
+    $self->{INDEX}   = 0;
+    return $self->current;
+}
+
+sub last  {
+    my $self = shift;
+    while ($self->next) {}
+    return $self->current;
+}
+
+sub iterate {
+    my $self = shift;
+    my $function = shift;
+    return unless defined $function and ref( $function ) eq 'CODE' ;
+    my $rv;
+    my $node = $self->first;
+    while ( $node ) {
+        $rv = $function->($self,$node);
+        $node = $self->next;
+    }
+    return $rv;
+}
+
+sub default_iterator {
+    my $self = shift;
+    my $dir  = shift;
+    my $node = undef;
+
+
+    if ( $dir < 0 ) {
+        return undef if $self->{CURRENT}->isSameNode( $self->{FIRST} )
+          and $self->{INDEX} <= 0;
+
+        $node = $self->{CURRENT}->previousSibling;
+        if  ( not defined $node ) {
+            $node = $self->{CURRENT}->parentNode;
+        }
+        elsif ( $node->hasChildNodes ) {
+            $node = $node->lastChild;
+        }
+    }
+    else {
+        return undef if $self->{CURRENT}->isSameNode( $self->{FIRST} )
+          and $self->{INDEX} > 0;
+
+        if ( $self->{CURRENT}->hasChildNodes ) {
+            $node = $self->{CURRENT}->firstChild;
+        }
+        else {
+            $node = $self->{CURRENT}->nextSibling;
+            unless ( defined $node ) {
+                $node = $self->{CURRENT}->parentNode;
+                $node = $node->nextSibling if defined $node;
+            }
+        }
+    }
+
+    return $node;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+XML::LibXML::Iterator - Simple Tree Iteration Class for XML::LibXML
+
+=head1 SYNOPSIS
+
+  use XML::LibXML;
+  use XML::LibXML::Iterator;
+
+  my $doc = XML::LibXML->new->parse_string( $somedata );
+  my $iter= XML::LibXML::Iterator->new( $doc );
+
+  $iter->iterator_function( \&iterate );
+
+  # more control on the flow
+  while ( $iter->next ) {
+      # do something
+  }
+
+  # operate on the entire tree
+  $iter->iterate( \&operate );
+
+=head1 DESCRIPTION
+
+An iterator allows to operate on a document tree as it would be a
+linear sequence of nodes.
+
+=head2 Functions
+
+=over 4
+
+=item new($first_node)
+
+=item first()
+
+=item next()
+
+=item previous()
+
+=item last()
+
+=item current()
+
+=item index()
+
+=item iterator_function($funcion_ref);
+
+=item iterate($function_ref);
+
+=back
+
+XML::LibXML::Iterator knows two types of callback. One is knows as the
+iterator function, the other is used by iterate(). The first function
+will be called for each call of next() or previous(). It is used to
+find out about the next node recognized by the iterator.
+
+The iterator function has to take two parameters: As the first
+parameter it will recieve the iterator object, as second the direction
+of the iteration will be passed. The direction is either 1 (for next())
+or -1 (for previous()).
+
+The iterators iterate() function will take a function reference that
+takes as well two parameters. The first parameter is again the
+iterator object. The second parameter is the node to operate on. The
+iterate function can do any operation on the node as
+prefered. Appending new nodes or removing the current node will not
+confuse the iteration process: The iterator preloads the next object
+before calling the iteration function. Thus the Iterator will not find
+nodes appended by the iteration function.

File lib/XML/LibXML/NodeList.pm

 	scalar @$self;
 }
 
-sub get_node { # uses array index starting at 1, not 0
+sub get_node {
+    # uses array index starting at 1, not 0
+    # this is mainly because of XPath.
 	my $self = CORE::shift;
 	my ($pos) = @_;
 	$self->[$pos - 1];
 			);
 }
 
+sub iterator {
+    my $self = CORE::shift;
+    return XML::LibXML::NodeList::Iterator->new( $self );
+}
+
+1;
+
+package XML::LibXML::NodeList::Iterator;
+
+use strict;
+
+use overload
+  '++' => sub { $_[0]->next;     $_[0]; },
+  '--' => sub { $_[0]->previous; $_[0] },
+  '<>'  =>  sub {
+      if ( wantarray ) {
+          my @rv = ();
+          while ( $_[0]->next ){ push @rv,$_;}
+          return @rv;
+      } else {
+          return $_[0]->next
+      };
+  },
+;
+
+sub new {
+    my $class = shift;
+    my $list  = shift;
+    my $self  = undef;
+    if ( defined $list ) {
+        $self = bless [
+                       $list,
+                       0
+                      ], $class;
+    }
+    return $self;
+}
+
+sub first    { $_[0][1]=0; return $_[0][0][0]; }
+sub last     { $_[0][1]=scalar(@{$_[0][0]})-1; return $_[0][0][-1]; }
+sub current  { return $_[0][0][$_[0][1]]; }
+sub index    { return $_[0][1]; }
+
+sub next     {
+    if ( (scalar @{$_[0][0]}) <= ($_[0][1] + 1)) {
+        return undef;
+    }
+    $_[0][1]++;
+    return $_[0][0]->[$_[0][1]];
+}
+
+sub previous {
+    if ( $_[0][1] <= 0 ) {
+        return undef;
+    }
+    $_[0][1]--;
+    return $_[0][0][$_[0][1]];
+}
+
+sub iterate  {
+    my $self = shift;
+    my $funcref = shift;
+    return unless defined $funcref && ref( $funcref ) eq 'CODE';
+    $self->[1] = -1;
+    my $rv;
+    while ( <$self> ) {
+        $rv = $funcref->( $self, $_ );
+    }
+    return $rv;
+}
+
 1;
 __END__
 
 Given a nodelist, prepends the list of nodes in $nodelist to the front of
 the current list.
 
+=head2 iterator()
+
+Will return a new nodelist iterator for the current nodelist. A
+nodelist iterator is usefull if more complex nodelist processing is
+needed.
+
 =cut

File t/22iterator.t

+use Test;
+
+BEGIN { plan tests => 32; }
+
+use XML::LibXML;
+use XML::LibXML::Iterator;
+use XML::LibXML::NodeList;
+
+my $doc = XML::LibXML->new->parse_string( <<EOF );
+<test>
+    text
+    <foo/>
+    <bar><kungfoo/></bar>
+    <foo/>
+    text
+</test>
+EOF
+
+my $iter = XML::LibXML::Iterator->new( $doc->documentElement );
+
+do {
+    ok(1);
+}while ( $iter->next );
+
+do {
+    ok(1);
+}while ( $iter->previous );
+
+
+$iter->iterate( sub { ok(1) } );
+
+$iter->first;
+ok( $iter->current->nodeName, "test" );
+
+my $n = $iter->last;
+ok( $iter->current->nodeName, "text" );
+
+my $nodelist = $doc->findnodes( '//foo' );
+
+my $nliter = XML::LibXML::NodeList::Iterator->new( $nodelist );
+
+
+
+while ( $nliter->next ) {
+    ok(1);
+}
+
+$nliter->iterate( sub {ok(1)} );