Commits

Anonymous committed cefb8a3

added basic support for xmlRegexp module

  • Participants
  • Parent commits ac5df55

Comments (0)

Files changed (7)

 
 sub getChildrenByLocalName {
     my ( $node, $name ) = @_;
-    my @nodes;
-    if ($name eq '*') {
-      @nodes = grep { $_->nodeType == XML_ELEMENT_NODE() }
-	$node->childNodes();
-    } else {
-      @nodes = grep { $_->nodeType == XML_ELEMENT_NODE() and
-		      $_->localName eq $name } $node->childNodes();
-    }
+    # my @nodes;
+    # if ($name eq '*') {
+    #   @nodes = grep { $_->nodeType == XML_ELEMENT_NODE() }
+    # 	$node->childNodes();
+    # } else {
+    #   @nodes = grep { $_->nodeType == XML_ELEMENT_NODE() and
+    # 		      $_->localName eq $name } $node->childNodes();
+    # }
+    # return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1);
+    my @nodes = $node->_getChildrenByTagNameNS('*',$name);
     return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1);
 }
 
 1;
 
 #-------------------------------------------------------------------------#
+# XML::LibXML::RegExp Interface                                          #
+#-------------------------------------------------------------------------#
+
+package XML::LibXML::RegExp;
+
+sub CLONE_SKIP { 1 }
+
+sub new {
+  my $class = shift;
+  my ($regexp)=@_;
+  unless (UNIVERSAL::can($class,'_compile')) {
+    croak("Cannot create XML::LibXML::RegExp - ".
+	  "your libxml2 is compiled without regexp support!");
+  }
+  return $class->_compile($regexp);
+}
+
+1;
+
+#-------------------------------------------------------------------------#
 # XML::LibXML::XPathExpression Interface                                  #
 #-------------------------------------------------------------------------#
 
 #include <libxml/pattern.h>
 #endif
 
+#ifdef LIBXML_REGEXP_ENABLED
+#include <libxml/xmlregexp.h>
+#endif
+
 #if LIBXML_VERSION >= 20510
 #define HAVE_SCHEMAS
 #include <libxml/relaxng.h>
 
 #endif /* LIBXML_PATTERN_ENABLED */
 
+#ifdef LIBXML_REGEXP_ENABLED
+
+MODULE = XML::LibXML       PACKAGE = XML::LibXML::RegExp
+
+xmlRegexpPtr
+_compile(CLASS, pregexp)
+        SV * pregexp
+    PREINIT:
+        xmlChar * regexp = Sv2C(pregexp, NULL);
+	PREINIT_SAVED_ERROR
+    CODE:
+        if ( regexp == NULL )
+	   XSRETURN_UNDEF;
+	INIT_ERROR_HANDLER;
+	RETVAL = xmlRegexpCompile(regexp);
+        xmlFree( regexp );
+        CLEANUP_ERROR_HANDLER;
+        REPORT_ERROR(0);
+        if ( RETVAL == NULL ) {
+	  croak("Compilation of regexp failed");
+	}
+    OUTPUT:
+	RETVAL
+
+int
+matches(self, pvalue)
+        xmlRegexpPtr self
+	SV* pvalue
+    CODE:
+        xmlChar * value = Sv2C(pvalue, NULL);
+        if ( value == NULL )
+	   XSRETURN_UNDEF;
+	RETVAL = xmlRegexpExec(self,value);
+        xmlFree( value );
+    OUTPUT:
+        RETVAL
+
+int
+isDeterministic(self)
+        xmlRegexpPtr self
+    CODE:
+	RETVAL = xmlRegexpIsDeterminist(self);
+    OUTPUT:
+        RETVAL
+
+void
+DESTROY( self )
+        xmlRegexpPtr self
+    CODE:
+        xs_warn( "DESTROY REGEXP OBJECT" );
+   	xmlRegFreeRegexp(self);
+
+#endif /* LIBXML_REGEXP_ENABLED */
+
+
 MODULE = XML::LibXML       PACKAGE = XML::LibXML::XPathExpression
 
 xmlXPathCompExprPtr
 lib/XML/LibXML/PI.pod
 lib/XML/LibXML/Reader.pm
 lib/XML/LibXML/Reader.pod
+lib/XML/LibXML/RegExp.pod
 lib/XML/LibXML/RelaxNG.pod
 lib/XML/LibXML/SAX/Builder.pm
 lib/XML/LibXML/SAX/Builder.pod
 test/relaxng/demo.xml
 test/relaxng/demo2.rng
 test/relaxng/demo3.rng
+test/relaxng/demo4.rng
 test/relaxng/invaliddemo.xml
 test/relaxng/schema.rng
 test/schema/badschema.xsd
 test/schema/demo.xml
 test/schema/invaliddemo.xml
 test/schema/schema.xsd
+testschema.xsd
 test/textReader/countries.xml
 test/xinclude/entity.txt
 test/xinclude/test.xml
 test/xinclude/xinclude.xml
+TODO
 typemap
 t/01basic.t
 t/02parse.t
 t/40reader.t
 t/41xinclude.t
 t/42common.t
+t/43options.t
+t/44extent.t
+t/45regex.t
 t/60struct_error.t
 t/61error.t
 t/80registryleak.t
 - add a flag to disable touching the I/O callbacks (as requested by
   thread users on xml@gnome.org)
 
-- clear psvi before validation or keep track of whether a document has been validated or not
-
 - apply user-data patch (changes the proxy node data structure)

File docs/libxml.dbk

 	      <funcsynopsisinfo>$bool = $pattern->matchesNode($node);</funcsynopsisinfo>
 	    </funcsynopsis>
 	    <para>Given a XML::LibXML::Node object, returns a true value if 
-	    the node matches the compiled pattern expression.</para>
+	    the node is matched by the compiled pattern expression.</para>
 	  </listitem>
 	</varlistentry>
       </variablelist>
       <para><xref linkend="XML-LibXML-Reader"/> for other methods involving compiled patterns.</para>
     </sect1>      
   </chapter>
+  <chapter id="XML-LibXML-RegExp">
+    <title>XML::LibXML::RegExp - interface to libxml2 regular expressions</title>
+    <titleabbrev>XML::LibXML::RegExp</titleabbrev>
+    <sect1>
+      <title>Synopsis</title>
+      <programlisting>use XML::LibXML;
+my $compiled_re = new XML::LibXML::RegExp('[0-9]{5}(-[0-9]{4})?');
+if ($compiled_re->isDeterministic()) { ... }
+if ($compiled_re->matches($string)) { ... }
+</programlisting>
+    </sect1>
+    <sect1>
+      <title>Description</title>
+      <para>This is a perl interface to libxml2's implementation of regular expressions, which are used e.g. for validation of XML Schema simple types (pattern facet).</para>
+      <variablelist>
+	<varlistentry>
+	  <term>new()</term>
+	  <listitem>
+	    <funcsynopsis>
+	      <funcsynopsisinfo>$compiled_re = XML::LibXML::RegExp-&gt;new( $regexp_str );</funcsynopsisinfo>
+	    </funcsynopsis>
+	    <para>The constructor takes a string containing a regular expression 
+	    and returns a compiled regexp object.
+	    </para>
+	  </listitem>
+	</varlistentry>
+	<varlistentry>
+	  <term>matches($string)</term>
+	  <listitem>
+	    <funcsynopsis>
+	      <funcsynopsisinfo>$bool = $compiled_re->matches($string);</funcsynopsisinfo>
+	    </funcsynopsis>
+	    <para>Given a string value, returns a true value if 
+	    the value is matched by the compiled regular expression.</para>
+	  </listitem>
+	</varlistentry>
+	<varlistentry>
+	  <term>isDeterministic()</term>
+	  <listitem>
+	    <funcsynopsis>
+	      <funcsynopsisinfo>$bool = $compiled_re->isDeterministic();</funcsynopsisinfo>
+	    </funcsynopsis>
+	    <para>Returns a true value if the regular expression is deterministic; returns false otherwise. (See the definition of determinism in the <ulink url="http://www.w3.org/TR/REC-xml/#determinism">XML spec</ulink>)</para>
+	  </listitem>
+	</varlistentry>
+      </variablelist>
+    </sect1>
+  </chapter>
   <chapter id="XML-LibXML-Error">
     <title>Structured Errors</title>
     <titleabbrev>XML::LibXML::Error</titleabbrev> 

File lib/XML/LibXML/Error.pm

       my ($xE,$prev) = @_;
       my $terr;
       $terr=XML::LibXML::Error->new($xE);
-      unless ( defined $terr->{file} and length $terr->{file} ) {
+      #unless ( defined $terr->{file} and length $terr->{file} ) {
 	# this would make it easier to recognize parsed strings
 	# but it breaks old implementations
 	# [CG] $terr->{file} = 'string()';
-      }
+      #}
       #warn "Saving the error ",$terr->dump;
       $terr->{_prev} = ref($prev) ? $prev :
 	defined($prev) && length($prev) ? XML::LibXML::Error->new($prev) : undef;
       my ($saved_error) = @_;
       #print "CALLBACK ERROR: $saved_error\n";
       if ( defined $saved_error ) {
-	use Data::Dumper;
-	# print "reporting error ",Dumper($saved_error);
 	die $saved_error;
       }
     }
 xmlParserCtxtPtr            O_PARSER_OBJECT
 xmlRelaxNGPtr               O_OBJECT
 xmlPatternPtr               O_PATTERN_OBJECT
+xmlRegexpPtr                O_REGEXP_OBJECT
 xmlSchemaPtr                O_OBJECT
 xmlNodeSetPtr               O_OBJECT
 perlxmlParserObjectPtr      O_OBJECT
             XSRETURN_UNDEF;
     }
 
+O_REGEXP_OBJECT
+    if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) && sv_isa($arg,\"XML::LibXML::RegExp\"))
+            $var = INT2PTR($type,SvIV((SV*)SvRV( $arg )));
+    else{
+            warn( \"${Package}::$func_name() -- $var is not a XML::LibXML::RegExp\" );
+            XSRETURN_UNDEF;
+    }
+
+
 OUTPUT
 
 # The Perl object is blessed into 'CLASS', which should be a
 O_PATTERN_OBJECT
         sv_setref_pv( $arg, (char *)\"XML::LibXML::Pattern\", (void*)$var );
 
+O_REGEXP_OBJECT
+        sv_setref_pv( $arg, (char *)\"XML::LibXML::RegExp\", (void*)$var );
+
 O_XPATH_OBJECT
         sv_setref_pv( $arg, (char *)\"XML::LibXML::XPathExpression\", (void*)$var );