Commits

Shlomi Fish committed 9f0e1f5

Start implementing Role::XSLT.

I'm a bit uneasy about the fact that it consumes the Role::RelaxNG.

For why we changed the sub BUILD to after BUILD see:

http://www.perlmonks.org/?node_id=837397 .

  • Participants
  • Parent commits 129252a

Comments (0)

Files changed (7)

XML-GrammarBase/MANIFEST

 inc/Test/Run/Builder.pm
 lib/XML/GrammarBase.pm
 lib/XML/GrammarBase/Role/RelaxNG.pm
+lib/XML/GrammarBase/Role/XSLT.pm
 Makefile.PL
 MANIFEST
 META.yml
 t/data/fiction-xml.rng
 t/data/fiction-xml-invalid-test.xml
 t/data/fiction-xml-test.xml
+t/data/fiction-xml-test-html-xslt-output.xhtml
+t/data/fiction-xml-to-html.xslt
+t/lib/Test/XML/Ordered.pm
 t/pod-coverage.t
 t/pod.t
 t/rng.t
+t/xslt.t
 META.json

XML-GrammarBase/lib/XML/GrammarBase/Role/RelaxNG.pm

 has 'rng_schema_basename' => (isa => 'Str', is => 'rw');
 has '_rng' => (isa => 'XML::LibXML::RelaxNG', is => 'rw');
 
-sub BUILD
-{
+sub BUILD {}
+
+after 'BUILD' => sub {
     my ($self) = @_;
 
     my $data_dir = $self->data_dir() ||
         );
 
     $self->_rng($rngschema);
-}
+};
 
 sub _undefize
 {

XML-GrammarBase/lib/XML/GrammarBase/Role/XSLT.pm

+package XML::GrammarBase::Role::XSLT;
+
+use strict;
+use warnings;
+
+
+=head1 NAME
+
+XML::GrammarBase::Role::XSLT - role for an XSLT converter.
+
+=head1 VERSION
+
+Version 0.0.1
+
+=cut
+
+use Any::Moose 'Role';
+
+use XML::LibXML;
+use XML::LibXSLT;
+
+our $VERSION = '0.0.1';
+
+with ('XML::GrammarBase::Role::RelaxNG');
+
+has 'xslt_transform_basename' => (isa => 'Str', is => 'rw');
+has '_stylesheet' => (isa => "XML::LibXSLT::StylesheetWrapper", is => 'rw');
+has '_xml_parser' => (isa => "XML::LibXML", is => 'rw');
+
+sub BUILD {}
+
+after 'BUILD' => sub {
+    my ($self) = @_;
+
+    my $data_dir = $self->data_dir() ||
+        dist_dir( $self->module_base() );
+
+    $self->data_dir($data_dir);
+
+    $self->_xml_parser(XML::LibXML->new());
+
+    my $xslt = XML::LibXSLT->new();
+
+    my $style_doc = $self->_xml_parser()->parse_file(
+        File::Spec->catfile(
+            $self->data_dir(),
+            $self->xslt_transform_basename(),
+        ),
+    );
+
+    $self->_stylesheet($xslt->parse_stylesheet($style_doc));
+
+    return;
+};
+
+sub _undefize
+{
+    my $v = shift;
+
+    return defined($v) ? $v : "(undef)";
+}
+
+sub _calc_and_ret_dom_without_validate
+{
+    my $self = shift;
+    my $args = shift;
+
+    my $source = $args->{source};
+
+    return
+          exists($source->{'dom'})
+        ? $source->{'dom'}
+        : exists($source->{'string_ref'})
+        ? $self->_xml_parser()->parse_string(${$source->{'string_ref'}})
+        : $self->_xml_parser()->parse_file($source->{'file'})
+        ;
+}
+
+sub _get_dom_from_source
+{
+    my $self = shift;
+    my $args = shift;
+
+    my $source_dom = $self->_calc_and_ret_dom_without_validate($args);
+
+    my $ret_code;
+
+    eval
+    {
+        $ret_code = $self->_rng()->validate($source_dom);
+    };
+
+    if (defined($ret_code) && ($ret_code == 0))
+    {
+        # It's OK.
+    }
+    else
+    {
+        confess "RelaxNG validation failed [\$ret_code == "
+            . _undefize($ret_code) . " ; $@]"
+            ;
+    }
+
+    return $source_dom;
+}
+
+sub perform_xslt_translation
+{
+    my ($self, $args) = @_;
+
+    my $source_dom = $self->_get_dom_from_source($args);
+
+    my $stylesheet = $self->_stylesheet();
+
+    my $results = $stylesheet->transform($source_dom);
+
+    my $medium = $args->{output};
+
+    if ($medium eq "string")
+    {
+        return $stylesheet->output_string($results);
+    }
+    elsif ($medium eq "dom")
+    {
+        return $results;
+    }
+    else
+    {
+        confess "Unknown medium";
+    }
+}
+
+=head1 SYNOPSIS
+
+    package XML::Grammar::MyGrammar::RelaxNG::Validate;
+
+    use Any::Moose;
+
+    with ('XML::GrammarBase::Role::RelaxNG');
+
+    has '+module_base' => (default => 'XML::Grammar::MyGrammar');
+    has '+rng_schema_basename' => (default => 'my-grammar.rng');
+
+    package main;
+
+    my $rnger = XML::Grammar::MyGrammar::RelaxNG::Validate->new(
+        data_dir => "/path/to/data-dir",
+    );
+
+    # Throws an exception on failure.
+    $rnger->rng_validate_file("/different-path-to-xml-file.xml");
+
+=head1 SLOTS
+
+=head2 module_base
+
+The basename of the module - used for dist dir.
+
+=head2 data_dir
+
+The data directory where the XML assets can be found (the RELAX NG schema, etc.)
+
+=head2 rng_schema_basename
+
+The Relax NG Schema basename.
+
+=head1 METHODS
+
+=head2 $self->rng_validate_dom($source_dom)
+
+Validates the DOM ( $source_dom ) using the RELAX-NG schema.
+
+=head2 $self->rng_validate_file($file_path)
+
+Validates the file in $file_path using the RELAX-NG schema.
+
+=head2 $self->rng_validate_string($xml_string)
+
+Validates the XML in the $xml_string using the RELAX-NG schema.
+
+=head2 $converter->perform_xslt_translation
+
+=over 4
+
+=item * my $final_source = $converter->perform_xslt_translation({source => {file => $filename}, output => "string" })
+
+=item * my $final_source = $converter->perform_xslt_translation({source => {string_ref => \$buffer}, output => "string" })
+
+=item * my $final_dom = $converter->perform_xslt_translation({source => {file => $filename}, output => "dom" })
+
+=item * my $final_dom = $converter->perform_xslt_translation({source => {dom => $libxml_dom}, output => "dom" })
+
+=back
+
+Does the actual conversion. The C<'source'> argument points to a hash-ref with
+keys and values for the source. If C<'file'> is specified there it points to the
+filename to translate (currently the only available source). If
+C<'string_ref'> is specified it points to a reference to a string, with the
+contents of the source XML. If C<'dom'> is specified then it points to an XML
+DOM as parsed or constructed by XML::LibXML.
+
+The C<'output'> key specifies the return value. A value of C<'string'> returns
+the XML as a string, and a value of C<'dom'> returns the XML as an
+L<XML::LibXML> DOM object.
+
+=cut
+
+=head2 BUILD
+
+L<Any::Moose> constructor. For internal use.
+
+=head1 AUTHOR
+
+Shlomi Fish, C<< <shlomif at cpan.org> >>
+
+=head1 BUGS
+
+Please report any bugs or feature requests to C<bug-xml-grammarbase at rt.cpan.org>, or through
+the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=XML-GrammarBase>.  I will be notified, and then you'll
+automatically be notified of progress on your bug as I make changes.
+
+
+
+
+=head1 SUPPORT
+
+You can find documentation for this module with the perldoc command.
+
+    perldoc XML::GrammarBase
+
+You can also look for information at:
+
+=over 4
+
+=item * RT: CPAN's request tracker
+
+L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=XML-GrammarBase>
+
+=item * AnnoCPAN: Annotated CPAN documentation
+
+L<http://annocpan.org/dist/XML-GrammarBase>
+
+=item * CPAN Ratings
+
+L<http://cpanratings.perl.org/d/XML-GrammarBase>
+
+=item * Search CPAN
+
+L<http://search.cpan.org/dist/XML-GrammarBase/>
+
+=back
+
+
+=head1 ACKNOWLEDGEMENTS
+
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2009 Shlomi Fish.
+
+This program is distributed under the MIT (X11) License:
+L<http://www.opensource.org/licenses/mit-license.php>
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+=cut
+
+1; # End of XML::GrammarBase::RelaxNG::Validate
+

XML-GrammarBase/t/data/fiction-xml-test-html-xslt-output.xhtml

+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xmlns:fic="http://web-cpan.berlios.de/modules/XML-Grammar-Fortune/fiction-xml-0.2/">
+  <head>
+    <title>David vs. Goliath - Part I</title>
+  </head>
+  <body>
+    <div class="fiction story" xml:id="index">
+      <h1>David vs. Goliath - Part I</h1>
+      <div class="fiction section" xml:id="top">
+        <h2>The Top Section</h2>
+        <p>
+    King David and Goliath were standing by each other.
+    </p>
+        <p>
+    David said unto Goliath: "I will shoot you. I <b>swear</b> I will"
+    </p>
+        <div class="fiction section" xml:id="goliath">
+          <h3>Goliath's Response</h3>
+          <p>
+    Goliath was not amused.
+    </p>
+          <p>
+    He said to David: "Oh, really. <i>David</i>, the red-headed!".
+    </p>
+          <p>
+    David started listing Goliath's disadvantages:
+    </p>
+        </div>
+      </div>
+    </div>
+  </body>
+</html>

XML-GrammarBase/t/data/fiction-xml-to-html.xslt

+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet version = '1.0'
+    xmlns="http://www.w3.org/1999/xhtml"
+    xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
+    xmlns:fic="http://web-cpan.berlios.de/modules/XML-Grammar-Fortune/fiction-xml-0.2/"
+     >
+
+<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"
+ doctype-public="-//W3C//DTD XHTML 1.1//EN"
+ doctype-system="http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"
+ />
+
+<xsl:template match="/">
+        <xsl:apply-templates select="//fic:body" />
+</xsl:template>
+
+<xsl:template match="fic:body">
+    <html>
+        <head>
+            <title>
+                <xsl:value-of select="fic:title" />
+            </title>
+        </head>
+        <body>
+            <div class="fiction story">
+                <xsl:attribute name="xml:id">
+                    <xsl:value-of select="@xml:id" />
+                </xsl:attribute>
+                <!-- TODO : duplicate code between here and fic:section.
+                    Abstract into a common functionality!
+                -->
+                <xsl:element name="h{count(ancestor-or-self::fic:section|ancestor-or-self::fic:body)}">
+                    <xsl:value-of select="fic:title" />
+                </xsl:element>
+
+                <xsl:apply-templates select="fic:section" />
+            </div>
+        </body>
+    </html>
+</xsl:template>
+
+<xsl:template match="fic:section">
+    <div class="fiction section">
+        <xsl:if test="@xml:id">
+            <xsl:attribute name="xml:id">
+                <xsl:value-of select="@xml:id" />
+            </xsl:attribute>
+        </xsl:if>
+        <xsl:element name="h{count(ancestor-or-self::fic:section|ancestor-or-self::fic:body)}">
+            <xsl:value-of select="fic:title" />
+        </xsl:element>
+        <xsl:apply-templates select="fic:section|fic:p" />
+    </div>
+</xsl:template>
+
+<xsl:template match="fic:p">
+    <p>
+        <xsl:apply-templates/>
+    </p>
+</xsl:template>
+
+<xsl:template match="fic:b">
+    <b>
+        <xsl:apply-templates/>
+    </b>
+</xsl:template>
+
+<xsl:template match="fic:i">
+    <i>
+        <xsl:apply-templates/>
+    </i>
+</xsl:template>
+
+</xsl:stylesheet>

XML-GrammarBase/t/lib/Test/XML/Ordered.pm

+package Test::XML::Ordered;
+
+use strict;
+use warnings;
+
+use XML::LibXML::Reader;
+
+use Test::More;
+
+use base 'Exporter';
+
+use vars '@EXPORT_OK';
+
+@EXPORT_OK = (qw(is_xml_ordered));
+
+sub new
+{
+    my $class = shift;
+    my $self = {};
+
+    bless $self, $class;
+
+    $self->_init(@_);
+
+    return $self;
+}
+
+sub _got
+{
+    return shift->{got_reader};
+}
+
+sub _expected
+{
+    return shift->{expected_reader};
+}
+
+sub _init
+{
+    my ($self, $args) = @_;
+
+    $self->{got_reader} =
+        XML::LibXML::Reader->new(@{$args->{got_params}});
+    $self->{expected_reader} =
+        XML::LibXML::Reader->new(@{$args->{expected_params}});
+
+    $self->{diag_message} = $args->{diag_message};
+
+    $self->{got_end} = 0;
+    $self->{expected_end} = 0;
+
+    return;
+}
+
+sub _got_end
+{
+    return shift->{got_end};
+}
+
+sub _expected_end
+{
+    return shift->{expected_end};
+}
+
+sub _read_got
+{
+    my $self = shift;
+
+    if ($self->_got->read() <= 0)
+    {
+        $self->{got_end} = 1;
+    }
+
+    return;
+}
+
+sub _read_expected
+{
+    my $self = shift;
+
+    if ($self->_expected->read() <= 0)
+    {
+        $self->{expected_end} = 1;
+    }
+
+    return;
+}
+
+sub _next_elem
+{
+    my $self = shift;
+
+    $self->_read_got();
+    $self->_read_expected();
+
+    return;
+}
+
+sub _ns
+{
+    my $elem = shift;
+    my $ns = $elem->namespaceURI();
+
+    return defined($ns) ? $ns : "";
+}
+
+sub _compare_loop
+{
+    my $self = shift;
+
+    my $calc_prob = sub {
+        my $args = shift;
+
+        if (!exists($args->{param}))
+        {
+            die "No 'param' specified.";
+        }
+        return
+        {
+            verdict => 0,
+            param => $args->{param},
+        }
+    };
+
+    NODE_LOOP:
+    while ((!$self->_got_end()) && (!$self->_expected_end()))
+    {
+        my $type = $self->_got->nodeType();
+        my $exp_type = $self->_expected->nodeType();
+
+        if ($type == XML_READER_TYPE_SIGNIFICANT_WHITESPACE())
+        {
+            $self->_read_got();
+            redo NODE_LOOP;
+        }
+        elsif ($exp_type == XML_READER_TYPE_SIGNIFICANT_WHITESPACE())
+        {
+            $self->_read_expected();
+            redo NODE_LOOP;
+        }
+        elsif ($type != $exp_type)
+        {
+            return $calc_prob->({param => "nodeType"});
+        }
+        elsif ($type == XML_READER_TYPE_TEXT())
+        {
+            my $got_text = $self->_got->value();
+            my $expected_text = $self->_expected->value();
+
+            foreach my $t ($got_text, $expected_text)
+            {
+                $t =~ s{\A\s+}{}ms;
+                $t =~ s{\s+\z}{}ms;
+                $t =~ s{\s+}{ }ms;
+            }
+            if ($got_text ne $expected_text)
+            {
+                return $calc_prob->({param => "text"});
+            }
+        }
+        elsif ($type == XML_READER_TYPE_ELEMENT())
+        {
+            if ($self->_got->name() ne $self->_expected->name())
+            {
+                return $calc_prob->({param => "element_name"});
+            }
+            if (_ns($self->_got) ne _ns($self->_expected))
+            {
+                return $calc_prob->({param => "mismatch_ns"});
+            }
+        }
+    }
+    continue
+    {
+        $self->_next_elem();
+    }
+
+    return { verdict => 1};
+}
+
+sub _get_diag_message
+{
+    my ($self, $status_struct) = @_;
+
+    if ($status_struct->{param} eq "nodeType")
+    {
+        return
+            "Different Node Type!\n"
+            . "Got: " . $self->_got->nodeType() . " at line " . $self->_got->lineNumber()
+            . "\n"
+            . "Expected: " . $self->_expected->nodeType() . " at line " . $self->_expected->lineNumber()
+            ;
+    }
+    elsif ($status_struct->{param} eq "text")
+    {
+        return
+            "Texts differ: Got at " . $self->_got->lineNumber(). " with value <<@{[$self->_got->value()]}>> ; Expected at ". $self->_expected->lineNumber() . " with value <<@{[$self->_expected->value()]}>>.";
+    }
+    elsif ($status_struct->{param} eq "element_name")
+    {
+        return
+            "Got name: " . $self->_got->name(). " at " . $self->_got->lineNumber() .
+            " ; " .
+            "Expected name: " . $self->_expected->name() . " at " .$self->_expected->lineNumber();
+    }
+    elsif ($status_struct->{param} eq "mismatch_ns")
+    {
+        return
+            "Got Namespace: " . _ns($self->_got). " at " . $self->_got->lineNumber() .
+            " ; " .
+            "Expected Namespace: " . _ns($self->_expected) . " at " .$self->_expected->lineNumber();
+    }
+
+    else
+    {
+        die "Unknown param";
+    }
+}
+
+sub compare
+{
+    local $Test::Builder::Level = $Test::Builder::Level+1;
+
+    my $self = shift;
+
+    $self->_next_elem();
+
+    my $status_struct = $self->_compare_loop();
+    my $verdict = $status_struct->{verdict};
+
+    if (!$verdict)
+    {
+        diag($self->_get_diag_message($status_struct));
+    }
+
+    return ok($verdict, $self->{diag_message});
+}
+
+sub is_xml_ordered
+{
+    local $Test::Builder::Level = $Test::Builder::Level+1;
+
+    my ($got_params, $expected_params, $message) = @_;
+
+    my $comparator =
+        Test::XML::Ordered->new(
+            {
+                got_params => $got_params,
+                expected_params => $expected_params,
+                diag_message => $message,
+            }
+        );
+
+    return $comparator->compare();
+}
+
+1;
+

XML-GrammarBase/t/xslt.t

+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use Test::More tests => 1;
+
+package MyGrammar::XSLT;
+
+use Any::Moose;
+
+use File::Spec;
+
+with ('XML::GrammarBase::Role::XSLT');
+
+has '+module_base' => (default => 'XML::GrammarBase');
+has '+data_dir' => (default => File::Spec->catdir(File::Spec->curdir(), "t", "data"));
+has '+xslt_transform_basename' => (default => 'fiction-xml-to-html.xslt');
+has '+rng_schema_basename' => (default => 'fiction-xml.rng');
+
+package main;
+
+use lib "./t/lib";
+
+use Test::XML::Ordered qw(is_xml_ordered);
+
+
+sub _utf8_slurp
+{
+    my $filename = shift;
+
+    open my $in, '<', $filename
+        or die "Cannot open '$filename' for slurping - $!";
+
+    binmode $in, ':encoding(utf8)';
+
+    local $/;
+    my $contents = <$in>;
+
+    close($in);
+
+    return $contents;
+}
+
+# TEST:$c=0;
+sub test_file
+{
+    my $args = shift;
+
+    my $input_fn = $args->{input_fn};
+    my $output_fn = $args->{output_fn};
+
+    my $xslt = MyGrammar::XSLT->new();
+
+    {
+        my $final_source = $xslt->perform_xslt_translation(
+            {
+                source => {file => $input_fn, },
+                output => "string",
+            }
+        );
+
+        my $xml_source = _utf8_slurp($output_fn);
+
+        # TEST:$c++;
+        is_xml_ordered(
+            [ string => $final_source, ],
+            [ string => $xml_source, ],
+            "'$input_fn' generated good output on source/input_filename - output - string"
+        );
+    }
+}
+
+# TEST:$test_file=$c;
+
+# TEST*$test_file
+test_file(
+    {
+        input_fn => File::Spec->catfile(
+            File::Spec->curdir(), "t", "data", "fiction-xml-test.xml",
+        ),
+        output_fn => File::Spec->catfile(
+            File::Spec->curdir(), "t", "data", "fiction-xml-test-html-xslt-output.xhtml",
+        ),
+    }
+);