Commits

Toby Inkster committed 9575ecf

Spint GRDDL support off from Swignition.

  • Participants

Comments (0)

Files changed (21)

+############################################################################
+## Changes #################################################################
+############################################################################
+
+XML-GRDDL
+=========
+
+Created:      2010-07-14
+Home page:    <http://search.cpan.org/dist/XML-GRDDL/>
+Bug tracker:  <http://rt.cpan.org/Dist/Display.html?Queue=XML-GRDDL>
+Maintainer:   Toby Inkster <mailto:tobyink@cpan.org>
+
+0.001 [2010-07-14] # Original version
+
+#############################################################
+
+@prefix :        <http://usefulinc.com/ns/doap#> .
+@prefix dcs:     <http://ontologi.es/doap-changeset#> .
+@prefix dc:      <http://purl.org/dc/terms/> .
+@prefix foaf:    <http://xmlns.com/foaf/0.1/> .
+@prefix my:      <http://purl.org/NET/cpan-uri/dist/XML-GRDDL/> .
+@prefix rdfs:    <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix toby:    <http://tobyinkster.co.uk/#> .
+@prefix xsd:     <http://www.w3.org/2001/XMLSchema#> .
+
+#############################################################
+
+<>
+
+	dc:title         "Changes" ;
+	dc:description   "Revision history for Perl extension XML::GRDDL."@en ;
+	dc:subject       my:project ;
+	dc:creator       toby:i .
+
+#############################################################
+
+my:v_0-001
+
+	a               :Version ;
+	dc:issued       "2010-07-14"^^xsd:date ;
+	:revision       "0.001"^^xsd:string ;
+	:file-release   <http://backpan.cpan.org/authors/id/T/TO/TOBYINK/XML-GRDDL-0.001.tar.gz> ;
+	rdfs:label      "Original version"@en ;
+	rdfs:comment    "Forked from Swignition."@en ;
+	rdfs:seeAlso    <http://buzzword.org.uk/swignition/#swignition> .
+
+#############################################################
+
+my:project
+
+	a               :Project ;
+	:name           "XML-GRDDL" ;
+	:shortdesc      "transform XML and XHTML to RDF"@en ;
+	:programming-language  "Perl" ;
+	:homepage       <http://search.cpan.org/dist/XML-GRDDL/> ;
+	:download-page  <http://search.cpan.org/dist/XML-GRDDL/> ;
+	:bug-database   <http://rt.cpan.org/Dist/Display.html?Queue=XML-GRDDL> ;
+	:repository     [ a :SVNRepository ; :browse <http://goddamn.co.uk/viewvc/perlmods/XML-GRDDL/> ] ;
+	:maintainer     toby:i ;
+	:developer      toby:i ;
+	:documenter     toby:i ;
+	:tester         toby:i ;
+	:created        "2010-07-14"^^xsd:date ;
+	:license        <http://dev.perl.org/licenses/> ;
+	:release        my:v_0-001 .
+
+#############################################################
+
+toby:i
+
+	a               foaf:Person ;
+	foaf:name       "Toby Inkster" ;
+	foaf:homepage   <http://tobyinkster.co.uk/> ;
+	foaf:page       <http://search.cpan.org/~tobyink/> ;
+	foaf:mbox       <mailto:tobyink@cpan.org> ;
+	<http://www.w3.org/2002/07/owl#sameAs> <http://purl.org/NET/cpan-uri/person/tobyink> .
+
+#############################################################
+<?xml version="1.0" encoding="utf-8"?>
+<rdf:RDF
+   xmlns:dc="http://purl.org/dc/terms/"
+   xmlns:dcs="http://ontologi.es/doap-changeset#"
+   xmlns:foaf="http://xmlns.com/foaf/0.1/"
+   xmlns:my="http://purl.org/NET/cpan-uri/dist/XML-GRDDL/"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+   xmlns:toby="http://tobyinkster.co.uk/#"
+   xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
+   xmlns="http://usefulinc.com/ns/doap#">
+  <rdf:Description rdf:about="file:///home/tai/src/perlmods/XML-GRDDL/Changes.ttl">
+    <dc:creator rdf:resource="http://tobyinkster.co.uk/#i"/>
+    <dc:description xml:lang="en">Revision history for Perl extension XML::GRDDL.</dc:description>
+    <dc:subject rdf:resource="http://purl.org/NET/cpan-uri/dist/XML-GRDDL/project"/>
+    <dc:title>Changes</dc:title>
+  </rdf:Description>
+  <Version rdf:about="http://purl.org/NET/cpan-uri/dist/XML-GRDDL/v_0-001">
+    <dc:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2010-07-14</dc:issued>
+    <file-release rdf:resource="http://backpan.cpan.org/authors/id/T/TO/TOBYINK/XML-GRDDL-0.001.tar.gz"/>
+    <revision rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0.001</revision>
+    <rdfs:comment xml:lang="en">Forked from Swignition.</rdfs:comment>
+    <rdfs:label xml:lang="en">Original version</rdfs:label>
+    <rdfs:seeAlso rdf:resource="http://buzzword.org.uk/swignition/#swignition"/>
+  </Version>
+  <Project rdf:about="http://purl.org/NET/cpan-uri/dist/XML-GRDDL/project">
+    <bug-database rdf:resource="http://rt.cpan.org/Dist/Display.html?Queue=XML-GRDDL"/>
+    <created rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2010-07-14</created>
+    <developer rdf:resource="http://tobyinkster.co.uk/#i"/>
+    <documenter rdf:resource="http://tobyinkster.co.uk/#i"/>
+    <download-page rdf:resource="http://search.cpan.org/dist/XML-GRDDL/"/>
+    <homepage rdf:resource="http://search.cpan.org/dist/XML-GRDDL/"/>
+    <license rdf:resource="http://dev.perl.org/licenses/"/>
+    <maintainer rdf:resource="http://tobyinkster.co.uk/#i"/>
+    <name>XML-GRDDL</name>
+    <programming-language>Perl</programming-language>
+    <release rdf:resource="http://purl.org/NET/cpan-uri/dist/XML-GRDDL/v_0-001"/>
+    <repository>
+      <SVNRepository>
+        <browse rdf:resource="http://goddamn.co.uk/viewvc/perlmods/XML-GRDDL/"/>
+      </SVNRepository>
+    </repository>
+    <shortdesc xml:lang="en">transform XML and XHTML to RDF</shortdesc>
+    <tester rdf:resource="http://tobyinkster.co.uk/#i"/>
+  </Project>
+  <foaf:Person rdf:about="http://tobyinkster.co.uk/#i">
+    <ns1:sameAs xmlns:ns1="http://www.w3.org/2002/07/owl#" rdf:resource="http://purl.org/NET/cpan-uri/person/tobyink"/>
+    <foaf:homepage rdf:resource="http://tobyinkster.co.uk/"/>
+    <foaf:mbox rdf:resource="mailto:tobyink@cpan.org"/>
+    <foaf:name>Toby Inkster</foaf:name>
+    <foaf:page rdf:resource="http://search.cpan.org/~tobyink/"/>
+  </foaf:Person>
+</rdf:RDF>
+Changes
+Changes.ttl
+Changes.xml
+Makefile.PL
+MANIFEST
+MANIFEST.SKIP
+README
+META.yml
+SIGNATURE
+
+t/00sig.t
+t/01basic.t
+
+inc/Module/AutoInstall.pm
+inc/Module/Install/AutoInstall.pm
+inc/Module/Install/Base.pm
+inc/Module/Install/Can.pm
+inc/Module/Install/DOAPChangeSets.pm
+inc/Module/Install/Fetch.pm
+inc/Module/Install/Include.pm
+inc/Module/Install/Makefile.pm
+inc/Module/Install/Metadata.pm
+inc/Module/Install.pm
+inc/Module/Install/ReadmeFromPod.pm
+inc/Module/Install/Win32.pm
+inc/Module/Install/WriteAll.pm
+inc/Test/Signature.pm
+
+lib/XML/GRDDL/External.pm
+lib/XML/GRDDL/Namespace.pm
+lib/XML/GRDDL.pm
+lib/XML/GRDDL/Profile.pm
+lib/XML/GRDDL/Transformation.pm
+lib/XML/GRDDL/Transformation/RDF_EASE/Functional.pm
+lib/XML/GRDDL/Transformation/RDF_EASE.pm
+lib/XML/GRDDL/Transformation/RDF_EASE/Selector.pm
+lib/XML/GRDDL/Transformation/XSLT_1.pm

File MANIFEST.SKIP

+^Makefile$
+^blib/
+^pm_to_blib
+^blibdirs
+\.svn
+^example.*\.pl$
+^[^/]+\.(tar\.gz|tar\.bz2|tgz|tbz2|tbz|zip|tar)$
+^MYMETA..yml
+---
+abstract: 'transform XML and XHTML to RDF'
+author:
+  - 'Toby Inkster <tobyink@cpan.org>'
+build_requires:
+  ExtUtils::MakeMaker: 6.42
+  Test::More: 0.61
+configure_requires:
+  ExtUtils::MakeMaker: 6.42
+distribution_type: module
+generated_by: 'Module::Install version 0.95'
+license: perl
+meta-spec:
+  url: http://module-build.sourceforge.net/META-spec-v1.4.html
+  version: 1.4
+name: XML-GRDDL
+no_index:
+  directory:
+    - inc
+    - t
+requires:
+  CSS: 0
+  CSS::Parse::PRDGrammar: 0
+  Carp: 1.00
+  Data::UUID: 0
+  Exporter: 0
+  RDF::RDFa::Parser: 1.09_10
+  RDF::Trine: 0.124
+  Scalar::Util: 0
+  URI: 1.30
+  URI::Escape: 0
+  XML::LibXML: 0
+  XML::LibXSLT: 0
+  common::sense: 0
+  perl: 5.8.0
+resources:
+  bugtracker: http://rt.cpan.org/Dist/Display.html?Queue=XML-GRDDL
+  homepage: http://search.cpan.org/dist/XML-GRDDL/
+  license: http://dev.perl.org/licenses/
+  repository: http://goddamn.co.uk/viewvc/perlmods/XML-GRDDL/
+version: 0.001
+use strict;
+use warnings;
+
+use inc::Module::Install;
+
+my $dist = 'XML-GRDDL';
+my $fn   = "lib/$dist.pm"; $fn =~ s#-#/#g;
+
+name                $dist;
+perl_version_from   $fn;
+version_from        $fn;
+abstract_from       $fn;
+readme_from         $fn;
+author              'Toby Inkster <tobyink@cpan.org>';
+license             'perl';
+
+test_requires       'Test::More' => '0.61';
+
+requires            'Carp'               => '1.00';
+requires            'common::sense'      => 0;
+requires            'CSS'                => 0;
+requires            'CSS::Parse::PRDGrammar' => 0;
+requires            'Data::UUID'         => 0;
+requires            'Exporter'           => 0;
+requires            'RDF::RDFa::Parser'  => '1.09_10';
+requires            'RDF::Trine'         => '0.124';
+requires            'Scalar::Util'       => 0;
+requires            'XML::LibXML'        => '1.60';
+requires            'URI'                => '1.30';
+requires            'URI::Escape'        => 0;
+requires            'XML::LibXML'        => 0;
+requires            'XML::LibXSLT'       => 0;
+
+# install_script 'fingerw';
+
+resources(
+	'homepage'   => "http://search.cpan.org/dist/$dist/",
+	'repository' => "http://goddamn.co.uk/viewvc/perlmods/$dist/",
+	'bugtracker' => "http://rt.cpan.org/Dist/Display.html?Queue=$dist",
+	);
+	
+write_doap_changes;
+write_doap_changes_xml;
+
+include 'Test::Signature';
+auto_install;
+WriteAll(
+	'meta' => 1,
+	'sign' => 1,
+	);
+NAME
+    XML::GRDDL - transform XML and XHTML to RDF
+
+SYNOPSIS
+     # Low-Level Interface
+     #
+     my $grddl = XML::GRDDL->new;
+     my @transformations = $grddl->discover($xmldoc, $baseuri);
+     foreach my $trans (@transformations)
+     {
+       # $t is an XML::GRDDL::Transformation
+       my ($output, $mediatype) = $t->transform($xmldoc);
+     }
+
+     # High-Level Interface
+     #
+     my $grddl = XML::GRDDL->new;
+     my $model = $grddl->data($xmldoc, $baseuri);
+     # $model is an RDF::Trine::Model
+
+DESCRIPTION
+    GRDDL is a W3C Recommendation for extracting RDF data from arbitrary XML
+    and XHTML via a transformation, typically written in XSLT. See
+    <http://www.w3.org/TR/grddl/> for more details.
+
+    This module implements GRDDL in Perl. It offers both a low level
+    interface, allowing you to generate a list of transformations associated
+    with the document being processed, and thus the ability to selectively
+    run the transformation; and a high-level interface where a single RDF
+    model is returned representing the union of the RDF graphs generated by
+    applying all available transformations.
+
+  Constructor
+    "XML::GRDDL->new"
+        The constructor accepts no parameters and returns an XML::GRDDL
+        object.
+
+  Methods
+    "$grddl->discover($xml, $base, %options)"
+        Processes the document to discover the transformations associated
+        with it. $xml is the raw XML source of the document, or an
+        XML::LibXML::Document object. ($xml cannot be "tag soup" HTML,
+        though you should be able to use HTML::HTML5::Parser to parse tag
+        soup into an XML::LibXML::Document.) $base is the base URI for
+        resolving relative references.
+
+        Returns a list of XML::GRDDL::Transformation objects.
+
+        Options include:
+
+        *   force_rel - boolean; interpret XHTML rel="transformation" even
+            in the absence of the GRDDL profile.
+
+        *   strings - boolean; return a list of plain strings instead of
+            blessed objects.
+
+    "$grddl->data($xml, $base, %options)"
+        Processes the document, discovers the transformations associated
+        with it, applies the transformations and merges the results into a
+        single RDF model. $xml and $base are as per "discover".
+
+        Returns an RDF::Trine::Model containing the data. Statement contexts
+        (a.k.a. named graphs / quads) are used to distinguish between data
+        from the result of each transformation.
+
+        Options include:
+
+        *   force_rel - boolean; interpret XHTML rel="transformation" even
+            in the absence of the GRDDL profile.
+
+        *   metadata - boolean; include provenance information in the
+            default graph (a.k.a. nil context).
+
+    "$grddl->ua( [$ua] )"
+        Get/set the user agent used for HTTP requests. $ua, if supplied,
+        must be an LWP::UserAgent.
+
+FEATURES
+    XML::GRDDL supports transformations written in XSLT 1.0, and in
+    RDF-EASE.
+
+    XML::GRDDL is a good HTTP citizen: Referer headers are included in
+    requests, and appropriate Accept headers supplied. To be an even better
+    citizen, I recommend changing the User-Agent header to advertise the
+    name of the application:
+
+     $grddl->ua->default_header(user_agent => 'MyApp/1.0 ');
+
+    Provenance information for GRDDL transformations is returned using the
+    GRDDL vocabulary at <http://www.w3.org/2003/g/data-view#>.
+
+    Certain XHTML profiles and XML namespaces known not to contain any
+    transformations, or to contain useless transformations are skipped. See
+    XML::GRDDL::Namespace and XML::GRDDL::Profile for details. In particular
+    profiles for RDFa and many Microformats are skipped, as
+    RDF::RDFa::Parser and HTML::Microformats will typically yield far
+    superior results.
+
+BUGS
+    Please report any bugs to <http://rt.cpan.org/>.
+
+    Known limitations:
+
+    *   Recursive GRDDL doesn't work yet.
+
+        That is, the profile documents and namespace documents linked to
+        from your primary document cannot themselves rely on GRDDL.
+
+SEE ALSO
+    XML::GRDDL::Transformation, XML::GRDDL::Namespace, XML::GRDDL::Profile,
+    XML::GRDDL::Transformation::RDF_EASE::Functional.
+
+    HTML::HTML5::Parser, RDF::RDFa::Parser, HTML::Microformats.
+
+    <http://www.w3.org/TR/grddl/>.
+
+    <http://www.perlrdf.org/>.
+
+    This module is derived from Swignition
+    <http://buzzword.org.uk/swignition/>.
+
+AUTHOR
+    Toby Inkster <tobyink@cpan.org>.
+
+COPYRIGHT
+    Copyright 2008-2010 Toby Inkster
+
+    This library is free software; you can redistribute it and/or modify it
+    under the same terms as Perl itself.
+

File XML-GRDDL-0.001.tar.gz

Binary file added.
+use lib "lib";
+use lib "../RDF-RDFa-Parser/lib";
+use XML::GRDDL;
+use LWP::Simple;
+use Data::Dumper;
+use RDF::TrineShortcuts;
+
+my $grddl = XML::GRDDL->new();
+my $data  = {};
+foreach my $url (qw(http://localhost/test/grddl/document.html http://localhost/test/grddl/document2.html http://localhost/test/grddl/ease.html))
+{
+	print "#### URL: $url\n";
+	my $r = $grddl->data(get($url), $url, force_rel=>1, metadata=>1);
+	print rdf_string($r, 'nquads');
+}

File lib/XML/GRDDL.pm

+package XML::GRDDL;
+
+use 5.008;
+use common::sense;
+use constant GRDDL_NS  => 'http://www.w3.org/2003/g/data-view#';
+use constant XHTML_NS  => 'http://www.w3.org/1999/xhtml';
+
+use Carp;
+use Data::UUID;
+use RDF::RDFa::Parser '1.09_10';
+use RDF::Trine qw[iri statement];
+use Scalar::Util qw[blessed];
+use URI;
+use URI::Escape qw[uri_escape];
+use XML::GRDDL::Namespace;
+use XML::GRDDL::Profile;
+use XML::GRDDL::Transformation;
+use XML::LibXML;
+
+our $VERSION = '0.001';
+
+sub new
+{
+	my ($class) = @_;
+	return bless { cache=>{}, ua=>undef, }, $class;
+}
+
+sub ua
+{
+	my $self = shift;
+	if (@_)
+	{
+		my $rv = $self->{'ua'};
+		$self->{'ua'} = shift;
+		croak "Set UA to something that is not an LWP::UserAgent!"
+			unless blessed $self->{'ua'} && $self->{'ua'}->isa('LWP::UserAgent');
+		return $rv;
+	}
+	unless (blessed $self->{'ua'} && $self->{'ua'}->isa('LWP::UserAgent'))
+	{
+		$self->{'ua'} = LWP::UserAgent->new(agent=>sprintf('%s/%s ', __PACKAGE__, $VERSION));
+	}
+	return $self->{'ua'};
+}
+
+sub data
+{
+	my ($self, $document, $uri, %options) = @_;
+	
+	unless (blessed($document) && $document->isa('XML::LibXML::Document'))
+	{
+		my $parser = XML::LibXML->new;
+		$document = $parser->parse_string($document);
+	}
+	
+	my @transformations;
+	{
+		local $options{strings} = 0;
+		@transformations = $self->discover($document, $uri, %options);
+	}
+
+	my $model = RDF::Trine::Model->temporary_model;
+
+	foreach my $t (@transformations)
+	{
+		my $m = $t->model($document);
+		if ($m)
+		{
+			my $context  = iri('urn:uuid:'.Data::UUID->new->create_str);
+			my $rootnode = iri('urn:uuid:'.Data::UUID->new->create_str);
+			my $property = iri('http://ontologi.es/grddl?transformation='.uri_escape($t->uri).'#result');
+			$model->add_hashref($m->as_hashref, $context);
+			
+			if ($options{metadata})
+			{
+				$model->add_statement(statement(
+					iri($uri),
+					iri('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
+					iri(GRDDL_NS.'InformationResource'),
+					));
+				$model->add_statement(statement(
+					iri($uri),
+					iri(GRDDL_NS.'rootNode'),
+					$rootnode,
+					));
+				$model->add_statement(statement(
+					$rootnode,
+					iri('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
+					iri(GRDDL_NS.'RootNode'),
+					));
+				$model->add_statement(statement(
+					iri($uri),
+					iri(GRDDL_NS.'result'),
+					$context,
+					));
+				$model->add_statement(statement(
+					$context,
+					iri('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
+					iri(GRDDL_NS.'RDFGraph'),
+					));
+				$model->add_statement(statement(
+					iri($t->uri),
+					iri('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
+					iri(GRDDL_NS.'Transformation'),
+					));
+				$model->add_statement(statement(
+					iri($t->uri),
+					iri(GRDDL_NS.'transformationProperty'),
+					$property,
+					));
+				$model->add_statement(statement(
+					$property,
+					iri('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
+					iri(GRDDL_NS.'TransformationProperty'),
+					));
+				$model->add_statement(statement(
+					$rootnode,
+					$property,
+					$context,
+					));
+			}
+		}
+	}
+	
+	return $model;
+}
+
+sub discover
+{
+	my ($self, $document, $uri, %options) = @_;
+
+	unless (blessed($document) && $document->isa('XML::LibXML::Document'))
+	{
+		my $parser = XML::LibXML->new;
+		$document = $parser->parse_string($document);
+	}
+
+	my @transformations;
+
+	push @transformations,
+		$self->_discover_from_rel_attribute($document, $uri, %options);
+
+	push @transformations,
+		$self->_discover_from_transformation_attribute($document, $uri, %options);
+
+	push @transformations,
+		$self->_discover_from_profiles($document, $uri, %options);
+	
+	push @transformations,
+		$self->_discover_from_namespace($document, $uri, %options);
+
+	if ($options{'strings'})
+	{
+		return @transformations;
+	}
+	else
+	{
+		return map { XML::GRDDL::Transformation->new($_, $uri, $self); } @transformations;
+	}
+}
+
+sub _discover_from_rel_attribute
+{
+	my ($self, $document, $uri, %options) = @_;
+	my @transformations;
+	
+	my $profile_found = $options{'force_rel'};
+	
+	my $xpc = XML::LibXML::XPathContext->new;
+	$xpc->registerNs(xhtml => XHTML_NS);
+	
+	unless ($profile_found)
+	{
+		my @nodes = $xpc->findnodes('/xhtml:html/xhtml:head[@profile]', $document);
+		foreach my $head (@nodes)
+		{
+			if ($head->getAttribute('profile') =~ m!(^|\s) http://www\.w3\.org/2003/g/data-view\#? (\s|$)!x)
+			{
+				$profile_found = 1;
+				last;
+			}
+		}
+	}
+	
+	if ($profile_found)
+	{
+		my $is_html = $document->documentElement->namespaceURI eq XHTML_NS;
+		my $rdfa = $self->_rdf_model($document, $uri, $is_html?'application/xhtml+xml':'application/xml');
+		my $iter = $rdfa->get_statements(iri($uri), iri(GRDDL_NS.'transformation'), undef);
+		while (my $st = $iter->next)
+		{
+			next unless $st->object->is_resource;
+			push @transformations, $st->object->uri;
+		}
+	}
+	
+	return @transformations;
+}
+
+sub _discover_from_transformation_attribute
+{
+	my ($self, $document, $uri, %options) = @_;
+	my @transformations;
+
+	# Right now just doing this on root element. Supposed to also check others??
+	my $attr = $document->documentElement->getAttributeNS(GRDDL_NS, 'transformation');
+	my @t = split /\s+/, $attr;
+	foreach my $t (@t)
+	{
+		next unless $t =~ /[a-z0-9\.]/i;
+		push @transformations, $self->_resolve_relative_ref($t, $uri);
+	}
+	
+	return @transformations;
+}
+
+sub _discover_from_profiles
+{
+	my ($self, $document, $uri, %options) = @_;
+	my @transformations;
+	
+	my $xpc = XML::LibXML::XPathContext->new;
+	$xpc->registerNs(xhtml => XHTML_NS);
+		
+	my @profiles;
+	my @nodes = $xpc->findnodes('/xhtml:html/xhtml:head[@profile]', $document);
+	foreach my $head (@nodes)
+	{
+		my @t = split /\s+/, $head->getAttribute('profile');
+		foreach my $t (@t)
+		{
+			next unless $t =~ /[a-z0-9\.]/i;
+			push @profiles, $self->_resolve_relative_ref($t, $uri);
+		}		
+	}
+
+	foreach my $profile (@profiles)
+	{
+		my $profile_object = XML::GRDDL::Profile->new($profile, $uri, $self);
+		push @transformations, $profile_object->transformations;
+	}
+
+	return @transformations;
+}
+
+sub _discover_from_namespace
+{
+	my ($self, $document, $uri, %options) = @_;
+	
+	my $ns     = $document->documentElement->namespaceURI;
+	my $ns_obj = XML::GRDDL::Namespace->new($ns, $uri, $self);
+	
+	return $ns_obj->transformations;
+}
+
+sub _fetch
+{
+	my ($self, $document, %headers) = @_;
+	$self->{'cache'}->{$document} ||= $self->ua->get($document, %headers);
+	return $self->{'cache'}->{$document};
+}
+
+sub _rdf_model
+{
+	my ($self, $document, $uri, $type, $nocache) = @_;
+	
+	if ($nocache || !$self->{'cached-rdf'}->{$uri})
+	{
+		if ($type eq 'application/xhtml+xml'
+		or  $type eq 'text/html'
+		or  $type eq 'application/atom+xml'
+		or  $type eq 'image/svg+xml')
+		{
+			my $config = RDF::RDFa::Parser::Config->new($type, '1.0',
+				keyword_bundles=>'rdfa grddl');
+			my $parser = RDF::RDFa::Parser->new($document, $uri, $config);
+			return $parser->graph if $nocache;
+			$self->{'cached-rdf'}->{$uri} = $parser->graph;
+		}
+		else
+		{
+			if (blessed($document))
+			{
+				$document = $document->toString;
+			}
+			my $model  = RDF::Trine::Model->temporary_model;
+			my $pclass = $RDF::Trine::Parser::media_types{ $type };
+			my $parser = ($pclass && $pclass->can('new'))
+			           ? $pclass->new
+			           : RDF::Trine::Parser::RDFXML->new;
+			$parser->parse_into_model($uri, $document, $model);
+			return $model if $nocache;
+			$self->{'cached-rdf'}->{$uri} = $model;
+		}
+	}
+	
+	return $self->{'cached-rdf'}->{$uri};
+}
+
+sub _resolve_relative_ref
+{
+	my ($self, $ref, $base) = @_;
+
+	return $ref unless $base; # keep relative unless we have a base URI
+
+	if ($ref =~ /^([a-z][a-z0-9\+\.\-]*)\:/i)
+	{
+		return $ref; # already an absolute reference
+	}
+
+	# create absolute URI
+	my $abs = URI->new_abs($ref, $base)->canonical->as_string;
+
+	while ($abs =~ m!^(http://.*)(\.\./|\.)+(\.\.|\.)?$!i)
+		{ $abs = $1; } # fix edge case of 'http://example.com/../../../'
+
+	return $abs;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+XML::GRDDL - transform XML and XHTML to RDF
+
+=head1 SYNOPSIS
+
+ # Low-Level Interface
+ #
+ my $grddl = XML::GRDDL->new;
+ my @transformations = $grddl->discover($xmldoc, $baseuri);
+ foreach my $trans (@transformations)
+ {
+   # $t is an XML::GRDDL::Transformation
+   my ($output, $mediatype) = $t->transform($xmldoc);
+ }
+
+ # High-Level Interface
+ #
+ my $grddl = XML::GRDDL->new;
+ my $model = $grddl->data($xmldoc, $baseuri);
+ # $model is an RDF::Trine::Model
+
+=head1 DESCRIPTION
+
+GRDDL is a W3C Recommendation for extracting RDF data from arbitrary
+XML and XHTML via a transformation, typically written in XSLT. See
+L<http://www.w3.org/TR/grddl/> for more details.
+
+This module implements GRDDL in Perl. It offers both a low level interface,
+allowing you to generate a list of transformations associated with the
+document being processed, and thus the ability to selectively run the
+transformation; and a high-level interface where a single RDF model
+is returned representing the union of the RDF graphs generated by
+applying all available transformations.
+
+=head2 Constructor
+
+=over 4
+
+=item C<< XML::GRDDL->new >>
+
+The constructor accepts no parameters and returns an XML::GRDDL
+object.
+
+=back
+
+=head2 Methods
+
+=over 4
+
+=item C<< $grddl->discover($xml, $base, %options) >>
+
+Processes the document to discover the transformations associated
+with it. $xml is the raw XML source of the document, or an
+XML::LibXML::Document object. ($xml cannot be "tag soup" HTML,
+though you should be able to use L<HTML::HTML5::Parser> to
+parse tag soup into an XML::LibXML::Document.) $base is the
+base URI for resolving relative references.
+
+Returns a list of L<XML::GRDDL::Transformation> objects.
+
+Options include:
+
+=over 4
+
+=item * B<force_rel> - boolean; interpret XHTML rel="transformation" even in the absence of the GRDDL profile.
+
+=item * B<strings> - boolean; return a list of plain strings instead of blessed objects.
+
+=back
+
+=item C<< $grddl->data($xml, $base, %options) >>
+
+Processes the document, discovers the transformations associated
+with it, applies the transformations and merges the results into a
+single RDF model. $xml and $base are as per C<discover>.
+
+Returns an RDF::Trine::Model containing the data. Statement contexts
+(a.k.a. named graphs / quads) are used to distinguish between data
+from the result of each transformation.
+
+Options include:
+
+=over 4
+
+=item * B<force_rel> - boolean; interpret XHTML rel="transformation" even in the absence of the GRDDL profile.
+
+=item * B<metadata> - boolean; include provenance information in the default graph (a.k.a. nil context).
+
+=back
+
+=item C<< $grddl->ua( [$ua] ) >>
+
+Get/set the user agent used for HTTP requests. $ua, if supplied, must be
+an LWP::UserAgent.
+
+=back
+
+=head1 FEATURES
+
+XML::GRDDL supports transformations written in XSLT 1.0, and in RDF-EASE.
+
+XML::GRDDL is a good HTTP citizen: Referer headers are included in requests,
+and appropriate Accept headers supplied. To be an even better citizen, I
+recommend changing the User-Agent header to advertise the name of the
+application:
+
+ $grddl->ua->default_header(user_agent => 'MyApp/1.0 ');
+
+Provenance information for GRDDL transformations is returned using the
+GRDDL vocabulary at L<http://www.w3.org/2003/g/data-view#>.
+
+Certain XHTML profiles and XML namespaces known not to contain any
+transformations, or to contain useless transformations are skipped. See
+L<XML::GRDDL::Namespace> and L<XML::GRDDL::Profile> for details. In
+particular profiles for RDFa and many Microformats are skipped, as
+L<RDF::RDFa::Parser> and L<HTML::Microformats> will typically yield
+far superior results.
+
+=head1 BUGS
+
+Please report any bugs to L<http://rt.cpan.org/>.
+
+Known limitations:
+
+=over 4
+
+=item * Recursive GRDDL doesn't work yet.
+
+That is, the profile documents and namespace documents linked to from
+your primary document cannot themselves rely on GRDDL.
+
+=back
+
+=head1 SEE ALSO
+
+L<XML::GRDDL::Transformation>,
+L<XML::GRDDL::Namespace>,
+L<XML::GRDDL::Profile>,
+L<XML::GRDDL::Transformation::RDF_EASE::Functional>.
+
+L<HTML::HTML5::Parser>,
+L<RDF::RDFa::Parser>,
+L<HTML::Microformats>.
+
+L<http://www.w3.org/TR/grddl/>.
+
+L<http://www.perlrdf.org/>.
+
+This module is derived from Swignition L<http://buzzword.org.uk/swignition/>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT
+
+Copyright 2008-2010 Toby Inkster
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.

File lib/XML/GRDDL/External.pm

+package XML::GRDDL::External;
+
+use 5.008;
+use common::sense;
+
+use XML::GRDDL;
+
+our $VERSION = '0.001';
+
+sub new
+{
+	my ($class, $profile, $referer, $grddl_object) = @_;
+
+	# certain profiles known not to be GRDDLable
+	my $ignore;
+	foreach my $ignorant ($class->ignore)
+	{
+		if (ref $ignorant eq 'Regexp' && $profile =~ $ignorant)
+			{ $ignore++ && last; }
+		elsif (ref $ignorant eq 'CODE' && $ignorant->($profile))
+			{ $ignore++ && last; }
+		elsif ((!ref $ignorant) && $ignorant eq $profile)
+			{ $ignore++ && last; }
+	}
+	return __PACKAGE__->_new_ignored($profile, $referer, $grddl_object)
+		if $ignore; # i.e. do not bless it as a child class.
+	
+	my $self = bless {
+		'uri'     => $profile,
+		'referer' => $referer,
+		'grddl'   => $grddl_object,
+		}, $class;
+	return $self;
+}
+
+sub _new_ignored
+{
+	my ($class, $profile, $referer, $grddl_object) = @_;
+	my $self = bless {
+		'uri'     => $profile,
+		'referer' => $referer,
+		'grddl'   => $grddl_object,
+		}, $class;
+	return $self;
+}
+
+sub ignore
+{
+	return;
+}
+
+sub transformations
+{
+	return;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+XML::GRDDL::External - base class for externally loaded documents
+
+=head1 DESCRIPTION
+
+This is the superclass of L<XML::GRDDL::Transformation>, L<XML::GRDDL::Profile>,
+and L<XML::GRDDL::Namespace>. Doesn't do much on its own.
+
+=head1 SEE ALSO
+
+L<XML::GRDDL>,
+L<XML::GRDDL::Transformation>, L<XML::GRDDL::Profile>, L<XML::GRDDL::Namespace>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT
+
+Copyright 2008-2010 Toby Inkster
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.

File lib/XML/GRDDL/Namespace.pm

+package XML::GRDDL::Namespace;
+
+use 5.008;
+use common::sense;
+use base qw[XML::GRDDL::External];
+
+use RDF::Trine qw[iri];
+use Scalar::Util qw[blessed];
+
+our $VERSION = '0.001';
+
+# hard-code certain namespaces to skip...
+our @ignore = (
+	'http://www.w3.org/1999/xhtml',
+	'http://www.w3.org/2003/g/data-view',
+	'http://www.w3.org/2003/g/data-view#',
+	'http://www.w3.org/2005/Atom',
+	);
+
+sub ignore
+{
+	my ($class) = @_;
+	return @ignore;
+}
+
+sub transformations
+{
+	my ($self) = @_;
+	my $response = $self->{grddl}->_fetch(
+		$self->{uri},
+		Referer  => $self->{referer},
+		Accept   => 'application/xhtml+xml, text/html, application/rdf+xml, text/turtle, application/xml;q=0.1, text/xml;q=0.1, */*;q=0.01',
+		);
+		
+	my ($model, @transformations);
+	$model = $self->{grddl}->_rdf_model($response->decoded_content, $response->base, $response->content_type);
+
+	return
+		unless blessed($model)
+		&& $model->can('count_statements')
+		&& $model->count_statements;
+
+	my $iter = $model->get_statements(
+		iri($self->{uri}),
+		iri(XML::GRDDL::GRDDL_NS.'namespaceTransformation'),
+		undef);
+	while (my $st = $iter->next)
+	{
+		next unless $st->object->is_resource;
+		push @transformations, $st->object->uri;
+	}
+	
+	return @transformations;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+XML::GRDDL::Namespace - represents a namespace URI
+
+=head1 DESCRIPTION
+
+This module is used internally by XML::GRDDL and you probably don't want to mess with it.
+
+C<< @XML::GRDDL::Namespace::ignore >> is an array of strings and regular expressions
+for matching namespace URIs that should be ignored. You can fiddle with it, but it voids
+your warranty.
+
+The ignore list currently consists of the XHTML namespace, Atom namespace and the GRDDL
+namespace itself.
+
+Namespace documents many be written in any format supported by
+RDF::RDFa::Parser or RDF::Trine::Parser, including RDF/XML, Turtle
+and XHTML+RDFa.
+
+=head1 SEE ALSO
+
+L<XML::GRDDL>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT
+
+Copyright 2008-2010 Toby Inkster
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.

File lib/XML/GRDDL/Profile.pm

+package XML::GRDDL::Profile;
+
+use 5.008;
+use common::sense;
+use base qw[XML::GRDDL::External];
+
+use RDF::Trine qw[iri];
+use Scalar::Util qw[blessed];
+
+our $VERSION = '0.001';
+
+# hard-code certain profiles to skip...
+our @ignore = (
+	'http://www.w3.org/1999/xhtml/vocab',
+	'http://www.w3.org/1999/xhtml/vocab#',
+	'http://www.w3.org/2003/g/data-view',
+	'http://www.w3.org/2003/g/data-view#',
+	qr{^http://purl\.org/uF/},
+	qr{^http://microformats\.org/profile/},
+	qr{^http://ufs\.cc/x/},
+	);
+
+sub ignore
+{
+	my ($class) = @_;
+	return @ignore;
+}
+
+sub transformations
+{
+	my ($self) = @_;
+	my $response = $self->{grddl}->_fetch(
+		$self->{uri},
+		Referer  => $self->{referer},
+		Accept   => 'application/xhtml+xml, text/html, application/rdf+xml, text/turtle, application/xml;q=0.1, text/xml;q=0.1, */*;q=0.01',
+		);
+		
+	my ($model, @transformations);
+	$model = $self->{grddl}->_rdf_model($response->decoded_content, $response->base, $response->content_type);
+
+	return
+		unless blessed($model)
+		&& $model->can('count_statements')
+		&& $model->count_statements;
+
+	my $iter = $model->get_statements(
+		iri($self->{uri}),
+		iri(XML::GRDDL::GRDDL_NS.'profileTransformation'),
+		undef);
+	while (my $st = $iter->next)
+	{
+		next unless $st->object->is_resource;
+		push @transformations, $st->object->uri;
+	}
+	
+	return @transformations;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+XML::GRDDL::Profile - represents a profile URI
+
+=head1 DESCRIPTION
+
+This module is used internally by XML::GRDDL and you probably don't want to mess with it.
+
+C<< @XML::GRDDL::Profile::ignore >> is an array of strings and regular expressions
+for matching profile URIs that should be ignored. You can fiddle with it, but it voids
+your warranty.
+
+The ignore list currently consists of the RDFa profile, the GRDDL profile itself, and regular
+expressions matching profiles that start 'http://purl.org/uF/', 'http://microformats.org/profile/'
+and 'http://ufs.cc/x/'.
+
+Profile documents many be written in any format supported by
+RDF::RDFa::Parser or RDF::Trine::Parser, including RDF/XML, Turtle
+and XHTML+RDFa.
+
+=head1 SEE ALSO
+
+L<XML::GRDDL>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT
+
+Copyright 2008-2010 Toby Inkster
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.

File lib/XML/GRDDL/Transformation.pm

+package XML::GRDDL::Transformation;
+
+use 5.008;
+use common::sense;
+use base qw[XML::GRDDL::External];
+
+use XML::GRDDL::Transformation::XSLT_1;
+use XML::GRDDL::Transformation::RDF_EASE;
+
+our $VERSION = '0.001';
+
+sub new
+{
+	my $class = shift;
+	my $self  = $class->SUPER::new(@_);
+	
+	my $response = $self->{grddl}->_fetch(
+		$self->{uri},
+		Referer  => $self->{referer},
+		Accept   => 'application/xslt+xml, text/xslt, text/xsl, text/x-rdf+css, text/css',
+		);
+	
+	$self->{'response'} = $response;
+	
+	if ($response->header('content-type') =~ m#xslt?#i
+	||  $response->content =~ m#http://www.w3.org/1999/XSL/Transform#)
+	{
+		return bless $self, 'XML::GRDDL::Transformation::XSLT_1';
+	}
+	elsif ($response->header('content-type') =~ /text\/(css|x\-rdf\+css)/i)
+	{
+		return bless $self, 'XML::GRDDL::Transformation::RDF_EASE';
+	}
+
+	return $self;
+}
+
+sub uri
+{
+	my ($self) = @_;
+	return $self->{uri};
+}
+
+sub transform
+{
+	my ($self, $input) = @_;
+	warn "Cannot perform transformation: ".$self->{uri};
+	return;
+}
+
+sub model
+{
+	my ($self, $input) = @_;
+	my ($rdf, $type) = $self->transform($input);
+	
+	return $self->{grddl}->_rdf_model($rdf, $self->{referer}, $type, 1)
+		if $rdf;
+	
+	return;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+XML::GRDDL::Transformation - represents a transformation
+
+=head1 DESCRIPTION
+
+The interface is a little weird.
+
+=head2 Constructor
+
+=over 4
+
+=item C<< XML::GRDDL::Transformation->new($turi, $duri, [$grddl]) >>
+
+Constructs a new transformation object.
+
+$turi is the URI of the transformation itself; $duri is the document URI, used
+for sending an HTTP Referer header, and for resolving relative URIs found in
+the document; $grddl is an XML::GRDDL object used as a cache between
+requests, and used for its C<ua> method.
+
+=back
+
+=head2 Methods
+
+=over 4
+
+=item C<< $transformation->uri >>
+
+Returns the URI of the transformation.
+
+=item C<< $transformation->transform($xml) >>
+
+Transforms some XML, either an a well-formed XML string, or an
+XML::LibXML::Document. Returns a string.
+
+If called in list context returns a string, media type pair.
+
+=item C<< $transformation->model($xml) >>
+
+Transforms some XML and then parses the result as RDF. Returns an
+RDF::Trine::Model.
+
+The intermediate RDF format can be any format supported by
+RDF::RDFa::Parser or RDF::Trine::Parser, including RDF/XML, Turtle
+and XHTML+RDFa.
+
+=back
+
+=head1 SEE ALSO
+
+L<XML::GRDDL>.
+
+L<XML::GRDDL::Transformation::XSLT_1>,
+L<XML::GRDDL::Transformation::RDF_EASE>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT
+
+Copyright 2008-2010 Toby Inkster
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.

File lib/XML/GRDDL/Transformation/RDF_EASE.pm

+package XML::GRDDL::Transformation::RDF_EASE;
+
+use 5.008;
+use common::sense;
+use base qw[XML::GRDDL::Transformation];
+
+use Scalar::Util qw[blessed];
+use XML::GRDDL::Transformation::RDF_EASE::Functional qw[:standard];
+
+our $VERSION = '0.001';
+
+sub transform
+{
+	my ($self, $input) = @_;
+	
+	if (blessed($input) && $input->isa('XML::LibXML::Document'))
+	{
+		$input = $input->toString;
+	}
+	
+	my $rdfa = &rdfease_to_rdfa($self->{'response'}->decoded_content, $input);
+
+	return ($rdfa, 'application/xhtml+xml') if wantarray;
+	return $rdfa;
+}
+
+sub model
+{
+	my ($self, $input) = @_;
+	
+	if (blessed($input) && $input->isa('XML::LibXML::Document'))
+	{
+		$input = $input->toString;
+	}
+	
+	my $rdfa = &rdfease_to_rdfa($self->{'response'}->decoded_content, $input);
+	return $self->{grddl}->_rdf_model($rdfa, $self->{referer}, 'application/xhtml+xml', 1);
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+XML::GRDDL::Transformation::RDF_EASE - represents an RDF-EASE transformation
+
+=head1 DESCRIPTION
+
+Implements RDF-EASE transformations.
+
+=head1 SEE ALSO
+
+L<XML::GRDDL>, L<XML::GRDDL::Transformation>.
+
+A standalone RDF-EASE implementation can be found in
+L<XML::GRDDL::Transformation::RDF_EASE::Functional>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT
+
+Copyright 2008-2010 Toby Inkster
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.

File lib/XML/GRDDL/Transformation/RDF_EASE/Functional.pm

+package XML::GRDDL::Transformation::RDF_EASE::Functional;
+
+use 5.008;
+use base qw[Exporter];
+use common::sense;
+
+use CSS;
+use CSS::Parse::PRDGrammar;
+use XML::GRDDL::Transformation::RDF_EASE::Selector;
+use Exporter;
+use XML::LibXML;
+
+our $VERSION = '0.001';
+
+our @EXPORT_OK = qw(&rdfease_to_rdfa &parse_sheet &parse_value &bnode_for_element &rule_matches_node);
+our %EXPORT_TAGS = (
+		'standard' => [qw(&rdfease_to_rdfa)],
+		'extended' => [qw(&rdfease_to_rdfa &parse_sheet &parse_value &bnode_for_element &rule_matches_node)]
+	);
+
+our $_RDFEASE_MatcherCacher   = {};
+our $_RDFEASE_BlankNodes      = {};
+our $_RDFEASE_BlankNode_Count = 0;
+our $_RDFEASE_Protocols_Seen  = {};
+	
+sub rdfease_to_rdfa
+{
+	my $ease  = shift;
+	my $html  = shift;
+	my $asDOM = shift || 0;
+	
+	# Initialise shared variables
+	$_RDFEASE_MatcherCacher   = {};
+	$_RDFEASE_BlankNodes      = {};
+	$_RDFEASE_BlankNode_Count = 0;
+	$_RDFEASE_Protocols_Seen  = {};
+
+	# Parse XHTML into DOM tree
+	my $parser  = XML::LibXML->new();
+	my $DOMTree = $parser->parse_string($html);
+	
+	# RDF-EASE Algorithm: step 2.
+	# Generate a 'kwijibo' string
+	my $kwijibo = 'RDFEASE';
+	while ($html =~ /$kwijibo/i)
+		{ $kwijibo = 'RDFEASE'.int(rand(900000)+100000); }
+	
+	# RDF-EASE Algorithm, steps 1, 3, 4 and 5.
+	# Parse RDF-EASE into structure
+	my $ParsedEASE = parse_sheet($ease);
+	
+	# RDF-EASE Algorithm: step 6 and a little of step 2.
+	# Process tree
+	process_tree($DOMTree, $ParsedEASE, $kwijibo);
+	
+	# RDF-EASE Algorithm: we don't do step 7, as we want to return RDFa.
+	{} ;
+	
+	# If they requested the DOM representation, then return it
+	return $DOMTree if $asDOM;
+	
+	# Otherwise, return the result as an XHTML string.
+	return $DOMTree->documentElement->toString;
+}
+
+sub process_tree
+{
+	my $DOM     = shift;
+	my $EASE    = shift;
+	my $kwijibo = shift;
+	
+	process_element($DOM->documentElement, $DOM, $EASE, $kwijibo);
+
+	foreach my $proto (keys %$_RDFEASE_Protocols_Seen)
+	{
+		$DOM->documentElement->setAttribute('xmlns:'.$kwijibo.$proto, $proto.':');
+	}
+}
+
+sub process_element
+{
+	my $elem    = shift; # 'E'
+	my $DOM     = shift;
+	my $CSS     = shift;
+	my $kwijibo = shift;
+
+	# For each rule set rs in RuleList
+	foreach my $rule_block (@{$CSS->{'data'}})
+	{
+		# If the selector of rule set rs does not match element E, move on to the
+		# next rule set in RuleList
+		next unless rule_matches_node($rule_block, $elem, $DOM);
+
+		# Each property value pair (p, v) within rs should be handled as follows
+		foreach my $rule (@{$rule_block->{'properties'}})
+		{
+			# Skip non "-rdf-" rules.
+			my $prop;
+			if ($rule->{'property'} =~ /^\-rdf\-(.*)$/i)
+				{ $prop = lc($1); }
+			else
+				{ next; }
+
+			my @vals = parse_value($rule->{'value'}, $CSS->{'prefixes'});
+
+			if ($prop =~ /^(typeof|rel|rev|property|role)$/)
+			{
+				if (grep {/^reset$/i} @vals)
+					{ $elem->setAttribute('x-rdf-'.$prop, undef); }
+					
+				my $new = $elem->getAttribute('x-rdf-'.$prop);
+				$new .= ' ' if ($new);
+				foreach my $v (@vals)
+				{
+					next if ($v eq 'reset');
+					$_RDFEASE_Protocols_Seen->{$1} = 1
+						if ($v =~ /^([^:]+)/);
+					$new .= "$kwijibo$v ";
+				}
+				$new =~ s/ $//;
+				$elem->setAttribute('x-rdf-'.$prop, $new);
+			}
+			elsif ($prop eq 'about')
+			{
+				my $v = $vals[0];
+
+				if (lc($v) eq 'reset')
+					{ $elem->removeAttribute('x-rdf-about'); }
+				elsif (lc($v) eq 'document')
+				{
+					$elem->setAttribute('x-rdf-about', '')
+						unless (defined $elem->getAttribute($prop));
+				}
+				elsif ($v =~ /^NEAR:\s+(.+)$/)
+				{
+					my @matched = $DOM->documentElement->findnodes(XML::GRDDL::Transformation::RDF_EASE::Selector::to_xpath($1));
+					my $best_match;
+					foreach my $matching_node (@matched)
+					{
+						if (substr($elem->nodePath, 0, length($matching_node->nodePath)) eq $matching_node->nodePath)
+						{
+							$best_match = $matching_node
+								if ((!$best_match)
+								||  (length($matching_node->nodePath) > length($best_match->nodePath)));
+						}
+					}
+					if ($best_match)
+					{
+						$elem->setAttribute('x-rdf-about', '['.bnode_for_element($best_match, $kwijibo).']');
+					}
+				}
+			}
+			elsif ($prop eq 'content')
+			{
+				if ($rule->{'value'} =~ /^\s*attr\([\'\"]?(.+)[\'\"]?\)\s*$/i)
+					{ $elem->setAttribute('x-rdf-content', $elem->getAttribute($1)); }
+			}
+			elsif ($prop eq 'datatype')
+			{
+				my $v = $vals[0];
+				
+				if (lc($v) eq 'reset')
+					{ $elem->removeAttribute('x-rdf-datatype'); }
+				elsif (lc($v) eq 'string')
+					{ $elem->setAttribute('x-rdf-datatype', ''); }
+				elsif ($v =~ /\:/)
+				{
+					$_RDFEASE_Protocols_Seen->{$1} = 1
+						if ($v =~ /^([^:]+)/);
+					$elem->setAttribute('x-rdf-datatype', "$kwijibo$v");
+				}
+			}
+		}
+	}
+	
+	foreach my $prop (qw(about content datatype))
+	{
+		if (defined $elem->getAttribute('x-rdf-'.$prop))
+		{
+			$elem->setAttribute($prop, $elem->getAttribute('x-rdf-'.$prop))
+				if (!defined $elem->getAttribute($prop));
+			$elem->removeAttribute('x-rdf-'.$prop);
+		}
+	}
+	foreach my $prop (qw(typeof rel rev property role))
+	{
+		if ($elem->getAttribute('x-rdf-'.$prop))
+		{
+			if ($elem->getAttribute($prop))
+			{
+				$elem->setAttribute($prop,
+					$elem->getAttribute($prop).' '.
+					$elem->getAttribute('x-rdf-'.$prop));
+			}
+			else
+			{
+				$elem->setAttribute($prop,
+					$elem->getAttribute('x-rdf-'.$prop));
+			}
+			$elem->removeAttribute('x-rdf-'.$prop);
+		}
+	}
+
+	my $recurse = 1;
+	if (length $elem->getAttribute('property'))
+	{
+		$recurse = 0 if (!defined $elem->getAttribute('datatype'));
+		$recurse = 0 if ($elem->getAttribute('datatype') =~ /XMLLiteral\s*$/);
+		$recurse = 1 if (defined $elem->getAttribute('content'));
+	}
+	
+	if ($recurse)
+	{
+		foreach my $child ($elem->getChildrenByTagName('*'))
+		{
+			process_element($child, $DOM, $CSS, $kwijibo);
+		}
+	}
+}
+
+sub bnode_for_element
+{
+	my $elem     = shift;
+	my $kwijibo  = shift;
+	my $nodepath = $elem->nodePath;
+	
+	unless (defined $_RDFEASE_BlankNodes->{$nodepath})
+	{
+		$_RDFEASE_BlankNode_Count++;
+		$_RDFEASE_BlankNodes->{$nodepath} = sprintf('%s_Node%s',
+			$kwijibo, $_RDFEASE_BlankNode_Count);
+	}
+
+	return '_:'.$_RDFEASE_BlankNodes->{$nodepath};
+}
+
+sub rule_matches_node
+{
+	my $rule = shift;
+	my $elem = shift;
+	my $dom  = shift;
+	
+	my $rulepath = $rule->{'xpath'};
+	my $elempath = $elem->nodePath;
+	
+	return $_RDFEASE_MatcherCacher->{'Answers'}->{$rulepath}->{$elempath}
+		if defined $_RDFEASE_MatcherCacher->{'Answers'}->{$rulepath}->{$elempath};
+		
+	unless (defined $_RDFEASE_MatcherCacher->{'Lists'}->{$rulepath})
+	{
+		my $xpc = XML::LibXML::XPathContext->new;
+		$xpc->registerNs(xhtml => 'http://www.w3.org/1999/xhtml');
+		$_RDFEASE_MatcherCacher->{'Lists'}->{$rulepath} = $xpc->findnodes($rulepath, $dom);
+	}
+	
+	my $rv = 0;
+	foreach my $match ($_RDFEASE_MatcherCacher->{'Lists'}->{$rulepath}->get_nodelist)
+	{
+		if ($match->isSameNode($elem))
+		{
+			$rv++;
+			last;
+		}
+	}
+	
+	#warn sprintf("%s %s %s\n", $rulepath, ($rv?'matches':'DOES NOT MATCH'), $elempath);
+	
+	$_RDFEASE_MatcherCacher->{'Answers'}->{$rulepath}->{$elempath} = $rv;
+	
+	return $rv;
+}
+
+sub parse_value
+{
+	my $vals = shift;
+	my $pfxs = shift;
+	my @rv;
+	
+	return @rv
+		if ($vals =~ /^ \s* normal \s* $/i); 
+	
+	while (length $vals)
+	{
+		if ($vals =~ /^ \s* (reset|document|string) \s* (.*) $/x)
+		{
+			push @rv, $1;
+			$vals = $2;
+		}
+		elsif ($vals =~ /^ \s* url\(\s*\'([^\']*)\'\s*\) \s* (.*) $/ix)
+		{
+			push @rv, $1;
+			$vals = $2;
+		}
+		elsif ($vals =~ /^ \s* url\(\s*\"([^\"]*)\"\s*\) \s* (.*) $/ix)
+		{
+			push @rv, $1;
+			$vals = $2;
+		}
+		elsif ($vals =~ /^ \s* url\(\s*([^\"\'\)]*)\s*\) \s* (.*) $/ix)
+		{
+			push @rv, $1;
+			$vals = $2;
+		}
+		elsif ($vals =~ /^ \s* nearest\-ancestor\(\s*\'([^\']*)\'\s*\) \s* (.*) $/ix)
+		{
+			push @rv, "NEAR: $1";
+			$vals = $2;
+		}
+		elsif ($vals =~ /^ \s* nearest\-ancestor\(\s*\"([^\"]*)\"\s*\) \s* (.*) $/ix)
+		{
+			push @rv, "NEAR: $1";
+			$vals = $2;
+		}
+		elsif ($vals =~ /^ \s* nearest\-ancestor\(\s*([^\"\'\)]*)\s*\) \s* (.*) $/ix)
+		{
+			push @rv, "NEAR: $1";
+			$vals = $2;
+		}
+		elsif ($vals =~ /^ \s* \'([^\'\:]*)\:([^\']*)\' \s* (.*) $/ix)
+		{
+			push @rv, $pfxs->{$1}.$2;
+			$vals = $3;
+		}
+		elsif ($vals =~ /^ \s* \"([^\"\:]*)\:([^\"]*)\" \s* (.*) $/ix)
+		{
+			push @rv, $pfxs->{$1}.$2;
+			$vals = $3;
+		}
+		elsif ($vals =~ /^ \s* ([^\"\'\:\s]*)\:([^\"\'\s]*) \s* (.*) $/ix)
+		{
+			push @rv, $pfxs->{$1}.$2;
+			$vals = $3;
+		}
+		else
+		{
+			my @null;
+			return @null;
+		}
+	}
+	
+	return @rv;
+}
+
+sub parse_sheet
+{
+	my $css = shift;
+
+	my @data;
+	my ($prefixes, $i) = ({
+		'dc'    => 'http://purl.org/dc/terms/',
+		'foaf'  => 'http://xmlns.com/foaf/0.1/',
+		'owl'   => 'http://www.w3.org/2002/07/owl#',
+		'rdf'   => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
+		'rdfs'  => 'http://www.w3.org/2000/01/rdf-schema#',
+		'sioc'  => 'http://rdfs.org/sioc/ns#',
+		'skos'  => 'http://www.w3.org/2004/02/skos/core#',
+		'xsd'   => 'http://www.w3.org/2001/XMLSchema#'
+	}, 0);
+
+	# Handle at-rules in advance, as CSS::Parse::Heavy doesn't support them.
+	while ($css =~ /^\s*(\@\S+)\s+([^\;]+)\s*\;\s*(.*)$/s)
+	{
+		$css = $3;
+
+		my $atrule = $1;
+		my $stuff  = $2;
+		
+		if ($atrule =~ /^\@(prefix|namespace)$/i)
+		{
+			if ($stuff =~ /^\s*([A-Za-z0-9\._-]+)\s+(.+)\s*$/)
+			{
+				my $pfx = $1;
+				my $uri = $2;
+				
+				if ($uri =~ /^url\((.*)\)$/)
+					{ $uri = $1; }
+				if ($uri =~ /^\"(.*)\"$/)
+					{ $uri = $1; }
+				elsif ($uri =~ /^\'(.*)\'$/)
+					{ $uri = $1; }
+					
+				$prefixes->{ $pfx } = $uri;
+			}
+		}
+	}
+
+	# Patch CSS::Parse::Heavy because it doesn't support CSS properties that
+	# start with a dash.
+	$CSS::Parse::PRDGrammar::GRAMMAR =~ s#macro_nmstart:\s+/\[a-zA-Z\]/
+	                                     #macro_nmstart:   /[a-zA-Z_-]/
+	                                     #x;	
+
+	# Actually parse the CSS, using CSS::Parse::Heavy.
+	my $parser = CSS->new( { 'parser' => 'CSS::Parse::Heavy' } )->read_string($css);
+
+	foreach my $block (@$parser)
+	{
+		foreach my $selector (@{ $block->{selectors} })
+		{
+			if ($selector->{name} eq '_')
+			{
+				foreach my $property (@{ $block->{properties} })
+				{
+					my $prefix = $property->{options}->{property};
+					my $url    = $property->{options}->{value};
+					
+					$url = $1 if ($url =~ /url\([\'\"]?([^\'\"]+)[\'\"]?\)/i);
+					$prefixes->{$prefix} = $url;
+				}
+				next;
+			}
+			
+			my $x = {};
+			foreach my $property (@{ $block->{properties} })
+			{
+				push @{ $x->{properties} }, $property->{options};
+			}
+			$x->{selector}    = $selector->{name};
+			$x->{order}       = ++$i;
+			$x->{tokens}      = XML::GRDDL::Transformation::RDF_EASE::Selector::get_tokens($x->{selector});
+			$x->{specificity} = XML::GRDDL::Transformation::RDF_EASE::Selector::specificity(@{ $x->{tokens} });
+			$x->{xpath}       = XML::GRDDL::Transformation::RDF_EASE::Selector::to_xpath(@{ $x->{tokens} });
+			push @data, $x;
+		}
+	}
+	
+	my @sorted = sort css21_cascade_order @data;
+	
+	return {
+		prefixes => $prefixes,
+		data     => \@sorted
+	};
+}
+
+sub css21_cascade_order
+{
+	return ($a->{order} <=> $b->{order})
+		if $a->{specificity} == $b->{specificity};
+
+	return ($a->{specificity} <=> $b->{specificity});
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+XML::GRDDL::Transformation::RDF_EASE::Functional - stand-alone RDF-EASE module
+
+=head1 DESCRIPTION
+
+This module exports one function:
+
+=over 4
+
+=item C<< rdfease_to_rdfa( $css, $xhtml, $as_dom ) >>
+
+Takes an RDF-EASE (CSS) transformation and an XHTML document (well-formed
+string) and returns the resulting XHTML+RDFa document, which can then be
+fed to L<RDF::RDFa::Parser>.
+
+If $as_dom is true, returns an XML::LibXML::Document; otherwise, a string.
+
+=back
+
+=head1 SEE ALSO
+
+L<XML::GRDDL>, L<XML::GRDDL::Transformation::RDF_EASE>.
+
+L<RDF::RDFa::Parser>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT
+
+Copyright 2008-2010 Toby Inkster
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.

File lib/XML/GRDDL/Transformation/RDF_EASE/Selector.pm

+package XML::GRDDL::Transformation::RDF_EASE::Selector;
+
+use 5.008;
+use common::sense;
+
+our $VERSION = '0.001';
+
+sub specificity
+{
+	return (
+			specificity_count_ids(@_) * 1000000 +
+			specificity_count_attrs(@_) * 1000 +
+			specificity_count_elems(@_)
+		);
+}
+
+sub get_tokens
+{
+	my $selector = shift;
+	my @rv;
+	
+	while (length $selector)
+	{
+		if ($selector =~ /^ \s* ([\>\+]) \s* (.*) $/x)
+		{
+			push @rv, $1;
+			$selector = $2;
+		}
+		elsif ($selector =~ /^ (\s+) (.*) $/x)
+		{
+			push @rv, ' ';
+			$selector = $2;
+		}
+		elsif ($selector =~ /^ ([^\s\>\+]+) ([\s\>\+].*) $/x)
+		{
+			push @rv, $1;
+			$selector = $2;
+		}
+		else
+		{
+			push @rv, $selector;
+			$selector = '';
+		}
+	}
+	
+	return \@rv;
+}
+
+sub specificity_count_ids
+{
+	return scalar grep { /\#/ } @_;
+}
+
+sub specificity_count_attrs
+{
+	return scalar grep { /[\.[]/ } @_;
+}
+
+sub specificity_count_elems
+{
+	return scalar grep { /^[a-z]/i } @_;
+}
+
+sub to_xpath
+{
+	return '//'.to_partial_xpath(\@_);
+}
+
+sub token_to_pieces
+{
+	my $str = shift;
+	my @rv;
+	
+	if ($str =~ /^ ([a-z0-9\*]+) (.*) $/ix)
+	{
+		push @rv, $1;
+		$str = $2;
+	}
+	
+	while (length $str)
+	{
+		if ($str =~ /^ (\[[^\]]*\]) (.*) $/ix)
+		{
+			push @rv, $1;
+			$str = $2;
+		}
+		elsif ($str =~ /^ (\:[a-z-]+\([a-z_-]*\)) (.*) $/ix)
+		{
+			push @rv, $1;
+			$str = $2;
+		}
+		elsif ($str =~ /^ (\.[a-z0-9_-]+) (.*) $/ix)
+		{
+			push @rv, $1;
+			$str = $2;
+		}
+		elsif ($str =~ /^ (\#[a-z0-9_-]+) (.*) $/ix)
+		{
+			push @rv, $1;
+			$str = $2;
+		}
+	}
+	
+	return @rv;
+}
+
+sub to_partial_xpath
+{
+	my $toks   = shift;
+	my @tokens = @{$toks};
+	my $self   = shift;
+	my $next   = 0;
+	my $t      = shift @tokens || return '';
+	my $rv     = '';
+	
+	# Make $t always start with the tag name.
+	$t = "*$t"
+		if ($t =~ /^[\.\#\:]/);
+	
+	if ($t eq '>')
+		{ $rv = '/'; }
+	elsif ($t eq ' ')
+		{ $rv = '//'; }
+	elsif ($t eq '+')
+		{ $rv = '/following-sibling::*[1]/'; $next = 1; }
+	else
+	{
+		my @bits = token_to_pieces($t);
+		foreach my $bit (@bits)
+		{
+			if ($bit =~ /^ \. (.*) $/ix)
+				{ $rv .= "[contains(concat(\" \",\@class,\" \"),concat(\" \",\"$1\",\" \"))]"; }
+			elsif ($bit =~ /^ \# (.*) $/ix)
+				{ $rv .= "[\@id=\"$1\"]"; }
+			elsif ($bit =~ /^ \[ \s* (.*) \s* \~\= \s* [\"\']?(.*)[\"\']? \s* \] $/ix)
+				{ $rv .= "[contains(concat(\" \",\@$1,\" \"),concat(\" \",\"$2\",\" \"))]"; }
+			elsif ($bit =~ /^ \[ \s* (.*) \s* \|\= \s* [\"\']?(.*)[\"\']? \s* \] $/ix)
+				{ $rv .= "[\@$1=\"$2\" or starts-with(\@$1,concat(\"$2\",\"-\"))]"; }
+			elsif ($bit =~ /^ \[ \s* (.*) \s*   \= \s* [\"\']?(.*)[\"\']? \s* \] $/ix)
+				{ $rv .= "[\@$1=\"$2\"]"; }
+			elsif ($bit =~ /^ \[ \s* (.*) \s* \] $/ix)
+				{ $rv .= "[\@$1]"; }
+			elsif (lc($bit) eq ':first-child')
+				{ $rv = "*[1]/self::$rv"; }
+			elsif ($bit =~ /^ \[ \s* \:lang\((.*)\) \s* \] $/ix)
+				{ $rv .= "[\@lang=\"$1\" or starts-with(\@lang,concat(\"$1\",\"-\"))]"; }
+			else
+				{ $rv .= "xhtml:${bit}"; }
+		}
+	}
+		
+	return 'self::'.$rv.to_partial_xpath(\@tokens, $next)
+		if ($self);
+	return $rv.to_partial_xpath(\@tokens, $next);
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+XML::GRDDL::Transformation::RDF_EASE::Selector - CSS 2.1 selector utility functions
+
+=head1 DESCRIPTION
+
+Utility functions for dealing with CSS 2.1 selectors.
+
+Currently nothing here is suitable for external use. Hopefully as this is cleaned up,
+it might be able to export some useful functions.
+
+=head1 SEE ALSO
+
+L<XML::GRDDL::Transformation::RDF_EASE::Functional>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT
+
+Copyright 2008-2010 Toby Inkster
+
+This library is free software; you