Commits

Toby Inkster  committed c8d8151

splitting out some stuff from HTML-HTML5-Sanity.

  • Participants

Comments (0)

Files changed (12)

+############################################################################
+## Changes #################################################################
+############################################################################
+
+XML-LibXML-Debugging
+====================
+
+Created:      2010-04-26
+Home page:    <http://search.cpan.org/dist/XML-LibXML-Debugging/>
+Bug tracker:  <http://rt.cpan.org/Dist/Display.html?Queue=XML-LibXML-Debugging>
+Maintainer:   Toby Inkster <mailto:tobyink@cpan.org>
+
+0.04 [2010-04-28] # Fork from HTML-HTML5-Sanity.
+ - Split the to_hashref and to_clarkml functions out into a separate
+   distribution.
+ - Make functions into object methods.
+
+#############################################################
+
+@prefix :        <http://usefulinc.com/ns/doap#> .
+@prefix dcs:     <http://ontologi.es/doap-changeset#> .
+@prefix dc:      <http://purl.org/dc/terms/> .
+@prefix foaf:    <http://xmlns.com/foaf/0.1/> .
+@prefix my:      <http://purl.org/NET/cpan-uri/dist/XML-LibXML-Debugging/> .
+@prefix rdfs:    <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix toby:    <http://tobyinkster.co.uk/#> .
+@prefix xsd:     <http://www.w3.org/2001/XMLSchema#> .
+
+#############################################################
+
+<>
+
+	dc:title         "Changes" ;
+	dc:description   "Revision history for Perl extension XML::LibXML::Debugging."@en ;
+	dc:subject       my:project ;
+	dc:creator       toby:i .
+
+#############################################################
+
+my:v_0-04
+
+	a               :Version ;
+	dc:issued       "2010-04-28"^^xsd:date ;
+	:revision       "0.04"^^xsd:string ;
+	:file-release   <http://backpan.cpan.org/authors/id/T/TO/TOBYINK/XML-LibXML-Debugging-0.04.tar.gz> ;
+	rdfs:label      "Fork from HTML-HTML5-Sanity."@en ;
+	dcs:changeset
+	[
+		a dcs:ChangeSet ;
+		dcs:versus <http://purl.org/NET/cpan-uri/dist/HTML-HTML5-Sanity/v_0-03> ;
+		dcs:item [ rdfs:label "Split the to_hashref and to_clarkml functions out into a separate distribution."@en ] ;
+		dcs:item [ rdfs:label "Make functions into object methods."@en ] 
+	] .
+
+#############################################################
+
+my:project
+
+	a               :Project ;
+	:name           "XML-LibXML-Debugging" ;
+	:shortdesc      "get debugging information from XML::LibXML nodes"@en ;
+	:programming-language  "Perl" ;
+	:homepage       <http://search.cpan.org/dist/XML-LibXML-Debugging/> ;
+	:download-page  <http://search.cpan.org/dist/XML-LibXML-Debugging/> ;
+	rdfs:seeAlso    <http://purl.org/NET/cpan-uri/dist/HTML-HTML5-Sanity/project> ;
+	:bug-database   <http://rt.cpan.org/Dist/Display.html?Queue=XML-LibXML-Debugging> ;
+	:repository     [ a :SVNRepository ; :browse <http://goddamn.co.uk/viewvc/perlmods/XML-LibXML-Debugging/> ] ;
+	:maintainer     toby:i ;
+	:developer      toby:i ;
+	:documenter     toby:i ;
+	:tester         toby:i ;
+	:created        "2010-04-26"^^xsd:date ;
+	:license        <http://dev.perl.org/licenses/> ;
+	:release        my:v_0-04 .
+
+#############################################################
+
+toby:i
+
+	a               foaf:Person ;
+	foaf:name       "Toby Inkster" ;
+	foaf:homepage   <http://tobyinkster.co.uk/> ;
+	foaf:page       <http://search.cpan.org/~tobyink/> ;
+	foaf:mbox       <mailto:tobyink@cpan.org> ;
+	<http://www.w3.org/2002/07/owl#sameAs> <http://purl.org/NET/cpan-uri/person/tobyink> .
+
+#############################################################
+<?xml version="1.0" encoding="utf-8"?>
+<rdf:RDF
+   xmlns:dc="http://purl.org/dc/terms/"
+   xmlns:dcs="http://ontologi.es/doap-changeset#"
+   xmlns:foaf="http://xmlns.com/foaf/0.1/"
+   xmlns:my="http://purl.org/NET/cpan-uri/dist/XML-LibXML-Debugging/"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+   xmlns:toby="http://tobyinkster.co.uk/#"
+   xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
+   xmlns="http://usefulinc.com/ns/doap#">
+  <rdf:Description rdf:about="file:///home/tai/src/perlmods/XML-LibXML-Debugging/Changes.ttl">
+    <dc:creator rdf:resource="http://tobyinkster.co.uk/#i"/>
+    <dc:description xml:lang="en">Revision history for Perl extension XML::LibXML::Debugging.</dc:description>
+    <dc:subject rdf:resource="http://purl.org/NET/cpan-uri/dist/XML-LibXML-Debugging/project"/>
+    <dc:title>Changes</dc:title>
+  </rdf:Description>
+  <Version rdf:about="http://purl.org/NET/cpan-uri/dist/XML-LibXML-Debugging/v_0-04">
+    <dcs:changeset>
+      <dcs:ChangeSet>
+        <dcs:item>
+          <rdf:Description>
+            <rdfs:label xml:lang="en">Split the to_hashref and to_clarkml functions out into a separate distribution.</rdfs:label>
+          </rdf:Description>
+        </dcs:item>
+        <dcs:item>
+          <rdf:Description>
+            <rdfs:label xml:lang="en">Make functions into object methods.</rdfs:label>
+          </rdf:Description>
+        </dcs:item>
+        <dcs:versus rdf:resource="http://purl.org/NET/cpan-uri/dist/HTML-HTML5-Sanity/v_0-03"/>
+      </dcs:ChangeSet>
+    </dcs:changeset>
+    <dc:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2010-04-28</dc:issued>
+    <file-release rdf:resource="http://backpan.cpan.org/authors/id/T/TO/TOBYINK/XML-LibXML-Debugging-0.04.tar.gz"/>
+    <revision rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0.04</revision>
+    <rdfs:label xml:lang="en">Fork from HTML-HTML5-Sanity.</rdfs:label>
+  </Version>
+  <Project rdf:about="http://purl.org/NET/cpan-uri/dist/XML-LibXML-Debugging/project">
+    <bug-database rdf:resource="http://rt.cpan.org/Dist/Display.html?Queue=XML-LibXML-Debugging"/>
+    <created rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2010-04-26</created>
+    <developer rdf:resource="http://tobyinkster.co.uk/#i"/>
+    <documenter rdf:resource="http://tobyinkster.co.uk/#i"/>
+    <download-page rdf:resource="http://search.cpan.org/dist/XML-LibXML-Debugging/"/>
+    <homepage rdf:resource="http://search.cpan.org/dist/XML-LibXML-Debugging/"/>
+    <license rdf:resource="http://dev.perl.org/licenses/"/>
+    <maintainer rdf:resource="http://tobyinkster.co.uk/#i"/>
+    <name>XML-LibXML-Debugging</name>
+    <programming-language>Perl</programming-language>
+    <release rdf:resource="http://purl.org/NET/cpan-uri/dist/XML-LibXML-Debugging/v_0-04"/>
+    <repository>
+      <SVNRepository>
+        <browse rdf:resource="http://goddamn.co.uk/viewvc/perlmods/XML-LibXML-Debugging/"/>
+      </SVNRepository>
+    </repository>
+    <shortdesc xml:lang="en">get debugging information from XML::LibXML nodes</shortdesc>
+    <tester rdf:resource="http://tobyinkster.co.uk/#i"/>
+    <rdfs:seeAlso rdf:resource="http://purl.org/NET/cpan-uri/dist/HTML-HTML5-Sanity/project"/>
+  </Project>
+  <foaf:Person rdf:about="http://tobyinkster.co.uk/#i">
+    <ns1:sameAs xmlns:ns1="http://www.w3.org/2002/07/owl#" rdf:resource="http://purl.org/NET/cpan-uri/person/tobyink"/>
+    <foaf:homepage rdf:resource="http://tobyinkster.co.uk/"/>
+    <foaf:mbox rdf:resource="mailto:tobyink@cpan.org"/>
+    <foaf:name>Toby Inkster</foaf:name>
+    <foaf:page rdf:resource="http://search.cpan.org/~tobyink/"/>
+  </foaf:Person>
+</rdf:RDF>
+Changes.ttl
+Changes.xml
+Changes
+Makefile.PL
+MANIFEST
+MANIFEST.SKIP
+README
+META.yml
+SIGNATURE
+
+lib/XML/LibXML/Debugging.pm
+
+t/00sig.t
+t/01basics.t
+
+inc/Module/AutoInstall.pm
+inc/Module/Install/AutoInstall.pm
+inc/Module/Install/Base.pm
+inc/Module/Install/Can.pm
+inc/Module/Install/Fetch.pm
+inc/Module/Install/Include.pm
+inc/Module/Install/Makefile.pm
+inc/Module/Install/Metadata.pm
+inc/Module/Install.pm
+inc/Module/Install/Win32.pm
+inc/Module/Install/WriteAll.pm
+inc/Test/Signature.pm
+

File MANIFEST.SKIP

+^HTML-HTML5-Sanity-.\...\.tar\.gz$
+^Makefile$
+^blib/
+^pm_to_blib
+^blibdirs
+\.svn
+^example.*\.pl$
+---
+abstract: 'get debugging information from XML::LibXML nodes'
+author:
+  - 'Toby Inkster <tobyink@cpan.org>'
+build_requires:
+  ExtUtils::MakeMaker: 6.42
+  Test::More: 0.61
+configure_requires:
+  ExtUtils::MakeMaker: 6.42
+distribution_type: module
+generated_by: 'Module::Install version 0.91'
+license: perl
+meta-spec:
+  url: http://module-build.sourceforge.net/META-spec-v1.4.html
+  version: 1.4
+name: XML-LibXML-Debugging
+no_index:
+  directory:
+    - inc
+    - t
+requires:
+  HTML::Entities: 3.27
+  Locale::Country: 1.06
+  XML::LibXML: 0
+  perl: 5.8.0
+resources:
+  bugtracker: http://rt.cpan.org/Dist/Display.html?Queue=XML-LibXML-Debugging
+  homepage: http://search.cpan.org/dist/XML-LibXML-Debugging/
+  license: http://dev.perl.org/licenses/
+  repository: http://goddamn.co.uk/viewvc/perlmods/XML-LibXML-Debugging/
+version: 0.04
+use strict;
+use warnings;
+
+use inc::Module::Install;
+
+my $dist = 'XML-LibXML-Debugging';
+my $fn   = "lib/$dist.pm"; $fn =~ s#-#/#g;
+
+name                $dist;
+perl_version_from   $fn;
+version_from        $fn;
+abstract_from       $fn;
+readme_from         $fn;
+author              'Toby Inkster <tobyink@cpan.org>';
+license             'perl';
+
+test_requires       'Test::More'        => '0.61';
+
+requires            'HTML::Entities'    => '3.27';
+requires            'Locale::Country'   => '1.06';
+requires            'XML::LibXML'       => 0;
+
+resources(
+	'homepage'   => "http://search.cpan.org/dist/$dist/",
+	'repository' => "http://goddamn.co.uk/viewvc/perlmods/$dist/",
+	'bugtracker' => "http://rt.cpan.org/Dist/Display.html?Queue=$dist",
+	);
+	
+write_doap_changes;
+write_doap_changes_xml;
+
+include 'Test::Signature';
+auto_install;
+WriteAll(
+	'meta' => 1,
+	'sign' => 1,
+	);
+NAME
+    XML::LibXML::Debugging - get debugging information from XML::LibXML
+    nodes
+
+VERISON
+    0.04
+
+SYNOPSIS
+      use XML::LibXML::Debugging;
+
+      my $parser = XML::LibXML->new;
+      my $doc    = $parser->parse_file('input.xml');
+      print $doc->toClarkML;
+
+DESCRIPTION
+    This module adds a couple of additional methods to XML::LibXML::Node
+    objects which are mostly aimed at helping figure out what's going on
+    with the DOM's namespaces and structure. "toClarkML" produces a string
+    of XML-like markup with explicit namespaces. The following XML:
+
+      <foo xmlns="http://example.com/1"
+           xmlns:bar="http://example.com/2"
+                     bar:baz="quux" />
+
+    Might be represented as:
+
+      <{http://example.com/1}foo
+           {http://www.w3.org/2000/xmlns/}XMLNS="http://example.com/1"
+           {http://www.w3.org/2000/xmlns/}bar="http://example.com/2"
+                     {http://example.com/2}baz="quux" />
+
+    Another method "toDebuggingHash" returns a hashref suitable for dumping
+    using Data::Dumper.
+
+BUGS
+    Please report any bugs to <http://rt.cpan.org/>.
+
+SEE ALSO
+    XML::LibXML.
+
+AUTHOR
+    Toby Inkster <tobyink@cpan.org>.
+
+COPYRIGHT AND LICENSE
+    Copyright (C) 2009-2010 by Toby Inkster
+
+    This library is free software; you can redistribute it and/or modify it
+    under the same terms as Perl itself, either Perl version 5.8 or, at your
+    option, any later version of Perl 5 you may have available.
+
+use lib "lib/";
+use lib "../HTML-HTML5-Parser/lib/";
+
+use HTML::HTML5::Parser;
+use HTML::HTML5::Sanity;
+
+use Data::Dumper;
+$Data::Dumper::Useqq     = 1;
+$Data::Dumper::Quotekeys = 0;
+
+my $parser = HTML::HTML5::Parser->new;
+my $dom    = $parser->parse_string(<<HTML);
+<html
+	foo="bar"
+	xmlns:quux="http://example.com/"
+	quux:xyzzy="xyzzy"
+	baz:baz="baz"
+	>
+<body xml:lang="en">
+	<!--Hello-->
+	<foo xmlns="http://example.net/">
+		<bar />
+	</foo>
+	<foot:x xmlns:foot="http://example.net/">
+		<bar />
+	</foot:x>
+</body>
+</html>
+HTML
+
+my $sane = fix_document($dom);
+
+print "========\n";
+print HTML::HTML5::Sanity::document_to_clarkml($dom)."\n";
+print "--------\n";
+print HTML::HTML5::Sanity::document_to_clarkml($sane)."\n";
+print "========\n";

File lib/XML/LibXML/Debugging.pm

+package XML::LibXML::Debugging;
+
+use 5.008;
+use strict;
+use warnings;
+use base qw(XML::LibXML);
+
+our $VERSION = '0.04';
+
+1;
+
+package XML::LibXML::Document;
+
+sub toDebuggingHash
+{
+	my $n = shift;
+	
+	return {
+		'type'   => 'Document',
+		'root'   => $n->documentElement->toDebuggingHash,
+		};
+}
+
+sub toClarkML
+{
+	my $n = shift;
+	$n->documentElement->toClarkML;
+}
+
+1;
+
+package XML::LibXML::Element;
+
+use HTML::Entities qw(encode_entities_numeric);
+
+sub toDebuggingHash
+{
+	my $n = shift;
+	
+	my $rv = {
+		'type'    => 'Element',
+		'qname'   => $n->nodeName,
+		'prefix'  => $n->prefix,
+		'suffix'  => $n->localname,
+		'nsuri'   => $n->namespaceURI,
+		'attributes' => [],
+		'children'   => [],
+		};
+	
+	foreach my $attr ($n->attributes)
+	{
+		my $x = $attr->toDebuggingHash;
+		push @{ $rv->{'attributes'} }, $x if $x;
+	}
+	
+	foreach my $kid ($n->childNodes)
+	{
+		if ($kid->nodeType == XML_TEXT_NODE
+		||  $kid->nodeType == XML_CDATA_SECTION_NODE)
+		{
+			push @{ $rv->{'children'} }, $kid->nodeValue;
+		}
+		elsif ($kid->nodeType == XML_COMMENT_NODE)
+		{
+			push @{ $rv->{'children'} }, $kid->toDebuggingHash;
+		}
+		elsif ($kid->nodeType == XML_ELEMENT_NODE)
+		{
+			push @{ $rv->{'children'} }, $kid->toDebuggingHash;
+		}
+	}
+	
+	return $rv;
+}
+
+sub toClarkML
+{
+	my $n = shift;
+	
+	my $rv;
+	
+	if (defined $n->namespaceURI)
+	{
+		$rv = sprintf("<{%s}%s", $n->namespaceURI, $n->localname);
+	}
+	else
+	{
+		$rv = sprintf("<%s", $n->localname);
+	}
+	
+	foreach my $attr ($n->attributes)
+	{
+		my $x = $attr->toClarkML;
+		$rv .= " $x" if $x;
+	}
+	
+	if (! $n->childNodes)
+	{
+		return $rv . "/>";
+	}
+	
+	$rv .= ">";
+	
+	foreach my $kid ($n->childNodes)
+	{
+		if ($kid->nodeType == XML_TEXT_NODE
+		||  $kid->nodeType == XML_CDATA_SECTION_NODE)
+		{
+			$rv .= encode_entities_numeric($kid->nodeValue);
+		}
+		elsif ($kid->nodeType == XML_COMMENT_NODE)
+		{
+			$rv .= "<!--" . $kid->nodeValue . "-->";
+		}
+		elsif ($kid->nodeType == XML_ELEMENT_NODE)
+		{
+			$rv .= $kid->toClarkML;
+		}
+	}
+	
+	if (defined $n->namespaceURI)
+	{
+		$rv .= sprintf("</{%s}%s>", $n->namespaceURI, $n->localname);
+	}
+	else
+	{
+		$rv .= sprintf("</%s>", $n->localname);
+	}
+	
+	return $rv;
+}
+
+1;
+
+package XML::LibXML::Comment;
+
+sub toDebuggingHash
+{
+	my $n = shift;
+	
+	return {
+		'type'    => 'Comment',
+		'comment' => $n->nodeValue,
+		};
+}
+
+sub toClarkML
+{
+	my $n = shift;
+	return "<!--" . $n->nodeValue . "-->";
+}
+
+1;
+
+package XML::LibXML::Attr;
+
+use XML::LibXML qw(:all);
+
+sub toDebuggingHash
+{
+	my $n = shift;
+	
+	if ($n->nodeType == XML_NAMESPACE_DECL)
+	{
+		return {
+			'type'    => 'Attribute (XMLNS)',
+			'qname'   => $n->nodeName,
+			'prefix'  => $n->prefix,
+			'suffix'  => $n->getLocalName,
+			'nsuri'   => $n->getNamespaceURI,
+			'value'   => $n->getData,
+		};
+	}
+	
+	return {
+		'type'    => 'Attribute',
+		'qname'   => $n->nodeName,
+		'prefix'  => $n->prefix,
+		'suffix'  => $n->localname,
+		'nsuri'   => $n->namespaceURI,
+		'value'   => $n->nodeValue,
+		};
+}
+
+sub toClarkML
+{
+	my $n = shift;
+
+	if ($n->nodeType == XML_NAMESPACE_DECL)
+	{
+		if (defined $n->getLocalName)
+		{
+			return sprintf("{%s}%s=\"%s\"",
+				$n->getNamespaceURI, $n->getLocalName, $n->getData);
+		}
+		return sprintf("{%s}XMLNS=\"%s\"",
+			$n->getNamespaceURI, $n->getData);
+	}
+	
+	if (defined $n->namespaceURI)
+	{
+		return sprintf("{%s}%s=\"%s\"",
+			$n->namespaceURI, $n->localname, $n->nodeValue);
+	}
+	else
+	{
+		return sprintf("%s=\"%s\"",
+			$n->localname, $n->nodeValue);
+	}
+}
+
+1;
+
+package XML::LibXML::Node;
+
+sub toClarkML
+{
+	return '';
+}
+
+sub toDebuggingHash
+{
+	return {type=>'Node'};
+}
+
+1;
+
+package XML::LibXML::Namespace;
+
+sub toClarkML
+{
+	return XML::LibXML::Attr::toClarkML(@_);
+}
+
+sub toDebuggingHash
+{
+	return XML::LibXML::Attr::toDebuggingHash(@_);
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+XML::LibXML::Debugging - get debugging information from XML::LibXML nodes
+
+=head1 VERISON
+
+0.04
+
+=head1 SYNOPSIS
+
+  use XML::LibXML::Debugging;
+
+  my $parser = XML::LibXML->new;
+  my $doc    = $parser->parse_file('input.xml');
+  print $doc->toClarkML;
+
+=head1 DESCRIPTION
+
+This module adds a couple of additional methods to XML::LibXML::Node
+objects which are mostly aimed at helping figure out what's going on
+with the DOM's namespaces and structure. C<toClarkML> produces a
+string of XML-like markup with explicit namespaces. The following XML:
+
+  <foo xmlns="http://example.com/1"
+       xmlns:bar="http://example.com/2"
+       bar:baz="quux" />
+
+Might be represented as:
+
+  <{http://example.com/1}foo
+       {http://www.w3.org/2000/xmlns/}XMLNS="http://example.com/1"
+       {http://www.w3.org/2000/xmlns/}bar="http://example.com/2"
+       {http://example.com/2}baz="quux" />
+
+Another method C<toDebuggingHash> returns a hashref suitable for
+dumping using Data::Dumper.
+
+=head1 BUGS
+
+Please report any bugs to L<http://rt.cpan.org/>.
+
+=head1 SEE ALSO
+
+L<XML::LibXML>.
+
+=head1 AUTHOR
+
+Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2009-2010 by Toby Inkster
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.8 or,
+at your option, any later version of Perl 5 you may have available.
+
+=cut
+use lib 'inc';
+use Test::More tests => 1;
+use Test::Signature;
+signature_ok();

File t/01basics.t

+use Test::More tests => 3;
+BEGIN { use_ok('XML::LibXML::Debugging') };
+
+use XML::LibXML;
+
+my $doc  = XML::LibXML::Document->new;
+my $root = $doc->createElementNS('http://www.w3.org/1999/xhtml', 'html');
+$doc->setDocumentElement($root);
+$root->setAttribute('xml:lang', 'en-gb-oed');
+
+is(
+	$doc->toClarkML,
+	'<{http://www.w3.org/1999/xhtml}html {http://www.w3.org/XML/1998/namespace}lang="en-gb-oed" {http://www.w3.org/2000/xmlns/}XMLNS="http://www.w3.org/1999/xhtml"/>',
+	"toClarkML seems to be working.");
+
+my $correct = {
+          'root' => {
+                      'nsuri' => 'http://www.w3.org/1999/xhtml',
+                      'suffix' => 'html',
+                      'qname' => 'html',
+                      'children' => [],
+                      'type' => 'Element',
+                      'attributes' => [
+                                        {
+                                          'value' => 'en-gb-oed',
+                                          'nsuri' => 'http://www.w3.org/XML/1998/namespace',
+                                          'suffix' => 'lang',
+                                          'qname' => 'xml:lang',
+                                          'type' => 'Attribute',
+                                          'prefix' => 'xml'
+                                        },
+                                        {
+                                          'value' => 'http://www.w3.org/1999/xhtml',
+                                          'nsuri' => 'http://www.w3.org/2000/xmlns/',
+                                          'suffix' => undef,
+                                          'qname' => 'xmlns',
+                                          'type' => 'Attribute (XMLNS)',
+                                          'prefix' => 'xmlns'
+                                        }
+                                      ],
+                      'prefix' => undef
+                    },
+          'type' => 'Document'
+        };
+
+is_deeply($doc->toDebuggingHash, $correct, "toDebuggingHash seems to work.");