Commits

Toby Inkster committed 0f2e389

replace old regexps; misc minor changes; bump version number; new test case

Comments (0)

Files changed (5)

lib/RDF/Prefixes.pm

 package RDF::Prefixes;
 
 use 5.010;
-use common::sense;
+use strict;
 use constant ARRAY_INDEX_USED      => 0;
 use constant ARRAY_INDEX_SUGGESTED => 1;
 use constant ARRAY_INDEX_OPTIONS   => 2;
 use overload '%{}' => \&to_hashref;
 use overload '""'  => \&to_string;
 
-use Carp qw[];
+BEGIN {
+	eval 'use Carp; 1'
+	or eval 'sub carp { warn "$_[0]\n" }'
+}
 
 BEGIN {
 	$RDF::Prefixes::AUTHORITY = 'cpan:TOBYINK';
-	$RDF::Prefixes::VERSION   = '0.002';
+	$RDF::Prefixes::VERSION   = '0.003';
 }
 
-our ($r_nameStartChar, $r_nameChar, $r_prefix);
-BEGIN {
-	$r_nameStartChar = qr'[A-Za-z\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{00010000}-\x{000EFFFF}]';
-	$r_nameChar      = qr'${r_nameStartChar}|[-_0-9\x{b7}\x{0300}-\x{036f}\x{203F}-\x{2040}]';
-	$r_prefix        = qr'${r_nameStartChar}${r_nameChar}*';
-}
+# These are the rules from Turtle (W3C WD, dated 09 Aug 2011).
+# XML 1.0 5e's syntax for XML names (i.e. element names,
+# attribute names, etc) appears to be pretty similar except
+# that it allows names to start with a colon or full-stop.
+# (The former would violate XML namespaces, but is allowed by
+# XML itself - apparently.)
+#
+# So anyway, we go with Turtle as that is the more restrictive
+# syntax, thus any valid Turtle names should automatically be
+# valid XML names.
+#
+
+my $PN_CHARS_BASE = qr<(?:
+	[A-Z]
+	| [a-z]
+	| [\x{00C0}-\x{00D6}]
+	| [\x{00D8}-\x{00F6}]
+	| [\x{00F8}-\x{02FF}]
+	| [\x{0370}-\x{037D}]
+	| [\x{037F}-\x{1FFF}]
+	| [\x{200C}-\x{200D}]
+	| [\x{2070}-\x{218F}]
+	| [\x{2C00}-\x{2FEF}]
+	| [\x{3001}-\x{D7FF}]
+	| [\x{F900}-\x{FDCF}]
+	| [\x{FDF0}-\x{FFFD}]
+	| [\x{10000}-\x{EFFFF}]
+)>x;
+
+my $PN_CHARS_U = qr<(?:
+	$PN_CHARS_BASE
+	| [_]
+)>x;
+
+my $PN_CHARS = qr<(?:
+	$PN_CHARS_U
+	| [0-9-]
+	| [\x{00B7}]
+	| [\x{0300}-\x{036F}]
+	| [\x{203F}-\x{2040}]
+)>x;
+
+my $PN_PREFIX = qr<
+	$PN_CHARS_BASE
+	(?:
+		(?: $PN_CHARS | [.] )*
+		$PN_CHARS
+	)?
+>x;
+
+my $PN_LOCAL = qr<
+	(?: $PN_CHARS_U )   # change from Turtle: disallow digits here
+	(?:
+		(?: $PN_CHARS | [.] )*
+		$PN_CHARS
+	)?
+>x;
 
 sub new
 {
 		
 	foreach my $s (reverse sort keys %$suggested)
 	{
-		if ($s =~ /^[a-z0-9][a-z0-9_\.]*$/i)
+		if ($s =~ m< ^ $PN_PREFIX $ >ix)
 		{
 			$self->[ARRAY_INDEX_SUGGESTED]{ $suggested->{$s} } = $s;
 		}
 		else
 		{
-			Carp::carp "Ignored suggestion $s => " . $suggested->{$s};
+			carp "Ignored suggestion $s => " . $suggested->{$s};
 		}
 	}
 	
-	return bless $self, $class;
+	bless $self, $class;
 }
 
 sub get_prefix
 
 sub preview_prefix
 {
-	return _practical_prefix(@_);
+	shift->_practical_prefix(@_);
+}
+
+sub _valid_qname
+{
+	my ($self, $p, $l) = @_;
+	return undef unless defined $p && defined $l;
+	return undef unless $l =~ m< ^ $PN_LOCAL $ >x;
+	
+	join q(:) => ($p, $l);
 }
 
 sub get_qname
 	my ($p, $s) = $self->_split_qname($url);
 	return undef unless defined $p and defined $s;
 	
-	return $self->get_prefix($p) . ':' . $s;
+	return $self->_valid_qname($self->get_prefix($p), $s);
 }
 
 sub preview_qname
 	my ($p, $s) = $self->_split_qname($url);
 	return undef unless defined $p and defined $s;
 	
-	return $self->preview_prefix($p) . ':' .  $s;
+	return $self->_valid_qname($self->preview_prefix($p), $s);
 }
 
 sub get_curie
 	my $rv;
 	foreach my $prefix (sort keys %$self)
 	{
-		$rv .= sprintf("\@prefix %s: <%s> .\n",
-			$prefix,
+		$rv .= sprintf("\@prefix %-6s <%s> .\n",
+			$prefix.':',
 			$self->{$prefix});
 	}
 	return $rv;
 {
 	my ($self, $uri) = @_;
         
-	my $nameStartChar  = qr<([A-Za-z_]|[\x{C0}-\x{D6}]|[\x{D8}-\x{D8}]|[\x{F8}-\x{F8}]|[\x{200C}-\x{200C}]|[\x{37F}-\x{1FFF}][\x{200C}-\x{200C}]|[\x{2070}-\x{2070}]|[\x{2C00}-\x{2C00}]|[\x{3001}-\x{3001}]|[\x{F900}-\x{F900}]|[\x{FDF0}-\x{FDF0}]|[\x{10000}-\x{10000}])>;
-	my $nameChar       = qr<$nameStartChar|-|[.]|[0-9]|\x{B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}]>;
-	my $lnre           = qr<((${nameStartChar})($nameChar)*)>;
-	
-   if ($uri =~ m/${lnre}$/)
+   if ($uri =~ m< ($PN_LOCAL) $ >x)
 	{
 		my $ln  = $1;
 		my $ns  = substr($uri, 0, length($uri)-length($ln));
 	return;
 }
 
+my $looks_like_version = qr< ^ [0-9\.-]+ $ >x;
+my $too_generic        = [qw< terms ns vocab vocabulary rdf rdfs owl schema xsd >];
+
 sub _perfect_prefix
 {
 	my ($self, $url) = @_;
 	
 	return $chosen if length $chosen;
 
-	my @words = split /[^A-Za-z0-9\._-]/, $url;
+	my @words = map { lc; } ($url =~ m< ((?:$PN_CHARS|\.)+) >xg);
 	WORD: while (defined(my $w = pop @words))
 	{
-		next unless length $w;
-		next if $w =~ /^[0-9\.-]+$/; # looks like a date or version number
-		next if $w =~ /^(terms|ns|vocab|vocabulary|rdf|rdfs|owl|schema)$/i; # too generic
-		next unless $w =~ /^[a-z0-9][a-z0-9_\.]*$/i;
+		next WORD if (
+			   length $w < 1
+			or $w ~~ $looks_like_version
+			or $w ~~ $too_generic
+			or $w !~ m< ^ $PN_PREFIX $ >x
+		);
 		
 		$chosen = $w;
 		last WORD;
 	}
 	
-	$chosen =~ s/\.(owl|rdf|rdfx|rdfs|nt|ttl|turtle|xml|org|com|net)$//i;
+	$chosen =~ s< [.] (owl|rdf|rdfx|rdfs|nt|ttl|turtle|xml|org|com|net) $ >()x;
 	$chosen = 'ex' if $chosen eq 'example';
 	return undef unless length $chosen;	
 	return lc $chosen;
 serialises data using namespaces.
 
 It generates pretty prefixes, reducing "http://purl.org/dc/terms/"
-to "dc" rather than something anonymous like like "ns01", and provides
+to "dc" rather than something too generic like like "ns01", and provides
 a context for keeping track of namespaces already used, so that when
 "http://purl.org/dc/elements/1.1/" is encountered, it won't stomp on
 the previous definition of "dc".
 =item C<< get_qname($uri) >>
 
 Gets a QName for a URI. e.g.
-C<< get_prefix('http://purl.org/dc/terms/title') >> might return 'dc:title'.
+C<< get_qname('http://purl.org/dc/terms/title') >> might return 'dc:title'.
 
 Some URIs cannot be converted to QNames. In these cases, undef is returned.
 
 
 =head1 COPYRIGHT
 
-Copyright 2010-2011 Toby Inkster
+Copyright 2010-2012 Toby Inkster
 
 This library is free software; you can redistribute it and/or modify it
 under the same terms as Perl itself.
 		dcs:item   [ rdfs:label "Module::Package::RDF" ; a dcs:Packaging ] ;
 		dcs:item   [ rdfs:label "use 5.010" ; a dcs:Update ]
 		] .
+
+_:sm a foaf:Person ;
+	foaf:name "Saašha Metsärantala" ;
+	foaf:mbox_sha1sum "5864440b7beefff0c8811ec0a4ef7bba0804e5fc" .
+
+my:project :release my:v_0-003 .
+my:v_0-003
+	a               :Version ;
+	dc:issued       "2011-11-29"^^xsd:date ;
+	:revision       "0.003"^^xsd:string ;
+	:file-release   <http://backpan.cpan.org/authors/id/T/TO/TOBYINK/RDF-Prefixes-0.003.tar.gz> ;
+	dcs:changeset [
+		dcs:versus my:v_0-002 ;
+		dcs:item   [ rdfs:label "Copyright 2012."@en ; a dcs:Update ];
+		dcs:item   [ rdfs:label "Add test case covering warning message."@en ; a dcs:Packaging ];
+		dcs:item   [ rdfs:label "Remove unneeded dependencies."@en ];
+		dcs:item   [ rdfs:label "Fix typo."@en; a dcs:Documentation; dcs:thanks _:sm ];
+		dcs:item   [ rdfs:label "Review regular expressions for QNames."@en; dcs:thanks _:sm ];
+		dcs:item   [ rdfs:label "Disallow other vocabularies squatting on 'xsd' prefix."@en; dcs:thanks _:sm ];
+		dcs:item   [ rdfs:label "Remove some redundant regular expressions."@en; dcs:thanks _:sm ]
+		] .

meta/makefile.ttl

 	:perl_version_from _:main ;
 	:version_from _:main ;
 	:readme_from _:main ;
-	:test_requires "Test::More 0.61" ;
-	:requires "common::sense" , "Carp" .
+	:test_requires "Test::More 0.61", "Test::Warn".
 
 _:main <http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#fileName> "lib/RDF/Prefixes.pm" .
 
 	'get_curie returned something sensible!');
 
 is("$context", <<'TURTLE', "output seems OK");
-@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix foaf:  <http://xmlns.com/foaf/0.1/> .
 @prefix foaf2: <http://xmlns.com/foaf/1.0/> .
-@prefix foo: <http://example.com/foo.rdf#> .
-@prefix foo2: <http://example.com/foo.rdf#0> .
+@prefix foo:   <http://example.com/foo.rdf#> .
+@prefix foo2:  <http://example.com/foo.rdf#0> .
 TURTLE
+use Test::More tests => 1;
+use Test::Warn;
+use RDF::Prefixes;
+
+warning_like {
+	RDF::Prefixes->new({
+		'-x' => 'http://www.example.com/',
+	})
+} qr{^Ignored suggestion -x\b};
+