1. Toby Inkster
  2. p5-syntax-highlight-rdf

Commits

Toby Inkster  committed 487aa2c

add XML and JSON highlighting

  • Participants
  • Parent commits e44888f
  • Branches default

Comments (0)

Files changed (9)

File examples/json_to_html.pl

View file
+use v5.10;
+use strict;
+use warnings;
+
+use Syntax::Highlight::RDF;
+
+my $hl = "Syntax::Highlight::RDF"->highlighter("JSON");
+
+print $hl->highlight(\*DATA);
+
+__DATA__
+{
+	"http://example.org/about": 
+	{
+		"http://purl.org/dc/elements/1.1/title":
+		[
+			{ "type": "literal" , "value": "Anna's Homepage" }
+		]
+	}
+}

File examples/to_html.pl

-use v5.10;
-use strict;
-use warnings;
-
-use Syntax::Highlight::RDF;
-
-my $hl = "Syntax::Highlight::RDF"->new;
-
-print $hl->highlightText(\*DATA, "http://www.example.net/");
-
-__DATA__
-@base <http://www.example.org/> .
-@prefix foo: <http://example.com/foo#> .
-@prefix quux: <quux#>.
-
-<xyz>
-   foo:bar 123;
-   foo:baz "Yeah\"Baby\"Yeah";
-   foo:bum quux:quuux.
-

File examples/tokens.pl

-use v5.10;
-use strict;
-use warnings;
-
-use Syntax::Highlight::RDF;
-
-my $data = do { local $/ = <DATA> };
-my $hl   = "Syntax::Highlight::RDF"->new;
-
-$hl->tokenize(\$data);
-$hl->_fixup("http://www.example.net/");
-
-for my $tok (@{$hl->_tokens})
-{
-	say $tok->tok;
-}
-
-__DATA__
-@base <http://www.example.org/> .
-@prefix foo: <http://example.com/foo#> .
-@prefix quux: <quux#>.
-
-<xyz>
-   foo:bar 123;
-   foo:baz "Yeah\"Baby\"Yeah";
-   foo:bum quux:quuux.
-

File examples/turtle_to_html.pl

View file
+use v5.10;
+use strict;
+use warnings;
+
+use Syntax::Highlight::RDF;
+
+my $hl   = "Syntax::Highlight::RDF"->highlighter("Turtle");
+
+print $hl->highlight(\*DATA, "http://www.example.net/");
+
+__DATA__
+@base <http://www.example.org/> .
+@prefix foo: <http://example.com/foo#> .
+@prefix quux: <quux#>.
+
+<xyz>
+   foo:bar 123;
+   foo:baz "Yeah\"Baby\"Yeah";
+   foo:bum quux:quuux.
+

File examples/turtle_tokens.pl

View file
+use v5.10;
+use strict;
+use warnings;
+
+use Syntax::Highlight::RDF;
+
+my $data = do { local $/ = <DATA> };
+my $hl   = "Syntax::Highlight::RDF"->highlighter("Turtle");
+
+$hl->tokenize(\$data);
+$hl->_fixup("http://www.example.net/");
+
+for my $tok (@{$hl->_tokens})
+{
+	say $tok->tok;
+}
+
+__DATA__
+@base <http://www.example.org/> .
+@prefix foo: <http://example.com/foo#> .
+@prefix quux: <quux#>.
+
+<xyz>
+   foo:bar 123;
+   foo:baz "Yeah\"Baby\"Yeah";
+   foo:bum quux:quuux.
+

File examples/xml_to_html.pl

View file
+use v5.10;
+use strict;
+use warnings;
+
+use Syntax::Highlight::RDF;
+
+my $hl = "Syntax::Highlight::RDF"->highlighter("XML");
+
+print $hl->highlight(\*DATA);
+
+__DATA__
+<?xml version="1.0"?>
+<!DOCTYPE rdf:RDF PUBLIC "-//DUBLIN CORE//DCMES DTD 2002/07/31//EN"
+    "http://dublincore.org/documents/2002/07/31/dcmes-xml/dcmes-xml-dtd.dtd">
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+         xmlns:dc="http://purl.org/dc/elements/1.1/">
+  <rdf:Description rdf:about="http://www.ilrt.bristol.ac.uk/people/cmdjb/">
+    <dc:title>Dave Beckett's Home Page</dc:title>
+    <dc:creator>Dave Beckett</dc:creator>
+    <dc:publisher>ILRT, University of Bristol</dc:publisher>
+    <dc:date>2002-07-31</dc:date>
+  </rdf:Description>
+</rdf:RDF>

File lib/Syntax/Highlight/JSON2.pm

View file
+use 5.008;
+use strict;
+use warnings;
+
+{
+	package Syntax::Highlight::JSON2;
+
+	our $AUTHORITY = 'cpan:TOBYINK';
+	our $VERSION   = '0.001';
+	
+	use MooX::Struct -retain, -rw,
+		Feature                   => [],
+		Token                     => [-extends => [qw<Feature>], qw($spelling!)],
+		Brace                     => [-extends => [qw<Token>]],
+		Bracket                   => [-extends => [qw<Token>]],
+		String                    => [-extends => [qw<Token>]],
+		Number                    => [-extends => [qw<Token>]],
+		Number_Double             => [-extends => [qw<Number>]],
+		Number_Decimal            => [-extends => [qw<Number>]],
+		Number_Integer            => [-extends => [qw<Number>]],
+		Punctuation               => [-extends => [qw<Token>]],
+		Keyword                   => [-extends => [qw<Token>]],
+		Boolean                   => [-extends => [qw<Keyword>]],
+		Whitespace                => [-extends => [qw<Token>]],
+		Unknown                   => [-extends => [qw<Token>]],
+	;
+
+	use Throwable::Factory
+		Tokenization              => [qw( $remaining -caller )],
+		NotImplemented            => [qw( -notimplemented )],
+		WTF                       => [],
+		WrongInvocant             => [qw( -caller )],
+	;
+
+	{
+		use HTML::HTML5::Entities qw/encode_entities/;
+		
+		no strict 'refs';
+		*{Feature    . "::tok"}        = sub { sprintf "%s~", $_[0]->TYPE };
+		*{Token      . "::tok"}        = sub { sprintf "%s[%s]", $_[0]->TYPE, $_[0]->spelling };
+		*{Whitespace . "::tok"}        = sub { $_[0]->TYPE };
+		*{Feature    . "::TO_STRING"}  = sub { "" };
+		*{Token      . "::TO_STRING"}  = sub { $_[0]->spelling };
+		*{Token      . "::TO_HTML"}    = sub {
+			sprintf "<span class=\"json_%s\">%s</span>", lc $_[0]->TYPE, encode_entities($_[0]->spelling)
+		};
+		*{Whitespace . "::TO_HTML"}  = sub { $_[0]->spelling };
+	}
+
+	use Moo;
+
+	has _tokens     => (is => 'rw');
+	has _remaining  => (is => 'rw');
+	
+	use IO::Detect qw( as_filehandle );
+		
+	sub _peek
+	{
+		my $self = shift;
+		my ($regexp) = @_;
+		$regexp = qr{^(\Q$regexp\E)} unless ref $regexp;
+		
+		if (my @m = (${$self->_remaining} =~ $regexp))
+		{
+			return \@m;
+		}
+		
+		return;
+	}
+
+	sub _pull_token
+	{
+		my $self = shift;
+		my ($spelling, $class, %more) = @_;
+		defined $spelling or WTF->throw("Tried to pull undef token!");
+		substr(${$self->_remaining}, 0, length $spelling, "");
+		push @{$self->_tokens}, $class->new(spelling => $spelling, %more);
+	}
+
+	sub _pull_whitespace
+	{
+		my $self = shift;
+		$self->_pull_token($1, Whitespace)
+			if ${$self->_remaining} =~ m/^(\s*)/sm;
+	}
+	
+	sub _pull_string
+	{
+		my $self = shift;
+		# Extract string with escaped characters
+		${$self->_remaining} =~ m#^("((?:[^\x00-\x1F\\"]|\\(?:["\\/bfnrt]|u[[:xdigit:]]{4})){0,32766})*")#
+			? $self->_pull_token($1, String)
+			: $self->_pull_token('"', Unknown);
+	}
+	
+	sub tokenize
+	{
+		my $self = shift;
+		ref $self or WrongInvocant->throw("this is an object method!");
+		
+		my ($text_ref) = @_;
+		$self->_remaining(
+			ref($text_ref) eq 'SCALAR'
+				? $text_ref
+				: do { local $/; my $h = as_filehandle($text_ref); \(my $t = <$h>) }
+		);
+		$self->_tokens([]);
+		
+		# Declare this ahead of time for use in the big elsif!
+		my $matches;
+		
+		while (length ${ $self->_remaining })
+		{
+			$self->_pull_whitespace if $self->_peek(qr{^\s+});
+			
+			if ($matches = $self->_peek(qr!^([\,\:])!))
+			{
+				$self->_pull_token($matches->[0], Punctuation);
+			}
+			elsif ($matches = $self->_peek(qr!^([\[\]])!))
+			{
+				$self->_pull_token($matches->[0], Bracket);
+			}
+			elsif ($matches = $self->_peek(qr!^( \{ | \} )!x))
+			{
+				$self->_pull_token($matches->[0], Brace);
+			}
+			elsif ($self->_peek("null"))
+			{
+				$self->_pull_token("null", Keyword);
+			}
+			elsif ($matches = $self->_peek(qr!^(true|false)!))
+			{
+				$self->_pull_token($matches->[0], Boolean);
+			}
+			elsif ($self->_peek('"'))
+			{
+				$self->_pull_string;
+			}
+			elsif ($matches = $self->_peek(qr!^([-]?(?:0|[1-9][0-9]*)(?:\.[0-9]*)?(?:[eE][+-]?[0-9]+)?)!))
+			{
+				my $n = $matches->[0];
+				if ($n =~ /e/i)    { $self->_pull_token($n, Number_Double) }
+				elsif ($n =~ /\./) { $self->_pull_token($n, Number_Decimal) }
+				else               { $self->_pull_token($n, Number_Integer) }
+			}
+			elsif ($matches = $self->_peek(qr/^([^\s\r\n]+)[\s\r\n]/ms))
+			{
+				$self->_pull_token($matches->[0], Unknown);
+			}
+			elsif ($matches = $self->_peek(qr/^([^\s\r\n]+)$/ms))
+			{
+				$self->_pull_token($matches->[0], Unknown);
+			}
+			else
+			{
+				Tokenization->throw(
+					"Could not tokenise string!",
+					remaining => ${ $self->_remaining },
+				);
+			}
+			
+			$self->_pull_whitespace if $self->_peek(qr{^\s+});
+		}
+		
+		return $self->_tokens;
+	}
+
+	sub highlight
+	{
+		my $self = shift;
+		ref $self or WrongInvocant->throw("this is an object method!");
+		$self->tokenize(@_);
+		return join "", map $_->TO_HTML, @{$self->_tokens};
+	}
+}
+
+1;
+

File lib/Syntax/Highlight/RDF.pm

View file
 	};
 	
 	$hint =~ m{json}i and do {
-		require Syntax::Highlight::JSON;
-		return "Syntax::Highlight::JSON"->new;
+		require Syntax::Highlight::JSON2;
+		return "Syntax::Highlight::JSON2"->new;
 	};
 	
 	$hint =~ m{(ttl|turtle)}i       and return "$class\::Turtle"->new;
 
 =item *
 
-SPARQL Query 1.1
+SPARQL Query 1.1 (but not property paths yet)
 
 =item *
 
-SPARQL Update 1.1
+SPARQL Update 1.1 (but not property paths yet)
 
 =item *
 

File lib/Syntax/Highlight/XML.pm

View file
+use 5.008;
+use strict;
+use warnings;
+
+{
+	package Syntax::Highlight::XML;
+
+	our $AUTHORITY = 'cpan:TOBYINK';
+	our $VERSION   = '0.001';
+	
+	use MooX::Struct -retain, -rw,
+		Feature                   => [],
+		Token                     => [-extends => [qw<Feature>], qw($spelling!)],
+		Name                      => [-extends => [qw<Token>]],
+		Pointy                    => [-extends => [qw<Token>]],
+		Equals                    => [-extends => [qw<Token>]],
+		AttributeName             => [-extends => [qw<Name>]],
+		AttributeValue            => [-extends => [qw<Token>]],
+		Data                      => [-extends => [qw<Token>]],
+		Data_Whitespace           => [-extends => [qw<Data>]],
+		TagName                   => [-extends => [qw<Name>]],
+		Slash                     => [-extends => [qw<Token>]],
+		Whitespace                => [-extends => [qw<Token>]],
+		Structure_Start           => [-extends => [qw<Feature>], qw($end)],
+		Structure_End             => [-extends => [qw<Feature>], q($start) => [weak_ref => 1]],
+		Attribute_Start           => [-extends => [qw<Structure_Start>], qw($name $value)],
+		Attribute_End             => [-extends => [qw<Structure_End>]],
+		Tag_Start                 => [-extends => [qw<Structure_Start>], qw($name +is_opening +is_closing +is_pi +is_doctype)],
+		Tag_End                   => [-extends => [qw<Structure_End>]],
+	;
+
+	use Throwable::Factory
+		Tokenization              => [qw( $remaining -caller )],
+		NotImplemented            => [qw( -notimplemented )],
+		WTF                       => [],
+		WrongInvocant             => [qw( -caller )],
+	;
+
+	{
+		use HTML::HTML5::Entities qw/encode_entities/;
+		
+		no strict 'refs';
+		*{Feature    . "::tok"}        = sub { sprintf "%s~", $_[0]->TYPE };
+		*{Token      . "::tok"}        = sub { sprintf "%s[%s]", $_[0]->TYPE, $_[0]->spelling };
+		*{Whitespace . "::tok"}        = sub { $_[0]->TYPE };
+		*{Data_Whitespace . "::tok"}   = sub { $_[0]->TYPE };
+		*{Feature    . "::TO_STRING"}  = sub { "" };
+		*{Token      . "::TO_STRING"}  = sub { $_[0]->spelling };
+		*{Token      . "::TO_HTML"}    = sub {
+			sprintf "<span class=\"xml_%s\">%s</span>", lc $_[0]->TYPE, encode_entities($_[0]->spelling)
+		};
+		*{Whitespace . "::TO_HTML"}  = sub { $_[0]->spelling };
+		*{Data_Whitespace . "::TO_HTML"} = sub { $_[0]->spelling };
+		*{Structure_Start . "::TO_HTML"} = sub {
+			my @attrs = sprintf 'class="xml_%s"', lc $_[0]->TYPE;
+			sprintf "<span %s>", join " ", @attrs;
+		};
+		*{Structure_End . "::TO_HTML"} = sub {
+			"</span>"
+		};
+		*{Tag_Start . "::TO_HTML"} = sub {
+			my @classes = sprintf 'xml_%s', lc $_[0]->TYPE;
+			push @classes, "xml_tag_is_pi"        if $_[0]->is_pi;
+			push @classes, "xml_tag_is_doctype"   if $_[0]->is_doctype;
+			push @classes, "xml_tag_is_opening"   if $_[0]->is_opening;
+			push @classes, "xml_tag_is_closing"   if $_[0]->is_closing;
+			push @classes, "xml_tag_self_closing" if $_[0]->is_opening && $_[0]->is_closing;
+			my @attrs = sprintf 'data-xml-name="%s"', $_[0]->name->spelling;
+			sprintf '<span class="%s" %s>', join(" ", @classes), join(" ", @attrs);
+		};
+		*{Attribute_Start . "::TO_HTML"} = sub {
+			my @classes = sprintf 'xml_%s', lc $_[0]->TYPE;
+			push @classes, "xml_attribute_is_xmlns" if $_[0]->name->spelling =~ /^xmlns\b/;
+			push @classes, "xml_attribute_is_core"  if $_[0]->name->spelling =~ /^xml\b/;
+			my @attrs = sprintf 'data-xml-name="%s"', $_[0]->name->spelling;
+			sprintf '<span class="%s" %s>', join(" ", @classes), join(" ", @attrs);
+		};
+	}
+
+	use Moo;
+
+	has _tokens     => (is => 'rw');
+	has _remaining  => (is => 'rw');
+	
+	use IO::Detect qw( as_filehandle );
+		
+	sub _peek
+	{
+		my $self = shift;
+		my ($regexp) = @_;
+		$regexp = qr{^(\Q$regexp\E)} unless ref $regexp;
+		
+		if (my @m = (${$self->_remaining} =~ $regexp))
+		{
+			return \@m;
+		}
+		
+		return;
+	}
+
+	sub _pull_token
+	{
+		my $self = shift;
+		my ($spelling, $class, %more) = @_;
+		defined $spelling or WTF->throw("Tried to pull undef token!");
+		substr(${$self->_remaining}, 0, length $spelling, "");
+		push @{$self->_tokens}, $class->new(spelling => $spelling, %more);
+		return $self->_tokens->[-1];
+	}
+	
+	sub _pull_data
+	{
+		my $self = shift;
+		my $data = (${$self->_remaining} =~ /^(.*?)</ms) ? $1 : ${$self->_remaining};
+		$self->_pull_token($data, $data =~ /\S/ms ? Data : Data_Whitespace);
+	}
+
+	sub _pull_attribute_value
+	{
+		my $self = shift;
+				
+		${$self->_remaining} =~ /^(".*?")/m and return $self->_pull_token($1, AttributeValue);
+		${$self->_remaining} =~ /^('.*?')/m and return $self->_pull_token($1, AttributeValue);
+		
+		Tokenization->throw(
+			"Called _pull_attribute_value when remaining string doesn't look like an attribute value",
+			remaining => ${$self->_remaining},
+		);
+	}
+
+	sub tokenize
+	{
+		my $self = shift;
+		ref $self or WrongInvocant->throw("this is an object method!");
+		
+		my ($text_ref) = @_;
+		$self->_remaining(
+			ref($text_ref) eq 'SCALAR'
+				? $text_ref
+				: do { local $/; my $h = as_filehandle($text_ref); \(my $t = <$h>) }
+		);
+		$self->_tokens([]);
+
+		my ($TAG, $ATTR);
+		
+		# Declare this ahead of time for use in the big elsif!
+		my $matches;
+		
+		while (length ${ $self->_remaining })
+		{
+			if ($matches = $self->_peek(qr#^(<[?!]?)#))
+			{
+				push @{$self->_tokens}, ($TAG = Tag_Start->new);
+				$self->_pull_token($matches->[0], Pointy);
+				if ($matches->[0] =~ /\!/)
+				{
+					$TAG->is_doctype(1);
+				}
+				elsif ($matches->[0] =~ /\?/)
+				{
+					$TAG->is_pi(1);
+				}
+			}
+			elsif ($TAG and $matches = $self->_peek(qr#^(\??>)#))
+			{
+				my $end = Tag_End->new(start => $TAG);
+				$TAG->end($end);
+				$self->_pull_token($matches->[0], Pointy);
+				push @{$self->_tokens}, $end;
+				undef $TAG;
+			}
+			elsif ($TAG and $matches = $self->_peek(qr#^(\s+)#s))
+			{
+				$self->_pull_token($matches->[0], Whitespace);
+			}
+			elsif ($TAG and !$TAG->name and $matches = $self->_peek(qr#^((?:\w+[:])?\w+)#))
+			{
+				$TAG->name( $self->_pull_token($matches->[0], TagName) );
+				if (!$TAG->is_closing)
+				{
+					$TAG->is_opening(1);
+				}
+			}
+			elsif ($TAG and $TAG->name and $matches = $self->_peek(qr#^((?:\w+[:])?\w+)#))
+			{
+				if ($TAG->is_pi or $TAG->is_doctype)
+				{
+					$self->_pull_token($matches->[0], AttributeName);
+				}
+				else
+				{
+					push @{$self->_tokens}, ($ATTR = Attribute_Start->new);
+					$ATTR->name( $self->_pull_token($matches->[0], AttributeName) );
+				}
+			}
+			elsif ($TAG and $self->_peek("="))
+			{
+				$self->_pull_token("=", Equals);
+			}
+			elsif ($TAG and $self->_peek("/"))
+			{
+				$self->_pull_token("/", Slash);
+				if (!$TAG->name)
+				{
+					$TAG->is_closing(1);
+				}
+			}
+			elsif ($TAG and $self->_peek(qr{^["']}))
+			{
+				$self->_pull_attribute_value;
+				if ($ATTR) # doctype?? pi??
+				{
+					my $end = Attribute_End->new(start => $ATTR);
+					$ATTR->end($end);
+					push @{$self->_tokens}, $end;
+				}
+			}
+			else
+			{
+				$self->_pull_data;
+			}
+		}
+		
+		return $self->_tokens;
+	}
+	
+	sub _fixup { 1 };
+
+	sub highlight
+	{
+		my $self = shift;
+		ref $self or WrongInvocant->throw("this is an object method!");
+		$self->tokenize(@_);
+		$self->_fixup;
+		return join "", map $_->TO_HTML, @{$self->_tokens};
+	}
+}
+
+1;
+