Commits

Michele Bini committed 27068d9

Modifications for filtering headers

Comments (0)

Files changed (1)

 #! D:\perl\bin\perl.exe
 
+push @ARGV, qw(
+ --filter h1:title-head
+ --filter h2:chapter-head
+ --filter h3:section-head
+	      );
+
+# push @ARGV, '--filter-class' => 'title-head,chapter-head,section-head';
+
 ###############################################################################
 ###############################################################################
 #
 # To add a table of contents, based on its headings, to an HTML page.
 #
-#			Version 2
-#
 #	by Andrew Hardwick, http://duramecho.com, 2002/3/22
 #
 #	Released under GNU Public Licence.
 #
+# Modified by Michele Bini on March 2012 to support filtering
+# headings by level and/or class name.
+#
+# Copyright (c) 2002 Andrew Hardwick, http://duramecho.com
+# Copyright (c) 2012 Michele Bini <michele.bini@gmail.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the version 3 of the GNU General Public License
+# as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+##
 ###############################################################################
 ###############################################################################
 #
 #  markers as well so that, if you need the table of contents updated, this
 #  program can tell where you want the table of contents to be).
 #
+# Filtering headings:
+#
+# You can filter headings to include in the table of contents by using
+# the filter option:
+#
+# ./tocgen file.html --filter h2:classname # Only include h2 headings
+# with class 'classname'
+#
+# Wildcards are supported:
+#
+# ./tocgen file.html --filter h1:* --filter h2:classname # Only include
+# h2 headings with class 'classname', and h1 headings of any class
+#
+# You can also filter by classes the headings belong to, ignoring the
+# heading level:
+#
+# ./tocgen file.html --filter-classes class-a,class-b
+#
 ###############################################################################
 #
 # Known Deficiencies
 ###############################################################################
 
 # Include libraries
-use Cwd;		# To find current directory
-use strict;		# Disenable automatic variables
+use Getopt::Long 'GetOptions';
+use Cwd;	 # To find current directory
+use strict;	 # Disenable automatic variables
 
 # Global Variables
 my @HeadingCount;
 my @HeadingLevels;
 my @HeadingLabels;
 
+# Convert glob to regexp
+sub glob2rgx {
+  local $_ = shift;
+  my $letterrgx = "[-'a-z0-9_]";
+  my $namergx = $letterrgx."*";
+  # s/[*]/$namergx/g;
+  s/((\\\\.)|([*])|([?])|[.+])/defined($2)?$2:defined($3)?"$namergx":defined($4)?"$letterrgx":"$&"/ge;
+  $_
+}
+# die "glob2rgx" . glob2rgx("bl*ue?"); # , "[a-z]*", "[a-z]");
+
+# Filters
+my $filter_rgx;
+my $filter_classes;
+die "Usage" unless GetOptions
+  "filter=s"  => sub {
+    push @{ $filter_rgx ||= [] }, glob2rgx($_[1])
+  },
+  "filter-class=s"   => sub {
+    ( $filter_classes ||= { } )->{$_} = 1 for split(/,/, $_[1])
+  },
+  "filter-rgx"   => sub {
+    push @{ $filter_rgx ||= [] }, $_[1]
+  };
+
+sub filter {
+  return 1 unless defined($filter_classes) || defined($filter_rgx);
+  my ($l, $r) = @_;
+  my $classname;
+  $classname = $1 if $r =~ /class="([^"> ]+)"/i;
+  $classname = $1 if $r =~ /class=([^";> ]+)/i;
+  $classname = "" unless defined $classname;
+  $classname = lc $classname;
+  if (defined $filter_classes) {
+    return 1 if $filter_classes->{$classname};
+  }
+  $classname = lc "h$l:$classname";
+  if (defined $filter_rgx) {
+    for (@$filter_rgx) {
+       return 1 if $classname =~ /$_/i;
+    }
+  }
+  return 0;
+}
+
 ###############################################################################
 # Main rountine
 ###############################################################################
 
 {	my ($c,$d);
-	# Get data from file
-	my $From=cwd().'/'.$ARGV[0];
-	open(FILETOPROCESS,'<'.$From)||
-			die("Cannot open $From to read.");
-	my $Html;
-	read FILETOPROCESS,$Html,-s $From;
-	close FILETOPROCESS;
-	# Remove any old anchors
-	$Html=~s/<!--TableOfContentsAnchor:Begin-->
-			.*?
-			<!--TableOfContentsAnchor:End-->
-			//gsx;
-	# Find all headings & mark them with anchors
-	$Html=~s/<H(\d)(.*?)>(.*?)<\/H\1>/
-			'<H'.$1.$2.'><!--TableOfContentsAnchor:Begin--><A NAME="'.
-			MarkHeading($3,$1).
-			'"><\/A><!--TableOfContentsAnchor:End-->'.
-			$3.'<\/H'.$1.'>'/gise;
-	# Avoid jumping down more than one level at a time by adding null headings
-	for($c=0;$c<scalar(@HeadingTexts)-1;$c++)
-	{	if($HeadingLevels[$c]<$HeadingLevels[$c+1]-1)
-		{	splice(@HeadingLevels,$c+1,0,$HeadingLevels[$c]+1);
-			splice(@HeadingTexts,$c+1,0,'');
-			splice(@HeadingLabels,$c+1,0,'');}}
-	# Start HTML table of contents
-	my $Toc="<!--TableOfContents:Begin-->\n<UL>\n";
-	my $PreviousHeadingLevel=1;
-	# Add in contents lines
-	for($c=0;$c<scalar(@HeadingTexts);$c++)
-	{	# Indent/outdent contents line
-		for($d=$PreviousHeadingLevel;$d<$HeadingLevels[$c];$d++)
-		{	$Toc=~s/^(.*)<\/LI>(.*?)$/$1<UL>$2/s;}
-		for($d=$PreviousHeadingLevel;$d>$HeadingLevels[$c];$d--)
-		{	$Toc.="<\/UL><\/LI>\n";}
-		# Write a contents line
-		$Toc.='<LI><A HREF="#'.$HeadingLabels[$c].'">'.
-				$HeadingTexts[$c]."<\/A><\/LI>\n";
-		$PreviousHeadingLevel=$HeadingLevels[$c];}
-	# Outdent fully
-	for($d=$PreviousHeadingLevel;$d>1;$d--)
-		{	$Toc=~s/^(.*)<\/LI>(.*?)$/$1<\/UL>$2/s;}
-	# Remove null links
-	$Toc=~s/<A HREF=\"#\"><\/A>//g;
-	# Finish off table of contents
-	$Toc.="<\/UL>\n<!--TableOfContents:End-->\n";
-	# Replace old table of contents with new, or put at top if no old one
-	if(!($Html=~s/<!--TableOfContents:Begin-->\n
-			.*?
-			<!--TableOfContents:End-->\n
-			/$Toc/sx))
-	{	$Html=~s/(<BODY.*?>)/$1\n$Toc/is;}
-	# Write data back to the file
-	open(FILETOPROCESS,'>'.$From)||
-			die("Cannot open $From to write.");
-	print FILETOPROCESS $Html;
-	close FILETOPROCESS;}
+# Get data from file
+my $From=cwd().'/'.$ARGV[0];
+open(FILETOPROCESS,'<'.$From)||
+die("Cannot open $From to read.");
+my $Html;
+read FILETOPROCESS,$Html,-s $From;
+close FILETOPROCESS;
+# Remove any old anchors
+$Html=~s/<!--TableOfContentsAnchor:Begin-->
+.*?
+<!--TableOfContentsAnchor:End-->
+//gsx;
+# Find all headings & mark them with anchors
+$Html=~s/<H(\d)(.*?)>(.*?)<\/H\1>/
+filter($1, $2) ? (
+'<H'.$1.$2.'><!--TableOfContentsAnchor:Begin--><A NAME="'.
+MarkHeading($3,$1).
+'"><\/A><!--TableOfContentsAnchor:End-->'.
+$3.'<\/H'.$1.'>' ) : "$&"
+/gise;
+
+# Avoid jumping down more than one level at a time by adding null headings
+for($c=0;$c<scalar(@HeadingTexts)-1;$c++)
+{	if($HeadingLevels[$c]<$HeadingLevels[$c+1]-1)
+{	splice(@HeadingLevels,$c+1,0,$HeadingLevels[$c]+1);
+splice(@HeadingTexts,$c+1,0,'');
+splice(@HeadingLabels,$c+1,0,'');}}
+
+# Start HTML table of contents
+my $Toc="<!--TableOfContents:Begin-->\n<UL>\n";
+my $PreviousHeadingLevel=1;
+
+# Add in contents lines
+for($c=0;$c<scalar(@HeadingTexts);$c++)
+{	# Indent/outdent contents line
+for($d=$PreviousHeadingLevel;$d<$HeadingLevels[$c];$d++)
+{	$Toc=~s/^(.*)<\/LI>(.*?)$/$1<UL>$2/s;}
+for($d=$PreviousHeadingLevel;$d>$HeadingLevels[$c];$d--)
+{	$Toc.="<\/UL><\/LI>\n";}
+
+# Write a contents line
+$Toc.='<LI><A HREF="#'.$HeadingLabels[$c].'">'.
+$HeadingTexts[$c]."<\/A><\/LI>\n";
+$PreviousHeadingLevel=$HeadingLevels[$c];}
+
+# Outdent fully
+for($d=$PreviousHeadingLevel;$d>1;$d--)
+{	$Toc=~s/^(.*)<\/LI>(.*?)$/$1<\/UL>$2/s;}
+
+# Remove null links
+$Toc=~s/<A HREF=\"#\"><\/A>//g;
+
+# Finish off table of contents
+$Toc.="<\/UL>\n<!--TableOfContents:End-->\n";
+
+# Replace old table of contents with new, or put at top if no old one
+if(!($Html=~s/<!--TableOfContents:Begin-->\n
+.*?
+<!--TableOfContents:End-->\n
+/$Toc/sx))
+{	$Html=~s/(<BODY.*?>)/$1\n$Toc/is;}
+# Write data back to the file
+open(FILETOPROCESS,'>'.$From)||
+die("Cannot open $From to write.");
+print FILETOPROCESS $Html;
+close FILETOPROCESS;}
 
 ###############################################################################
 # MarkHeading
 ###############################################################################
-# This works out a heading number for a heading, adds the heading to a 
+# This works out a heading number for a heading, adds the heading to a
 #  list and creates an anchor in the heading
 ###############################################################################
 # Parameters
 
 sub MarkHeading
 {	my ($HeadingText,$HeadingLevel)=@_;
-	# Paranoia
-	$HeadingLevel=1 if($HeadingLevel<1);
-	$HeadingLevel=9 if($HeadingLevel>9);
-	# Work out heading number (add at current level & trim after)
-	$HeadingCount[$HeadingLevel-1]++;
-	@HeadingCount[$HeadingLevel..8]=(0)x9;
-	my $HeadingNumber=join('.',@HeadingCount[0..($HeadingLevel-1)]);
-	# Create a label
-	my $Label='Section_'.$HeadingNumber;
-	# Remove any HTML tags from the heading text
-	$HeadingText=~s/<.*?>//gis;
-	# Store the results
-	push(@HeadingTexts,$HeadingText);
-	push(@HeadingLevels,$HeadingLevel);
-	push(@HeadingLabels,$Label);
-	# Display progress on screen
-	print '-'x$HeadingLevel." $HeadingText\n";
-	return $Label;}
+# Paranoia
+$HeadingLevel=1 if($HeadingLevel<1);
+$HeadingLevel=9 if($HeadingLevel>9);
+# Work out heading number (add at current level & trim after)
+$HeadingCount[$HeadingLevel-1]++;
+@HeadingCount[$HeadingLevel..8]=(0)x9;
+my $HeadingNumber=join('.',@HeadingCount[0..($HeadingLevel-1)]);
+# Create a label
+my $Label='Section_'.$HeadingNumber;
+# Remove any HTML tags from the heading text
+$HeadingText=~s/<.*?>//gis;
+# Store the results
+push(@HeadingTexts,$HeadingText);
+push(@HeadingLevels,$HeadingLevel);
+push(@HeadingLabels,$Label);
+# Display progress on screen
+print '-'x$HeadingLevel." $HeadingText\n";
+return $Label;}
 
 ###############################################################################