Commits

cvs2hg  committed 4631278

fixup commit for tag 'inital'

  • Participants
  • Parent commits 87c2eca
  • Tags inital

Comments (0)

Files changed (1)

File adrian/website/HTMLindex.pl

-#! /usr/local/bin/perl
-#
-# Adrian Aichner, Teradyne GmbH. Munich, Sat, May. 06 1995.
-#
-# $Id$
-#
-
-use strict;
-use Getopt::Long;
-use APA::Usage;
-use CGI;
-
-my %options = (
-	       "list" => "Include all HTML Lists in the index.",
-	       "headline" => "Include all HTML Headings in the index.",
-	       "help" => "Print this help message only.",
-	       "prefix=s" => "Prefix every line added by STRING.",
-	       "sort" => "Sort the index lexically.",
-	       "where=s" => "REGEXP specifying where to insert index.",
-	      );
-
-use vars qw(
-	    $opt_list 
-	    $opt_headline 
-	    $opt_help 
-	    $opt_prefix 
-	    $opt_sort 
-	    $opt_where
-	   );
-
-# Words modifying the sense of following words. 
-my(@modifiers) = 
-  (
-   "all",
-   "little",
-   "m?any",
-   "much",
-   "not?",
-  );
-
-# Word-endings of `interesting' words to use as hyperlink data.
-my(@word_ends) =
-  (
-   "[ae]nts?",
-   "[bnrv]als?",
-   "[cd]es?",
-   "[dlrw]ays?",
-   "[glm]e[rs]?",
-   "[lmn]ess",
-   "[lmn]it(y|ies)",
-   "[lmr]ows?",
-   "ails?",
-   "an",
-   "ances?",
-   "are",
-   "ates?",
-   "burg",
-   "e[lmr]s?",
-   "ed",
-   "ell",
-   "erp",
-   "ests?",
-   "ich",
-   "ings?",
-   "is",
-   "ite",
-   "izes?",
-   "o[br]es?",
-   "olm",
-   "ould",
-   "ous",
-   "outs?",
-   "oven",
-   "sions?",
-   "sis",
-   "tal",
-   "tions?",
-   "ups?",
-  );
-
-# Common words to use as hyperlink-data.
-my(@words) =
-  (
-   "after",
-   "before",
-   "are",
-   "from",
-   "to",
-   "have",
-   "here",
-   "on",
-   "over",
-   "under",
-   "we",
-   "you",
-   "will",
-   "with(out)?",
-  );
-
-main();
-
-sub main {
-    my ($cgi) = new CGI({});
-    my (@cmd_line) = ($0, @ARGV);
-    my(@index, $list_index);
-    $list_index = 1;
-    my($modifiers) = join "|", @modifiers;
-    my($word_ends) = join "|", @word_ends;
-    my($words) = join "|", @words;
-    my($data_pat) =
-	"\\b((($modifiers)\\s+)?\\b[-\\w]+($word_ends)|($words))\\b";
-    my($heading, $index, $quoted_data, $name, $data, $hstart, $hend, $lws);
-    my($index_done) = 0;
-    if (! GetOptions(keys(%options)) || $opt_help) {
-	Usage(\%options, "Create an indexed version of the HTML input.");
-	exit 1;
-    }
-    $opt_where = "<ADDRESS>" unless $opt_where;
-    while (<>) {
-	#
-	# Perform line-oriented actions below.
-	#
-	if (m|<P>This\s+<A\s+NAME=\"index\">index</A>\s+was\s+generated\s+by|
-	    .. m|<HR>|) {
-	    next;
-	}
-	if (m|<P><A HREF=\"\#index\"(\s+NAME[^>]*)?>Goto</A> Index</P>|) {
-	    next;
-	}
-	if ($opt_headline && m|^(\s*).*(<H[1-6][^>]*>)(.*)(</H[1-6]>)|i) {
-	    if ($index_done) {
-		warn "Index has already been written before \"$opt_where\".\n";
-	    }
-	    $lws = $1;
-	    $hstart = $2;
-	    $heading = $3;
-	    $hend = $4;
-	    # Remove HTML-markup (needs more work to handle HTML
-	    # ATTRIBUTES).
-	    $_ = $heading;
-	    $_ =~ s/<[^>]+>//g;
-	    # Match a word in Heading somewhat meaningful as a
-	    # hyper-link.  Else use the whole headline data.
-	    if (m|$data_pat|i) {
-		$data = $1;
-	    }
-	    else {
-		$data = $heading;
-	    }
-	    # Build hopefully unique link-target by using first letter
-	    # of each word in HEADING.
-	    # Numbers are made part of the link-target, while
-	    # white-space is deleted.
-	    $name = $_;
-	    $name =~ s/([A-Za-z])[A-Za-z]*/$1/g;
-	    $name =~ s/\s+//g;
-	    # Incorporate the link-target into the current line, which
-	    # is a HEADING.  Create an index-line, containing a
-	    # hyper-link to this HEADING.
-	    $index = "$_";
-	    # Quote regular expression meta characters in headlines!
-	    $quoted_data = quotemeta $data;
-	    $index =~ s|$quoted_data|<A HREF="#$name">$data</A>|;
-	    # Push the index-line onto the index-stack to be dumped
-	    # before $opt_where.
-	    push @index, $index;
-	    # Re-assemble HEADING.
-	    $heading = $_;
-	    $_ = "$opt_prefix$lws$hstart$heading$hend";
-	    $_ .= "\n$opt_prefix$lws<P><A HREF=\"\#index\">Goto</A> Index</P>\n";
-	    s|$quoted_data|<A NAME="$name">$data</A>|;
-	}
-	if ($opt_list && /(<(D|O|U)L>)/i) {
-	    my($list_tag) = $1;
-	    if ($index_done) {
-		warn "Index has already been written before \"$opt_where\".\n";
-	    }
-	    s|$list_tag|<P><A HREF=\"\#index\" NAME="list$list_index">Goto</A> Index</P>\n$list_tag|;
-	    # push the index-line onto the index-stack to be dumped
-	    # before $opt_where.
-	    push @index, "<A HREF=\"\#list$list_index\">List</A> $list_index";
-	    # Increment list enumerator.
-	    $list_index++;
-	}
-	if (m|$opt_where|i) {
-	    if ($index_done) {
-		warn "Index has already been written before \"$opt_where\".\n";
-	    }
-	    print "$opt_prefix<P>This <A NAME=\"index\">index</A>";
-	    print " was generated by ";
-	    print $cgi->escapeHTML("@cmd_line") . "</P>\n$opt_prefix<UL>\n";
-	    print "$opt_prefix<LI><A HREF=\"#\">Goto</A> Top</LI>\n";
-	    if ($opt_sort) {
-		@index = sort byText @index;
-	    }
-	    for (@index) {
-		print "$opt_prefix<LI>$_</LI>\n";
-	    }
-	    print "$opt_prefix</UL>\n$opt_prefix<HR>\n";
-	    $index_done = 1;
-	}
-	print;
-	if (eof) {
-	    close (ARGV);
-	}
-    }
-}
-
-sub byText {
-    my ($a_key) = $a;
-    my ($b_key) = $b;
-    # Good enough for now, but not smart enough to remove all HTML
-    # tags correctly.
-    $a_key =~ s/<[^>]+>//;
-    $b_key =~ s/<[^>]+>//;
-    $a_key cmp $b_key;
-}