Source

xemacsweb / html2content.pl

Diff from to

File html2content.pl

 #! /usr/bin/perl -w
+# Author:
+# John S. Jacobs Anderson, jacobs@xemacs.org
+# Contributors:
+# Adrian Aichner (APA), aichner@ecf.teradyne.com, Teradyne GmbH, 2000-09-05.
 
+# APA: The "strict" pragma warns us about many potential user errors.
+use strict;
 use File::Find;
 
-find( \&changer , ( "content/Releases" ));
+if (scalar @ARGV) {
+  for (@ARGV) {
+    find( \&changer , ( "$_" ));
+  }
+} else {
+  die( "usage: html2content.pl DIR [DIR ...]
+
+Generate genpage .content files from .html files in DIR[s]\n" );
+}
 
 sub changer {
   my $file = $_;
-  if ( $file =~ '.html' ) {
+  if ( $file =~ '.html\Z' ) {
     my $contents;
     my $author;
     my $title;
       $contents = <IN>;
       close( IN );
     }
-    if ( $contents =~ m["author".*?content="(.*?)"]s ) { $author = $1;  }
-    else { die( "no author in $file!\n") }
-
-    if ( $contents =~ m[<title>(.*?)</title>] ) { $title = $1; }
-    else { die( "no title in $file!\n" ) }
-
-    if ( $contents =~ 
-	 m[<!-- content cell -->.*?<td align="left" valign="top">(.*)</td><!-- /content cell -->]s) {
+    if ( $contents =~ m["author".*?content="(.*?)"]s ) {
+      $author = $1;
+    } else {
+      $author = "unknown, please claim authorship!";
+      warn( "no author in $file!\n");
+    }
+    if ( $contents =~ m[<title>(.*?)</title>]si ) {
+      $title = $1;
+    } else {
+      die( "no title in $file!\n" );
+    }
+    if ( $contents =~
+         m[<!-- content cell -->.*?<td align="left" valign="top">(.*)</td><!-- /content cell -->]s) {
       $main = $1;
+    } elsif ($contents =~ m|<body[^>]*>(.*)</body>|si) {
+      $main = $1;
+    } else {
+      die( "no main in $file!\n" );
     }
-    else { die( "no main in $file!\n" ) }
-
-    open( OUT , ">$newfile" );
-    print OUT <<EOF;
+    if (-e "$newfile") {
+      warn( "\"$newfile\" in dir \"$File::Find::dir\" exists already, won't overwrite!\n" );
+    } else {
+      open( OUT , ">$newfile" );
+      print OUT <<EOF;
 %title%
 $title
-
 %author%
 $author
-
 %main%
 $main
 EOF
-
+    }
   }
 }