Source

xemacsweb / html2content.pl

#! /usr/bin/perl -w
# Author:
# John S. Jacobs Anderson, jacobs@xemacs.org
# Contributors:
# Adrian Aichner (APA), aichner@ecf.teradyne.com, Teradyne GmbH, 2000-09-05.

# APA: The "strict" pragma warns us about many potential user errors.
use strict;
use File::Find;

if (scalar @ARGV) {
  for (@ARGV) {
    find( \&changer , ( "$_" ));
  }
} else {
  die( "usage: html2content.pl DIR [DIR ...]

Generate genpage .content files from .html files in DIR[s]\n" );
}

sub changer {
  my $file = $_;
  if ( $file =~ '.html\Z' ) {
    my $contents;
    my $author;
    my $title;
    my $main;
    my $newfile =  $file;
    $newfile =~ s/\.html/\.content/ ;
    {
      local $/;
      undef $/;
      open( IN , $file );
      $contents = <IN>;
      close( IN );
    }
    if ( $contents =~ m["author".*?content="(.*?)"]s ) {
      $author = $1;
    } else {
      $author = "unknown, please claim authorship!";
      warn( "no author in $file!\n");
    }
    if ( $contents =~ m[<title>(.*?)</title>]si ) {
      $title = $1;
    } else {
      die( "no title in $file!\n" );
    }
    if ( $contents =~
         m[<!-- content cell -->.*?<td align="left" valign="top">(.*)</td><!-- /content cell -->]s) {
      $main = $1;
    } elsif ($contents =~ m|<body[^>]*>(.*)</body>|si) {
      $main = $1;
    } else {
      die( "no main in $file!\n" );
    }
    if (-e "$newfile") {
      warn( "\"$newfile\" in dir \"$File::Find::dir\" exists already, won't overwrite!\n" );
    } else {
      open( OUT , ">$newfile" );
      print OUT <<EOF;
%title%
$title
%author%
$author
%main%
$main
EOF
    }
  }
}