Source

xemacsweb / html2content.pl

Full commit
adriana 00f8a58 
adriana 61e9e52 



adriana 00f8a58 
adriana 61e9e52 

adriana 00f8a58 

adriana 61e9e52 








adriana 00f8a58 


adriana 61e9e52 
adriana 00f8a58 












adriana 61e9e52 












adriana 00f8a58 
adriana 61e9e52 



adriana 00f8a58 
adriana 61e9e52 




adriana 00f8a58 






adriana 61e9e52 
adriana 00f8a58 
#! /usr/bin/perl -w
# Author:
# John S. Jacobs Anderson, jacobs@xemacs.org
# Contributors:
# Adrian Aichner (APA), aichner@ecf.teradyne.com, Teradyne GmbH, 2000-09-05.

# APA: The "strict" pragma warns us about many potential user errors.
use strict;
use File::Find;

if (scalar @ARGV) {
  for (@ARGV) {
    find( \&changer , ( "$_" ));
  }
} else {
  die( "usage: html2content.pl DIR [DIR ...]

Generate genpage .content files from .html files in DIR[s]\n" );
}

sub changer {
  my $file = $_;
  if ( $file =~ '.html\Z' ) {
    my $contents;
    my $author;
    my $title;
    my $main;
    my $newfile =  $file;
    $newfile =~ s/\.html/\.content/ ;
    {
      local $/;
      undef $/;
      open( IN , $file );
      $contents = <IN>;
      close( IN );
    }
    if ( $contents =~ m["author".*?content="(.*?)"]s ) {
      $author = $1;
    } else {
      $author = "unknown, please claim authorship!";
      warn( "no author in $file!\n");
    }
    if ( $contents =~ m[<title>(.*?)</title>]si ) {
      $title = $1;
    } else {
      die( "no title in $file!\n" );
    }
    if ( $contents =~
         m[<!-- content cell -->.*?<td align="left" valign="top">(.*)</td><!-- /content cell -->]s) {
      $main = $1;
    } elsif ($contents =~ m|<body[^>]*>(.*)</body>|si) {
      $main = $1;
    } else {
      die( "no main in $file!\n" );
    }
    if (-e "$newfile") {
      warn( "\"$newfile\" in dir \"$File::Find::dir\" exists already, won't overwrite!\n" );
    } else {
      open( OUT , ">$newfile" );
      print OUT <<EOF;
%title%
$title
%author%
$author
%main%
$main
EOF
    }
  }
}