Source

shlomi-fish-homepage / t2 / humour / fortunes / process-fortunes.pl

Full commit
#!/usr/bin/perl

use strict;
use warnings;

package XML::Grammar::Fortune::FromText;

use IO::All;

use XML::Writer;

use List::MoreUtils (qw(any));

use base 'Class::Accessor';

__PACKAGE__->mk_accessors(
    qw(
        _id
        _xml_writer
        _unix_fortune_files_paths
    ));

sub new
{
    my $class = shift;
    my $self = {};
    bless $self, $class;

    $self->_init(@_);

    return $self;
}

sub _init
{
    my $self = shift;
    my $args = shift;

    $self->_unix_fortune_files_paths($args->{unix_fortune_files_paths});

    $self->_id(0);

    return 0;
}

sub process_all_input
{
    my $self = shift;

    foreach my $path (@{$self->_unix_fortune_files_paths()})
    {
        $self->_process_fortune_file($path);
    }
}

sub _calc_irc_title
{
    my ($self, $args) = @_;

    my $lines = $args->{lines_ref};
    my $last_word = $args->{last_word};

    if (defined($last_word) && $last_word =~ m{\A[\*<]})
    {
        return;
    }
    else
    {
        my $last_line = pop(@$lines);
        $last_line =~ m{\A *(?:-- *)?(.*)};
        return $1;
    }
}

sub _process_fortune_file
{
    my $self = shift;

    my $fort_file = shift;

    my $xml_outfile = "$fort_file-non-format.xml";

    open my $xml_out, ">", $xml_outfile;
    binmode $xml_out, ":utf8";

    my $writer = XML::Writer->new(OUTPUT => $xml_out);

    $self->_xml_writer($writer);

    $writer->xmlDecl("utf-8");

    $writer->startTag("collection");
    $writer->emptyTag("head");
    $writer->startTag("list");

    open my $fh, "<", $fort_file;
    binmode $fh, ":utf8";

    my $line = <$fh>;
    chomp($line);
    while (defined($line))
    {
        my @fortune_lines;
        push @fortune_lines, $line;
        GET_FORTUNE_LINES:
        while (defined($line = <$fh>))
        {
            chomp($line);
            if ($line eq "%")
            {
                last GET_FORTUNE_LINES;
            }
            push @fortune_lines, $line;
        }

        # OK we got the fortune lines - no process them:
        $self->_process_single_fortune(
            {
                path => $fort_file,
                lines => \@fortune_lines,
            }
        );
    }
    continue
    {
        if (defined($line))
        {
            $line = <$fh>;
            chomp($line);
        }
    }
    close($fh);

    $writer->endTag(); # list.
    $writer->endTag(); # collection.

    $writer->end();

    close($xml_out);

    system("xmllint", "--format", "--output", "$fort_file.xml", $xml_outfile);
    unlink($xml_outfile);
}

sub _get_next_id
{
    my $self = shift;

    return "PLOC-IDENT" . $self->_id($self->_id()+1);
}

sub _calc_default_title
{
    my ($self, $title) = @_;

    return
        (defined($title)
            ? $title
            : "QUACKPROLOKOG==UNKNOWN-TITLE"
        );
}

sub _process_single_fortune
{
    my $self = shift;
    my $args = shift;

    my $lines = $args->{lines};

    my $writer = $self->_xml_writer();

    $writer->startTag("fortune", "id" => $self->_get_next_id());

    my $out_meta = sub {
        my $args = shift;

        $writer->startTag("meta");
        $writer->startTag("title");
        $writer->characters($args->{text});
        $writer->endTag("title");
        $writer->endTag("meta");
    };

    if (any { $_ =~ m{\A +<[^>]+>} } @$lines)
    {
        my $title;
        if ($lines->[-1] =~ m{\A *([^ ]+)})
        {
            $title = $self->_calc_irc_title(
                {
                    lines_ref => $lines,
                    last_word => $1
                }
            );
        }
        $out_meta->(
            {
                text => $self->_calc_default_title($title),
            }
        );

        $writer->startTag("irc");
        $writer->startTag("body");
        # This is an IRC conversation.
        my $line;
        my ($who, $text, $type);

        my $start_new_elem = sub {
            my $new_type = shift;
            my ($nw, $nt) = ($1, $2);
            if (defined($who))
            {
                $writer->startTag($type, "who" => $who);
                $writer->characters($text);
                $writer->endTag(); # $type
            }
            ($who, $text, $type) = ($nw, $nt, $new_type);
        };

        $line = shift(@$lines);

        while (@$lines || defined($line))
        {
            if ($line =~ m{\A\s+<([^>]+)>\s+(.+)})
            {
                $start_new_elem->("saying");
            }
            elsif ($line =~ m{\A\s+\*\s+(\S+)\s+(.+)})
            {
                $start_new_elem->("me_is");
            }
            elsif ($line =~ m{\A\s+-->\s+(\S+)\s+(.+)})
            {
                $start_new_elem->("joins");
            }
            elsif ($line =~ m{\A\s+<--\s+(\S+)\s+(.+)})
            {
                $start_new_elem->("leaves");
            }
            else
            {
                $line =~ m{\A\s+(.*)};
                $text .= " $1";
            }
        }
        continue
        {
            $line = shift(@$lines);
        }

        $start_new_elem->("dont_care");

        $writer->endTag("body");

        $writer->startTag("info");
        $writer->endTag("info");
        $writer->endTag("irc");
    }
    else
    {
        $out_meta->({text => $self->_calc_default_title(undef)});
        # It's not an IRC conversation: process it as raw.
        $writer->startTag("raw");
        $writer->startTag("body");
        $writer->startTag("text");
        $writer->cdata(join("\n", @$lines, ""));
        $writer->endTag("text");
        $writer->endTag("body");
        $writer->startTag("info");
        $writer->endTag("info");
        $writer->endTag("raw");
    }

    $writer->endTag(); # fortune
}

package main;

use IO::All;

sub read_fortune_files_list
{
    my @lines = io("Makefile")->getlines();

    my @f;
    GET_FORTUNE_FILES_LOOP:
    while (my $l = shift(@lines))
    {
        if ($l =~ m{\AFILES *=})
        {
            my $first_time = 1;
            $l = shift(@lines);
            while ($first_time || $l =~ m{[a-z]})
            {
                $first_time = 0;
                $l =~ m{([a-z_\-]+)};
                push @f, $1;
            }
            continue
            {
                $l = shift(@lines);
            }
            last GET_FORTUNE_FILES_LOOP;
        }
    }

    return \@f;
}

my $fortunes_list = read_fortune_files_list();

# print join(" ", @$fortunes_list), "\n";

my $converter = XML::Grammar::Fortune::FromText->new(
    {
        unix_fortune_files_paths => $fortunes_list,
    }
);

binmode STDOUT, ":utf8";
$converter->process_all_input();