# shlomi-fish-homepage / bin / from-docbook-to-fiction-xml-txt.pl

  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 #!/usr/bin/perl use strict; use warnings; use XML::LibXML; use Getopt::Long; sub _esc { my $s = shift;$s =~ s{&}{&}g; $s =~ s{<}{<}g;$s =~ s{>}{>}g; $s =~ s{^[ \t]+}{}gms;$s =~ s{[ \t]+$}{}gms; return$s; } sub _esc_for_attr { my $s = shift; my$ret = _esc($s);$ret =~ s{"}{"}; return $ret; } my$xml_uri = q{http://www.w3.org/XML/1998/namespace}; my $xpc = XML::LibXML::XPathContext->new(); #$xpc->registerNs('x', q{http://www.w3.org/1999/xhtml}); $xpc->registerNs('db', q{http://docbook.org/ns/docbook});$xpc->registerNs('xlink', q{http://www.w3.org/1999/xlink}); $xpc->registerNs('xml',$xml_uri); my $parser = XML::LibXML->new();$parser->load_ext_dtd(0); my $output_file; GetOptions( "o|output=s" => \$output_file, ); my $input_file = shift(@ARGV); my$doc = $parser->parse_file($input_file); my ($main_title) =$xpc->findnodes(q{/db:article/db:info/db:title}, $doc); my$main_title_text = $main_title->textContent(); my ($main_article) = $xpc->findnodes(q{/db:article},$doc); my $main_id_text =$main_article->getAttributeNS($xml_uri, "id"); my @sections =$xpc->findnodes(q{/db:article/db:section}, $doc); sub _out_section { my$sect_elem = shift; my $id =$sect_elem->getAttributeNS($xml_uri, "id"); my ($title_elem) = $xpc->findnodes(q{./db:info/db:title},$sect_elem); my $title_text =$title_elem->textContent(); my @paras = $xpc->findnodes(q{./db:para},$sect_elem); my @subs = $xpc->findnodes(q{./db:section},$sect_elem); return qq{\n\n} . qq{} . _esc($title_text) . qq{\n\n} . join("\n\n", map { _esc($_->textContent()) } @paras) . join("\n\n", map { _out_section($_) } @subs) . qq{\n\n} ; } my$total = qq{\n\n} . qq{} . _esc($main_title_text) . qq{\n\n} . join("\n\n", map { _out_section($_) } @sections) . qq{\n\n} ; open my $out_fh, ">",$output_file or die "Could not open '$output_file' for output!"; binmode$out_fh, ":encoding(utf-8)"; print {$out_fh}$total; close(\$out_fh);