Commits

Anonymous committed 43541f0

Handling of entities at the beginning of opening lines in paragraphs.

  • Participants
  • Parent commits 3d14ea4

Comments (0)

Files changed (8)

File perl/modules/XML-Grammar-Fiction/Changes

 Revision history for XML-Grammar-Fiction.
 
+    * Fix the handling of entities at the start of the opening line of
+    paragraphs in the Fiction-Text . See 
+    t/fiction/data/proto-text/paras-with-entities-at-start-of-line.txt .
+
 0.5.0       Sat May 21 23:20:43 IDT 2011
     * Add support for the <programinglisting> tag.
     * Fix the handling of the entities ("&amp;" "&#32;" etc.) in Fiction-Text.

File perl/modules/XML-Grammar-Fiction/MANIFEST

 t/fiction/data/proto-text-invalid/wrong-close-tag.txt
 t/fiction/data/proto-text-invalid/wrong-closing-inner-tag.txt
 t/fiction/data/proto-text/nested-s.txt
+t/fiction/data/proto-text/paras-with-entities-at-start-of-line.txt
 t/fiction/data/proto-text/scenes-with-titles.txt
 t/fiction/data/proto-text/sections-a-href.txt
 t/fiction/data/proto-text/sections-and-paras.txt
 t/fiction/data/proto-text/with-ul-ol-li.txt
 t/fiction/data/xml/dialogue-with-several-paragraphs.xml
 t/fiction/data/xml/nested-s.xml
+t/fiction/data/xml/paras-with-entities-at-start-of-line.xml
 t/fiction/data/xml/scenes-with-titles.xml
 t/fiction/data/xml/sections-a-href.xml
 t/fiction/data/xml/sections-and-paras.xml

File perl/modules/XML-Grammar-Fiction/lib/XML/Grammar/Fiction/FromProto/Parser/QnD.pm

 our $VERSION = '0.5.0';
 
 sub _non_tag_text_unit_consume_regex {
-    return qr{(?:[\<\&]|^\n?$)}ms;
+    return qr{(?:[\<]|^\n?$)}ms;
 }
 
 sub _generate_non_tag_text_event

File perl/modules/XML-Grammar-Fiction/lib/XML/Grammar/Fiction/FromProto/Parser/XmlIterator.pm

 {
     my $self = shift;
 
-    my $text = $self->consume_up_to($self->_non_tag_text_unit_consume_regex);
+    my $orig_text = $self->consume_up_to($self->_non_tag_text_unit_consume_regex);
+
+    
+    my $text = '';
+
+    # Incrementally parse $text for entities.
+    pos($orig_text) = 0;
+
+    while ($orig_text =~ m{\G(.*?)(\&|\z)}msg)
+    {
+        my ($before, $indicator) = ($1, $2);
+
+        $text .= $before;
+
+        if ($indicator eq '&')
+        {
+            if ($orig_text =~ m{\G(\#?\w+;)}cg)
+            {
+                $text .= HTML::Entities::decode_entities("&$1");
+            }
+            else
+            {
+                Carp::confess(
+                    sprintf(
+                        "Cannot match entity '%s' at line %d",
+                        substr($orig_text, pos($orig_text)-1, 10),
+                        $self->line_num(),
+                    )
+                );
+            }
+        }
+    }
+
+    $text =~ s{(\&#?\w+;)}{HTML::Entities::decode_entities($1)}eg;
+
+    if ($text =~ m{\&})
+    {
+    }
 
     my $l = $self->curr_line_ref();
 
 
     my $l = $self->curr_line_ref();
 
-    if ($$l =~ m{\G(\&|<(?:/)?)}cg)
+    if ($$l =~ m{\G(<(?:/)?)}cg)
     {
         return $1;
     }
 
         pos($$l) = $orig_pos;
 
-        if ($$l =~ m{\G\&})
-        {
-            if ($$l !~ m/\G(\&#?\w+;)/g)
-            {
-                Carp::confess("Cannot match entity (e.g: \"&quot;\") at line " .
-                    $self->line_num()
-                );
-            }
-
-            my $entity = $1;
-
-            $self->_enqueue_event(
-                XML::Grammar::Fiction::Event->new(
-                    {
-                        type => "elem",
-                        elem => $self->_new_text(
-                            [HTML::Entities::decode_entities($entity)]
-                        ),
-                    },
-                )
-            );
-
-            return;
-        }
-
         $self->_enqueue_event(
             XML::Grammar::Fiction::Event->new(
                 {'type' => ($self->_is_closing_tag($tag_start) ? "close" : "open")}

File perl/modules/XML-Grammar-Fiction/lib/XML/Grammar/Screenplay/FromProto/Parser/QnD.pm

 
 sub _non_tag_text_unit_consume_regex
 {
-    return qr{(?:[\<\[\]\&]|^\n?$)}ms;
+    return qr{(?:[\<\[\]]|^\n?$)}ms;
 }
 
 around '_parse_non_tag_text_unit' => sub {
 
     my $l = $self->curr_line_ref();
 
-    if ($$l =~ m{\G([<\[\]\&])})
+    if ($$l =~ m{\G([<\[\]])})
     {
         return $1;
     }

File perl/modules/XML-Grammar-Fiction/t/fiction/data/proto-text/paras-with-entities-at-start-of-line.txt

+<body id="index">
+
+<title>David vs. Goliath - Part I</title>
+
+<s id="top">
+
+<title>The Top Section</title>
+
+&amp;&#32;David and Goliath were standing by each other.
+&lt;foo&gt;
+
+<s id="goliath">
+
+<title>Goliath's Response</title>
+
+Goliath was not amused.
+
+He said to David: "Oh, really".
+
+</s>
+
+</s>
+
+</body>
+

File perl/modules/XML-Grammar-Fiction/t/fiction/data/xml/paras-with-entities-at-start-of-line.xml

+<?xml version='1.0' encoding='utf-8'?>
+<document xmlns="http://web-cpan.berlios.de/modules/XML-Grammar-Fortune/fiction-xml-0.2/" version="0.2">
+    <head>
+    </head>
+    <body xml:id="index">
+    <title>David vs. Goliath - Part I</title>        
+    <section xml:id="top">
+    <title>The Top Section</title>
+    <p>
+        &amp;&#32;David and Goliath were standing by each other.    
+        &lt;foo&gt;
+    </p>
+    <section xml:id="goliath">
+    <title>Goliath's Response</title>
+    <p>
+    Goliath was not amused.
+    </p>
+    <p>
+    He said to David: "Oh, really".
+    </p>
+    </section>
+    </section>
+</body>
+</document>

File perl/modules/XML-Grammar-Fiction/t/fiction/proto-text-to-xml-using-custom-parser.t

 use strict;
 use warnings;
 
-use Test::More tests => 15;
+use Test::More tests => 17;
 
 use Test::XML;
 
     with-ul-ol-li
     with-blockquote
     with-programlisting
+    paras-with-entities-at-start-of-line
     ));
 
-# TEST:$num_texts=7
+# TEST:$num_texts=8
 
 my $grammar = XML::Grammar::Fiction::FromProto->new({
         parser_class => "XML::Grammar::Fiction::FromProto::Parser::QnD",