Commits

Shlomi Fish  committed ff77743

Fix https://rt.cpan.org/Ticket/Display.html?id=58848 .

Improper handling of UTF-8 characters.

  • Participants
  • Parent commits bebd2b2

Comments (0)

Files changed (5)

             - t/60error_prev_chain.t
             - example/JBR-ALLENtrees.htm
         - Thanks to SCOP.
+    - Fix https://rt.cpan.org/Ticket/Display.html?id=58848 :
+        - "Malformed UTF-8 character (fatal) at" exception thrown on invalid
+        UTF-8.
+        - Thanks to David E. Wheeler (DWHEELER) for the report.
 
 1.76            Thu Jun 30 20:58:46 IDT 2011
     - Cleaned up t/28new_callbacks_multiple.t - convert to a Counter
 example/article_internal_bad.xml
 example/bad.dtd
 example/bad.xml
+example/thedieline.rss
 example/catalog.xml
 example/cb_example.pl
 example/complex/complex.dtd
 t/17callbacks.t
 t/18docfree.t
 t/19encoding.t
+t/19die_on_invalid_utf8_rt_58848.t
 t/20extras.t
 t/21catalog.t
 t/23rawfunctions.t

File example/thedieline.rss

+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:thr="http://purl.org/syndication/thread/1.0">
+    <title>TheDieline.com: Package Design</title>
+    <link rel="self" type="application/atom+xml" href="http://www.thedieline.com/blog/atom.xml" />
+    <link rel="hub" href="http://hubbub.api.typepad.com/" />
+    <link rel="alternate" type="text/html" href="http://www.thedieline.com/blog/" />
+    <id>tag:typepad.com,2003:weblog-611821</id>
+    <updated>2011-06-15T11:03:00-07:00</updated>
+    <subtitle>The World's #1 Package Design Website</subtitle>
+    <generator uri="http://www.typepad.com/">TypePad</generator>
+<entry>
+        <title>Ginja d&#39; Óbidos</title>
+        <link rel="alternate" type="text/html" href="http://feeds.feedblitz.com/~/15124587/1ir2jk/thedieline~Ginja-d-%c3%93bidos.html" />
+        <link rel="replies" type="text/html" href="http://feeds.feedblitz.com/~/15124587/1ir2jk/thedieline~Ginja-d-%c3%93bidos.html" thr:count="2" thr:updated="2010-06-25T13:08:23-07:00" />
+        <id>tag:typepad.com,2003:post-6a00d8345250f069e20133f1a9824b970b</id>
+        <published>2010-06-25T10:00:00-07:00</published>
+        <updated>2010-06-24T22:16:07-07:00</updated>
+        <author>
+            <name>The Dieline</name>
+        </author>
+        <category scheme="http://www.sixapart.com/ns/types#category" term="Industry: Wine &amp; Spirits" />
+        <category scheme="http://www.sixapart.com/ns/types#category" term="Substrate: Glass Bottle" />
+        
+        
+<content  type="html" xml:lang="en-US" xml:base="http://www.thedieline.com/blog/">&lt;font  color=&quot;#000000&quot; &gt;&lt;p&gt;&lt;Img align=&quot;left&quot; border=&quot;0&quot; height=&quot;1&quot; width=&quot;1&quot; style=&quot;border:0;float:left;margin:0;&quot; vspace=&quot;0&quot; hspace=&quot;0&quot; src=&quot;http://feeds.feedblitz.com/~/i/15124587/1ir2jk/thedieline&quot;&gt;&lt;div xmlns=&quot;http://www.w3.org/1999/xhtml&quot;&gt;&lt;p&gt;&lt;a href=&quot;http://feeds.feedblitz.com/~/t/0/1ir2jk/thedieline/~http://www.thedieline.com/.a/6a00d8345250f069e20133f1a97caa970b-popup&quot; onclick=&quot;window.open( this.href, &amp;#39;_blank&amp;#39;, &amp;#39;width=640,height=480,scrollbars=no,resizable=no,toolbar=no,directories=no,location=no,menubar=no,status=no,left=0,top=0&amp;#39; ); return false&quot; style=&quot;display: inline;&quot;&gt;&lt;img alt=&quot;1&quot; class=&quot;asset asset-image at-xid-6a00d8345250f069e20133f1a97caa970b &quot; src=&quot;http://www.thedieline.com/.a/6a00d8345250f069e20133f1a97caa970b-550wi&quot; style=&quot;width: 540px; &quot; &gt;&lt;/a&gt; &lt;br&gt;Lisbon based &lt;a href=&quot;http://feeds.feedblitz.com/~/t/0/1ir2jk/thedieline/~http://&quot;&gt;&lt;/a&gt;&lt;a href=&quot;http://feeds.feedblitz.com/~/t/0/1ir2jk/thedieline/~http://www.ntgj.org/&quot; target=&quot;_blank&quot;&gt;NT.GJ&lt;/a&gt; designed this cherry liqueur concept which features actual cherries within...&lt;/div&gt;&lt;p&gt;&lt;a href=&quot;http://feeds.feedblitz.com/~/15124587/1ir2jk/thedieline&quot;&gt;CLICK HERE to read the rest of the post...&lt;/a&gt; &lt;!-- _!fbztxtlnk!_ http://feeds.feedblitz.com/~/15124587/1ir2jk/thedieline --&gt;&amp;raquo;&lt;/p&gt;&lt;/font&gt;&lt;p&gt;&lt;div style=&quot;clear:both;&quot;&gt;&lt;em&gt;(Want to see more packaging? Visit &lt;a href=&quot;http://www.TheDieline.com&quot;&gt;TheDieline.com&lt;/a&gt;!)&lt;/em&gt;&lt;p&gt;&lt;/div&gt;&lt;/p&gt;
+&lt;div style=&quot;clear:both;&quot;&gt;&lt;a title=&quot;Tweet with Bit.ly&quot; href=&quot;http://bit.ly/?v=3&amp;ref=feedblitz&amp;u=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&amp;t=Ginja+d%26%2339%3b+�&ldquo;bidos&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/bitly.jpg&quot;&gt;&lt;/a&gt;  &lt;a title=&quot;Add to Delicious&quot; href=&quot;http://delicious.com/post?url=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&amp;title=Ginja+d%26%2339%3b+�&ldquo;bidos&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/delicious.jpg&quot;&gt;&lt;/a&gt;  &lt;a title=&quot;Digg This&quot; href=&quot;http://digg.com/submit?phase=2&amp;url=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&amp;title=Ginja+d%26%2339%3b+�&ldquo;bidos&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/digg.jpg&quot;&gt;&lt;/a&gt;  &lt;a title=&quot;Add to FaceBook&quot; href=&quot;http://facebook.com/share.php?u=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&amp;t=Ginja+d%26%2339%3b+�&ldquo;bidos&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/facebook.jpg&quot;&gt;&lt;/a&gt;  &lt;a title=&quot;Add to Google Bookmarks&quot; href=&quot;http://google.com/bookmarks/mark?op=edit&amp;bkmk=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&amp;title=Ginja+d%26%2339%3b+�&ldquo;bidos&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/google.jpg&quot;&gt;&lt;/a&gt;  &lt;a title=&quot;Stumble This&quot; href=&quot;http://stumbleupon.com/submit?url=http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&amp;title=Ginja+d%26%2339%3b+�&ldquo;bidos&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/stumble.jpg&quot;&gt;&lt;/a&gt;  &lt;a title=&quot;Tweet This&quot; href=&quot;http://twitter.com/home?status=Ginja+d%26%2339%3b+�&ldquo;bidos+http%3a%2f%2fwww.thedieline.com%2fblog%2f2010%2f06%2fginja-d-%25c3%25b3bidos.html&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/icons/twitter.jpg&quot;&gt;&lt;/a&gt;  &lt;a title=&quot;Subscribe by RSS&quot; href=&quot;http://feeds.feedblitz.com/thedieline&quot;&gt;&lt;img height=&quot;16&quot; border=&quot;0&quot; src=&quot;http://assets.feedblitz.com/images/rss.gif&quot;&gt;&lt;/a&gt;  &lt;a title=&quot;View Comments&quot; href=&quot;http://www.thedieline.com/blog/2010/06/ginja-d-%C3%B3bidos.html&quot;&gt;&lt;img height=16 border=0 src=&quot;http://assets.feedblitz.com/images/comment.png&quot;&gt;&lt;/a&gt; &lt;/div&gt;</content></entry></feed>
+
+
+

File lib/XML/LibXML/Error.pm

 #
 package XML::LibXML::Error;
 
+use Encode ();
 use strict;
 use vars qw(@error_domains $VERSION $WARNINGS);
 use Carp;
       # warnings.  This has the pleasing benefit of making the test suite
       # run warning-free.
       no warnings 'utf8';
-      my $context = $self->{context};
+      my $context = Encode::encode('utf8', $self->{context}, Encode::FB_DEFAULT);
       $msg.=$context."\n";
       $context = substr($context,0,$self->{column});
       $context=~s/[^\t]/ /g;

File t/19die_on_invalid_utf8_rt_58848.t

+# This is a test for:
+# https://rt.cpan.org/Ticket/Display.html?id=58848
+
+use strict;
+use warnings;
+
+use Test::More tests => 2;
+
+use XML::LibXML;
+
+{
+    eval {
+        XML::LibXML->new->parse_file('example/thedieline.rss');
+    };
+
+    my $err = $@;
+
+    # TEST
+    isa_ok($err, 'XML::LibXML::Error', "Exception is XML::LibXML::Error");
+
+    # TEST
+    like ("$err", qr{parser error : Input is not proper UTF-8},
+        'Parser error.',
+    );
+}
+
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2011 by Shlomi Fish
+
+This program is distributed under the MIT (X11) License:
+L<http://www.opensource.org/licenses/mit-license.php>
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+=cut