Commits

Nick Wellnhofer committed f353c23

Fix some text methods to work with UTF-8

insertData, deleteData, replaceData used xmlStrsub and xmlStrlen instead
of xmlUTF8Strsub and xmlUTFStrlen.

  • Participants
  • Parent commits 5262469

Comments (0)

Files changed (3)

 Revision history for Perl extension XML::LibXML
 
 Not yet released
+    - Make insertData, deleteData, replaceData work correctly with UTF-8
+      strings.
     - Fix substringData
         - https://rt.cpan.org/Ticket/Display.html?id=88730
 
             if ( encstring != NULL && xmlStrlen( encstring ) > 0 ) {
                 data = domGetNodeValue(self);
                 if ( data != NULL && xmlStrlen( data ) > 0 ) {
-                    if ( xmlStrlen( data ) < offset ) {
+                    if ( xmlUTF8Strlen( data ) < offset ) {
                         data = xmlStrcat( data, encstring );
                         domSetNodeValue( self, data );
                     }
                     else {
-                        dl = xmlStrlen( data ) - offset;
+                        dl = xmlUTF8Strlen( data ) - offset;
 
                         if ( offset > 0 )
-                            new   = xmlStrsub(data, 0, offset );
-
-                        after = xmlStrsub(data, offset, dl );
+                            new   = xmlUTF8Strsub(data, 0, offset );
+
+                        after = xmlUTF8Strsub(data, offset, dl );
 
                         if ( new != NULL ) {
                             new = xmlStrcat(new, encstring );
     CODE:
         if ( length > 0 && offset >= 0 ) {
             data = domGetNodeValue(self);
-            len = xmlStrlen( data );
+            len = xmlUTF8Strlen( data );
             if ( data != NULL
                  && len > 0
                  && len > offset ) {
                 dl1 = offset + length;
                 if ( offset > 0 )
-                    new = xmlStrsub( data, 0, offset );
+                    new = xmlUTF8Strsub( data, 0, offset );
 
                 if ( len > dl1 ) {
                     dl2 = len - dl1;
-                    after = xmlStrsub( data, dl1, dl2 );
+                    after = xmlUTF8Strsub( data, dl1, dl2 );
                     if ( new != NULL ) {
                         new = xmlStrcat( new, after );
                         xmlFree(after);
 
             if ( encstring != NULL && xmlStrlen( encstring ) > 0 ) {
                 data = domGetNodeValue(self);
-                len = xmlStrlen( data );
+                len = xmlUTF8Strlen( data );
 
                 if ( data != NULL
                      && len > 0
 
                     dl1 = offset + length;
                     if ( dl1 < len ) {
-                        dl2 = xmlStrlen( data ) - dl1;
+                        dl2 = xmlUTF8Strlen( data ) - dl1;
                         if ( offset > 0 ) {
-                            new = xmlStrsub(data, 0, offset );
+                            new = xmlUTF8Strsub(data, 0, offset );
                             new = xmlStrcat(new, encstring );
                         }
                         else {
                             new   = xmlStrdup( encstring );
                         }
 
-                        after = xmlStrsub(data, dl1, dl2 );
+                        after = xmlUTF8Strsub(data, dl1, dl2 );
                         new = xmlStrcat(new, after );
 
                         domSetNodeValue( self, new );
                     else {
                         /* replace until end! */
                         if ( offset > 0 ) {
-                            new = xmlStrsub(data, 0, offset );
+                            new = xmlUTF8Strsub(data, 0, offset );
                             new = xmlStrcat(new, encstring );
                         }
                         else {
 use strict;
 use warnings;
 
-use Test::More tests => 37;
+use Test::More tests => 58;
 
 use XML::LibXML;
 
 }
 
 {
+    # UTF-8 tests
+
+    my $test_str  = "te\xDFt";
+    # Latin1 strings still fail.
+    utf8::upgrade($test_str);
+
+    # 1. creation
+    my $textnode = $doc->createTextNode($test_str);
+    # TEST
+    ok( $textnode, 'UTF-8 creation 1');
+    # TEST
+    is( $textnode->nodeValue(), $test_str,  'UTF-8 creation 2',);
+    my $foo_str = "\x{0444}oo\x{0431}ar";
+    $textnode = $doc->createTextNode($foo_str);
+    # TEST
+    ok( $textnode, 'UTF-8 creation 3');
+    # TEST
+    is( $textnode->nodeValue(), $foo_str,  'UTF-8 creation 4',);
+
+    # 2. substring
+    my $tnstr = $textnode->substringData( 1,2 );
+    # TEST
+    is( $tnstr , "oo", 'UTF-8 substring 1');
+    $tnstr = $textnode->substringData( 0,3 );
+    # TEST
+    is( $tnstr , "\x{0444}oo", 'UTF-8 substring 2');
+
+    # 3. Expansion
+    $textnode->appendData( $foo_str );
+    # TEST
+    is( $textnode->nodeValue(), $foo_str . $foo_str, 'UTF-8 expansion 1');
+
+    my $ins_str = "\x{0424}OO";
+    $textnode->insertData( 6, $ins_str );
+    # TEST
+    is( $textnode->nodeValue(), $foo_str.$ins_str.$foo_str,
+        'UTF-8 expansion 2' );
+
+    $textnode->setData( $foo_str );
+    $textnode->insertData( 6, $ins_str );
+    # TEST
+    is( $textnode->nodeValue(), $foo_str.$ins_str, 'UTF-8 expansion 3');
+
+    # 4. Removal
+    $textnode->setData( $foo_str );
+    $textnode->deleteData( 1,3 );
+    # TEST
+    is( $textnode->nodeValue(), "\x{0444}ar", 'UTF-8 Removal 1');
+    $textnode->setData( $foo_str );
+    $textnode->deleteData( 1,10 );
+    # TEST
+    is( $textnode->nodeValue(), "\x{0444}", 'UTF-8 Removal 2');
+    $textnode->setData( $foo_str );
+    $textnode->deleteData( 6,100 );
+    # TEST
+    is( $textnode->nodeValue(), $foo_str, 'UTF-8 Removal 3');
+
+    # 5. Replacement
+    my $phish_str = "ph\x{2160}sh";
+    $textnode->setData( $test_str );
+    $textnode->replaceData( 1,2, $phish_str );
+    # TEST
+    is( $textnode->nodeValue(), "t".$phish_str."t", 'UTF-8 Replacement 1');
+    $textnode->setData( $test_str );
+    $textnode->replaceData( 1,4, $phish_str );
+    # TEST
+    is( $textnode->nodeValue(), "t".$phish_str, 'UTF-8 Replacement 2');
+    $textnode->setData( $test_str );
+    $textnode->replaceData( 1,0, $phish_str );
+    # TEST
+    is( $textnode->nodeValue(), "t".$phish_str."e\xDFt",
+        'UTF-8 Replacement 3');
+
+    # 6. XML::LibXML features
+    $textnode->setData( $test_str );
+
+    my $new_str = "n\x{1D522}w";
+    $textnode->replaceDataString( "e\xDF", $new_str );
+    # TEST
+    is( $textnode->nodeValue(), "t".$new_str."t",
+        'UTF-8 replaceDataString() 1');
+
+    $textnode->replaceDataRegEx( 'n(.)w', '$1s' );
+    # TEST
+    is( $textnode->nodeValue(), "t\x{1D522}st", 'UTF-8 replaceDataRegEx() 2');
+
+    $textnode->setData( "blue $phish_str, white $phish_str, no $phish_str" );
+    $textnode->replaceDataRegEx( $phish_str, $test_str );
+    # TEST
+    is( $textnode->nodeValue(),
+        "blue $test_str, white $phish_str, no $phish_str",
+        'UTF-8 replaceDataRegEx 3',);
+
+    # replace them all!
+    $textnode->replaceDataRegEx( $phish_str, $test_str, 'g' );
+    # TEST
+    is( $textnode->nodeValue(),
+        "blue $test_str, white $test_str, no $test_str",
+        'UTF-8 replaceDataRegEx g',);
+
+    # check if deleteDataString works
+    my $hit_str = "hi\x{1D54B}";
+    my $pit_str = "\x{2119}it";
+    $textnode->setData( "$hit_str$pit_str$hit_str" );
+    $textnode->deleteDataString( $hit_str );
+    # TEST
+    is( $textnode->nodeValue(), "$pit_str$hit_str", 'UTF-8 deleteDataString 1' );
+
+    # check if deleteDataString all works
+    $textnode->setData( "$hit_str$pit_str$hit_str" );
+    $textnode->deleteDataString( $hit_str, 1 );
+    # TEST
+    is( $textnode->nodeValue(), $pit_str, 'UTF-8 deleteDataString 2' );
+}
+
+{
     # standalone test
     my $node = XML::LibXML::Text->new("foo");
     # TEST