Commits

Kirill Simonov committed fb17481

Fixed emitting of invalid BOM for UTF-16.

  • Participants
  • Parent commits 6b916f2

Comments (0)

Files changed (12)

lib/yaml/emitter.py

     def write_stream_start(self):
         # Write BOM if needed.
         if self.encoding and self.encoding.startswith('utf-16'):
-            self.stream.write(u'\xFF\xFE'.encode(self.encoding))
+            self.stream.write(u'\uFEFF'.encode(self.encoding))
 
     def write_stream_end(self):
         self.flush_stream()

lib3/yaml/emitter.py

     def write_stream_start(self):
         # Write BOM if needed.
         if self.encoding and self.encoding.startswith('utf-16'):
-            self.stream.write('\xFF\xFE'.encode(self.encoding))
+            self.stream.write('\uFEFF'.encode(self.encoding))
 
     def write_stream_end(self):
         self.flush_stream()

tests/data/utf16be.code

+"UTF-16-BE"

tests/data/utf16be.data

Binary file added.

tests/data/utf16le.code

+"UTF-16-LE"

tests/data/utf16le.data

Binary file added.

tests/data/utf8-implicit.code

+"implicit UTF-8"

tests/data/utf8-implicit.data

+--- implicit UTF-8

tests/data/utf8.code

+"UTF-8"

tests/data/utf8.data

+--- UTF-8

tests/lib/test_representer.py

 def test_representer_types(code_filename, verbose=False):
     test_constructor._make_objects()
     for allow_unicode in [False, True]:
-        native1 = test_constructor._load_code(open(code_filename, 'rb').read())
-        native2 = None
-        try:
-            output = yaml.dump(native1, Dumper=test_constructor.MyDumper,
-                        allow_unicode=allow_unicode)
-            native2 = yaml.load(output, Loader=test_constructor.MyLoader)
+        for encoding in ['utf-8', 'utf-16-be', 'utf-16-le']:
+            native1 = test_constructor._load_code(open(code_filename, 'rb').read())
+            native2 = None
             try:
-                if native1 == native2:
-                    continue
-            except TypeError:
-                pass
-            value1 = test_constructor._serialize_value(native1)
-            value2 = test_constructor._serialize_value(native2)
-            if verbose:
-                print "SERIALIZED NATIVE1:"
-                print value1
-                print "SERIALIZED NATIVE2:"
-                print value2
-            assert value1 == value2, (native1, native2)
-        finally:
-            if verbose:
-                print "NATIVE1:"
-                pprint.pprint(native1)
-                print "NATIVE2:"
-                pprint.pprint(native2)
-                print "OUTPUT:"
-                print output
+                output = yaml.dump(native1, Dumper=test_constructor.MyDumper,
+                            allow_unicode=allow_unicode, encoding=encoding)
+                native2 = yaml.load(output, Loader=test_constructor.MyLoader)
+                try:
+                    if native1 == native2:
+                        continue
+                except TypeError:
+                    pass
+                value1 = test_constructor._serialize_value(native1)
+                value2 = test_constructor._serialize_value(native2)
+                if verbose:
+                    print "SERIALIZED NATIVE1:"
+                    print value1
+                    print "SERIALIZED NATIVE2:"
+                    print value2
+                assert value1 == value2, (native1, native2)
+            finally:
+                if verbose:
+                    print "NATIVE1:"
+                    pprint.pprint(native1)
+                    print "NATIVE2:"
+                    pprint.pprint(native2)
+                    print "OUTPUT:"
+                    print output
 
 test_representer_types.unittest = ['.code']
 

tests/lib3/test_representer.py

 def test_representer_types(code_filename, verbose=False):
     test_constructor._make_objects()
     for allow_unicode in [False, True]:
-        native1 = test_constructor._load_code(open(code_filename, 'rb').read())
-        native2 = None
-        try:
-            output = yaml.dump(native1, Dumper=test_constructor.MyDumper,
-                        allow_unicode=allow_unicode)
-            native2 = yaml.load(output, Loader=test_constructor.MyLoader)
+        for encoding in ['utf-8', 'utf-16-be', 'utf-16-le']:
+            native1 = test_constructor._load_code(open(code_filename, 'rb').read())
+            native2 = None
             try:
-                if native1 == native2:
-                    continue
-            except TypeError:
-                pass
-            value1 = test_constructor._serialize_value(native1)
-            value2 = test_constructor._serialize_value(native2)
-            if verbose:
-                print("SERIALIZED NATIVE1:")
-                print(value1)
-                print("SERIALIZED NATIVE2:")
-                print(value2)
-            assert value1 == value2, (native1, native2)
-        finally:
-            if verbose:
-                print("NATIVE1:")
-                pprint.pprint(native1)
-                print("NATIVE2:")
-                pprint.pprint(native2)
-                print("OUTPUT:")
-                print(output)
+                output = yaml.dump(native1, Dumper=test_constructor.MyDumper,
+                            allow_unicode=allow_unicode, encoding=encoding)
+                native2 = yaml.load(output, Loader=test_constructor.MyLoader)
+                try:
+                    if native1 == native2:
+                        continue
+                except TypeError:
+                    pass
+                value1 = test_constructor._serialize_value(native1)
+                value2 = test_constructor._serialize_value(native2)
+                if verbose:
+                    print("SERIALIZED NATIVE1:")
+                    print(value1)
+                    print("SERIALIZED NATIVE2:")
+                    print(value2)
+                assert value1 == value2, (native1, native2)
+            finally:
+                if verbose:
+                    print("NATIVE1:")
+                    pprint.pprint(native1)
+                    print("NATIVE2:")
+                    pprint.pprint(native2)
+                    print("OUTPUT:")
+                    print(output)
 
 test_representer_types.unittest = ['.code']