1. nathanrice
  2. pypy

Commits

Amaury Forgeot d'Arc  committed 693b081

Implement CPython issue5057: do not const-fold a unicode.__getitem__
operation which returns a non-BMP character, this produces .pyc files
which depends on the unicode width

  • Participants
  • Parent commits 1119c38
  • Branches merge-2.7.2

Comments (0)

Files changed (2)

File pypy/interpreter/astcompiler/optimize.py

View file
 from pypy.tool import stdlib_opcode as ops
 from pypy.interpreter.error import OperationError
 from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.runicode import MAXUNICODE
 
 
 def optimize_ast(space, tree, compile_info):
                 w_idx = subs.slice.as_constant()
                 if w_idx is not None:
                     try:
-                        return ast.Const(self.space.getitem(w_obj, w_idx), subs.lineno, subs.col_offset)
+                        w_const = self.space.getitem(w_obj, w_idx)
                     except OperationError:
-                        # Let exceptions propgate at runtime.
-                        pass
+                        # Let exceptions propagate at runtime.
+                        return subs
+
+                    # CPython issue5057: if v is unicode, there might
+                    # be differences between wide and narrow builds in
+                    # cases like u'\U00012345'[0].
+                    # Wide builds will return a non-BMP char, whereas
+                    # narrow builds will return a surrogate.  In both
+                    # the cases skip the optimization in order to
+                    # produce compatible pycs.
+                    if (self.space.isinstance_w(w_obj, self.space.w_unicode)
+                        and 
+                        self.space.isinstance_w(w_const, self.space.w_unicode)):
+                        unistr = self.space.unicode_w(w_const)
+                        if len(unistr) == 1:
+                            ch = ord(unistr[0])
+                        else:
+                            ch = 0
+                        if (ch > 0xFFFF or
+                            (MAXUNICODE == 0xFFFF and 0xD800 <= ch <= OxDFFFF)):
+                            return subs
+
+                    return ast.Const(w_const, subs.lineno, subs.col_offset)
+
         return subs

File pypy/interpreter/astcompiler/test/test_compiler.py

View file
         # Just checking this doesn't crash out
         self.count_instructions(source)
 
+    def test_const_fold_unicode_subscr(self):
+        source = """def f():
+        return u"abc"[0]
+        """
+        counts = self.count_instructions(source)
+        assert counts == {ops.LOAD_CONST: 1, ops.RETURN_VALUE: 1}
+
+        # getitem outside of the BMP should not be optimized
+        source = """def f():
+        return u"\U00012345"[0]
+        """
+        counts = self.count_instructions(source)
+        assert counts == {ops.LOAD_CONST: 2, ops.BINARY_SUBSCR: 1,
+                          ops.RETURN_VALUE: 1}
+
+        # getslice is not yet optimized.
+        # Still, check a case which yields the empty string.
+        source = """def f():
+        return u"abc"[:0]
+        """
+        counts = self.count_instructions(source)
+        assert counts == {ops.LOAD_CONST: 2, ops.SLICE+2: 1,
+                          ops.RETURN_VALUE: 1}
+
     def test_remove_dead_code(self):
         source = """def f(x):
             return 5