Amaury Forgeot d'Arc avatar Amaury Forgeot d'Arc committed a2b4945

Issue1262: str.__repr__ now passes all printable characters.
Patch by arielby.

Comments (0)

Files changed (3)

pypy/objspace/std/test/test_unicodeobject.py

         raises(UnicodeError, b"\xc2".decode, "utf-8")
         assert b'\xe1\x80'.decode('utf-8', 'replace') == "\ufffd"
 
-    def test_repr_bug(self):
-        # we need to implement PEP 3138 for this to work
-        # http://www.python.org/dev/peps/pep-3138/
+    def test_repr_printable(self):
+        # PEP 3138: __repr__ respects printable characters.
         x = '\u027d'
         y = "'\u027d'"
         assert (repr(x) == y)

pypy/objspace/std/unicodeobject.py

 from pypy.rlib.objectmodel import compute_hash, specialize
 from pypy.rlib.objectmodel import compute_unique_id
 from pypy.rlib.rstring import UnicodeBuilder
-from pypy.rlib.runicode import unicode_encode_unicode_escape
+from pypy.rlib.runicode import (
+    unicode_encode_unicode_escape, unicode_escape_nonprintable)
 from pypy.module.unicodedata import unicodedb
 from pypy.tool.sourcetools import func_with_new_name
 from pypy.rlib import jit
 def repr__Unicode(space, w_unicode):
     chars = w_unicode._value
     size = len(chars)
-    s = unicode_encode_unicode_escape(chars, size, "strict", quotes=True)
+    s = unicode_escape_nonprintable(chars, size, "strict", quotes=True)
     return space.wrap(s)
 
 def mod__Unicode_ANY(space, w_format, w_values):

pypy/rlib/runicode.py

 
     return builder.build(), pos
 
-def make_unicode_escape_function():
+def make_unicode_escape_function(for_repr=False):
     # Python3 has two similar escape functions: One to implement
     # encode('unicode_escape') and which outputs bytes, and unicode.__repr__
     # which outputs unicode.  They cannot share RPython code, so we generate
     # them with the template below.
-    # Python2 does not really need this, but it reduces diffs between branches.
+
+    if for_repr:
+        STRING_BUILDER = UnicodeBuilder
+        STR = unicode
+        CHR = UNICHR
+    else:
+        STRING_BUILDER = StringBuilder
+        STR = str
+        CHR = chr
+
     def unicode_escape(s, size, errors, errorhandler=None, quotes=False):
         # errorhandler is not used: this function cannot cause Unicode errors
-        result = StringBuilder(size)
+        result = STRING_BUILDER(size)
 
         if quotes:
             if s.find(u'\'') != -1 and s.find(u'\"') == -1:
                 quote = ord('\"')
-                result.append('"')
+                result.append(STR('"'))
             else:
                 quote = ord('\'')
-                result.append('\'')
+                result.append(STR('\''))
         else:
             quote = 0
 
             if size == 0:
-                return ''
+                return STR('')
 
         pos = 0
         while pos < size:
 
             # Escape quotes
             if quotes and (oc == quote or ch == '\\'):
-                result.append('\\')
-                result.append(chr(oc))
+                result.append(STR('\\'))
+                result.append(CHR(oc))
                 pos += 1
                 continue
 
 
                 if 0xDC00 <= oc2 <= 0xDFFF:
                     ucs = (((oc & 0x03FF) << 10) | (oc2 & 0x03FF)) + 0x00010000
-                    raw_unicode_escape_helper(result, ucs)
+                    char_escape_helper(result, ucs)
                     pos += 1
                     continue
                 # Fall through: isolated surrogates are copied as-is
 
             # Map special whitespace to '\t', \n', '\r'
             if ch == '\t':
-                result.append('\\t')
+                result.append(STR('\\t'))
             elif ch == '\n':
-                result.append('\\n')
+                result.append(STR('\\n'))
             elif ch == '\r':
-                result.append('\\r')
+                result.append(STR('\\r'))
             elif ch == '\\':
-                result.append('\\\\')
+                result.append(STR('\\\\'))
 
             # Map non-printable or non-ascii to '\xhh' or '\uhhhh'
-            elif oc < 32 or oc >= 0x7F:
-                raw_unicode_escape_helper(result, oc)
+            elif for_repr and not unicodedb.isprintable(oc):
+                char_escape_helper(result, oc)
+            elif not for_repr and (oc < 32 or oc >= 0x7F):
+                char_escape_helper(result, oc)
 
             # Copy everything else as-is
             else:
-                result.append(chr(oc))
+                result.append(CHR(oc))
             pos += 1
 
         if quotes:
-            result.append(chr(quote))
+            result.append(CHR(quote))
         return result.build()
 
     def char_escape_helper(result, char):
         num = hex(char)
+        if STR is unicode:
+            num = num.decode('ascii')
         if char >= 0x10000:
-            result.append("\\U")
+            result.append(STR("\\U"))
             zeros = 8
         elif char >= 0x100:
-            result.append("\\u")
+            result.append(STR("\\u"))
             zeros = 4
         else:
-            result.append("\\x")
+            result.append(STR("\\x"))
             zeros = 2
         lnum = len(num)
         nb = zeros + 2 - lnum # num starts with '0x'
         if nb > 0:
-            result.append_multiple_char('0', nb)
+            result.append_multiple_char(STR('0'), nb)
         result.append_slice(num, 2, lnum)
 
     return unicode_escape, char_escape_helper
 
 (unicode_encode_unicode_escape, raw_unicode_escape_helper
  ) = make_unicode_escape_function()
+(unicode_escape_nonprintable, _
+ ) = make_unicode_escape_function(for_repr=True)
 
 # ____________________________________________________________
 # Raw unicode escape
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.