Commits

Yury Zaytsev  committed 3085368

Fix segfaults when encoding is NULL in CPyExt Unicode functions

Functions affected are PyUnicode_SetDefaultEncoding, PyUnicode_Decode,
PyUnicode_FromEncodedObject. The current behavior (whether to raise an
exception or to use the default encoding) tracks CPython 2.7 semantics.

  • Participants
  • Parent commits b028e2c

Comments (0)

Files changed (2)

File pypy/module/cpyext/test/test_unicodeobject.py

         invalid = rffi.str2charp('invalid')
         utf_8 = rffi.str2charp('utf-8')
         prev_encoding = rffi.str2charp(space.unwrap(w_default_encoding))
+        self.raises(space, api, TypeError, api.PyUnicode_SetDefaultEncoding, lltype.nullptr(rffi.CCHARP.TO))
         assert api.PyUnicode_SetDefaultEncoding(invalid) == -1
         assert api.PyErr_Occurred() is space.w_LookupError
         api.PyErr_Clear()
         rffi.free_charp(b_text)
         rffi.free_charp(b_encoding)
 
+    def test_decode_null_encoding(self, space, api):
+        null_charp = lltype.nullptr(rffi.CCHARP.TO)
+        u_text = u'abcdefg'
+        s_text = space.str_w(api.PyUnicode_AsEncodedString(space.wrap(u_text), null_charp, null_charp))
+        b_text = rffi.str2charp(s_text)
+        assert space.unwrap(api.PyUnicode_Decode(b_text, len(s_text), null_charp, null_charp)) == u_text
+        self.raises(space, api, TypeError, api.PyUnicode_FromEncodedObject, space.wrap(u_text), null_charp, None)
+        rffi.free_charp(b_text)
+
     def test_leak(self):
         size = 50
         raw_buf, gc_buf = rffi.alloc_buffer(size)

File pypy/module/cpyext/unicodeobject.py

 def PyUnicode_SetDefaultEncoding(space, encoding):
     """Sets the currently active default encoding. Returns 0 on
     success, -1 in case of an error."""
+    if not encoding:
+        PyErr_BadArgument(space)
     w_encoding = space.wrap(rffi.charp2str(encoding))
     setdefaultencoding(space, w_encoding)
     default_encoding[0] = '\x00'
     in the unicode() built-in function.  The codec to be used is looked up
     using the Python codec registry.  Return NULL if an exception was raised by
     the codec."""
+    if not encoding:
+        # This tracks CPython 2.7, in CPython 3.4 'utf-8' is hardcoded instead
+        encoding = PyUnicode_GetDefaultEncoding(space)
+    w_encoding = space.wrap(rffi.charp2str(encoding))
     w_str = space.wrap(rffi.charpsize2str(s, size))
-    w_encoding = space.wrap(rffi.charp2str(encoding))
     if errors:
         w_errors = space.wrap(rffi.charp2str(errors))
     else:
 
     All other objects, including Unicode objects, cause a TypeError to be
     set."""
+    if not encoding:
+        raise OperationError(space.w_TypeError,
+                             space.wrap("decoding Unicode is not supported"))
     w_encoding = space.wrap(rffi.charp2str(encoding))
     if errors:
         w_errors = space.wrap(rffi.charp2str(errors))