Commits

Anonymous committed 264e45b

Correct the handling of 0-termination of PyUnicode_AsWideChar()
and its usage in PyLocale_strcoll().

Clarify the documentation on this.

Thanks to Andreas Degert for pointing this out.

  • Participants
  • Parent commits 2b9c2ed
  • Branches legacy-trunk

Comments (0)

Files changed (4)

File Doc/api/concrete.tex

                                              wchar_t *w,
                                              int size}
   Copies the Unicode object contents into the \ctype{wchar_t} buffer
-  \var{w}.  At most \var{size} \ctype{wchar_t} characters are copied.
-  Returns the number of \ctype{wchar_t} characters copied or -1 in
-  case of an error.
+  \var{w}.  At most \var{size} \ctype{wchar_t} characters are copied
+  (excluding a possibly trailing 0-termination character).  Returns
+  the number of \ctype{wchar_t} characters copied or -1 in case of an
+  error.  Note that the resulting \ctype{wchar_t} string may or may
+  not be 0-terminated.  It is the responsibility of the caller to make
+  sure that the \ctype{wchar_t} string is 0-terminated in case this is
+  required by the application.
 \end{cfuncdesc}
 
 

File Include/unicodeobject.h

     int size                    /* size of buffer */
     );
 
-/* Copies the Unicode Object contents into the whcar_t buffer w.  At
+/* Copies the Unicode Object contents into the wchar_t buffer w.  At
    most size wchar_t characters are copied.
 
-   Returns the number of wchar_t characters copied or -1 in case of an
+   Note that the resulting wchar_t string may or may not be
+   0-terminated.  It is the responsibility of the caller to make sure
+   that the wchar_t string is 0-terminated in case this is required by
+   the application.
+
+   Returns the number of wchar_t characters copied (excluding a
+   possibly trailing 0-termination character) or -1 in case of an
    error. */
 
 PyAPI_FUNC(int) PyUnicode_AsWideChar(

File Modules/_localemodule.c

     }
     /* Convert the unicode strings to wchar[]. */
     len1 = PyUnicode_GET_SIZE(os1) + 1;
-    len2 = PyUnicode_GET_SIZE(os2) + 1;
     ws1 = PyMem_MALLOC(len1 * sizeof(wchar_t));
     if (!ws1) {
         PyErr_NoMemory();
     }
     if (PyUnicode_AsWideChar((PyUnicodeObject*)os1, ws1, len1) == -1)
         goto done;
+    ws1[len1 - 1] = 0;
+    len2 = PyUnicode_GET_SIZE(os2) + 1;
     ws2 = PyMem_MALLOC(len2 * sizeof(wchar_t));
     if (!ws2) {
         PyErr_NoMemory();
     }
     if (PyUnicode_AsWideChar((PyUnicodeObject*)os2, ws2, len2) == -1)
         goto done;
+    ws2[len2 - 1] = 0;
     /* Collate the strings. */
     result = PyInt_FromLong(wcscoll(ws1, ws2));
   done:

File Objects/unicodeobject.c

 	PyErr_BadInternalCall();
 	return -1;
     }
+
+    /* If possible, try to copy the 0-termination as well */
     if (size > PyUnicode_GET_SIZE(unicode))
-	size = PyUnicode_GET_SIZE(unicode);
+	size = PyUnicode_GET_SIZE(unicode) + 1;
+
 #ifdef HAVE_USABLE_WCHAR_T
     memcpy(w, unicode->str, size * sizeof(wchar_t));
 #else
     }
 #endif
 
+    if (size > PyUnicode_GET_SIZE(unicode))
+        return PyUnicode_GET_SIZE(unicode);
+    else
     return size;
 }