Commits

Martin von Löwis committed ec5d228

Issue #6097: Escape UTF-8 surrogates resulting from mbstocs conversion
of the command line.

Comments (0)

Files changed (2)

 Core and Builtins
 -----------------
 
+- Issue #6097: Escape UTF-8 surrogates resulting from mbstocs conversion
+  of the command line.
+
 - Issue #6012: Add cleanup support to O& argument parsing.
 
 - Issue #6089: Fixed str.format with certain invalid field specifiers
 		if (!res)
 			goto oom;
 		count = mbstowcs(res, arg, argsize+1);
-		if (count != (size_t)-1)
-			return res;
+		if (count != (size_t)-1) {
+			wchar_t *tmp;
+			/* Only use the result if it contains no
+			   surrogate characters. */
+			for (tmp = res; *tmp != 0 &&
+				     (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
+				;
+			if (*tmp == 0)
+				return res;
+		}
 		PyMem_Free(res);
 	}
 	/* Conversion failed. Fall back to escaping with surrogateescape. */
 			memset(&mbs, 0, sizeof mbs);
 			continue;
 		}
+		if (*out >= 0xd800 && *out <= 0xdfff) {
+			/* Surrogate character.  Escape the original
+			   byte sequence with surrogateescape. */
+			argsize -= converted;
+			while (converted--)
+				*out++ = 0xdc00 + *in++;
+			continue;
+		}
 		/* successfully converted some bytes */
 		in += converted;
 		argsize -= converted;