Commits

Martin von Löwis committed 61d0640

Backported r55839 and r61350
Issue #4469: Prevent expandtabs() on string and unicode
objects from causing a segfault when a large width is passed
on 32-bit platforms.

Comments (0)

Files changed (5)

Lib/test/test_str.py

+
 import unittest
+import sys
 from test import test_support, string_tests
 
 
         string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
         self.assertRaises(OverflowError, '%c'.__mod__, 0x1234)
 
+    def test_expandtabs_overflows_gracefully(self):
+        # This test only affects 32-bit platforms because expandtabs can only take
+        # an int as the max value, not a 64-bit C long.  If expandtabs is changed
+        # to take a 64-bit long, this test should apply to all platforms.
+        if sys.maxint > (1 << 32):
+            return
+        self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxint)
+
+
 def test_main():
     test_support.run_unittest(StrTest)
 

Lib/test/test_unicode.py

         self.assertEqual(repr(s1()), '\\n')
         self.assertEqual(repr(s2()), '\\n')
 
+    def test_expandtabs_overflows_gracefully(self):
+        # This test only affects 32-bit platforms because expandtabs can only take
+        # an int as the max value, not a 64-bit C long.  If expandtabs is changed
+        # to take a 64-bit long, this test should apply to all platforms.
+        if sys.maxint > (1 << 32):
+            return
+        self.assertRaises(OverflowError, u't\tt\t'.expandtabs, sys.maxint)
+
 def test_main():
     test_support.run_unittest(UnicodeTest)
 
 Core and builtins
 -----------------
 
+- Issue #4469: Prevent expandtabs() on string and unicode 
+  objects from causing a segfault when a large width is passed 
+  on 32-bit platforms.
+
 - Issue #4317: Fixed a crash in the imageop.rgb2rgb8() function.
 
 - Issue #4230: Fix a crash when a class has a custom __getattr__ and an

Objects/stringobject.c

 static PyObject*
 string_expandtabs(PyStringObject *self, PyObject *args)
 {
-    const char *e, *p;
+    const char *e, *p, *qe;
     char *q;
-    int i, j;
+    int i, j, incr;
     PyObject *u;
     int tabsize = 8;
 
 	return NULL;
 
     /* First pass: determine size of output string */
-    i = j = 0;
-    e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
+    i = 0; /* chars up to and including most recent \n or \r */
+    j = 0; /* chars since most recent \n or \r (use in tab calculations) */
+    e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
     for (p = PyString_AS_STRING(self); p < e; p++)
         if (*p == '\t') {
-	    if (tabsize > 0)
-		j += tabsize - (j % tabsize);
+	    if (tabsize > 0) {
+		incr = tabsize - (j % tabsize);
+		if (j > INT_MAX - incr)
+		    goto overflow1;
+		j += incr;
+            }
 	}
         else {
+	    if (j > INT_MAX - 1)
+		goto overflow1;
             j++;
             if (*p == '\n' || *p == '\r') {
+		if (i > INT_MAX - j)
+		    goto overflow1;
                 i += j;
                 j = 0;
             }
         }
 
+    if (i > INT_MAX - j)
+	goto overflow1;
+
     /* Second pass: create output string and fill it */
     u = PyString_FromStringAndSize(NULL, i + j);
     if (!u)
         return NULL;
 
-    j = 0;
-    q = PyString_AS_STRING(u);
+    j = 0; /* same as in first pass */
+    q = PyString_AS_STRING(u); /* next output char */
+    qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
 
     for (p = PyString_AS_STRING(self); p < e; p++)
         if (*p == '\t') {
 	    if (tabsize > 0) {
 		i = tabsize - (j % tabsize);
 		j += i;
-		while (i--)
+		while (i--) {
+		    if (q >= qe)
+			goto overflow2;
 		    *q++ = ' ';
+		}
 	    }
 	}
 	else {
+	    if (q >= qe)
+		goto overflow2;
+	    *q++ = *p;
             j++;
-	    *q++ = *p;
             if (*p == '\n' || *p == '\r')
                 j = 0;
         }
 
     return u;
+
+  overflow2:
+    Py_DECREF(u);
+  overflow1:
+    PyErr_SetString(PyExc_OverflowError, "new string is too long");
+    return NULL;
 }
 
 static PyObject *

Objects/unicodeobject.c

     Py_UNICODE *e;
     Py_UNICODE *p;
     Py_UNICODE *q;
-    int i, j;
+    Py_UNICODE *qe;
+    int i, j, incr;
     PyUnicodeObject *u;
     int tabsize = 8;
 
 	return NULL;
 
     /* First pass: determine size of output string */
-    i = j = 0;
-    e = self->str + self->length;
+    i = 0; /* chars up to and including most recent \n or \r */
+    j = 0; /* chars since most recent \n or \r (use in tab calculations) */
+    e = self->str + self->length; /* end of input */
     for (p = self->str; p < e; p++)
         if (*p == '\t') {
-	    if (tabsize > 0)
-		j += tabsize - (j % tabsize);
+	    if (tabsize > 0) {
+		incr = tabsize - (j % tabsize); /* cannot overflow */
+		if (j > INT_MAX - incr)
+		    goto overflow1;
+		j += incr;
+            }
 	}
         else {
+	    if (j > INT_MAX - 1)
+		goto overflow1;
             j++;
             if (*p == '\n' || *p == '\r') {
+		if (i > INT_MAX - j)
+		    goto overflow1;
                 i += j;
                 j = 0;
             }
         }
 
+    if (i > INT_MAX - j)
+	goto overflow1;
+
     /* Second pass: create output string and fill it */
     u = _PyUnicode_New(i + j);
     if (!u)
         return NULL;
 
-    j = 0;
-    q = u->str;
+    j = 0; /* same as in first pass */
+    q = u->str; /* next output char */
+    qe = u->str + u->length; /* end of output */
 
     for (p = self->str; p < e; p++)
         if (*p == '\t') {
 	    if (tabsize > 0) {
 		i = tabsize - (j % tabsize);
 		j += i;
-		while (i--)
+		while (i--) {
+		    if (q >= qe)
+			goto overflow2;
 		    *q++ = ' ';
+                }
 	    }
 	}
 	else {
+	    if (q >= qe)
+		goto overflow2;
+	    *q++ = *p;
             j++;
-	    *q++ = *p;
             if (*p == '\n' || *p == '\r')
                 j = 0;
         }
 
     return (PyObject*) u;
+
+  overflow2:
+    Py_DECREF(u);
+  overflow1:
+    PyErr_SetString(PyExc_OverflowError, "new string is too long");
+    return NULL;
 }
 
 PyDoc_STRVAR(find__doc__,