1. python_mirrors
  2. features/pep-3151

Commits

Victor Stinner  committed 5f11621

Ensure that newly created strings use the most efficient store in debug mode

  • Participants
  • Parent commits 0551743
  • Branches default

Comments (0)

Files changed (1)

File Objects/unicodeobject.c

View file
 #endif
 
 #ifdef Py_DEBUG
-#  define _PyUnicode_CHECK(op) _PyUnicode_CheckConsistency(op)
+#  define _PyUnicode_CHECK(op) _PyUnicode_CheckConsistency(op, 0)
 #else
 #  define _PyUnicode_CHECK(op) PyUnicode_Check(op)
 #endif
 
 #ifdef Py_DEBUG
 static int
-_PyUnicode_CheckConsistency(void *op)
+/* FIXME: use PyObject* type for op */
+_PyUnicode_CheckConsistency(void *op, int check_content)
 {
     PyASCIIObject *ascii;
     unsigned int kind;
         if (ascii->wstr == NULL)
             assert(compact->wstr_length == 0);
     }
-    return 1;
-}
-#else
-static int
-_PyUnicode_CheckConsistency(void *op)
-{
+    /* check that the best kind is used */
+    if (check_content && kind != PyUnicode_WCHAR_KIND)
+    {
+        Py_ssize_t i;
+        Py_UCS4 maxchar = 0;
+        void *data = PyUnicode_DATA(ascii);
+        for (i=0; i < ascii->length; i++)
+        {
+            Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+            if (ch > maxchar)
+                maxchar = ch;
+        }
+        if (kind == PyUnicode_1BYTE_KIND) {
+            if (ascii->state.ascii == 0)
+                assert(maxchar >= 128);
+            else
+                assert(maxchar < 128);
+        }
+        else if (kind == PyUnicode_2BYTE_KIND)
+            assert(maxchar >= 0x100);
+        else
+            assert(maxchar >= 0x10000);
+    }
     return 1;
 }
 #endif
         _PyUnicode_LENGTH(unicode) = length;
         PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
         if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) {
-            _PyUnicode_CheckConsistency(unicode);
+            assert(_PyUnicode_CheckConsistency(unicode, 0));
             return 0;
         }
     }
     _PyUnicode_WSTR(unicode) = wstr;
     _PyUnicode_WSTR(unicode)[length] = 0;
     _PyUnicode_WSTR_LENGTH(unicode) = length;
-    _PyUnicode_CheckConsistency(unicode);
+    assert(_PyUnicode_CheckConsistency(unicode, 0));
     return 0;
 }
 
             _PyUnicode_WSTR(unicode) = NULL;
         }
     }
+    assert(_PyUnicode_CheckConsistency(unicode, 0));
     return obj;
 }
 
         PyUnicode_4BYTE_DATA(unicode)[_PyUnicode_LENGTH(unicode)] = '\0';
     }
     _PyUnicode_STATE(unicode).ready = 1;
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
     return 0;
 }
 
         *p_unicode = resize_compact(unicode, length);
         if (*p_unicode == NULL)
             return -1;
-        _PyUnicode_CheckConsistency(*p_unicode);
+        assert(_PyUnicode_CheckConsistency(*p_unicode, 0));
         return 0;
     }
     return resize_inplace((PyUnicodeObject*)unicode, length);
         if (!unicode)
             return NULL;
         PyUnicode_1BYTE_DATA(unicode)[0] = ch;
+        assert(_PyUnicode_CheckConsistency(unicode, 1));
         unicode_latin1[ch] = unicode;
     }
     Py_INCREF(unicode);
         assert(0 && "Impossible state");
     }
 
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
     return (PyObject *)unicode;
 }
 
     if (!res)
         return NULL;
     memcpy(PyUnicode_1BYTE_DATA(res), u, size);
+    assert(_PyUnicode_CheckConsistency(res, 1));
     return res;
 }
 
     else
         for (i = 0; i < size; i++)
             PyUnicode_1BYTE_DATA(res)[i] = (Py_UCS1)u[i];
+    assert(_PyUnicode_CheckConsistency(res, 1));
     return res;
 }
 
         for (i = 0; i < size; i++)
             PyUnicode_WRITE(kind, data, i, u[i]);
     }
+    assert(_PyUnicode_CheckConsistency(res, 1));
     return res;
 }
 
         assert(0);
         break;
     }
+    assert(_PyUnicode_CheckConsistency(copy, 1));
     return copy;
 }
 
         PyObject_Free(callresults);
     if (numberresults)
         PyObject_Free(numberresults);
+    assert(_PyUnicode_CheckConsistency(string, 1));
     return (PyObject *)string;
   fail:
     if (callresults) {
     if (v == NULL)
         return NULL;
     PyUnicode_WRITE(PyUnicode_KIND(v), PyUnicode_DATA(v), 0, ordinal);
+    assert(_PyUnicode_CheckConsistency(v, 1));
     return v;
 }
 
         return NULL;
     }
 #endif
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
     return unicode;
 
   onError:
     v = PyCodec_Decode(unicode, encoding, errors);
     if (v == NULL)
         goto onError;
+    assert(_PyUnicode_CheckConsistency(v, 1));
     return v;
 
   onError:
         Py_DECREF(v);
         goto onError;
     }
+    assert(_PyUnicode_CheckConsistency(v, 1));
     return v;
 
   onError:
         return NULL;
     }
 #endif
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
     return (PyObject *)unicode;
 
   onError:
         return NULL;
     }
 #endif
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
     return (PyObject *)unicode;
 
   onError:
         return NULL;
     }
 #endif
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
     return (PyObject *)unicode;
 
   onError:
         return NULL;
     }
 #endif
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
     return (PyObject *)unicode;
 
   onError:
         return NULL;
     }
 #endif
+    assert(_PyUnicode_CheckConsistency(v, 1));
     return (PyObject *)v;
 
   ucnhashError:
         return NULL;
     }
 #endif
+    assert(_PyUnicode_CheckConsistency(v, 1));
     return (PyObject *)v;
 
   onError:
         return NULL;
     }
 #endif
+    assert(_PyUnicode_CheckConsistency(v, 1));
     return (PyObject *)v;
 
   onError:
         return NULL;
     }
 #endif
+    assert(_PyUnicode_CheckConsistency(v, 1));
     return (PyObject *)v;
 
   onError:
         return NULL;
     }
 #endif
+    assert(_PyUnicode_CheckConsistency(v, 1));
     return (PyObject *)v;
 }
 
         return NULL;
     }
 #endif
+    assert(_PyUnicode_CheckConsistency(v, 1));
     return (PyObject *)v;
 
   onError:
         return NULL;
     }
 #endif
+    assert(_PyUnicode_CheckConsistency(result, 1));
     return result;
 }
 /* --- Decimal Encoder ---------------------------------------------------- */
         }
 
         Py_DECREF(u);
+        assert(_PyUnicode_CheckConsistency(v, 1));
         return v;
     }
 }
   Done:
     Py_DECREF(fseq);
     Py_XDECREF(sep);
+    assert(_PyUnicode_CheckConsistency(res, 1));
     return res;
 
   onError:
         return NULL;
     }
 
-    return (PyUnicodeObject*)u;
+    assert(_PyUnicode_CheckConsistency(u, 1));
+    return u;
 }
 #undef FILL
 
         PyMem_FREE(buf1);
     if (release2)
         PyMem_FREE(buf2);
+    assert(_PyUnicode_CheckConsistency(u, 1));
     return u;
 
   nothing:
         goto onError;
     Py_DECREF(u);
     Py_DECREF(v);
+    assert(_PyUnicode_CheckConsistency(w, 1));
     return w;
 
   onError:
         if (!(PyUnicode_IS_ASCII(left) && !PyUnicode_IS_ASCII(right)))
         {
             unicode_append_inplace(p_left, right);
+            if (p_left != NULL)
+                assert(_PyUnicode_CheckConsistency(*p_left, 1));
             return;
         }
     }
     if (res == NULL)
         goto error;
     Py_DECREF(left);
+    assert(_PyUnicode_CheckConsistency(res, 1));
     *p_left = res;
     return;
 
         return NULL;
     }
 #endif
+    assert(_PyUnicode_CheckConsistency(u, 1));
     return (PyObject*) u;
 
   overflow:
         }
     }
 
+    assert(_PyUnicode_CheckConsistency(u, 1));
     return (PyObject*) u;
 }
 
         }
     }
     /* Closing quote already added at the beginning */
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
     return repr;
 }
 
         PyUnicode_WRITE(kind, data, fill, '0');
     }
 
+    assert(_PyUnicode_CheckConsistency(u, 1));
     return (PyObject*) u;
 }
 
 static PyObject *
 unicode__format__(PyObject* self, PyObject* args)
 {
-    PyObject *format_spec;
+    PyObject *format_spec, *out;
 
     if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
         return NULL;
 
-    return _PyUnicode_FormatAdvanced(self, format_spec, 0,
+    out = _PyUnicode_FormatAdvanced(self, format_spec, 0,
                                      PyUnicode_GET_LENGTH(format_spec));
+    if (out != NULL)
+        assert(_PyUnicode_CheckConsistency(out, 1));
+    return out;
 }
 
 PyDoc_STRVAR(p_format__doc__,
             Py_UCS4 ch = PyUnicode_READ(src_kind, src_data, cur);
             PyUnicode_WRITE(dest_kind, dest_data, i, ch);
         }
+        assert(_PyUnicode_CheckConsistency(result, 1));
         return result;
     } else {
         PyErr_SetString(PyExc_TypeError, "string indices must be integers");
         Py_DECREF(args);
     }
     Py_DECREF(uformat);
+    assert(_PyUnicode_CheckConsistency(result, 1));
     return (PyObject *)result;
 
   onError:
     Py_MEMCPY(data, PyUnicode_DATA(unicode),
               PyUnicode_KIND_SIZE(kind, length + 1));
     Py_DECREF(unicode);
+    assert(_PyUnicode_CheckConsistency(self, 1));
     return (PyObject *)self;
 
 onError:
 
     /* Init the implementation */
     unicode_empty = PyUnicode_New(0, 0);
+    assert(_PyUnicode_CheckConsistency(unicode_empty, 1));
     if (!unicode_empty)
         Py_FatalError("Can't create empty string");