1. python_mirrors
  2. features/faster-format

Commits

Victor Stinner  committed 21a65c1

_PyUnicodeWriter_WriteStr()

Don't copy the string if the result is just a string.

  • Participants
  • Parent commits 86643c5
  • Branches default

Comments (0)

Files changed (5)

File Include/unicodeobject.h

View file
 
 #ifndef Py_LIMITED_API
 typedef struct {
-    int overallocate;
-    Py_ssize_t pos;
     PyObject *buffer;
     void *data;
     enum PyUnicode_Kind kind;
     Py_UCS4 maxchar;
     Py_ssize_t size;
-    Py_ssize_t initlen;
+    Py_ssize_t pos;
+    /* minimum length of the buffer when overallocation is enabled,
+       see _PyUnicodeWriter_Init() */
+    Py_ssize_t min_length;
+    struct {
+        unsigned char overallocate:1;
+        /* If readonly is 1, buffer is a shared string (cannot be modified)
+           and size is set to 0. */
+        unsigned char readonly:1;
+    } flags;
 } _PyUnicodeWriter ;
 
-PyAPI_FUNC(int)
-_PyUnicodeWriter_Init(_PyUnicodeWriter *writer,
-                      Py_ssize_t length, int maxchar);
+/* Initialize a Unicode writer.
+
+   min_length is used by _PyUnicodeWriter_Prepare() as the minimum length of
+   the buffer when overallocation is enabled (overallocate=1) */
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length);
 
 /* Prepare the buffer for to write 'length' characters
    with the specified maximum character.
     (((MAXCHAR) <= (WRITER)->maxchar                                  \
       && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
      ? 0                                                              \
-     : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))
+     : (((LENGTH) == 0)                                               \
+        ? 0                                                           \
+        : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
 
 /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
    instead. */
 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
                                  Py_ssize_t length, Py_UCS4 maxchar);
 
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str);
+
 PyAPI_FUNC(PyObject *)
 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
 

File Objects/longobject.c

View file
     if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
         return NULL;
 
-    if (_PyUnicodeWriter_Init(&writer, 0, 0) == -1)
-        return NULL;
+    _PyUnicodeWriter_Init(&writer, 0);
     ret = _PyLong_FormatAdvancedWriter(
         self,
         format_spec, 0, PyUnicode_GET_LENGTH(format_spec),

File Objects/stringlib/unicode_format.h

View file
     PyObject *result = NULL;
     PyObject *format_spec_object = NULL;
     PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
-    Py_ssize_t len;
     int err;
 
     /* If we know the type exactly, skip the lookup of __format__ and just
     }
     if (result == NULL)
         goto done;
-    if (PyUnicode_READY(result) == -1)
+
+    if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
         goto done;
+    ok = 1;
 
-    len = PyUnicode_GET_LENGTH(result);
-    if (_PyUnicodeWriter_Prepare(writer,
-                               len, PyUnicode_MAX_CHAR_VALUE(result)) == -1)
-        goto done;
-    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
-                                  result, 0, len);
-    writer->pos += len;
-    ok = 1;
 done:
     Py_XDECREF(format_spec_object);
     Py_XDECREF(result);
 
         if (field_present) {
             if (iter.str.start == iter.str.end)
-                writer->overallocate = 0;
+                writer->flags.overallocate = 0;
             if (!output_markup(&field_name, &format_spec,
                                format_spec_needs_expanding, conversion, writer,
                                args, kwargs, recursion_depth, auto_number))
              int recursion_depth, AutoNumber *auto_number)
 {
     _PyUnicodeWriter writer;
-    Py_ssize_t initlen;
-    Py_UCS4 maxchar;
+    Py_ssize_t minlen;
 
     /* check the recursion level */
     if (recursion_depth <= 0) {
         return NULL;
     }
 
-    initlen = PyUnicode_GET_LENGTH(input->str) + 100;
-    maxchar = PyUnicode_MAX_CHAR_VALUE(input->str);
-    if (_PyUnicodeWriter_Init(&writer, initlen, maxchar) == -1)
-        return NULL;
+    minlen = PyUnicode_GET_LENGTH(input->str) + 100;
+    _PyUnicodeWriter_Init(&writer, minlen);
 
     if (!do_markup(input, args, kwargs, &writer, recursion_depth,
                    auto_number)) {

File Objects/unicodeobject.c

View file
     assert(0 <= from_start);
     assert(0 <= to_start);
     assert(PyUnicode_Check(from));
-    assert(PyUnicode_Check(to));
     assert(PyUnicode_IS_READY(from));
-    assert(PyUnicode_IS_READY(to));
     assert(from_start + how_many <= PyUnicode_GET_LENGTH(from));
-    assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
 
     if (how_many == 0)
         return 0;
 
+    assert(PyUnicode_Check(to));
+    assert(PyUnicode_IS_READY(to));
+    assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
+
     from_kind = PyUnicode_KIND(from);
     from_data = PyUnicode_DATA(from);
     to_kind = PyUnicode_KIND(to);
     writer->kind = PyUnicode_KIND(writer->buffer);
 }
 
-int
-_PyUnicodeWriter_Init(_PyUnicodeWriter *writer,
-                      Py_ssize_t length, int maxchar)
-{
-    writer->overallocate = 1;
-    writer->pos = 0;
-    writer->buffer = NULL;
-    writer->data = NULL;
+void
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length)
+{
+    memset(writer, 0, sizeof(*writer));
+#ifdef Py_DEBUG
     writer->kind = 5;    /* invalid kind */
-    writer->maxchar = maxchar;
-    writer->size = 0;
-    writer->initlen = length;
-    return 0;
+#endif
+    writer->min_length = Py_MAX(min_length, 100);
+    writer->flags.overallocate = 1;
 }
 
 int
     newlen = writer->pos + length;
 
     if (writer->buffer == NULL) {
-        if (writer->overallocate) {
+        if (writer->flags.overallocate) {
             /* overallocate 25% to limit the number of resize */
             if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
                 newlen += newlen / 4;
-            if (writer->initlen > newlen)
-                newlen = writer->initlen;
-            if (newlen < 100)
-                newlen = 100;
-            else if (writer->initlen > newlen)
-                newlen = writer->initlen;
+            if (newlen < writer->min_length)
+                newlen = writer->min_length;
         }
         writer->buffer = PyUnicode_New(newlen, maxchar);
         if (writer->buffer == NULL)
     }
 
     if (newlen > writer->size) {
-        if (writer->overallocate) {
+        if (writer->flags.overallocate) {
             /* overallocate 25% to limit the number of resize */
             if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
                 newlen += newlen / 4;
                 newlen = 100;
         }
 
-        if (maxchar > writer->maxchar) {
+        if (maxchar > writer->maxchar || writer->flags.readonly) {
             /* resize + widen */
             newbuffer = PyUnicode_New(newlen, maxchar);
             if (newbuffer == NULL)
                 return -1;
-            PyUnicode_CopyCharacters(newbuffer, 0,
-                                     writer->buffer, 0, writer->pos);
+            _PyUnicode_FastCopyCharacters(newbuffer, 0,
+                                          writer->buffer, 0, writer->pos);
             Py_DECREF(writer->buffer);
+            writer->flags.readonly = 0;
         }
         else {
             newbuffer = resize_compact(writer->buffer, newlen);
     return 0;
 }
 
+int
+_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
+{
+    Py_UCS4 maxchar;
+    Py_ssize_t len;
+
+    if (PyUnicode_READY(str) == -1)
+        return -1;
+    len = PyUnicode_GET_LENGTH(str);
+    if (len == 0)
+        return 0;
+    maxchar = PyUnicode_MAX_CHAR_VALUE(str);
+    if (maxchar > writer->maxchar || len > writer->size - writer->pos) {
+        if (writer->buffer == NULL && !writer->flags.overallocate) {
+            Py_INCREF(str);
+            writer->buffer = str;
+            _PyUnicodeWriter_Update(writer);
+            writer->flags.readonly = 1;
+            writer->size = 0;
+            writer->pos += len;
+            return 0;
+        }
+        if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1)
+            return -1;
+    }
+    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+                                  str, 0, len);
+    writer->pos += len;
+    return 0;
+}
+
 PyObject *
 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
 {
         Py_INCREF(unicode_empty);
         return unicode_empty;
     }
+    if (writer->flags.readonly) {
+        assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos);
+        return writer->buffer;
+    }
     if (PyUnicode_GET_LENGTH(writer->buffer) != writer->pos) {
         PyObject *newbuffer;
         newbuffer = resize_compact(writer->buffer, writer->pos);
 
     if (PyUnicode_READY(self) == -1)
         return NULL;
-    if (_PyUnicodeWriter_Init(&writer, 0, 0) == -1)
-        return NULL;
+    _PyUnicodeWriter_Init(&writer, 0);
     ret = _PyUnicode_FormatAdvancedWriter(self, format_spec, 0,
                                           PyUnicode_GET_LENGTH(format_spec),
                                           &writer);
     fmtcnt = PyUnicode_GET_LENGTH(uformat);
     fmtpos = 0;
 
-    maxchar = PyUnicode_MAX_CHAR_VALUE(uformat);
-    if (_PyUnicodeWriter_Init(&writer, fmtcnt + 100, maxchar) < 0)
-        goto onError;
+    _PyUnicodeWriter_Init(&writer, fmtcnt + 100);
 
     if (PyTuple_Check(args)) {
         arglen = PyTuple_Size(args);
                                 "incomplete format");
                 goto onError;
             }
+            if (fmtcnt == 0)
+                writer.flags.overallocate = 0;
 
             if (c == '%') {
                 if (_PyUnicodeWriter_Prepare(&writer, 1, '%') == -1)
                 continue;
             }
 
-
             v = getnextarg(args, arglen, &argidx);
             if (v == NULL)
                 goto onError;
 
-            if (fmtcnt < 1)
-                writer.overallocate = 0;
-
             sign = 0;
             signchar = '\0';
             fill = ' ';
             case 's':
             case 'r':
             case 'a':
-                if (width == -1) {
+                if (width == -1 && prec == -1) {
                     /* Fast path */
                     if (PyUnicode_CheckExact(v) && c == 's') {
-                        if (PyUnicode_READY(v) == -1)
+                        if (_PyUnicodeWriter_WriteStr(&writer, v) == -1)
                             goto onError;
-                        maxchar = PyUnicode_MAX_CHAR_VALUE(v);
-                        len = PyUnicode_GET_LENGTH(v);
-                        if (_PyUnicodeWriter_Prepare(&writer, len, maxchar) == -1)
-                            goto onError;
-                        _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
-                                                      v, 0, len);
-                        writer.pos += len;
                         goto nextarg;
                     }
                     else if (PyLong_CheckExact(v)) {

File Python/formatter_unicode.c

View file
 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
 {
     PyObject *str;
-    Py_ssize_t len;
+    int err;
 
     str = PyObject_Str(obj);
     if (str == NULL)
         return -1;
-    if (PyUnicode_READY(str) == -1) {
-        Py_DECREF(str);
-        return -1;
-    }
-
-    len = PyUnicode_GET_LENGTH(str);
-    if (_PyUnicodeWriter_Prepare(writer,
-                                 len, PyUnicode_MAX_CHAR_VALUE(str)) == -1) {
-        Py_DECREF(str);
-        return -1;
-    }
-    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
-                                  str, 0, len);
+    err = _PyUnicodeWriter_WriteStr(writer, str);
     Py_DECREF(str);
-    writer->pos += len;
-    return 0;
+    return err;
 }
 
 int