Commits

Victor Stinner committed 08704e6

Use _PyUnicodeWriter for float and complex

Comments (0)

Files changed (7)

Include/complexobject.h

 /* Format the object based on the format_spec, as defined in PEP 3101
    (Advanced String Formatting). */
 #ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj,
-                                                 PyObject *format_spec,
-                                                 Py_ssize_t start,
-                                                 Py_ssize_t end);
+PyAPI_FUNC(int) _PyComplex_FormatAdvancedWriter(
+    PyObject *obj,
+    PyObject *format_spec,
+    Py_ssize_t start,
+    Py_ssize_t end,
+    _PyUnicodeWriter *writer);
 #endif
 
 #ifdef __cplusplus

Include/floatobject.h

 
 /* Format the object based on the format_spec, as defined in PEP 3101
    (Advanced String Formatting). */
-PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj,
-                                               PyObject *format_spec,
-                                               Py_ssize_t start,
-                                               Py_ssize_t end);
+PyAPI_FUNC(int) _PyFloat_FormatAdvancedWriter(
+    PyObject *obj,
+    PyObject *format_spec,
+    Py_ssize_t start,
+    Py_ssize_t end,
+    _PyUnicodeWriter *writer);
 #endif /* Py_LIMITED_API */
 
 #ifdef __cplusplus

Objects/complexobject.c

 complex__format__(PyObject* self, PyObject* args)
 {
     PyObject *format_spec;
+    _PyUnicodeWriter writer;
+    int ret;
 
     if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
-    return NULL;
-    return _PyComplex_FormatAdvanced(self, format_spec, 0,
-                                     PyUnicode_GET_LENGTH(format_spec));
+        return NULL;
+
+    _PyUnicodeWriter_Init(&writer, 0);
+    ret = _PyComplex_FormatAdvancedWriter(
+        self,
+        format_spec, 0, PyUnicode_GET_LENGTH(format_spec),
+        &writer);
+    if (ret == -1) {
+        _PyUnicodeWriter_Dealloc(&writer);
+        return NULL;
+    }
+    return _PyUnicodeWriter_Finish(&writer);
 }
 
 #if 0

Objects/floatobject.c

 float__format__(PyObject *self, PyObject *args)
 {
     PyObject *format_spec;
+    _PyUnicodeWriter writer;
+    int ret;
 
     if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
         return NULL;
-    return _PyFloat_FormatAdvanced(self, format_spec, 0,
-                                   PyUnicode_GET_LENGTH(format_spec));
+
+    _PyUnicodeWriter_Init(&writer, 0);
+    ret = _PyFloat_FormatAdvancedWriter(
+        self,
+        format_spec, 0, PyUnicode_GET_LENGTH(format_spec),
+        &writer);
+    if (ret == -1) {
+        _PyUnicodeWriter_Dealloc(&writer);
+        return NULL;
+    }
+    return _PyUnicodeWriter_Finish(&writer);
 }
 
 PyDoc_STRVAR(float__format__doc,

Objects/stringlib/unicode_format.h

     int ok = 0;
     PyObject *result = NULL;
     PyObject *format_spec_object = NULL;
-    PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
+    int (*formatter) (PyObject *, PyObject *, Py_ssize_t, Py_ssize_t, _PyUnicodeWriter*) = NULL;
     int err;
 
     /* If we know the type exactly, skip the lookup of __format__ and just
        call the formatter directly. */
-    if (PyUnicode_CheckExact(fieldobj)) {
-        err = _PyUnicode_FormatAdvancedWriter(
-            fieldobj, format_spec->str,
-            format_spec->start, format_spec->end,
-            writer);
-        return (err == 0);
-    }
-    else if (PyLong_CheckExact(fieldobj)) {
-        err = _PyLong_FormatAdvancedWriter(
-            fieldobj, format_spec->str,
-            format_spec->start, format_spec->end,
-            writer);
-        return (err == 0);
-    }
+    if (PyUnicode_CheckExact(fieldobj))
+        formatter = _PyUnicode_FormatAdvancedWriter;
+    else if (PyLong_CheckExact(fieldobj))
+        formatter = _PyLong_FormatAdvancedWriter;
     else if (PyFloat_CheckExact(fieldobj))
-        formatter = _PyFloat_FormatAdvanced;
+        formatter = _PyFloat_FormatAdvancedWriter;
+    else if (PyComplex_CheckExact(fieldobj))
+        formatter = _PyComplex_FormatAdvancedWriter;
 
     if (formatter) {
         /* we know exactly which formatter will be called when __format__ is
            looked up, so call it directly, instead. */
-        result = formatter(fieldobj, format_spec->str,
-                           format_spec->start, format_spec->end);
+        err = formatter(fieldobj, format_spec->str,
+                        format_spec->start, format_spec->end,
+                        writer);
+        return (err == 0);
     }
     else {
         /* We need to create an object out of the pointers we have, because

Python/formatter_unicode.c

 /* Do the padding, and return a pointer to where the caller-supplied
    content goes. */
 static Py_ssize_t
-fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars,
+fill_padding(_PyUnicodeWriter *writer,
+             Py_ssize_t nchars,
              Py_UCS4 fill_char, Py_ssize_t n_lpadding,
              Py_ssize_t n_rpadding)
 {
-    /* Pad on left. */
-    if (n_lpadding)
-        PyUnicode_Fill(s, start, start + n_lpadding, fill_char);
-
-    /* Pad on right. */
-    if (n_rpadding)
-        PyUnicode_Fill(s, start + nchars + n_lpadding,
-                       start + nchars + n_lpadding + n_rpadding, fill_char);
-
-    /* Pointer to the user content. */
-    return start + n_lpadding;
-}
-
-/* Do the padding, and return a pointer to where the caller-supplied
-   content goes. */
-static Py_ssize_t
-fill_padding_writer(_PyUnicodeWriter *writer,
-                    Py_ssize_t start, Py_ssize_t nchars,
-                    Py_UCS4 fill_char, Py_ssize_t n_lpadding,
-                    Py_ssize_t n_rpadding)
-{
     Py_ssize_t pos, r;
 
     /* Pad on left. */
     if (n_lpadding) {
-        pos = writer->pos + start;
+        pos = writer->pos;
         r = PyUnicode_Fill(writer->buffer, pos, pos + n_lpadding, fill_char);
         if (r == -1)
             return -1;
 
     /* Pad on right. */
     if (n_rpadding) {
-        pos = writer->pos + start + nchars + n_lpadding;
+        pos = writer->pos + nchars + n_lpadding;
         r = PyUnicode_Fill(writer->buffer, pos, pos + n_rpadding, fill_char);
         if (r == -1)
             return -1;
     }
 
     /* Pointer to the user content. */
-    writer->pos += (start + n_lpadding);
+    writer->pos += n_lpadding;
     return 0;
 }
 
    as determined in calc_number_widths().
    Return -1 on error, or 0 on success. */
 static int
-fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
-            PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
-            PyObject *prefix, Py_ssize_t p_start,
-            Py_UCS4 fill_char,
-            LocaleInfo *locale, int toupper)
-{
-    /* Used to keep track of digits, decimal, and remainder. */
-    Py_ssize_t d_pos = d_start;
-    unsigned int kind = PyUnicode_KIND(out);
-    void *data = PyUnicode_DATA(out);
-    Py_ssize_t r;
-
-    if (spec->n_lpadding) {
-        PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char);
-        pos += spec->n_lpadding;
-    }
-    if (spec->n_sign == 1) {
-        PyUnicode_WRITE(kind, data, pos++, spec->sign);
-    }
-    if (spec->n_prefix) {
-        _PyUnicode_FastCopyCharacters(out, pos,
-                                      prefix, p_start,
-                                      spec->n_prefix);
-        if (toupper) {
-            Py_ssize_t t;
-            for (t = 0; t < spec->n_prefix; t++) {
-                Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
-                c = Py_TOUPPER(c);
-                assert (c <= 127);
-                PyUnicode_WRITE(kind, data, pos + t, c);
-            }
-        }
-        pos += spec->n_prefix;
-    }
-    if (spec->n_spadding) {
-        PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char);
-        pos += spec->n_spadding;
-    }
-
-    /* Only for type 'c' special case, it has no digits. */
-    if (spec->n_digits != 0) {
-        /* Fill the digits with InsertThousandsGrouping. */
-        char *pdigits;
-        if (PyUnicode_READY(digits))
-            return -1;
-        pdigits = PyUnicode_DATA(digits);
-        if (PyUnicode_KIND(digits) < kind) {
-            pdigits = _PyUnicode_AsKind(digits, kind);
-            if (pdigits == NULL)
-                return -1;
-        }
-        r = _PyUnicode_InsertThousandsGrouping(
-                out, pos,
-                spec->n_grouped_digits,
-                pdigits + kind * d_pos,
-                spec->n_digits, spec->n_min_width,
-                locale->grouping, locale->thousands_sep, NULL);
-        if (r == -1)
-            return -1;
-        assert(r == spec->n_grouped_digits);
-        if (PyUnicode_KIND(digits) < kind)
-            PyMem_Free(pdigits);
-        d_pos += spec->n_digits;
-    }
-    if (toupper) {
-        Py_ssize_t t;
-        for (t = 0; t < spec->n_grouped_digits; t++) {
-            Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
-            c = Py_TOUPPER(c);
-            if (c > 127) {
-                PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
-                return -1;
-            }
-            PyUnicode_WRITE(kind, data, pos + t, c);
-        }
-    }
-    pos += spec->n_grouped_digits;
-
-    if (spec->n_decimal) {
-        _PyUnicode_FastCopyCharacters(out, pos, locale->decimal_point, 0, spec->n_decimal);
-        pos += spec->n_decimal;
-        d_pos += 1;
-    }
-
-    if (spec->n_remainder) {
-        _PyUnicode_FastCopyCharacters(out, pos, digits, d_pos, spec->n_remainder);
-        pos += spec->n_remainder;
-        d_pos += spec->n_remainder;
-    }
-
-    if (spec->n_rpadding) {
-        PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char);
-        pos += spec->n_rpadding;
-    }
-    return 0;
-}
-
-/* Fill in the digit parts of a numbers's string representation,
-   as determined in calc_number_widths().
-   Return -1 on error, or 0 on success. */
-static int
-fill_number_writer(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
+fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
             PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
             PyObject *prefix, Py_ssize_t p_start,
             Py_UCS4 fill_char,
         goto done;
 
     /* Write into that space. First the padding. */
-    result = fill_padding_writer(writer, 0, len,
-                                 format->fill_char=='\0'?' ':format->fill_char,
-                                 lpad, rpad);
+    result = fill_padding(writer, len,
+                          format->fill_char=='\0'?' ':format->fill_char,
+                          lpad, rpad);
     if (result == -1)
         goto done;
 
         goto done;
 
     /* Populate the memory. */
-    result = fill_number_writer(writer, &spec,
-                      tmp, inumeric_chars, inumeric_chars + n_digits,
-                      tmp, prefix,
-                      format->fill_char == '\0' ? ' ' : format->fill_char,
-                      &locale, format->type == 'X');
+    result = fill_number(writer, &spec,
+                         tmp, inumeric_chars, inumeric_chars + n_digits,
+                         tmp, prefix,
+                         format->fill_char == '\0' ? ' ' : format->fill_char,
+                         &locale, format->type == 'X');
 
 done:
     Py_XDECREF(tmp);
 }
 
 /* much of this is taken from unicodeobject.c */
-static PyObject *
+static int
 format_float_internal(PyObject *value,
-                      const InternalFormatSpec *format)
+                      const InternalFormatSpec *format,
+                      _PyUnicodeWriter *writer)
 {
     char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
     Py_ssize_t n_digits;
     Py_ssize_t index;
     NumberFieldWidths spec;
     int flags = 0;
-    PyObject *result = NULL;
+    int result = -1;
     Py_UCS4 maxchar = 127;
     Py_UCS4 sign_char = '\0';
     int float_type; /* Used to see if we have a nan, inf, or regular float. */
     PyObject *unicode_tmp = NULL;
-    int err;
 
     /* Locale settings, either from the actual locale or
        from a hard-code pseudo-locale */
                                  &locale, format, &maxchar);
 
     /* Allocate the memory. */
-    result = PyUnicode_New(n_total, maxchar);
-    if (result == NULL)
+    if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
         goto done;
 
     /* Populate the memory. */
-    err = fill_number(result, 0, &spec,
-                      unicode_tmp, index, index + n_digits,
-                      NULL, 0,
-                      format->fill_char == '\0' ? ' ' : format->fill_char,
-                      &locale, 0);
-    if (err)
-        Py_CLEAR(result);
+    result = fill_number(writer, &spec,
+                         unicode_tmp, index, index + n_digits,
+                         NULL, 0,
+                         format->fill_char == '\0' ? ' ' : format->fill_char,
+                         &locale, 0);
 
 done:
     PyMem_Free(buf);
     Py_DECREF(unicode_tmp);
     free_locale_info(&locale);
-    assert(!result || _PyUnicode_CheckConsistency(result, 1));
     return result;
 }
 
 /*********** complex formatting *****************************************/
 /************************************************************************/
 
-static PyObject *
+static int
 format_complex_internal(PyObject *value,
-                        const InternalFormatSpec *format)
+                        const InternalFormatSpec *format,
+                        _PyUnicodeWriter *writer)
 {
     double re;
     double im;
     NumberFieldWidths re_spec;
     NumberFieldWidths im_spec;
     int flags = 0;
-    PyObject *result = NULL;
+    int result;
     Py_UCS4 maxchar = 127;
-    int rkind;
+    enum PyUnicode_Kind rkind;
     void *rdata;
-    Py_ssize_t index;
     Py_UCS4 re_sign_char = '\0';
     Py_UCS4 im_sign_char = '\0';
     int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
     Py_ssize_t total;
     PyObject *re_unicode_tmp = NULL;
     PyObject *im_unicode_tmp = NULL;
-    int err;
 
     /* Locale settings, either from the actual locale or
        from a hard-code pseudo-locale */
     if (lpad || rpad)
         maxchar = Py_MAX(maxchar, format->fill_char);
 
-    result = PyUnicode_New(total, maxchar);
-    if (result == NULL)
+    if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
         goto done;
-    rkind = PyUnicode_KIND(result);
-    rdata = PyUnicode_DATA(result);
+    rkind = writer->kind;
+    rdata = writer->data;
 
     /* Populate the memory. First, the padding. */
-    index = fill_padding(result, 0,
-                         n_re_total + n_im_total + 1 + add_parens * 2,
-                         format->fill_char=='\0' ? ' ' : format->fill_char,
-                         lpad, rpad);
+    result = fill_padding(writer,
+                          n_re_total + n_im_total + 1 + add_parens * 2,
+                          format->fill_char=='\0' ? ' ' : format->fill_char,
+                          lpad, rpad);
+    if (result == -1)
+        goto done;
 
-    if (add_parens)
-        PyUnicode_WRITE(rkind, rdata, index++, '(');
+    if (add_parens) {
+        PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
+        writer->pos++;
+    }
 
     if (!skip_re) {
-        err = fill_number(result, index, &re_spec,
-                          re_unicode_tmp, i_re, i_re + n_re_digits,
-                          NULL, 0,
-                          0,
-                          &locale, 0);
-        if (err) {
-            Py_CLEAR(result);
+        result = fill_number(writer, &re_spec,
+                             re_unicode_tmp, i_re, i_re + n_re_digits,
+                             NULL, 0,
+                             0,
+                             &locale, 0);
+        if (result == -1)
             goto done;
-        }
-        index += n_re_total;
     }
-    err = fill_number(result, index, &im_spec,
-                      im_unicode_tmp, i_im, i_im + n_im_digits,
-                      NULL, 0,
-                      0,
-                      &locale, 0);
-    if (err) {
-        Py_CLEAR(result);
+    result = fill_number(writer, &im_spec,
+                         im_unicode_tmp, i_im, i_im + n_im_digits,
+                         NULL, 0,
+                         0,
+                         &locale, 0);
+    if (result == -1)
         goto done;
+    PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
+    writer->pos++;
+
+    if (add_parens) {
+        PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
+        writer->pos++;
     }
-    index += n_im_total;
-    PyUnicode_WRITE(rkind, rdata, index++, 'j');
 
-    if (add_parens)
-        PyUnicode_WRITE(rkind, rdata, index++, ')');
+    writer->pos += rpad;
 
 done:
     PyMem_Free(re_buf);
     Py_XDECREF(re_unicode_tmp);
     Py_XDECREF(im_unicode_tmp);
     free_locale_info(&locale);
-    assert(!result || _PyUnicode_CheckConsistency(result, 1));
     return result;
 }
 
         tmp = PyNumber_Float(obj);
         if (tmp == NULL)
             goto done;
-        str = format_float_internal(tmp, &format);
-        result = format_obj(str, writer);
+        result = format_float_internal(tmp, &format, writer);
         break;
 
     default:
     return result;
 }
 
-PyObject *
-_PyFloat_FormatAdvanced(PyObject *obj,
-                        PyObject *format_spec,
-                        Py_ssize_t start, Py_ssize_t end)
+int
+_PyFloat_FormatAdvancedWriter(PyObject *obj,
+                              PyObject *format_spec,
+                              Py_ssize_t start, Py_ssize_t end,
+                              _PyUnicodeWriter *writer)
 {
-    PyObject *result = NULL;
     InternalFormatSpec format;
 
     /* check for the special case of zero length format spec, make
        it equivalent to str(obj) */
-    if (start == end) {
-        result = PyObject_Str(obj);
-        goto done;
-    }
+    if (start == end)
+        return format_obj(obj, writer);
 
     /* parse the format_spec */
     if (!parse_internal_render_format_spec(format_spec, start, end,
                                            &format, '\0', '>'))
-        goto done;
+        return -1;
 
     /* type conversion? */
     switch (format.type) {
     case 'n':
     case '%':
         /* no conversion, already a float.  do the formatting */
-        result = format_float_internal(obj, &format);
-        break;
+        return format_float_internal(obj, &format, writer);
 
     default:
         /* unknown */
         unknown_presentation_type(format.type, obj->ob_type->tp_name);
-        goto done;
+        return -1;
     }
-
-done:
-    return result;
 }
 
-PyObject *
-_PyComplex_FormatAdvanced(PyObject *obj,
-                          PyObject *format_spec,
-                          Py_ssize_t start, Py_ssize_t end)
+int
+_PyComplex_FormatAdvancedWriter(PyObject *obj,
+                                PyObject *format_spec,
+                                Py_ssize_t start, Py_ssize_t end,
+                                _PyUnicodeWriter *writer)
 {
-    PyObject *result = NULL;
     InternalFormatSpec format;
 
     /* check for the special case of zero length format spec, make
        it equivalent to str(obj) */
-    if (start == end) {
-        result = PyObject_Str(obj);
-        goto done;
-    }
+    if (start == end)
+        return format_obj(obj, writer);
 
     /* parse the format_spec */
     if (!parse_internal_render_format_spec(format_spec, start, end,
                                            &format, '\0', '>'))
-        goto done;
+        return -1;
 
     /* type conversion? */
     switch (format.type) {
     case 'G':
     case 'n':
         /* no conversion, already a complex.  do the formatting */
-        result = format_complex_internal(obj, &format);
-        break;
+        return format_complex_internal(obj, &format, writer);
 
     default:
         /* unknown */
         unknown_presentation_type(format.type, obj->ob_type->tp_name);
-        goto done;
+        return -1;
     }
-
-done:
-    return result;
 }
 
     case 'U': { /* PyUnicode object */
         PyObject **p = va_arg(*p_va, PyObject **);
-        if (PyUnicode_Check(arg))
+        if (PyUnicode_Check(arg)) {
+            if (PyUnicode_READY(arg) == -1)
+                RETURN_ERR_OCCURRED;
             *p = arg;
+        }
         else
             return converterr("str", arg, msgbuf, bufsize);
         break;