1. python_mirrors
  2. sandbox/split-unicodeobject.c

Commits

Victor Stinner  committed 33f965a Draft

reorganize unicodecodecs.c

  • Participants
  • Parent commits dd05a78
  • Branches default

Comments (0)

Files changed (1)

File Objects/unicodecodecs.c

View file
 #include "ucnhash.h"
 #include "unicodeimpl.h"
 
+const char *
+PyUnicode_GetDefaultEncoding(void)
+{
+    return "utf-8";
+}
+
+/* --- Codec Helpers ------------------------------------------------------ */
+
 int
 _PyUnicode_Putchar(PyObject **p_unicode, Py_ssize_t *pos,
                 Py_UCS4 ch)
     return 0;
 }
 
-const char *
-PyUnicode_GetDefaultEncoding(void)
+/* create or adjust a UnicodeEncodeError */
+static void
+make_encode_exception(PyObject **exceptionObject,
+                      const char *encoding,
+                      PyObject *unicode,
+                      Py_ssize_t startpos, Py_ssize_t endpos,
+                      const char *reason)
 {
-    return "utf-8";
+    if (*exceptionObject == NULL) {
+        *exceptionObject = PyObject_CallFunction(
+            PyExc_UnicodeEncodeError, "sOnns",
+            encoding, unicode, startpos, endpos, reason);
+    }
+    else {
+        if (PyUnicodeEncodeError_SetStart(*exceptionObject, startpos))
+            goto onError;
+        if (PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos))
+            goto onError;
+        if (PyUnicodeEncodeError_SetReason(*exceptionObject, reason))
+            goto onError;
+        return;
+      onError:
+        Py_DECREF(*exceptionObject);
+        *exceptionObject = NULL;
+    }
 }
 
+void
+_PyUnicode_RaiseEncodeException(PyObject **exceptionObject,
+                       const char *encoding,
+                       PyObject *unicode,
+                       Py_ssize_t startpos, Py_ssize_t endpos,
+                       const char *reason)
+{
+    make_encode_exception(exceptionObject,
+                          encoding, unicode, startpos, endpos, reason);
+    if (*exceptionObject != NULL)
+        PyCodec_StrictErrors(*exceptionObject);
+}
+
+/* error handling callback helper:
+   build arguments, call the callback and check the arguments,
+   put the result into newpos and return the replacement string, which
+   has to be freed by the caller */
+PyObject *
+_PyUnicode_EncodeCallErrorHandler(
+    const char *errors,
+    PyObject **errorHandler,
+    const char *encoding, const char *reason,
+    PyObject *unicode, PyObject **exceptionObject,
+    Py_ssize_t startpos, Py_ssize_t endpos,
+    Py_ssize_t *newpos)
+{
+    static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple";
+    Py_ssize_t len;
+    PyObject *restuple;
+    PyObject *resunicode;
+
+    if (*errorHandler == NULL) {
+        *errorHandler = PyCodec_LookupError(errors);
+        if (*errorHandler == NULL)
+            return NULL;
+    }
+
+    if (PyUnicode_READY(unicode) == -1)
+        return NULL;
+    len = PyUnicode_GET_LENGTH(unicode);
+
+    make_encode_exception(exceptionObject,
+                          encoding, unicode, startpos, endpos, reason);
+    if (*exceptionObject == NULL)
+        return NULL;
+
+    restuple = PyObject_CallFunctionObjArgs(
+        *errorHandler, *exceptionObject, NULL);
+    if (restuple == NULL)
+        return NULL;
+    if (!PyTuple_Check(restuple)) {
+        PyErr_SetString(PyExc_TypeError, &argparse[3]);
+        Py_DECREF(restuple);
+        return NULL;
+    }
+    if (!PyArg_ParseTuple(restuple, argparse,
+                          &resunicode, newpos)) {
+        Py_DECREF(restuple);
+        return NULL;
+    }
+    if (!PyUnicode_Check(resunicode) && !PyBytes_Check(resunicode)) {
+        PyErr_SetString(PyExc_TypeError, &argparse[3]);
+        Py_DECREF(restuple);
+        return NULL;
+    }
+    if (*newpos<0)
+        *newpos = len + *newpos;
+    if (*newpos<0 || *newpos>len) {
+        PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
+        Py_DECREF(restuple);
+        return NULL;
+    }
+    Py_INCREF(resunicode);
+    Py_DECREF(restuple);
+    return resunicode;
+}
+
+/* create or adjust a UnicodeDecodeError */
+static void
+make_decode_exception(PyObject **exceptionObject,
+                      const char *encoding,
+                      const char *input, Py_ssize_t length,
+                      Py_ssize_t startpos, Py_ssize_t endpos,
+                      const char *reason)
+{
+    if (*exceptionObject == NULL) {
+        *exceptionObject = PyUnicodeDecodeError_Create(
+            encoding, input, length, startpos, endpos, reason);
+    }
+    else {
+        if (PyUnicodeDecodeError_SetStart(*exceptionObject, startpos))
+            goto onError;
+        if (PyUnicodeDecodeError_SetEnd(*exceptionObject, endpos))
+            goto onError;
+        if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason))
+            goto onError;
+    }
+    return;
+
+onError:
+    Py_DECREF(*exceptionObject);
+    *exceptionObject = NULL;
+}
+
+/* error handling callback helper:
+   build arguments, call the callback and check the arguments,
+   if no exception occurred, copy the replacement to the output
+   and adjust various state variables.
+   return 0 on success, -1 on error
+*/
+
+int
+_PyUnicode_DecodeCallErrorHandler(
+    const char *errors, PyObject **errorHandler,
+    const char *encoding, const char *reason,
+    const char **input, const char **inend, Py_ssize_t *startinpos,
+    Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
+    PyObject **output, Py_ssize_t *outpos)
+{
+    static char *argparse = "O!n;decoding error handler must return (str, int) tuple";
+
+    PyObject *restuple = NULL;
+    PyObject *repunicode = NULL;
+    Py_ssize_t outsize;
+    Py_ssize_t insize;
+    Py_ssize_t requiredsize;
+    Py_ssize_t newpos;
+    PyObject *inputobj = NULL;
+    int res = -1;
+
+    if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND)
+        outsize = PyUnicode_GET_LENGTH(*output);
+    else
+        outsize = _PyUnicode_WSTR_LENGTH(*output);
+
+    if (*errorHandler == NULL) {
+        *errorHandler = PyCodec_LookupError(errors);
+        if (*errorHandler == NULL)
+            goto onError;
+    }
+
+    make_decode_exception(exceptionObject,
+        encoding,
+        *input, *inend - *input,
+        *startinpos, *endinpos,
+        reason);
+    if (*exceptionObject == NULL)
+        goto onError;
+
+    restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL);
+    if (restuple == NULL)
+        goto onError;
+    if (!PyTuple_Check(restuple)) {
+        PyErr_SetString(PyExc_TypeError, &argparse[4]);
+        goto onError;
+    }
+    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
+        goto onError;
+    if (PyUnicode_READY(repunicode) == -1)
+        goto onError;
+
+    /* Copy back the bytes variables, which might have been modified by the
+       callback */
+    inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
+    if (!inputobj)
+        goto onError;
+    if (!PyBytes_Check(inputobj)) {
+        PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes");
+    }
+    *input = PyBytes_AS_STRING(inputobj);
+    insize = PyBytes_GET_SIZE(inputobj);
+    *inend = *input + insize;
+    /* we can DECREF safely, as the exception has another reference,
+       so the object won't go away. */
+    Py_DECREF(inputobj);
+
+    if (newpos<0)
+        newpos = insize+newpos;
+    if (newpos<0 || newpos>insize) {
+        PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos);
+        goto onError;
+    }
+
+    if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) {
+        /* need more space? (at least enough for what we
+           have+the replacement+the rest of the string (starting
+           at the new input position), so we won't have to check space
+           when there are no errors in the rest of the string) */
+        Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode);
+        requiredsize = *outpos + replen + insize-newpos;
+        if (requiredsize > outsize) {
+            if (requiredsize<2*outsize)
+                requiredsize = 2*outsize;
+            if (PyUnicode_Resize(output, requiredsize) < 0)
+                goto onError;
+        }
+        if (_PyUnicode_Widen(output, *outpos,
+                          PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0)
+            goto onError;
+        _PyUnicode_FastCopyCharacters(*output, *outpos, repunicode, 0, replen);
+        *outpos += replen;
+    }
+    else {
+        wchar_t *repwstr;
+        Py_ssize_t repwlen;
+        repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
+        if (repwstr == NULL)
+            goto onError;
+        /* need more space? (at least enough for what we
+           have+the replacement+the rest of the string (starting
+           at the new input position), so we won't have to check space
+           when there are no errors in the rest of the string) */
+        requiredsize = *outpos + repwlen + insize-newpos;
+        if (requiredsize > outsize) {
+            if (requiredsize < 2*outsize)
+                requiredsize = 2*outsize;
+            if (PyUnicode_Resize(output, requiredsize) < 0)
+                goto onError;
+        }
+        wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen);
+        *outpos += repwlen;
+    }
+    *endinpos = newpos;
+    *inptr = *input + newpos;
+
+    /* we made it! */
+    res = 0;
+
+  onError:
+    Py_XDECREF(restuple);
+    return res;
+}
+
+/* --- Codec Public API --------------------------------------------------- */
+
 PyObject *
 PyUnicode_FromObject(register PyObject *obj)
 {
     return NULL;
 }
 
-/* create or adjust a UnicodeDecodeError */
-static void
-make_decode_exception(PyObject **exceptionObject,
-                      const char *encoding,
-                      const char *input, Py_ssize_t length,
-                      Py_ssize_t startpos, Py_ssize_t endpos,
-                      const char *reason)
-{
-    if (*exceptionObject == NULL) {
-        *exceptionObject = PyUnicodeDecodeError_Create(
-            encoding, input, length, startpos, endpos, reason);
-    }
-    else {
-        if (PyUnicodeDecodeError_SetStart(*exceptionObject, startpos))
-            goto onError;
-        if (PyUnicodeDecodeError_SetEnd(*exceptionObject, endpos))
-            goto onError;
-        if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason))
-            goto onError;
-    }
-    return;
-
-onError:
-    Py_DECREF(*exceptionObject);
-    *exceptionObject = NULL;
-}
-
-/* error handling callback helper:
-   build arguments, call the callback and check the arguments,
-   if no exception occurred, copy the replacement to the output
-   and adjust various state variables.
-   return 0 on success, -1 on error
-*/
-
-int
-_PyUnicode_DecodeCallErrorHandler(
-    const char *errors, PyObject **errorHandler,
-    const char *encoding, const char *reason,
-    const char **input, const char **inend, Py_ssize_t *startinpos,
-    Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
-    PyObject **output, Py_ssize_t *outpos)
-{
-    static char *argparse = "O!n;decoding error handler must return (str, int) tuple";
-
-    PyObject *restuple = NULL;
-    PyObject *repunicode = NULL;
-    Py_ssize_t outsize;
-    Py_ssize_t insize;
-    Py_ssize_t requiredsize;
-    Py_ssize_t newpos;
-    PyObject *inputobj = NULL;
-    int res = -1;
-
-    if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND)
-        outsize = PyUnicode_GET_LENGTH(*output);
-    else
-        outsize = _PyUnicode_WSTR_LENGTH(*output);
-
-    if (*errorHandler == NULL) {
-        *errorHandler = PyCodec_LookupError(errors);
-        if (*errorHandler == NULL)
-            goto onError;
-    }
-
-    make_decode_exception(exceptionObject,
-        encoding,
-        *input, *inend - *input,
-        *startinpos, *endinpos,
-        reason);
-    if (*exceptionObject == NULL)
-        goto onError;
-
-    restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL);
-    if (restuple == NULL)
-        goto onError;
-    if (!PyTuple_Check(restuple)) {
-        PyErr_SetString(PyExc_TypeError, &argparse[4]);
-        goto onError;
-    }
-    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
-        goto onError;
-    if (PyUnicode_READY(repunicode) == -1)
-        goto onError;
-
-    /* Copy back the bytes variables, which might have been modified by the
-       callback */
-    inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
-    if (!inputobj)
-        goto onError;
-    if (!PyBytes_Check(inputobj)) {
-        PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes");
-    }
-    *input = PyBytes_AS_STRING(inputobj);
-    insize = PyBytes_GET_SIZE(inputobj);
-    *inend = *input + insize;
-    /* we can DECREF safely, as the exception has another reference,
-       so the object won't go away. */
-    Py_DECREF(inputobj);
-
-    if (newpos<0)
-        newpos = insize+newpos;
-    if (newpos<0 || newpos>insize) {
-        PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos);
-        goto onError;
-    }
-
-    if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) {
-        /* need more space? (at least enough for what we
-           have+the replacement+the rest of the string (starting
-           at the new input position), so we won't have to check space
-           when there are no errors in the rest of the string) */
-        Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode);
-        requiredsize = *outpos + replen + insize-newpos;
-        if (requiredsize > outsize) {
-            if (requiredsize<2*outsize)
-                requiredsize = 2*outsize;
-            if (PyUnicode_Resize(output, requiredsize) < 0)
-                goto onError;
-        }
-        if (_PyUnicode_Widen(output, *outpos,
-                          PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0)
-            goto onError;
-        _PyUnicode_FastCopyCharacters(*output, *outpos, repunicode, 0, replen);
-        *outpos += replen;
-    }
-    else {
-        wchar_t *repwstr;
-        Py_ssize_t repwlen;
-        repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
-        if (repwstr == NULL)
-            goto onError;
-        /* need more space? (at least enough for what we
-           have+the replacement+the rest of the string (starting
-           at the new input position), so we won't have to check space
-           when there are no errors in the rest of the string) */
-        requiredsize = *outpos + repwlen + insize-newpos;
-        if (requiredsize > outsize) {
-            if (requiredsize < 2*outsize)
-                requiredsize = 2*outsize;
-            if (PyUnicode_Resize(output, requiredsize) < 0)
-                goto onError;
-        }
-        wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen);
-        *outpos += repwlen;
-    }
-    *endinpos = newpos;
-    *inptr = *input + newpos;
-
-    /* we made it! */
-    res = 0;
-
-  onError:
-    Py_XDECREF(restuple);
-    return res;
-}
-
 /* --- Unicode Escape Codec ----------------------------------------------- */
 
 /* Helper function for PyUnicode_DecodeUnicodeEscape, determines
     return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, s, size);
 }
 
-/* create or adjust a UnicodeEncodeError */
-static void
-make_encode_exception(PyObject **exceptionObject,
-                      const char *encoding,
-                      PyObject *unicode,
-                      Py_ssize_t startpos, Py_ssize_t endpos,
-                      const char *reason)
-{
-    if (*exceptionObject == NULL) {
-        *exceptionObject = PyObject_CallFunction(
-            PyExc_UnicodeEncodeError, "sOnns",
-            encoding, unicode, startpos, endpos, reason);
-    }
-    else {
-        if (PyUnicodeEncodeError_SetStart(*exceptionObject, startpos))
-            goto onError;
-        if (PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos))
-            goto onError;
-        if (PyUnicodeEncodeError_SetReason(*exceptionObject, reason))
-            goto onError;
-        return;
-      onError:
-        Py_DECREF(*exceptionObject);
-        *exceptionObject = NULL;
-    }
-}
-
-void
-_PyUnicode_RaiseEncodeException(PyObject **exceptionObject,
-                       const char *encoding,
-                       PyObject *unicode,
-                       Py_ssize_t startpos, Py_ssize_t endpos,
-                       const char *reason)
-{
-    make_encode_exception(exceptionObject,
-                          encoding, unicode, startpos, endpos, reason);
-    if (*exceptionObject != NULL)
-        PyCodec_StrictErrors(*exceptionObject);
-}
-
-/* error handling callback helper:
-   build arguments, call the callback and check the arguments,
-   put the result into newpos and return the replacement string, which
-   has to be freed by the caller */
-PyObject *
-_PyUnicode_EncodeCallErrorHandler(
-    const char *errors,
-    PyObject **errorHandler,
-    const char *encoding, const char *reason,
-    PyObject *unicode, PyObject **exceptionObject,
-    Py_ssize_t startpos, Py_ssize_t endpos,
-    Py_ssize_t *newpos)
-{
-    static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple";
-    Py_ssize_t len;
-    PyObject *restuple;
-    PyObject *resunicode;
-
-    if (*errorHandler == NULL) {
-        *errorHandler = PyCodec_LookupError(errors);
-        if (*errorHandler == NULL)
-            return NULL;
-    }
-
-    if (PyUnicode_READY(unicode) == -1)
-        return NULL;
-    len = PyUnicode_GET_LENGTH(unicode);
-
-    make_encode_exception(exceptionObject,
-                          encoding, unicode, startpos, endpos, reason);
-    if (*exceptionObject == NULL)
-        return NULL;
-
-    restuple = PyObject_CallFunctionObjArgs(
-        *errorHandler, *exceptionObject, NULL);
-    if (restuple == NULL)
-        return NULL;
-    if (!PyTuple_Check(restuple)) {
-        PyErr_SetString(PyExc_TypeError, &argparse[3]);
-        Py_DECREF(restuple);
-        return NULL;
-    }
-    if (!PyArg_ParseTuple(restuple, argparse,
-                          &resunicode, newpos)) {
-        Py_DECREF(restuple);
-        return NULL;
-    }
-    if (!PyUnicode_Check(resunicode) && !PyBytes_Check(resunicode)) {
-        PyErr_SetString(PyExc_TypeError, &argparse[3]);
-        Py_DECREF(restuple);
-        return NULL;
-    }
-    if (*newpos<0)
-        *newpos = len + *newpos;
-    if (*newpos<0 || *newpos>len) {
-        PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
-        Py_DECREF(restuple);
-        return NULL;
-    }
-    Py_INCREF(resunicode);
-    Py_DECREF(restuple);
-    return resunicode;
-}
-
 static PyObject *
 unicode_encode_ucs1(PyObject *unicode,
                     const char *errors,
     return _PyUnicode_AsASCIIString(unicode, NULL);
 }
 
-
 /* --- Decimal Encoder ---------------------------------------------------- */
 
 int