Commits

Fredrik Lundh  committed 3d81faf

needforspeed: more stringlib refactoring

  • Participants
  • Parent commits 68e1a02
  • Branches legacy-trunk

Comments (0)

Files changed (4)

File Objects/stringlib/README.txt

 
 the stuff in here is included into relevant places; see the individual
 source files for details.
+
+--------------------------------------------------------------------
+the following defines used by the different modules:
+
+STRINGLIB_CHAR
+
+    the type used to hold a character (char or Py_UNICODE)
+
+STRINGLIB_EMPTY
+
+    a PyObject representing the empty string
+
+int STRINGLIB_CMP(STRINGLIB_CHAR*, STRINGLIB_CHAR*, Py_ssize_t)
+
+    compares two strings. returns 0 if they match, and non-zero if not.
+
+Py_ssize_t STRINGLIB_LEN(PyObject*)
+
+    returns the length of the given string object (which must be of the
+    right type)
+
+PyObject* STRINGLIB_NEW(STRINGLIB_CHAR*, Py_ssize_t)
+
+    creates a new string object
+
+STRINGLIB_CHAR* STRINGLIB_STR(PyObject*)
+
+    returns the pointer to the character data for the given string
+    object (which must be of the right type)

File Objects/stringlib/find.h

     return pos;
 }
 
+#ifdef STRINGLIB_STR
+
+Py_LOCAL(Py_ssize_t)
+stringlib_find_obj(PyObject* str, PyObject* sub,
+                   Py_ssize_t start, Py_ssize_t end)
+{
+    return stringlib_find(
+        STRINGLIB_STR(str) + start, end - start,
+        STRINGLIB_STR(sub), STRINGLIB_LEN(sub), start
+        );
+}
+
+Py_LOCAL(int)
+stringlib_contains_obj(PyObject* str, PyObject* sub)
+{
+    return stringlib_find(
+        STRINGLIB_STR(str), STRINGLIB_LEN(str),
+        STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0
+        ) != -1;
+}
+
+Py_LOCAL(Py_ssize_t)
+stringlib_rfind_obj(PyObject* str, PyObject* sub,
+                    Py_ssize_t start, Py_ssize_t end)
+{
+    return stringlib_rfind(
+        STRINGLIB_STR(str) + start, end - start,
+        STRINGLIB_STR(sub), STRINGLIB_LEN(sub), start
+        );
+}
+
+#endif
+
 #endif
 
 /*

File Objects/stringobject.c

 	return NULL;
 }
 
+/* -------------------------------------------------------------------- */
+/* object api */
+
 static Py_ssize_t
 string_getsize(register PyObject *op)
 {
 }
 
 /* -------------------------------------------------------------------- */
-/* stringlib components */
+/* Methods */
 
 #define STRINGLIB_CHAR char
 
+#define STRINGLIB_CMP memcmp
+#define STRINGLIB_LEN PyString_GET_SIZE
 #define STRINGLIB_NEW PyString_FromStringAndSize
-#define STRINGLIB_CMP memcmp
+#define STRINGLIB_STR PyString_AS_STRING
 
 #define STRINGLIB_EMPTY nullstring
 
 #include "stringlib/fastsearch.h"
 
+#include "stringlib/count.h"
 #include "stringlib/find.h"
 #include "stringlib/partition.h"
 
-/* -------------------------------------------------------------------- */
-/* Methods */
 
 static int
 string_print(PyStringObject *op, FILE *fp, int flags)
 }
 
 static int
-string_contains(PyObject *a, PyObject *el)
+string_contains(PyObject *str_obj, PyObject *sub_obj)
 {
-	char *s = PyString_AS_STRING(a);
-	const char *sub = PyString_AS_STRING(el);
-	Py_ssize_t len_sub = PyString_GET_SIZE(el);
-	Py_ssize_t pos;
-
-	if (!PyString_CheckExact(el)) {
+	if (!PyString_CheckExact(sub_obj)) {
 #ifdef Py_USING_UNICODE
-		if (PyUnicode_Check(el))
-			return PyUnicode_Contains(a, el);
+		if (PyUnicode_Check(sub_obj))
+			return PyUnicode_Contains(str_obj, sub_obj);
 #endif
-		if (!PyString_Check(el)) {
+		if (!PyString_Check(sub_obj)) {
 			PyErr_SetString(PyExc_TypeError,
 			    "'in <string>' requires string as left operand");
 			return -1;
 		}
 	}
 
-	if (len_sub == 0)
-		return 1;
-
-	pos = fastsearch(
-		s, PyString_GET_SIZE(a),
-		sub, len_sub, FAST_SEARCH
-		);
-
-	return (pos != -1);
+	return stringlib_contains_obj(str_obj, sub_obj);
 }
 
 static PyObject *
 string_item(PyStringObject *a, register Py_ssize_t i)
 {
+	char pchar;
 	PyObject *v;
-	char *pchar;
 	if (i < 0 || i >= a->ob_size) {
 		PyErr_SetString(PyExc_IndexError, "string index out of range");
 		return NULL;
 	}
-	pchar = a->ob_sval + i;
-	v = (PyObject *)characters[*pchar & UCHAR_MAX];
+	pchar = a->ob_sval[i];
+	v = (PyObject *)characters[pchar & UCHAR_MAX];
 	if (v == NULL)
-		v = PyString_FromStringAndSize(pchar, 1);
+		v = PyString_FromStringAndSize(&pchar, 1);
 	else {
 #ifdef COUNT_ALLOCS
 		one_strings++;
 int
 _PyString_Eq(PyObject *o1, PyObject *o2)
 {
-	PyStringObject *a, *b;
-	a = (PyStringObject*)o1;
-	b = (PyStringObject*)o2;
+	PyStringObject *a = (PyStringObject*) o1;
+	PyStringObject *b = (PyStringObject*) o2;
         return a->ob_size == b->ob_size
           && *a->ob_sval == *b->ob_sval
           && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
 static PyObject *
 string_count(PyStringObject *self, PyObject *args)
 {
-	const char *s = PyString_AS_STRING(self), *sub;
-	Py_ssize_t len = PyString_GET_SIZE(self), n;
-	Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
-	Py_ssize_t m, r;
-	PyObject *subobj;
-
-	if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
-		_PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
+	PyObject *sub_obj;
+	const char *str = PyString_AS_STRING(self), *sub;
+	Py_ssize_t sub_len;
+	Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
+
+	if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
+		_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
 		return NULL;
 
-	if (PyString_Check(subobj)) {
-		sub = PyString_AS_STRING(subobj);
-		n = PyString_GET_SIZE(subobj);
+	if (PyString_Check(sub_obj)) {
+		sub = PyString_AS_STRING(sub_obj);
+		sub_len = PyString_GET_SIZE(sub_obj);
 	}
 #ifdef Py_USING_UNICODE
-	else if (PyUnicode_Check(subobj)) {
+	else if (PyUnicode_Check(sub_obj)) {
 		Py_ssize_t count;
-		count = PyUnicode_Count((PyObject *)self, subobj, i, last);
+		count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
 		if (count == -1)
 			return NULL;
 		else
-		    	return PyInt_FromLong((long) count);
+		    	return PyInt_FromSsize_t(count);
 	}
 #endif
-	else if (PyObject_AsCharBuffer(subobj, &sub, &n))
+	else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
 		return NULL;
 
-	string_adjust_indices(&i, &last, len);
-
-	m = last + 1 - n;
-	if (n == 0)
-		return PyInt_FromSsize_t(m-i);
-
-	r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
-	if (r < 0)
-		r = 0; /* no match */
-	return PyInt_FromSsize_t(r);
+	string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
+
+	return PyInt_FromSsize_t(
+		stringlib_count(str + start, end - start, sub, sub_len)
+		);
 }
 
 PyDoc_STRVAR(swapcase__doc__,
 }
 
 Py_LOCAL(Py_ssize_t)
-     countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
+countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
 {
 	Py_ssize_t count=0;
 	char *start=target;
 }
 
 
-/* Algorithms for difference cases of string replacement */
+/* Algorithms for different cases of string replacement */
 
 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
 Py_LOCAL(PyStringObject *)

File Objects/unicodeobject.c

 
 #define STRINGLIB_CHAR Py_UNICODE
 
+#define STRINGLIB_LEN PyUnicode_GET_SIZE
 #define STRINGLIB_NEW PyUnicode_FromUnicode
+#define STRINGLIB_STR PyUnicode_AS_UNICODE
 
 Py_LOCAL(int)
 STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len)
     return result;
 }
 
-static Py_ssize_t findstring(PyUnicodeObject *self,
-	       PyUnicodeObject *substring,
-	       Py_ssize_t start,
-	       Py_ssize_t end,
-	       int direction)
-{
-    FIX_START_END(self);
-
-    if (substring->length == 0)
-	return (direction > 0) ? start : end;
-
-    if (direction > 0) {
-        Py_ssize_t pos = fastsearch(
-            PyUnicode_AS_UNICODE(self) + start, end - start,
-            substring->str, substring->length, FAST_SEARCH
-            );
-        if (pos >= 0)
-            return pos + start;
-    } else {
-        end -= substring->length;
-        for (; end >= start; end--)
-            if (Py_UNICODE_MATCH(self, end, substring))
-                return end;
-    }
-    return -1;
-}
-
 Py_ssize_t PyUnicode_Find(PyObject *str,
-                          PyObject *substr,
+                          PyObject *sub,
                           Py_ssize_t start,
                           Py_ssize_t end,
                           int direction)
 {
     Py_ssize_t result;
-    PyUnicodeObject* str_obj;
-    PyUnicodeObject* sub_obj;
-
-    str_obj = (PyUnicodeObject*) PyUnicode_FromObject(str);
-    if (!str_obj)
+
+    str = PyUnicode_FromObject(str);
+    if (!str)
 	return -2;
-    sub_obj = (PyUnicodeObject*) PyUnicode_FromObject(substr);
-    if (!sub_obj) {
-	Py_DECREF(str_obj);
+    sub = PyUnicode_FromObject(sub);
+    if (!sub) {
+	Py_DECREF(str);
 	return -2;
     }
 
-    FIX_START_END(str_obj);
+    FIX_START_END((PyUnicodeObject*) str);
 
     if (direction > 0)
-        result = stringlib_find(
-            str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
-            start
-            );
+        result = stringlib_find_obj(str, sub, start, end);
     else
-        result = stringlib_rfind(
-            str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
-            start
-            );
-
-    Py_DECREF(str_obj);
-    Py_DECREF(sub_obj);
+        result = stringlib_rfind_obj(str, sub, start, end);
+
+    Py_DECREF(str);
+    Py_DECREF(sub);
+
     return result;
 }
 
 int PyUnicode_Contains(PyObject *container,
 		       PyObject *element)
 {
-    PyUnicodeObject *u, *v;
-    Py_ssize_t size;
-    Py_ssize_t pos;
+    PyObject *str, *sub;
+    int result;
 
     /* Coerce the two arguments */
-    v = (PyUnicodeObject *) PyUnicode_FromObject(element);
-    if (!v) {
+    sub = PyUnicode_FromObject(element);
+    if (!sub) {
 	PyErr_SetString(PyExc_TypeError,
 	    "'in <string>' requires string as left operand");
         return -1;
     }
 
-    u = (PyUnicodeObject *) PyUnicode_FromObject(container);
-    if (!u) {
-        Py_DECREF(v);
+    str = PyUnicode_FromObject(container);
+    if (!str) {
+        Py_DECREF(sub);
         return -1;
     }
 
-    size = PyUnicode_GET_SIZE(v);
-    if (!size) {
-        pos = 0;
-        goto done;
-    }
-
-    pos = fastsearch(
-        PyUnicode_AS_UNICODE(u), PyUnicode_GET_SIZE(u),
-        PyUnicode_AS_UNICODE(v), size, FAST_SEARCH
-        );
-
-done:
-    Py_DECREF(u);
-    Py_DECREF(v);
-    return (pos != -1);
+    result = stringlib_contains_obj(str, sub);
+
+    Py_DECREF(str);
+    Py_DECREF(sub);
+
+    return result;
 }
 
 /* Concat to string or Unicode object giving a new Unicode object. */
 static PyObject *
 unicode_find(PyUnicodeObject *self, PyObject *args)
 {
-    PyUnicodeObject *substring;
+    PyObject *substring;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
-    PyObject *result;
+    Py_ssize_t result;
 
     if (!PyArg_ParseTuple(args, "O|O&O&:find", &substring,
 		_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
         return NULL;
-    substring = (PyUnicodeObject *)PyUnicode_FromObject(
-						(PyObject *)substring);
-    if (substring == NULL)
+
+    substring = PyUnicode_FromObject(substring);
+    if (!substring)
 	return NULL;
 
-    result = PyInt_FromSsize_t(findstring(self, substring, start, end, 1));
+    FIX_START_END(self);
+
+    result = stringlib_find_obj((PyObject*) self, substring, start, end);
 
     Py_DECREF(substring);
-    return result;
+
+    return PyInt_FromSsize_t(result);
 }
 
 static PyObject *
 unicode_index(PyUnicodeObject *self, PyObject *args)
 {
     Py_ssize_t result;
-    PyUnicodeObject *substring;
+    PyObject *substring;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
 
 		_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
         return NULL;
 
-    substring = (PyUnicodeObject *)PyUnicode_FromObject(
-						(PyObject *)substring);
-    if (substring == NULL)
+    substring = PyUnicode_FromObject(substring);
+    if (!substring)
 	return NULL;
 
-    result = findstring(self, substring, start, end, 1);
+    FIX_START_END(self);
+
+    result = stringlib_find_obj((PyObject*) self, substring, start, end);
 
     Py_DECREF(substring);
+
     if (result < 0) {
         PyErr_SetString(PyExc_ValueError, "substring not found");
         return NULL;
     }
+
     return PyInt_FromSsize_t(result);
 }
 
 static PyObject *
 unicode_rfind(PyUnicodeObject *self, PyObject *args)
 {
-    PyUnicodeObject *substring;
+    PyObject *substring;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
-    PyObject *result;
+    Py_ssize_t result;
 
     if (!PyArg_ParseTuple(args, "O|O&O&:rfind", &substring,
 		_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
         return NULL;
-    substring = (PyUnicodeObject *)PyUnicode_FromObject(
-						(PyObject *)substring);
-    if (substring == NULL)
+    substring = PyUnicode_FromObject(substring);
+    if (!substring)
 	return NULL;
 
-    result = PyInt_FromSsize_t(findstring(self, substring, start, end, -1));
+    FIX_START_END(self);
+
+    result = stringlib_rfind_obj((PyObject*)self, substring, start, end);
 
     Py_DECREF(substring);
-    return result;
+
+    return PyInt_FromSsize_t(result);
 }
 
 PyDoc_STRVAR(rindex__doc__,
 static PyObject *
 unicode_rindex(PyUnicodeObject *self, PyObject *args)
 {
-    Py_ssize_t result;
-    PyUnicodeObject *substring;
+    PyObject *substring;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
+    Py_ssize_t result;
 
     if (!PyArg_ParseTuple(args, "O|O&O&:rindex", &substring,
 		_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
         return NULL;
-    substring = (PyUnicodeObject *)PyUnicode_FromObject(
-						(PyObject *)substring);
-    if (substring == NULL)
+    substring = PyUnicode_FromObject(substring);
+    if (!substring)
 	return NULL;
 
-    result = findstring(self, substring, start, end, -1);
+    FIX_START_END(self);
+
+    result = stringlib_rfind_obj((PyObject*)self, substring, start, end);
 
     Py_DECREF(substring);
+
     if (result < 0) {
         PyErr_SetString(PyExc_ValueError, "substring not found");
         return NULL;