Commits

Amaury Forgeot d'Arc  committed 859f157

cpyext: implement PyUnicode_Split and PyUnicode_Splitlines

  • Participants
  • Parent commits 10ee763

Comments (0)

Files changed (3)

File pypy/module/cpyext/stubs.py

     """Concat two strings giving a new Unicode string."""
     raise NotImplementedError
 
-@cpython_api([PyObject, PyObject, Py_ssize_t], PyObject)
-def PyUnicode_Split(space, s, sep, maxsplit):
-    """Split a string giving a list of Unicode strings.  If sep is NULL, splitting
-    will be done at all whitespace substrings.  Otherwise, splits occur at the given
-    separator.  At most maxsplit splits will be done.  If negative, no limit is
-    set.  Separators are not included in the resulting list.
-
-    This function used an int type for maxsplit. This might require
-    changes in your code for properly supporting 64-bit systems."""
-    raise NotImplementedError
-
-@cpython_api([PyObject, rffi.INT_real], PyObject)
-def PyUnicode_Splitlines(space, s, keepend):
-    """Split a Unicode string at line breaks, returning a list of Unicode strings.
-    CRLF is considered to be one line break.  If keepend is 0, the Line break
-    characters are not included in the resulting strings."""
-    raise NotImplementedError
-
 @cpython_api([PyObject, PyObject, rffi.CCHARP], PyObject)
 def PyUnicode_Translate(space, str, table, errors):
     """Translate a string by applying a character mapping table to it and return the

File pypy/module/cpyext/test/test_unicodeobject.py

         assert api.PyUnicode_Find(w_str, space.wrap(u"c"), 3, 7, -1) == 5
         assert api.PyUnicode_Find(w_str, space.wrap(u"c"), 0, 4, -1) == 2
         assert api.PyUnicode_Find(w_str, space.wrap(u"z"), 0, 4, -1) == -1
+
+    def test_split(self, space, api):
+        w_str = space.wrap(u"a\nb\nc\nd")
+        assert "[u'a', u'b', u'c', u'd']" == space.unwrap(space.repr(
+                api.PyUnicode_Split(w_str, space.wrap('\n'), -1)))
+        assert r"[u'a', u'b', u'c\nd']" == space.unwrap(space.repr(
+                api.PyUnicode_Split(w_str, space.wrap('\n'), 2)))
+        assert "[u'a', u'b', u'c', u'd']" == space.unwrap(space.repr(
+                api.PyUnicode_Splitlines(w_str, 0)))
+        assert r"[u'a\n', u'b\n', u'c\n', u'd']" == space.unwrap(space.repr(
+                api.PyUnicode_Splitlines(w_str, 1)))

File pypy/module/cpyext/unicodeobject.py

         w_pos = space.call_method(w_str, "rfind", w_substr,
                                   space.wrap(start), space.wrap(end))
     return space.int_w(w_pos)
+
+@cpython_api([PyObject, PyObject, Py_ssize_t], PyObject)
+def PyUnicode_Split(space, w_str, w_sep, maxsplit):
+    """Split a string giving a list of Unicode strings.  If sep is
+    NULL, splitting will be done at all whitespace substrings.
+    Otherwise, splits occur at the given separator.  At most maxsplit
+    splits will be done.  If negative, no limit is set.  Separators
+    are not included in the resulting list."""
+    return space.call_method(w_str, "split", w_sep, space.wrap(maxsplit))
+
+@cpython_api([PyObject, rffi.INT_real], PyObject)
+def PyUnicode_Splitlines(space, w_str, keepend):
+    """Split a Unicode string at line breaks, returning a list of
+    Unicode strings.  CRLF is considered to be one line break.  If
+    keepend is 0, the Line break characters are not included in the
+    resulting strings."""
+    return space.call_method(w_str, "splitlines", space.wrap(keepend))