Commits

Jeff Allen committed 4f7b45e

buffer() support in str.__add__ and the strip operations.

Comments (0)

Files changed (7)

Lib/test/string_tests.py

                  'lstrip', unicode('xyz', 'ascii'))
             self.checkequal(unicode('xyzzyhello', 'ascii'), 'xyzzyhelloxyzzy',
                  'rstrip', unicode('xyz', 'ascii'))
-            # XXX
+            # Not a Jython skip: str passes but bytearray fails this check
+            # in both Jython 2.7 and CPython 2.7.5
             #self.checkequal(unicode('hello', 'ascii'), 'hello',
             #     'strip', unicode('xyz', 'ascii'))
 

Lib/test/test_repr.py

 
 
 def test_main():
-    from test import test_support
-    if test_support.is_jython:
-        # XXX: Jython lacks the buffer type
-        del ReprTests.test_buffer
     run_unittest(ReprTests)
     run_unittest(LongReprTest)
 

Lib/test/test_types.py

         self.assertRaises(TypeError, type, 1, 2)
         self.assertRaises(TypeError, type, 1, 2, 3, 4)
 
-    @unittest.skipIf(is_jython, "No buffer on Jython")
     def test_buffers(self):
         self.assertRaises(ValueError, buffer, 'asdf', -1)
         cmp(buffer("abc"), buffer("def")) # used to raise a warning: tp_compare didn't return -1, 0, or 1

src/org/python/core/Py2kBuffer.java

 import org.python.expose.MethodType;
 
 /**
- * Class implementing the Python <code>buffer</code> type. <code>buffer</code> is being superseded
- * in Python 2.7 by <code>memoryview</code>, and is provided here to support legacy Python code. Use
- * <code>memoryview</code> if you can. <code>buffer</code> and <code>memoryview</code> both wrap the
- * same Jython buffer API, the one designed for <code>memoryview</code>, whereas in CPython the C
- * APIs supporting each are different. Because of this, they may be applied to exactly the same
- * underlying object types. Their behaviour differs in detail.
+ * Implementation of the Python <code>buffer</code> type. <code>buffer</code> is being superseded in
+ * Python 2.7 by <code>memoryview</code>, and is provided here to support legacy Python code. Use
+ * <code>memoryview</code> if you can.
+ * <p>
+ * <code>buffer</code> and <code>memoryview</code> both wrap the <em>same</em> Jython buffer API:
+ * that designed for <code>memoryview</code>. In CPython, a new C API (which Jython's resembles) was
+ * introduced with <code>memoryview</code>. Because of this, <code>buffer</code> and
+ * <code>memoryview</code> may be supplied as arguments in the same places, and will accept as
+ * arguments the same (one-dimensional byte-array) types. Their behaviour differs as detailed in the
+ * documentation.
  */
 @ExposedType(name = "buffer", doc = BuiltinDocs.buffer_doc, base = PyObject.class,
         isBaseType = false)
 
     /** The underlying object on which the buffer was created. */
     private final BufferProtocol object;
-    /** The offset (in bytes) into the offered object at which the buffer starts */
+    /** The offset (in bytes) into the offered object at which the buffer starts. */
     private final int offset;
     /** Number of bytes to include in the buffer (or -1 for all available). */
     private final int size;
      * <code>memoryview</code>.) Note that when <code>size=-1</code> is given, the buffer reflects
      * the changing size of the underlying object.
      * 
-     * @param object the object on which this is to be a buffer
-     * @param offset into the array exposed by the object (0 for start)
-     * @param size of the slice or -1 for all of the object
+     * @param object the object on which this is to be a buffer.
+     * @param offset into the array exposed by the object (0 for start).
+     * @param size of the slice or -1 for all of the object.
      */
     public Py2kBuffer(BufferProtocol object, int offset, int size) {
         super(TYPE);
      * Every action on the <code>buffer</code> must obtain a new {@link PyBuffer} reflecting (this
      * buffer's slice of) the contents of the backing object.
      * 
-     * @return a <code>PyBuffer</code> onto the specified slice
+     * @return a <code>PyBuffer</code> onto the specified slice.
      */
     private PyBuffer getBuffer() {
         /*
-         * Ask for the full set of facilities (strides, indirect, etc.) from the object in case they
-         * are necessary for navigation, but only ask for read access. If the object is writable,
-         * the PyBuffer will be writable.
+         * Ask for a simple one-dimensional byte view (not requiring strides, indirect, etc.) from
+         * the object, as we cannot deal with other navigation. Ask for read access. If the object
+         * is writable, the PyBuffer will be writable, but we won't write to it.
          */
-        final int flags = PyBUF.FULL_RO;
-        PyBuffer buf = object.getBuffer(PyBUF.FULL_RO);
+        final int flags = PyBUF.SIMPLE;
+        PyBuffer buf = object.getBuffer(flags);
 
         // This may already be what we need, or this buffer may be a sub-range of the object
         if (offset > 0 || size >= 0) {
         return buf;
     }
 
+    /**
+     * Return a {@link PyObject} bearing the interface {@link BufferProtocol} and equivalent to the
+     * argument, or return <code>null</code>. This is a helper function to those methods that accept
+     * a range of types supporting the buffer API. Normally the return is exactly the argument,
+     * except in the case of a {@link PyUnicode}, which will be converted to a {@link PyString}
+     * according to Py2k semantics, equivalent to a UTF16BE encoding to bytes (for Py2k
+     * compatibility).
+     * 
+     * @param obj the object to access.
+     * @return <code>PyObject</code> supporting {@link BufferProtocol}, if not <code>null</code>.
+     */
+    private static BufferProtocol asBufferableOrNull(PyObject obj) {
+
+        if (obj instanceof PyUnicode) {
+            /*
+             * Jython unicode does not support the buffer protocol (so that you can't take a
+             * memoryview of one). But to be compatible with CPython we allow buffer(unicode) to
+             * export two-byte UTF-16. Fortunately, a buffer is read-only, so we can use a copy.
+             */
+            String bytes = codecs.encode((PyString)obj, "UTF-16BE", "replace");
+            return new PyString(bytes);
+
+        } else if (obj instanceof BufferProtocol) {
+            // That will do directly
+            return (BufferProtocol)obj;
+
+        } else {
+            // We don't know how to give this value the buffer API.
+            return null;
+        }
+    }
+
+    /** Names of arguments in the constructor (for ArgParser). */
     private static String[] paramNames = {"object", "offset", "size"};
 
     @ExposedNew
         int size = ap.getInt(2, -1);
 
         // Get the object as a BufferProtocol if possible
-        BufferProtocol object = null;
-        if (obj instanceof PyUnicode) {
-            /*
-             * Jython unicode does not support the buffer protocol (so that you can't take a
-             * memoryview of one). But to be compatible with CPython we allow buffer(unicode) to
-             * export two-byte UTF-16. Fortunately, a buffer is read-only, so we can use a copy.
-             */
-            String bytes = codecs.encode((PyString)obj, "UTF-16BE", "replace");
-            object = new PyString(bytes);
-
-        } else if (obj instanceof BufferProtocol) {
-                // That will do directly
-                object = (BufferProtocol)obj;
-
-        }
+        BufferProtocol object = asBufferableOrNull(obj);
 
         // Checks
         if (object == null) {
-            throw Py.TypeError("object must support the buffer protocol (or be unicode)");
+            throw Py.TypeError("buffer object expected (or unicode)");
         } else if (offset < 0) {
             throw Py.ValueError("offset must be zero or positive");
         } else if (size < -1) {
     }
 
     /**
-     * Equivalent to the standard Python <code>__add__</code> method, that for a <code>buffer</code>
-     * treats it as a <code>str</code> ({@link PyString}) containing the same bytes.
+     * {@inheritDoc} A <code>buffer</code> implements this as concatenation and returns a
+     * <code>str</code> ({@link PyString}) result.
      */
     @Override
     public PyObject __add__(PyObject other) {
 
     @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.buffer___add___doc)
     final PyObject buffer___add__(PyObject other) {
-        return __str__().__add__(other);
+
+        // The other operand must offer us the buffer interface
+        BufferProtocol bp = asBufferableOrNull(other);
+
+        if (bp == null) {
+            // Allow PyObject._basic_add to pick up the pieces or raise informative error
+            return null;
+        } else {
+            // PyBuffer on the underlying object of this buffer
+            PyBuffer buf = getBuffer();
+            try {
+                // And on the other operand (ask for simple 1D-bytes).
+                PyBuffer otherBuf = bp.getBuffer(PyBUF.SIMPLE);
+                try {
+                    // Concatenate the buffers as strings
+                    return new PyString(buf.toString().concat(otherBuf.toString()));
+                } finally {
+                    // Must always let go of the buffer
+                    otherBuf.release();
+                }
+            } finally {
+                // Must always let go of the buffer
+                buf.release();
+            }
+        }
     }
 
     /**
-     * Equivalent to the standard Python <code>__mul__</code> method, that for a <code>buffer</code>
-     * returns a <code>str</code> containing the same thing <code>n</code> times.
+     * {@inheritDoc} On a <code>buffer</code> it returns a <code>str</code> containing the buffer
+     * contents <code>n</code> times.
      */
     @Override
     public PyObject __mul__(PyObject o) {
     }
 
     /**
-     * Equivalent to the standard Python <code>__rmul__</code> method, that for a
-     * <code>buffer</code> returns a <code>str</code> containing the same thing <code>n</code>
-     * times.
+     * {@inheritDoc} On a <code>buffer</code> it returns a <code>str</code> containing the buffer
+     * contents <code>n</code> times.
      */
     @Override
     public PyObject __rmul__(PyObject o) {
     }
 
     /**
-     * <code>buffer*int</code> represent repetition in Python, and returns a <code>str</code> (
+     * <code>buffer*int</code> represents repetition in Python, and returns a <code>str</code> (
      * <code>bytes</code>) object.
      * 
      * @param count the number of times to repeat this.

src/org/python/core/PyObject.java

     // Generated by make_binops.py (Begin)
 
     /**
-     * Equivalent to the standard Python __add__ method
+     * Equivalent to the standard Python __add__ method.
      * @param     other the object to perform this binary operation with
      *            (the right-hand operand).
      * @return    the result of the add, or null if this operation
-     *            is not defined
+     *            is not defined.
      **/
     public PyObject __add__(PyObject other) {
         return null;
     }
 
     /**
-     * Equivalent to the standard Python __radd__ method
+     * Equivalent to the standard Python __radd__ method.
      * @param     other the object to perform this binary operation with
      *            (the left-hand operand).
      * @return    the result of the add, or null if this operation
     }
 
     /**
-     * Equivalent to the standard Python __iadd__ method
+     * Equivalent to the standard Python __iadd__ method.
      * @param     other the object to perform this binary operation with
      *            (the right-hand operand).
      * @return    the result of the iadd, or null if this operation
     }
 
     /**
-      * Implements the Python expression <code>this + o2</code>
+      * Implements the Python expression <code>this + o2</code>.
       * @param     o2 the object to perform this binary operation with.
       * @return    the result of the add.
       * @exception Py.TypeError if this operation can't be performed
     }
 
     /**
-      * Implements the Python expression <code>this += o2</code>
+      * Implements the Python expression <code>this += o2</code>.
       * @param     o2 the object to perform this inplace binary
       *            operation with.
       * @return    the result of the iadd.
     }
 
     /**
-     * Equivalent to the standard Python __mul__ method
+     * Equivalent to the standard Python __mul__ method.
      * @param     other the object to perform this binary operation with
      *            (the right-hand operand).
      * @return    the result of the mul, or null if this operation
     }
 
     /**
-     * Equivalent to the standard Python __rmul__ method
+     * Equivalent to the standard Python __rmul__ method.
      * @param     other the object to perform this binary operation with
      *            (the left-hand operand).
      * @return    the result of the mul, or null if this operation
     }
 
     /**
-     * Equivalent to the standard Python __imul__ method
+     * Equivalent to the standard Python __imul__ method.
      * @param     other the object to perform this binary operation with
      *            (the right-hand operand).
      * @return    the result of the imul, or null if this operation
-     *            is not defined
+     *            is not defined.
      **/
     public PyObject __imul__(PyObject other) {
         return null;
     }
 
     /**
-      * Implements the Python expression <code>this * o2</code>
+      * Implements the Python expression <code>this * o2</code>.
       * @param     o2 the object to perform this binary operation with.
       * @return    the result of the mul.
       * @exception Py.TypeError if this operation can't be performed
     }
 
     /**
-      * Implements the Python expression <code>this *= o2</code>
+      * Implements the Python expression <code>this *= o2</code>.
       * @param     o2 the object to perform this inplace binary
       *            operation with.
       * @return    the result of the imul.

src/org/python/core/PyString.java

-/// Copyright (c) Corporation for National Research Initiatives
+// Copyright (c) Corporation for National Research Initiatives
 package org.python.core;
 
 import java.lang.ref.Reference;
      * unsigned bytes. The caller specifies its requirements and navigational capabilities in the
      * <code>flags</code> argument (see the constants in interface {@link PyBUF} for an
      * explanation). The method may return the same PyBuffer object to more than one consumer.
-     * 
+     *
      * @param flags consumer requirements
      * @return the requested buffer
      */
         // ignore isBasic, doesn't apply to PyString, just PyUnicode
         return new PyString(str);
     } 
-    
+
+    /**
+     * Return a String equivalent to the argument. This is a helper function to those methods that
+     * accept any byte array type (any object that supports a one-dimensional byte buffer).
+     *
+     * @param obj to coerce to a String
+     * @return coerced value or <code>null</code> if it can't be
+     */
+    private static String asStringOrNull(PyObject obj) {
+        if (obj instanceof PyString) {
+            // str or unicode object: go directly to the String
+            return ((PyString)obj).getString();
+        } else if (obj instanceof BufferProtocol) {
+            // Other object with buffer API: briefly access the buffer
+            PyBuffer buf = ((BufferProtocol)obj).getBuffer(PyBUF.SIMPLE);
+            try {
+                return buf.toString();
+            } finally {
+                buf.release();
+            }
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * Return a String equivalent to the argument. This is a helper function to those methods that
+     * accept any byte array type (any object that supports a one-dimensional byte buffer).
+     *
+     * @param obj to coerce to a String
+     * @return coerced value
+     * @throws PyException if the coercion fails
+     */
+    private static String asStringOrError(PyObject obj) throws PyException {
+        String ret = asStringOrNull(obj);
+        if (ret != null) {
+            return ret;
+        } else {
+            throw Py.TypeError("expected str, bytearray or buffer compatible object");
+        }
+    }
+
+    /**
+     * Return a String equivalent to the argument according to the calling conventions of the
+     * <code>strip</code> and <code>sep</code> methods of <code>str</code>. Those methods accept
+     * anything bearing the buffer interface as a byte string, but also PyNone (or the argument may
+     * be omitted, showing up here as null) to indicate that the criterion is whitespace. They also
+     * accept a unicode argument, not dealt with here.
+     *
+     * @param obj to coerce to a String or nullk
+     * @param name of method
+     * @return coerced value or null
+     * @throws PyException if the coercion fails
+     */
+    private static String asStripSepOrError(PyObject obj, String name) throws PyException {
+
+        if (obj == null || obj == Py.None) {
+            return null;
+        } else {
+            String ret = asStringOrNull(obj);
+            if (ret != null) {
+                return ret;
+            } else {
+                throw Py.TypeError(name
+                        + " arg must be None, str, unicode, buffer compatible object");
+            }
+        }
+    }
+
     @Override
     public boolean __contains__(PyObject o) {
         return str___contains__(o);
 
     @ExposedMethod(doc = BuiltinDocs.str___contains___doc)
     final boolean str___contains__(PyObject o) {
-        if (!(o instanceof PyString))
-            throw Py.TypeError("'in <string>' requires string as left operand");
-        PyString other = (PyString) o;
-        return getString().indexOf(other.getString()) >= 0;
+        String other = asStringOrError(o);
+        return getString().indexOf(other) >= 0;
     }
 
     protected PyObject repeat(int count) {
         return repeat(o.asIndex(Py.OverflowError));
     }
 
+    /**
+     * {@inheritDoc} For a <code>str</code> addition means concatenation and returns a
+     * <code>str</code> ({@link PyString}) result, except when a {@link PyUnicode} argument is
+     * given, when a <code>PyUnicode</code> results.
+     */
     @Override
     public PyObject __add__(PyObject other) {
         return str___add__(other);
 
     @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___add___doc)
     final PyObject str___add__(PyObject other) {
+
         if (other instanceof PyUnicode) {
+            // Convert self to PyUnicode and escalate the problem
             return decode().__add__(other);
 
-        } else if (other instanceof PyString) {
-            PyString otherStr = (PyString)other;
-            return new PyString(getString().concat(otherStr.getString()));
-
-        } else if (other instanceof PyByteArray) {
-            return new PyString(getString().concat(other.asString()));
-
+        } else {
+            // Some kind of object with the buffer API
+            String otherStr = asStringOrNull(other);
+            if (otherStr == null) {
+                // Allow PyObject._basic_add to pick up the pieces or raise informative error
+                return null;
+            } else {
+                // Concatenate as strings
+                return new PyString(getString().concat(otherStr));
+            }
         }
-        return null;
     }
 
     @ExposedMethod(doc = BuiltinDocs.str___getnewargs___doc)
         return new String(chars);
     }
 
+    /**
+     * Equivalent of Python str.strip() with no argument, meaning strip whitespace. Any whitespace
+     * byte/character will be discarded from either end of this <code>str</code>.
+     *
+     * @return a new String, stripped of the whitespace characters/bytes
+     */
     public String strip() {
-        return str_strip(null);
+        return _strip();
     }
 
-    public String strip(String sep) {
-        return str_strip(sep);
+    /**
+     * Equivalent of Python str.strip(). Any byte/character matching one of those in
+     * <code>stripChars</code> will be discarded from either end of this <code>str</code>. If
+     * <code>stripChars == null</code>, whitespace will be stripped.
+     *
+     * @param stripChars characters to strip from either end of this str/bytes, or null
+     * @return a new String, stripped of the specified characters/bytes
+     */
+    public String strip(String stripChars) {
+        return _strip(stripChars);
     }
 
     @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_strip_doc)
-    final String str_strip(String sep) {
-        char[] chars = getString().toCharArray();
-        int n=chars.length;
-        int start=0;
-        if (sep == null)
-            while (start < n && Character.isWhitespace(chars[start]))
-                start++;
-        else
-            while (start < n && sep.indexOf(chars[start]) >= 0)
-                start++;
-
-        int end=n-1;
-        if (sep == null)
-            while (end >= 0 && Character.isWhitespace(chars[end]))
-                end--;
-        else
-            while (end >= 0 && sep.indexOf(chars[end]) >= 0)
-                end--;
-
-        if (end >= start) {
-            return (end < n-1 || start > 0)
-                ? getString().substring(start, end+1) : getString();
+    final PyObject str_strip(PyObject chars) {
+        if (chars instanceof PyUnicode) {
+            // Promote the problem to a Unicode one
+            return ((PyUnicode)decode()).unicode_strip(chars);
         } else {
-            return "";
+            // It ought to be None, null, some kind of bytes the with buffer API.
+            String stripChars = asStripSepOrError(chars, "strip");
+            // Strip specified characters or whitespace if stripChars == null
+            return new PyString(_strip(stripChars));
         }
     }
 
+    /**
+     * Implementation of Python str.strip() common to exposed and Java API, when stripping
+     * whitespace. Any whitespace byte/character will be discarded from either end of this
+     * <code>str</code>.
+     * <p>
+     * Implementation note: although a str contains only bytes, this method is also called by
+     * {@link PyUnicode#unicode_strip(PyObject)} when this is a basic-plane string.
+     *
+     * @return a new String, stripped of the whitespace characters/bytes
+     */
+    protected final String _strip() {
+        String s = getString();
+        // Rightmost non-whitespace
+        int right = _stripRight(s);
+        if (right < 0) {
+            // They're all whitespace
+            return "";
+        } else {
+            // Leftmost non-whitespace character: right known not to be a whitespace
+            int left = _stripLeft(s, right);
+            return s.substring(left, right + 1);
+        }
+    }
+
+    /**
+     * Implementation of Python str.strip() common to exposed and Java API. Any byte/character
+     * matching one of those in <code>stripChars</code> will be discarded from either end of this
+     * <code>str</code>. If <code>stripChars == null</code>, whitespace will be stripped.
+     * <p>
+     * Implementation note: although a str contains only bytes, this method is also called by
+     * {@link PyUnicode#unicode_strip(PyObject)} when both arguments are basic-plane strings.
+     *
+     * @param stripChars characters to strip or null
+     * @return a new String, stripped of the specified characters/bytes
+     */
+    protected final String _strip(String stripChars) {
+        if (stripChars == null) {
+            // Devert to the whitespace version
+            return _strip();
+        } else {
+            String s = getString();
+            // Rightmost non-matching character
+            int right = _stripRight(s, stripChars);
+            if (right < 0) {
+                // They all match
+                return "";
+            } else {
+                // Leftmost non-matching character: right is known not to match
+                int left = _stripLeft(s, stripChars, right);
+                return s.substring(left, right + 1);
+            }
+        }
+    }
+
+    /**
+     * Helper for strip, lstrip implementation, when stripping whitespace.
+     *
+     * @param s string to search (only <code>s[0:right]</code> is searched).
+     * @param right rightmost extent of string search
+     * @return index of lefttmost non-whitespace character or <code>right</code> if they all are.
+     */
+    private static final int _stripLeft(String s, int right) {
+        for (int left = 0; left < right; left++) {
+            if (!Character.isWhitespace(s.charAt(left))) {
+                return left;
+            }
+        }
+        return right;
+    }
+
+    /**
+     * Helper for strip, lstrip implementation, when stripping specified characters.
+     *
+     * @param s string to search (only <code>s[0:right]</code> is searched).
+     * @param stripChars specifies set of characters to strip
+     * @param right rightmost extent of string search
+     * @return index of leftmost character not in <code>stripChars</code> or <code>right</code> if
+     *         they all are.
+     */
+    private static final int _stripLeft(String s, String stripChars, int right) {
+        for (int left = 0; left < right; left++) {
+            if (stripChars.indexOf(s.charAt(left)) < 0) {
+                return left;
+            }
+        }
+        return right;
+    }
+
+    /**
+     * Helper for strip, rstrip implementation, when stripping whitespace.
+     *
+     * @param s string to search.
+     * @return index of rightmost non-whitespace character or -1 if they all are.
+     */
+    private static final int _stripRight(String s) {
+        for (int right = s.length(); --right >= 0;) {
+            if (!Character.isWhitespace(s.charAt(right))) {
+                return right;
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * Helper for strip, rstrip implementation, when stripping specified characters.
+     *
+     * @param s string to search.
+     * @param stripChars specifies set of characters to strip
+     * @return index of rightmost character not in <code>stripChars</code> or -1 if they all are.
+     */
+    private static final int _stripRight(String s, String stripChars) {
+        for (int right = s.length(); --right >= 0;) {
+            if (stripChars.indexOf(s.charAt(right)) < 0) {
+                return right;
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * Equivalent of Python str.lstrip() with no argument, meaning strip whitespace. Any whitespace
+     * byte/character will be discarded from the left of this <code>str</code>.
+     *
+     * @return a new String, stripped of the whitespace characters/bytes
+     */
     public String lstrip() {
-        return str_lstrip(null);
+        return _lstrip();
     }
-    
+
+    /**
+     * Equivalent of Python str.lstrip(). Any byte/character matching one of those in
+     * <code>stripChars</code> will be discarded from the left end of this <code>str</code>. If
+     * <code>stripChars == null</code>, whitespace will be stripped.
+     *
+     * @param stripChars characters to strip from either end of this str/bytes, or null
+     * @return a new String, stripped of the specified characters/bytes
+     */
     public String lstrip(String sep) {
-        return str_lstrip(sep);
+        return _lstrip(sep);
     }
 
     @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_lstrip_doc)
-    final String str_lstrip(String sep) {
-        char[] chars = getString().toCharArray();
-        int n=chars.length;
-        int start=0;
-        if (sep == null)
-            while (start < n && Character.isWhitespace(chars[start]))
-                start++;
-        else
-            while (start < n && sep.indexOf(chars[start]) >= 0)
-                start++;
-
-        return (start > 0) ? getString().substring(start, n) : getString();
+    final PyObject str_lstrip(PyObject chars) {
+        if (chars instanceof PyUnicode) {
+            // Promote the problem to a Unicode one
+            return ((PyUnicode)decode()).unicode_lstrip(chars);
+        } else {
+            // It ought to be None, null, some kind of bytes the with buffer API.
+            String stripChars = asStripSepOrError(chars, "lstrip");
+            // Strip specified characters or whitespace if stripChars == null
+            return new PyString(_lstrip(stripChars));
+        }
     }
 
+    /**
+     * Implementation of Python str.lstrip() common to exposed and Java API, when stripping
+     * whitespace. Any whitespace byte/character will be discarded from the left end of this
+     * <code>str</code>.
+     * <p>
+     * Implementation note: although a str contains only bytes, this method is also called by
+     * {@link PyUnicode#unicode_lstrip(PyObject)} when this is a basic-plane string.
+     *
+     * @return a new String, stripped of the whitespace characters/bytes
+     */
+    protected final String _lstrip() {
+        String s = getString();
+        // Leftmost non-whitespace character: cannot exceed length
+        int left = _stripLeft(s, s.length());
+        return s.substring(left);
+    }
+
+    /**
+     * Implementation of Python str.lstrip() common to exposed and Java API. Any byte/character
+     * matching one of those in <code>stripChars</code> will be discarded from the left end of this
+     * <code>str</code>. If <code>stripChars == null</code>, whitespace will be stripped.
+     * <p>
+     * Implementation note: although a str contains only bytes, this method is also called by
+     * {@link PyUnicode#unicode_lstrip(PyObject)} when both arguments are basic-plane strings.
+     *
+     * @param stripChars characters to strip or null
+     * @return a new String, stripped of the specified characters/bytes
+     */
+    protected final String _lstrip(String stripChars) {
+        if (stripChars == null) {
+            // Divert to the whitespace version
+            return _lstrip();
+        } else {
+            String s = getString();
+            // Leftmost matching character: cannot exceed length
+            int left = _stripLeft(s, stripChars, s.length());
+            return s.substring(left);
+        }
+    }
+
+    /**
+     * Equivalent of Python str.rstrip() with no argument, meaning strip whitespace. Any whitespace
+     * byte/character will be discarded from the right end of this <code>str</code>.
+     *
+     * @return a new String, stripped of the whitespace characters/bytes
+     */
+    public String rstrip() {
+        return _rstrip();
+    }
+
+    /**
+     * Equivalent of Python str.rstrip(). Any byte/character matching one of those in
+     * <code>stripChars</code> will be discarded from thr right end of this <code>str</code>. If
+     * <code>stripChars == null</code>, whitespace will be stripped.
+     *
+     * @param stripChars characters to strip from either end of this str/bytes, or null
+     * @return a new String, stripped of the specified characters/bytes
+     */
     public String rstrip(String sep) {
-        return str_rstrip(sep);
+        return _rstrip(sep);
     }
-    
+
     @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_rstrip_doc)
-    final String str_rstrip(String sep) {
-        char[] chars = getString().toCharArray();
-        int n=chars.length;
-        int end=n-1;
-        if (sep == null)
-            while (end >= 0 && Character.isWhitespace(chars[end]))
-                end--;
-        else
-            while (end >= 0 && sep.indexOf(chars[end]) >= 0)
-                end--;
-
-        return (end < n-1) ? getString().substring(0, end+1) : getString();
+    final PyObject str_rstrip(PyObject chars) {
+        if (chars instanceof PyUnicode) {
+            // Promote the problem to a Unicode one
+            return ((PyUnicode)decode()).unicode_rstrip(chars);
+        } else {
+            // It ought to be None, null, some kind of bytes the with buffer API.
+            String stripChars = asStripSepOrError(chars, "rstrip");
+            // Strip specified characters or whitespace if stripChars == null
+            return new PyString(_rstrip(stripChars));
+        }
     }
 
+    /**
+     * Implementation of Python str.rstrip() common to exposed and Java API, when stripping
+     * whitespace. Any whitespace byte/character will be discarded from the right end of this
+     * <code>str</code>.
+     * <p>
+     * Implementation note: although a str contains only bytes, this method is also called by
+     * {@link PyUnicode#unicode_rstrip(PyObject)} when this is a basic-plane string.
+     *
+     * @return a new String, stripped of the whitespace characters/bytes
+     */
+    protected final String _rstrip() {
+        String s = getString();
+        // Rightmost non-whitespace
+        int right = _stripRight(s);
+        if (right < 0) {
+            // They're all whitespace
+            return "";
+        } else {
+            // Substring up to and including this rightmost non-whitespace
+            return s.substring(0, right + 1);
+        }
+    }
+
+    /**
+     * Implementation of Python str.rstrip() common to exposed and Java API. Any byte/character
+     * matching one of those in <code>stripChars</code> will be discarded from the right end of this
+     * <code>str</code>. If <code>stripChars == null</code>, whitespace will be stripped.
+     * <p>
+     * Implementation note: although a str contains only bytes, this method is also called by
+     * {@link PyUnicode#unicode_strip(PyObject)} when both arguments are basic-plane strings.
+     *
+     * @param stripChars characters to strip or null
+     * @return a new String, stripped of the specified characters/bytes
+     */
+    protected final String _rstrip(String stripChars) {
+        if (stripChars == null) {
+            // Devert to the whitespace version
+            return _rstrip();
+        } else {
+            String s = getString();
+            // Rightmost non-matching character
+            int right = _stripRight(s, stripChars);
+            // Substring up to and including this rightmost non-matching character (or "")
+            return s.substring(0, right + 1);
+        }
+    }
 
     public PyList split() {
         return str_split(null, -1);
 
     @ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.str_split_doc)
     final PyList str_split(String sep, int maxsplit) {
+        
+        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
+        
         if (sep != null) {
             if (sep.length() == 0) {
                 throw Py.ValueError("empty separator");
 
     @ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.str_rsplit_doc)
     final PyList str_rsplit(String sep, int maxsplit) {
+        
+        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
+        
         if (sep != null) {
             if (sep.length() == 0) {
                 throw Py.ValueError("empty separator");
 
     @ExposedMethod(doc = BuiltinDocs.str_partition_doc)
     final PyTuple str_partition(PyObject sepObj) {
+        
+        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
+        
         String sep;
 
         if (sepObj instanceof PyUnicode) {
 
     @ExposedMethod(doc = BuiltinDocs.str_rpartition_doc)
     final PyTuple str_rpartition(PyObject sepObj) {
+        
+        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
+        
         String sep;
 
         if (sepObj instanceof PyUnicode) {
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_index_doc)
     final int str_index(String sub, PyObject start, PyObject end) {
+        
+        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
+        
         int index = str_find(sub, start, end);
         if (index == -1)
             throw Py.ValueError("substring not found in string.index");
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_rindex_doc)
     final int str_rindex(String sub, PyObject start, PyObject end) {
+        
+        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
+        
         int index = str_rfind(sub, start, end);
         if(index == -1)
             throw Py.ValueError("substring not found in string.rindex");
     
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_count_doc)
     final int str_count(String sub, PyObject start, PyObject end) {
+        
+        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
+        
         if (sub == null) {
             throw Py.TypeError("count() takes at least 1 argument (0 given)");
         }
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_find_doc)
     final int str_find(String sub, PyObject start, PyObject end) {
+        
+        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
+        
         int[] indices = translateIndices(start, end);
         int index = getString().indexOf(sub, indices[0]);
         if (index < indices[2] || index > indices[1]) {
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_rfind_doc)
     final int str_rfind(String sub, PyObject start, PyObject end) {
+        
+        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
+        
         int[] indices = translateIndices(start, end);
         int index = getString().lastIndexOf(sub, indices[1] - sub.length());
         if (index < indices[2]) {
     }
     
     protected PyString replace(PyString oldPiece, PyString newPiece, int maxsplit) {
+        
+        // XXX Accept PyObjects that may be BufferProtocol or PyUnicode
+        
         int len = getString().length();
         int old_len = oldPiece.getString().length();
         if (len == 0) {
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_startswith_doc)
     final boolean str_startswith(PyObject prefix, PyObject start, PyObject end) {
+        
+        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
+        
         int[] indices = translateIndices(start, end);
         
         if (prefix instanceof PyString) {
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_endswith_doc)
     final boolean str_endswith(PyObject suffix, PyObject start, PyObject end) {
+        
+        // XXX Accept PyObject that may be BufferProtocol or PyUnicode
+        
         int[] indices = translateIndices(start, end);
 
         String substr = getString().substring(indices[0], indices[1]);
     /**
      * Turns the possibly negative Python slice start and end into valid indices
      * into this string.
-     * 
+     *
      * @return a 3 element array of indices into this string describing a
      *         substring from [0] to [1]. [0] <= [1], [0] >= 0 and [1] <=
      *         string.length(). The third element contains the unadjusted 
 
     @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_translate_doc)
     final String str_translate(String table, String deletechars) {
+        
+        // XXX Accept PyObjects that may be BufferProtocol
+        
         if (table != null && table.length() != 256)
             throw Py.ValueError(
                 "translation table must be 256 characters long");

src/org/python/core/PyUnicode.java

         }
     }
 
-    @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode___getslice___doc)
+    @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_strip_doc)
     final PyObject unicode_strip(PyObject sepObj) {
+
         PyUnicode sep = coerceStripSepToUnicode(sepObj);
-        if (isBasicPlane() && (sep == null || sep.isBasicPlane())) {
+
+        if (isBasicPlane()) {
+            // this contains only basic plane characters
             if (sep == null) {
-                return new PyUnicode(str_strip(null));
-            } else {
-                return new PyUnicode(str_strip(sep.getString()));
+                // And we're stripping whitespace, so use the PyString implementation
+                return new PyUnicode(_strip());
+            } else if (sep.isBasicPlane()) {
+                // And the strip characters are basic plane too, so use the PyString implementation
+                return new PyUnicode(_strip(sep.getString()));
             }
         }
-        return new PyUnicode(new ReversedIterator(new StripIterator(sep,
-                new ReversedIterator(new StripIterator(sep, newSubsequenceIterator())))));
+
+        // Not basic plane: have to do real Unicode
+        return new PyUnicode(new ReversedIterator(new StripIterator(sep, new ReversedIterator(
+                new StripIterator(sep, newSubsequenceIterator())))));
     }
 
-    @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode___getslice___doc)
+    @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_lstrip_doc)
     final PyObject unicode_lstrip(PyObject sepObj) {
+
         PyUnicode sep = coerceStripSepToUnicode(sepObj);
-        if (isBasicPlane() && (sep == null || sep.isBasicPlane())) {
+
+        if (isBasicPlane()) {
+            // this contains only basic plane characters
             if (sep == null) {
-                return new PyUnicode(str_lstrip(null));
-            } else {
-                return new PyUnicode(str_lstrip(sep.getString()));
+                // And we're stripping whitespace, so use the PyString implementation
+                return new PyUnicode(_lstrip());
+            } else if (sep.isBasicPlane()) {
+                // And the strip characters are basic plane too, so use the PyString implementation
+                return new PyUnicode(_lstrip(sep.getString()));
             }
         }
+
+        // Not basic plane: have to do real Unicode
         return new PyUnicode(new StripIterator(sep, newSubsequenceIterator()));
     }
 
-    @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode___getslice___doc)
+    @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_rstrip_doc)
     final PyObject unicode_rstrip(PyObject sepObj) {
+
         PyUnicode sep = coerceStripSepToUnicode(sepObj);
-        if (isBasicPlane() && (sep == null || sep.isBasicPlane())) {
+
+        if (isBasicPlane()) {
+            // this contains only basic plane characters
             if (sep == null) {
-                return new PyUnicode(str_rstrip(null));
-            } else {
-                return new PyUnicode(str_rstrip(sep.getString()));
+                // And we're stripping whitespace, so use the PyString implementation
+                return new PyUnicode(_rstrip());
+            } else if (sep.isBasicPlane()) {
+                // And the strip characters are basic plane too, so use the PyString implementation
+                return new PyUnicode(_rstrip(sep.getString()));
             }
         }
-        return new PyUnicode(new ReversedIterator(new StripIterator(sep,
-                new ReversedIterator(newSubsequenceIterator()))));
+
+        // Not basic plane: have to do real Unicode
+        return new PyUnicode(new ReversedIterator(new StripIterator(sep, new ReversedIterator(
+                newSubsequenceIterator()))));
     }
 
     @Override