Commits

Anonymous committed 7854826 Draft

Implement codecs.encode/decode
Enables removal of 2 more skips from test_codecs, now 14 skips.

Comments (0)

Files changed (3)

Lib/test/test_codecs.py

     def test_bad_args(self):
         self.assertRaises(TypeError, codecs.utf_16_ex_decode)
 
-@unittest.skipIf(test_support.is_jython, "FIXME: Jython has no _codecs.readbuffer_encode method")
+@unittest.skipIf(test_support.is_jython, "Jython has no _codecs.readbuffer_encode method")
 class ReadBufferTest(unittest.TestCase):
 
     def test_array(self):
         self.assertRaises(TypeError, codecs.readbuffer_encode)
         self.assertRaises(TypeError, codecs.readbuffer_encode, 42)
 
-@unittest.skipIf(test_support.is_jython, "FIXME: Jython has no _codecs.charbuffer_encode method")
+@unittest.skipIf(test_support.is_jython, "Jython has no _codecs.charbuffer_encode method")
 class CharBufferTest(unittest.TestCase):
 
     def test_string(self):
 
 class CodecsModuleTest(unittest.TestCase):
 
-    @unittest.skipIf(test_support.is_jython, "FIXME: _codecs.decode not implemented")
     def test_decode(self):
         self.assertEqual(codecs.decode('\xe4\xf6\xfc', 'latin-1'),
                           u'\xe4\xf6\xfc')
         self.assertEqual(codecs.decode('abc'), u'abc')
         self.assertRaises(UnicodeDecodeError, codecs.decode, '\xff', 'ascii')
 
-    @unittest.skipIf(test_support.is_jython, "FIXME: _codecs.encode not implemented")
     def test_encode(self):
         self.assertEqual(codecs.encode(u'\xe4\xf6\xfc', 'latin-1'),
                           '\xe4\xf6\xfc')
         self.assertRaises(TypeError, codecs.encode)
-        self.assertRaises(LookupError, codecs.encode, "foo", "__spam__")
+        self.assertRaises(LookupError, codecs.encode, u"foo", "__spam__")
         self.assertEqual(codecs.encode(u'abc'), 'abc')
         self.assertRaises(UnicodeEncodeError, codecs.encode, u'\xffff', 'ascii')
 

src/org/python/core/codecs.java

 import java.util.Iterator;
 
 import org.python.core.util.StringUtil;
+import org.python.modules._codecs;
 
 /**
  * This class implements the codec registry and utility methods supporting codecs, such as those
         }
     }
 
+    /**
+     * Decode the bytes <code>v</code> using the codec registered for the <code>encoding</code>.
+     * The <code>encoding</code> defaults to the system default encoding
+     * (see {@link codecs#getDefaultEncoding()}).
+     * The string <code>errors</code> may name a different error handling
+     * policy (built-in or registered with {@link #register_error(String, PyObject)}).
+     * The default error policy is 'strict' meaning that encoding errors raise a
+     * <code>ValueError</code>.
+     * This method is exposed through the _codecs module as
+     * {@link _codecs#decode(PyString, String, String)}.
+     *
+     * @param v bytes to be decoded
+     * @param encoding name of encoding (to look up in codec registry)
+     * @param errors error policy name (e.g. "ignore", "replace")
+     * @return Unicode string decoded from <code>bytes</code>
+     */
     public static PyObject decode(PyString v, String encoding, String errors) {
         if (encoding == null) {
             encoding = getDefaultEncoding();
         return new PyUnicode(result, true);
     }
 
+    /**
+     * Encode <code>v</code> using the codec registered for the <code>encoding</code>.
+     * The <code>encoding</code> defaults to the system default encoding
+     * (see {@link codecs#getDefaultEncoding()}).
+     * The string <code>errors</code> may name a different error handling
+     * policy (built-in or registered with {@link #register_error(String, PyObject)}).
+     * The default error policy is 'strict' meaning that encoding errors raise a
+     * <code>ValueError</code>.
+     *
+     * @param v unicode string to be encoded
+     * @param encoding name of encoding (to look up in codec registry)
+     * @param errors error policy name (e.g. "ignore")
+     * @return bytes object encoding <code>v</code>
+     */
+    // XXX v should probably be declared PyUnicode (or thing delivering unicode code points)
     public static String encode(PyString v, String encoding, String errors) {
         if (encoding == null) {
             encoding = getDefaultEncoding();

src/org/python/modules/_codecs.java

         codecs.register_error(name, errorHandler);
     }
 
+    /**
+     * Decode <code>bytes</code> using the system default encoding (see
+     * {@link codecs#getDefaultEncoding()}). Decoding errors raise a <code>ValueError</code>.
+     *
+     * @param bytes to be decoded
+     * @return Unicode string decoded from <code>bytes</code>
+     */
+    public static PyObject decode(PyString bytes) {
+        return decode(bytes, null, null);
+    }
+
+    /**
+     * Decode <code>bytes</code> using the codec registered for the <code>encoding</code>. The
+     * <code>encoding</code> defaults to the system default encoding (see
+     * {@link codecs#getDefaultEncoding()}). Decoding errors raise a <code>ValueError</code>.
+     *
+     * @param bytes to be decoded
+     * @param encoding name of encoding (to look up in codec registry)
+     * @return Unicode string decoded from <code>bytes</code>
+     */
+    public static PyObject decode(PyString bytes, String encoding) {
+        return decode(bytes, encoding, null);
+    }
+
+    /**
+     * Decode <code>bytes</code> using the codec registered for the <code>encoding</code>. The
+     * <code>encoding</code> defaults to the system default encoding (see
+     * {@link codecs#getDefaultEncoding()}). The string <code>errors</code> may name a different
+     * error handling policy (built-in or registered with {@link #register_error(String, PyObject)}
+     * ). The default error policy is 'strict' meaning that decoding errors raise a
+     * <code>ValueError</code>.
+     *
+     * @param bytes to be decoded
+     * @param encoding name of encoding (to look up in codec registry)
+     * @param errors error policy name (e.g. "ignore")
+     * @return Unicode string decoded from <code>bytes</code>
+     */
+    public static PyObject decode(PyString bytes, String encoding, String errors) {
+        return codecs.decode(bytes, encoding, errors);
+    }
+
+    /**
+     * Encode <code>unicode</code> using the system default encoding (see
+     * {@link codecs#getDefaultEncoding()}). Encoding errors raise a <code>ValueError</code>.
+     *
+     * @param unicode string to be encoded
+     * @return bytes object encoding <code>unicode</code>
+     */
+    public static PyString encode(PyUnicode unicode) {
+        return encode(unicode, null, null);
+    }
+
+    /**
+     * Encode <code>unicode</code> using the codec registered for the <code>encoding</code>. The
+     * <code>encoding</code> defaults to the system default encoding (see
+     * {@link codecs#getDefaultEncoding()}). Encoding errors raise a <code>ValueError</code>.
+     *
+     * @param unicode string to be encoded
+     * @param encoding name of encoding (to look up in codec registry)
+     * @return bytes object encoding <code>unicode</code>
+     */
+    public static PyString encode(PyUnicode unicode, String encoding) {
+        return encode(unicode, encoding, null);
+    }
+
+    /**
+     * Encode <code>unicode</code> using the codec registered for the <code>encoding</code>. The
+     * <code>encoding</code> defaults to the system default encoding (see
+     * {@link codecs#getDefaultEncoding()}). The string <code>errors</code> may name a different
+     * error handling policy (built-in or registered with {@link #register_error(String, PyObject)}
+     * ). The default error policy is 'strict' meaning that encoding errors raise a
+     * <code>ValueError</code>.
+     *
+     * @param unicode string to be encoded
+     * @param encoding name of encoding (to look up in codec registry)
+     * @param errors error policy name (e.g. "ignore")
+     * @return bytes object encoding <code>unicode</code>
+     */
+    public static PyString encode(PyUnicode unicode, String encoding, String errors) {
+        return Py.newString(codecs.encode(unicode, encoding, errors));
+    }
+
+    /* --- Some codec support methods -------------------------------------------- */
+
     public static PyObject charmap_build(PyUnicode map) {
         return EncodingMap.buildEncodingMap(map);
     }