Commits

Philip Jenvey committed 8964009

update

  • Participants
  • Parent commits b3e42a0

Comments (0)

Files changed (2)

File multibytecodec

 
 diff -r 54515dec1d20 pypy/module/_multibytecodec/__init__.py
 --- a/pypy/module/_multibytecodec/__init__.py	Fri Jul 29 14:17:43 2011 -0500
-+++ b/pypy/module/_multibytecodec/__init__.py	Fri Jul 29 16:28:46 2011 -0700
++++ b/pypy/module/_multibytecodec/__init__.py	Mon Aug 01 11:17:35 2011 -0700
 @@ -7,13 +7,22 @@
          # for compatibility this name is obscured, and should be called
          # via the _codecs_*.py modules written in lib_pypy.
          'MultibyteStreamWriter':
 diff -r 54515dec1d20 pypy/module/_multibytecodec/c_codecs.py
 --- a/pypy/module/_multibytecodec/c_codecs.py	Fri Jul 29 14:17:43 2011 -0500
-+++ b/pypy/module/_multibytecodec/c_codecs.py	Fri Jul 29 16:28:46 2011 -0700
++++ b/pypy/module/_multibytecodec/c_codecs.py	Mon Aug 01 11:17:35 2011 -0700
 @@ -4,6 +4,8 @@
  from pypy.tool.autopath import pypydir
  
      else:
 diff -r 54515dec1d20 pypy/module/_multibytecodec/interp_multibytecodec.py
 --- a/pypy/module/_multibytecodec/interp_multibytecodec.py	Fri Jul 29 14:17:43 2011 -0500
-+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py	Fri Jul 29 16:28:46 2011 -0700
-@@ -1,9 +1,12 @@
++++ b/pypy/module/_multibytecodec/interp_multibytecodec.py	Mon Aug 01 11:17:35 2011 -0700
+@@ -1,9 +1,13 @@
 +import sys
 +
  from pypy.interpreter.baseobjspace import Wrappable
  from pypy.module._multibytecodec import c_codecs
  from pypy.module._codecs.interp_codecs import CodecState
 +from pypy.objspace.std.unicodetype import unicode_from_object
++from pypy.rpython.lltypesystem import rffi
  
  
  class MultibyteCodec(Wrappable):
-@@ -42,9 +45,11 @@
+@@ -42,9 +46,11 @@
              errors = 'strict'
          state = space.fromcache(CodecState)
          #
          except c_codecs.EncodeDecodeError, e:
              raise OperationError(
                  space.w_UnicodeEncodeError,
-@@ -70,6 +75,249 @@
+@@ -70,6 +76,278 @@
  MultibyteCodec.typedef.acceptable_as_base_class = False
  
  
-+"""
-+class MultibyteStatefulCodec(Wrappable):
-+
-+    def __init__(self, space, codec, errors):
-+        self.codec, self.errors = codec, errors
-+
-+class MultibyteStatefulDecoder(Wrappable):
-+
-+    def __init__(self, )
-+    """
-+
-+
 +class MultibyteIncrementalEncoder(Wrappable):
 +
 +    def __init__(self, space, codec, errors):
 +    def decode(self, space, w_input, final=0):
 +        input = space.str_w(w_input)
 +        inputlen = len(input)
-+        origpending = w_ctx.pendingsize
++        origpending = self.pendingsize
 +        if origpending > 0:
 +            if inputlen > (sys.maxint - origpending):
 +                raise OperationError(space.w_MemoryError, space.w_None)
-+            input = w_ctx.pending + input
++            input = self.pending + input
 +            inputlen += origpending
-+            w_ctx.pendingsize = 0
++            self.pendingsize = 0
 +
 +        codec = self.codec.codec
-+        decodebuf = c_codecs.pypy_cjk_dec_init(codec, input, inputlen)
++        inbuf = rffi.get_nonmovingbuffer(input)
++        remaining = 0
++        try:
++            # XXX:
++            decodebuf = c_codecs.pypy_cjk_dec_init(codec, inbuf, inputlen)
++            try: from nose.tools import set_trace
++            except ImportError: from pdb import set_trace
++            set_trace()
++            decodebuf.state = self.state
++            if not decodebuf:
++                raise MemoryError()
 +
-+        state = space.fromcache(CodecState)
-+        try:
-+            output = c_codecs.encode(w_ctx.codec.codec, input, w_ctx.errors,
-+                                     state.encode_error_handler, w_ctx.codec.name,
-+                                     c_codecs.MBENC_FLUSH if final else 0)
-+        except Exception, e:
-+            if origpending > 0:
-+                # XXX: recover the original pending buffer
-+                # XXX: copy inbuf back to pending?
-+                # this doesn't seem necessary, we didn't overwrite it earlier
-+                pass
-+            w_ctx.pendingsize = origpending
-+            if not isinstance(e, c_codecs.EncodeDecodeError):
-+                raise
-+            raise OperationError(
-+                space.w_UnicodeEncodeError,
-+                space.newtuple([
-+                    space.wrap(w_ctx.codec.name),
-+                    space.wrap(input),
-+                    space.wrap(e.start),
-+                    space.wrap(e.end),
-+                    space.wrap(e.reason)]))
++            state = space.fromcache(CodecState)
++            try:
++                while True:
++                    r = c_codecs.pypy_cjk_dec_chunk(decodebuf)
++                    if r == 0:
++                        break
++                    try:
++                        remaining = c_codecs.pypy_cjk_dec_inbuf_remaining(
++                            decodebuf)
++                        if final and remaining:
++                            c_codecs.multibytecodec_decerror(
++                                decodebuf, r, self.errors, state.decode_error_handler,
++                                self.codec.name, input)
++                    except Exception, e:
++                        if origpending > 0:
++                            # XXX: recover the original pending buffer
++                            # XXX: copy inbuf back to pending?
++                            # this doesn't seem necessary, we didn't overwrite it earlier
++                            pass
++                        self.pendingsize = origpending
++                        if not isinstance(e, c_codecs.EncodeDecodeError):
++                            raise
++                        raise OperationError(
++                            space.w_UnicodeEncodeError,
++                            space.newtuple([
++                                space.wrap(self.codec.name),
++                                space.wrap(input),
++                                space.wrap(e.start),
++                                space.wrap(e.end),
++                                space.wrap(e.reason)]))
 +
++                src = c_codecs.pypy_cjk_dec_outbuf(decodebuf)
++                length = c_codecs.pypy_cjk_dec_outlen(decodebuf)
++                output = rffi.wcharpsize2unicode(src, length)
++            finally:
++                c_codecs.pypy_cjk_dec_free(decodebuf)
++        finally:
++            rffi.free_nonmovingbuffer(input, inbuf)
++
++        if ((remaining + self.pendingsize > 2) or
++            remaining > (sys.maxint - self.pendingsize)):
++            raise OperationError(space.w_UnicodeError,
++                                 space.wrap("pending buffer overflow"))
++        # XXX: this is weird, c code is:
++        #memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
++        # which probably explains why they need to reset pending..
++        self.pendingsize = remaining
++        self.pending = input[-remaining:]
++
++        """
 +        outputlen = len(output)
 +        if outputlen < inputlen:
-+            w_ctx.pendingsize = inputlen - outputlen
++            self.pendingsize = inputlen - outputlen
 +            # XXX: MAXENCPENDING
-+            if w_ctx.pendingsize > 2:
-+                w_ctx.pendingsize = 0
++            if self.pendingsize > 2:
++                self.pendingsize = 0
++                try: from nose.tools import set_trace
++                except ImportError: from pdb import set_trace
++                set_trace()
 +                raise OperationError(space.w_UnicodeError,
 +                                     space.wrap("pending buffer overflow"))
-+            w_ctx.pending = input[-w_ctx.pendingsize:]
++            self.pending = input[-self.pendingsize:]
++            """
 +        return space.wrap(output)
 +
 +    def reset(self):
      try:
 diff -r 54515dec1d20 pypy/module/_multibytecodec/test/test_app_codecs.py
 --- a/pypy/module/_multibytecodec/test/test_app_codecs.py	Fri Jul 29 14:17:43 2011 -0500
-+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py	Fri Jul 29 16:28:46 2011 -0700
++++ b/pypy/module/_multibytecodec/test/test_app_codecs.py	Mon Aug 01 11:17:35 2011 -0700
 @@ -106,3 +106,75 @@
          repl = u"\u2014"
          s = u"\uDDA1".encode("gbk", "test.multi_bad_handler")
 +        assert s ==  b'\\xff'
 +        s = encoder.encode(u'\n')
 +        assert s == b'\n'
+diff -r 54515dec1d20 pypy/tool/sourcetools.py
+--- a/pypy/tool/sourcetools.py	Fri Jul 29 14:17:43 2011 -0500
++++ b/pypy/tool/sourcetools.py	Mon Aug 01 11:17:35 2011 -0700
+@@ -172,8 +172,13 @@
+         #print "***** duplicate code ******* "
+         #print source 
+     except KeyError: 
+-        #if DEBUG: 
+-        co = py.code.compile(source, filename, mode, flags) 
++        #if DEBUG:
++        try:
++            co = py.code.compile(source, filename, mode, flags)
++        except:
++            try: from nose.tools import set_trace
++            except ImportError: from pdb import set_trace
++            set_trace()
+         #else: 
+         #    co = compile(source, filename, mode, flags) 
+         compile2_cache[key] = co 
 diff -r 54515dec1d20 pypy/translator/c/src/cjkcodecs/multibytecodec.c
 --- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c	Fri Jul 29 14:17:43 2011 -0500
-+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c	Fri Jul 29 16:28:46 2011 -0700
-@@ -30,6 +30,50 @@
++++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c	Mon Aug 01 11:17:35 2011 -0700
+@@ -30,6 +30,23 @@
    return NULL;
  }
  
 +  free(state);
 +}
 +
-+struct pypy_cjk_dec_s *pypy_cjk_prepare_buffer(const MultibyteCodec *codec,
-+                                               char *inbuf, Py_ssize_t inlen)
-+{
-+  struct pypy_cjk_dec_s *d = malloc(sizeof(struct pypy_cjk_dec_s));
-+  if (!d)
-+    return NULL;
-+  if (codec->decinit != NULL && codec->decinit(&d->state, codec->config) != 0)
-+    goto errorexit;
-+
-+  d->codec = codec;
-+  d->inbuf_start = inbuf;
-+  d->inbuf = inbuf;
-+  d->inbuf_end = inbuf + inlen;
-+  d->outbuf_start = (inlen <= (PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) ?
-+                     malloc(inlen * sizeof(Py_UNICODE)) :
-+                     NULL);
-+  if (!d->outbuf_start)
-+    goto errorexit;
-+  d->outbuf = d->outbuf_start;
-+  d->outbuf_end = d->outbuf_start + inlen;
-+  return d;
-+
-+ errorexit:
-+  free(d);
-+  return NULL;
-+}
-+
  void pypy_cjk_dec_free(struct pypy_cjk_dec_s *d)
  {
    free(d->outbuf_start);
-@@ -169,9 +213,8 @@
+@@ -169,9 +186,8 @@
  
  #define MBENC_RESET     MBENC_MAX<<1
  
        Py_ssize_t r;
 diff -r 54515dec1d20 pypy/translator/c/src/cjkcodecs/multibytecodec.h
 --- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h	Fri Jul 29 14:17:43 2011 -0500
-+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h	Fri Jul 29 16:28:46 2011 -0700
++++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h	Mon Aug 01 11:17:35 2011 -0700
 @@ -115,7 +115,7 @@
  struct pypy_cjk_enc_s *pypy_cjk_enc_init(const MultibyteCodec *codec,
                                           Py_UNICODE *inbuf, Py_ssize_t inlen);
-5063903bba091be7601ff86f1e8c913251d037b9:multibytecodec
+4194b0b2137e347b19e5ac63041aa8e61be2fcfe:multibytecodec