Commits

Philip Jenvey committed edbb3b3

initial commit of patches

Comments (0)

Files changed (6)

+^\.hg
+^\.mq
+syntax: glob
+status
+guards
+# HG changeset patch
+# Parent b5bc2f812d764554599a7f6f9624b48e3170785b
+add missing _multibytecodec stateful classes
+
+diff -r b5bc2f812d76 pypy/module/_multibytecodec/__init__.py
+--- a/pypy/module/_multibytecodec/__init__.py	Sun Jul 24 17:13:53 2011 -0700
++++ b/pypy/module/_multibytecodec/__init__.py	Tue Jul 26 10:14:42 2011 -0700
+@@ -7,13 +7,22 @@
+         # for compatibility this name is obscured, and should be called
+         # via the _codecs_*.py modules written in lib_pypy.
+         '__getcodec': 'interp_multibytecodec.getcodec',
++
++        'MultibyteIncrementalEncoder':
++            'interp_multibytecodec.MultibyteIncrementalEncoder',
++        'MultibyteIncrementalDecoder':
++            'interp_multibytecodec.MultibyteIncrementalDecoder',
++#        'MultibyteStreamReader':
++#            'interp_multibytecodec.MultibyteStreamReader',
++#        'MultibyteStreamWriter':
++#            'interp_multibytecodec.MultibyteStreamWriter',
+     }
+ 
+     appleveldefs = {
+-        'MultibyteIncrementalEncoder':
+-            'app_multibytecodec.MultibyteIncrementalEncoder',
+-        'MultibyteIncrementalDecoder':
+-            'app_multibytecodec.MultibyteIncrementalDecoder',
++#        'MultibyteIncrementalEncoder':
++#            'app_multibytecodec.MultibyteIncrementalEncoder',
++#        'MultibyteIncrementalDecoder':
++#            'app_multibytecodec.MultibyteIncrementalDecoder',
+         'MultibyteStreamReader':
+             'app_multibytecodec.MultibyteStreamReader',
+         'MultibyteStreamWriter':
+diff -r b5bc2f812d76 pypy/module/_multibytecodec/c_codecs.py
+--- a/pypy/module/_multibytecodec/c_codecs.py	Sun Jul 24 17:13:53 2011 -0700
++++ b/pypy/module/_multibytecodec/c_codecs.py	Tue Jul 26 10:14:42 2011 -0700
+@@ -4,6 +4,8 @@
+ from pypy.tool.autopath import pypydir
+ 
+ UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD'
++MBENC_FLUSH = 0x0001 # encode all characters encodable
++MBENC_RESET = MBENC_FLUSH << 1 # reset after an encoding session
+ 
+ 
+ class EncodeDecodeError(Exception):
+@@ -179,7 +181,7 @@
+                                ENCODEBUF_P)
+ pypy_cjk_enc_free = llexternal('pypy_cjk_enc_free', [ENCODEBUF_P],
+                                lltype.Void)
+-pypy_cjk_enc_chunk = llexternal('pypy_cjk_enc_chunk', [ENCODEBUF_P],
++pypy_cjk_enc_chunk = llexternal('pypy_cjk_enc_chunk', [ENCODEBUF_P, rffi.INT],
+                                 rffi.SSIZE_T)
+ pypy_cjk_enc_reset = llexternal('pypy_cjk_enc_reset', [ENCODEBUF_P],
+                                 rffi.SSIZE_T)
+@@ -196,7 +198,8 @@
+                                             rffi.SSIZE_T, rffi.SSIZE_T],
+                                            rffi.SSIZE_T)
+ 
+-def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None):
++def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None,
++           flags=0):
+     inleft = len(unicodedata)
+     inbuf = rffi.get_nonmoving_unicodebuffer(unicodedata)
+     try:
+@@ -205,8 +208,9 @@
+             raise MemoryError
+         try:
+             while True:
+-                r = pypy_cjk_enc_chunk(encodebuf)
+-                if r == 0:
++                # XXX: maybe use rffi.CConstant instead
++                r = pypy_cjk_enc_chunk(encodebuf, rffi.cast(rffi.INT, flags))
++                if r == 0 or (r == MBERR_TOOFEW and not flags & MBENC_FLUSH):
+                     break
+                 multibytecodec_encerror(encodebuf, r, errors,
+                                         codec, errorcb, namecb, unicodedata)
+@@ -249,7 +253,9 @@
+         replace = ""
+     elif errors == "replace":
+         try:
+-            replace = encode(codec, u"?")
++            replace = encode(codec, u"?", MBENC_FLUSH | MBENC_RESET)
++            # 0 in code but resizes the buffer if it fails..
++            #replace = encode(codec, u"?", 0)
+         except EncodeDecodeError:
+             replace = "?"
+     else:
+diff -r b5bc2f812d76 pypy/module/_multibytecodec/interp_multibytecodec.py
+--- a/pypy/module/_multibytecodec/interp_multibytecodec.py	Sun Jul 24 17:13:53 2011 -0700
++++ b/pypy/module/_multibytecodec/interp_multibytecodec.py	Tue Jul 26 10:14:42 2011 -0700
+@@ -1,9 +1,12 @@
++import sys
++
+ from pypy.interpreter.baseobjspace import Wrappable
+ from pypy.interpreter.gateway import interp2app, unwrap_spec
+-from pypy.interpreter.typedef import TypeDef
++from pypy.interpreter.typedef import GetSetProperty, TypeDef
+ from pypy.interpreter.error import OperationError
+ from pypy.module._multibytecodec import c_codecs
+ from pypy.module._codecs.interp_codecs import CodecState
++from pypy.objspace.std.unicodetype import unicode_from_object
+ 
+ 
+ class MultibyteCodec(Wrappable):
+@@ -42,9 +45,11 @@
+             errors = 'strict'
+         state = space.fromcache(CodecState)
+         #
++        flags = c_codecs.MBENC_FLUSH | c_codecs.MBENC_RESET
+         try:
+             output = c_codecs.encode(self.codec, input, errors,
+-                                     state.encode_error_handler, self.name)
++                                     state.encode_error_handler, self.name,
++                                     flags)
+         except c_codecs.EncodeDecodeError, e:
+             raise OperationError(
+                 space.w_UnicodeEncodeError,
+@@ -70,6 +75,249 @@
+ MultibyteCodec.typedef.acceptable_as_base_class = False
+ 
+ 
++"""
++class MultibyteStatefulCodec(Wrappable):
++
++    def __init__(self, space, codec, errors):
++        self.codec, self.errors = codec, errors
++
++class MultibyteStatefulDecoder(Wrappable):
++
++    def __init__(self, )
++    """
++
++
++class MultibyteIncrementalEncoder(Wrappable):
++
++    def __init__(self, space, codec, errors):
++        self.codec = codec
++        self.errors = errors
++        self.pending = None
++
++        # should have codec, state, errors (general StatefulCodec --
++        # should I have a class hierarchy?).
++        # should also have pending[MAXENCPENDING (2)] and pendingsize (StatefulEncoder)
++
++        self.pendingsize = 0
++        self.pending = None
++
++    # XXX: i think this and above should probably be str,
++    # >>> 'hi'.encode('utf-8', None)
++    # Traceback (most recent call last):
++    #   File "<stdin>", line 1, in <module>
++    # TypeError: encode() argument 2 must be string, not None
++    @unwrap_spec(errors="str_or_None")
++    def mbiencoder_new(space, w_subtype, errors='strict'):
++        w_codec = space.getattr(w_subtype, space.wrap('codec'))
++        if not isinstance(w_codec, MultibyteCodec):
++            raise OperationError(space.w_TypeError,
++                                 space.wrap("codec is unexpected type"))
++
++        w_obj = space.allocate_instance(MultibyteIncrementalEncoder, w_subtype)
++        w_obj.__init__(space, space.unwrap(w_codec), errors)
++        return w_obj
++
++    """
++    def mbiencoder_init(self, space):
++        pass
++        """
++
++    @unwrap_spec(final=int)
++    def encode(self, space, w_input, final=0):
++        return encoder_encode_stateful(space, self, w_input, final)
++
++    def reset(self):
++        #if (self->codec->decreset != NULL &&
++            #self->codec->decreset(&self->state, self->codec->config) != 0)
++            #return NULL;
++            #self->pendingsize = 0;
++        #try: from nose.tools import set_trace
++        #except ImportError: from pdb import set_trace
++        #set_trace()
++        # XXX: this calls decreset in the cpython code which is a bug
++        self.pendingsize = 0
++
++    def descr_errors(self, space):
++        return space.wrap(self.errors)
++
++    def descr_seterrors(self, space, w_errors):
++        self.errors = space.str_w(w_errors)
++
++
++MultibyteIncrementalEncoder.typedef = TypeDef(
++    'MultibyteIncrementalEncoder',
++    __module__ = '_multibytecodec',
++    __new__ = interp2app(MultibyteIncrementalEncoder.mbiencoder_new.im_func),
++#    __init__ = interp2app(MultibyteIncrementalEncoder.mbiencoder_init),
++    encode = interp2app(MultibyteIncrementalEncoder.encode),
++    reset = interp2app(MultibyteIncrementalEncoder.reset),
++    errors = GetSetProperty(MultibyteIncrementalEncoder.descr_errors,
++                            MultibyteIncrementalEncoder.descr_seterrors,
++                            cls=MultibyteIncrementalEncoder),
++    )
++
++def encoder_encode_stateful(space, w_ctx, w_input, final):
++    if not space.isinstance_w(w_input, space.w_unicode):
++        w_input = unicode_from_object(space, w_input)
++        if not space.isinstance_w(w_input, space.w_unicode):
++            raise OperationError(space.w_TypeError,
++                                 space.wrap("couldn't convert the object to "
++                                            "unicode."))
++
++    input = space.unicode_w(w_input)
++    inputlen = len(input)
++    origpending = w_ctx.pendingsize
++    if origpending > 0:
++        if inputlen > (sys.maxint - origpending):
++            raise OperationError(space.w_MemoryError, space.w_None)
++        input = w_ctx.pending + input
++        inputlen += origpending
++        w_ctx.pendingsize = 0
++
++    state = space.fromcache(CodecState)
++    try:
++        output = c_codecs.encode(w_ctx.codec.codec, input, w_ctx.errors,
++                                 state.encode_error_handler, w_ctx.codec.name,
++                                 c_codecs.MBENC_FLUSH if final else 0)
++    except Exception, e:
++        if origpending > 0:
++            # XXX: recover the original pending buffer
++            # XXX: copy inbuf back to pending?
++            # this doesn't seem necessary, we didn't overwrite it earlier
++            pass
++        w_ctx.pendingsize = origpending
++        if not isinstance(e, c_codecs.EncodeDecodeError):
++            raise
++        raise OperationError(
++            space.w_UnicodeEncodeError,
++            space.newtuple([
++                space.wrap(w_ctx.codec.name),
++                space.wrap(input),
++                space.wrap(e.start),
++                space.wrap(e.end),
++                space.wrap(e.reason)]))
++
++    outputlen = len(output)
++    if outputlen < inputlen:
++        w_ctx.pendingsize = inputlen - outputlen
++        # XXX: MAXENCPENDING
++        if w_ctx.pendingsize > 2:
++            w_ctx.pendingsize = 0
++            raise OperationError(space.w_UnicodeError,
++                                 space.wrap("pending buffer overflow"))
++        w_ctx.pending = input[-w_ctx.pendingsize:]
++    return space.wrap(output)
++
++
++class MultibyteIncrementalDecoder(Wrappable):
++
++    def __init__(self, space, codec, errors):
++        self.errors = errors
++        self.codec = codec
++
++        # not exposed
++        self.pendingsize = 0
++        self.pending = None # XXX:
++
++    @unwrap_spec(errors="str_or_None")
++    def mbidecoder_new(space, w_subtype, errors='strict'):
++        codec = space.getattr(w_subtype, space.wrap('codec'))
++        if not isinstance(codec, MultibyteCodec):
++            raise OperationError(space.w_TypeError,
++                                 space.wrap("codec is unexpected type"))
++
++        w_obj = space.allocate_instance(MultibyteIncrementalDecoder, w_subtype)
++        w_obj.__init__(space, codec, errors)
++        return w_obj
++
++    def mbidecoder_init(self, space):
++        pass
++
++    @unwrap_spec(final=int)
++    def decode(self, space, w_input, final=0):
++        # get a butter from input
++        #input = space.unicode_w(w_input)
++        
++        # check pending size
++        # prep buf
++        # feed buf
++        # deal w/ pending
++        inputlen = len(input)
++        origpending = w_ctx.pendingsize
++        if origpending > 0:
++            if inputlen > (sys.maxint - origpending):
++                raise OperationError(space.w_MemoryError, space.w_None)
++            input = w_ctx.pending + input
++            inputlen += origpending
++            w_ctx.pendingsize = 0
++
++        pypy_cjk_dec_prepare_buffer(self.decodebuf, inbuf, inputlen)
++
++        state = space.fromcache(CodecState)
++        try:
++            output = c_codecs.encode(w_ctx.codec.codec, input, w_ctx.errors,
++                                     state.encode_error_handler, w_ctx.codec.name,
++                                     c_codecs.MBENC_FLUSH if final else 0)
++        except Exception, e:
++            if origpending > 0:
++                # XXX: recover the original pending buffer
++                # XXX: copy inbuf back to pending?
++                # this doesn't seem necessary, we didn't overwrite it earlier
++                pass
++            w_ctx.pendingsize = origpending
++            if not isinstance(e, c_codecs.EncodeDecodeError):
++                raise
++            raise OperationError(
++                space.w_UnicodeEncodeError,
++                space.newtuple([
++                    space.wrap(w_ctx.codec.name),
++                    space.wrap(input),
++                    space.wrap(e.start),
++                    space.wrap(e.end),
++                    space.wrap(e.reason)]))
++
++        outputlen = len(output)
++        if outputlen < inputlen:
++            w_ctx.pendingsize = inputlen - outputlen
++            # XXX: MAXENCPENDING
++            if w_ctx.pendingsize > 2:
++                w_ctx.pendingsize = 0
++                raise OperationError(space.w_UnicodeError,
++                                     space.wrap("pending buffer overflow"))
++            w_ctx.pending = input[-w_ctx.pendingsize:]
++        return space.wrap(output)
++
++    def reset(self):
++        #if (self->codec->decreset != NULL &&
++            #self->codec->decreset(&self->state, self->codec->config) != 0)
++            #return NULL;
++            #self->pendingsize = 0;
++        #try: from nose.tools import set_trace
++        #except ImportError: from pdb import set_trace
++        #set_trace()
++        # XXX: this calls decreset in the cpython code which is a bug
++        self.pendingsize = 0
++
++    def descr_errors(self, space):
++        return space.wrap(self.errors)
++
++    def descr_seterrors(self, space, w_errors):
++        self.errors = space.str_w(w_errors)
++
++
++MultibyteIncrementalDecoder.typedef = TypeDef(
++    'MultibyteIncrementalDecoder',
++    __module__ = '_multibytecodec',
++    __new__ = interp2app(MultibyteIncrementalDecoder.mbidecoder_new.im_func),
++    __init__ = interp2app(MultibyteIncrementalDecoder.mbidecoder_init),
++    decode = interp2app(MultibyteIncrementalDecoder.decode),
++    reset = interp2app(MultibyteIncrementalDecoder.reset),
++    errors = GetSetProperty(MultibyteIncrementalDecoder.descr_errors,
++                            MultibyteIncrementalDecoder.descr_seterrors,
++                            cls=MultibyteIncrementalDecoder),
++    )
++
++
+ @unwrap_spec(name=str)
+ def getcodec(space, name):
+     try:
+diff -r b5bc2f812d76 pypy/module/_multibytecodec/test/test_app_codecs.py
+--- a/pypy/module/_multibytecodec/test/test_app_codecs.py	Sun Jul 24 17:13:53 2011 -0700
++++ b/pypy/module/_multibytecodec/test/test_app_codecs.py	Tue Jul 26 10:14:42 2011 -0700
+@@ -106,3 +106,67 @@
+         repl = u"\u2014"
+         s = u"\uDDA1".encode("gbk", "test.multi_bad_handler")
+         assert s == '\xA1\xAA'
++
++class AppTestIncrementalEncoder:
++    def setup_class(cls):
++        cls.space = gettestobjspace(usemodules=['_multibytecodec'])
++
++    def test_stateless(self):
++        import codecs
++        # This has no state
++        encoder = codecs.getincrementalencoder("cp949")()
++        s = encoder.encode(u'\ud30c\uc774\uc36c \ub9c8\uc744')
++        assert s == '\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb'
++        s = encoder.reset()
++        assert s is None
++        s = encoder.encode(u'\u2606\u223c\u2606', True)
++        assert s == '\xa1\xd9\xa1\xad\xa1\xd9'
++        s = encoder.reset()
++        assert s is None
++        s = encoder.encode(u'', True)
++        assert s == ''
++        s = encoder.encode(u'', False)
++        assert s == ''
++        s = encoder.reset()
++        assert s is None
++
++    def test_stateful(self):
++        import codecs
++        encoder = codecs.getincrementalencoder("jisx0213")()
++        s = encoder.encode(u'\u00e6\u0300')
++        assert s == '\xab\xc4'
++        s = encoder.encode(u'\u00e6')
++        assert s == ''
++        s = encoder.encode(u'\u0300')
++        assert s == '\xab\xc4'
++        s = encoder.encode(u'\u00e6', True)
++        assert s == '\xa9\xdc'
++        s = encoder.reset()
++        assert s is None
++        s = encoder.encode(u'\u0300')
++        assert s == '\xab\xdc'
++        s = encoder.encode(u'\u00e6')
++        assert s == ''
++        s = encoder.encode('', True)
++        assert s == '\xa9\xdc'
++        s = encoder.encode('', True)
++        assert s == ''
++
++    def test_stateful_keep_buffer(self):
++        import codecs
++        encoder = codecs.getincrementalencoder('jisx0213')()
++        s = encoder.encode(u'\u00e6')
++        assert s == ''
++        raises(UnicodeEncodeError, encoder.encode, u'\u0123')
++        s = encoder.encode(u'\u0300\u00e6')
++        assert s == '\xab\xc4'
++        raises(UnicodeEncodeError, encoder.encode, u'\u0123')
++        s = encoder.reset()
++        assert s is None
++        s = encoder.encode(u'\u0300')
++        assert s == '\xab\xdc'
++        s = encoder.encode(u'\u00e6')
++        assert s == ''
++        raises(UnicodeEncodeError, encoder.encode, u'\u0123')
++        s = encoder.encode(u'', True)
++        assert s == '\xa9\xdc'
+\ No newline at end of file
+diff -r b5bc2f812d76 pypy/translator/c/src/cjkcodecs/multibytecodec.c
+--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c	Sun Jul 24 17:13:53 2011 -0700
++++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c	Tue Jul 26 10:14:42 2011 -0700
+@@ -169,9 +169,8 @@
+ 
+ #define MBENC_RESET     MBENC_MAX<<1
+ 
+-Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *d)
++Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *d, int flags)
+ {
+-  int flags = MBENC_FLUSH | MBENC_RESET;   /* XXX always, for now */
+   while (1)
+     {
+       Py_ssize_t r;
+diff -r b5bc2f812d76 pypy/translator/c/src/cjkcodecs/multibytecodec.h
+--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h	Sun Jul 24 17:13:53 2011 -0700
++++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h	Tue Jul 26 10:14:42 2011 -0700
+@@ -115,7 +115,7 @@
+ struct pypy_cjk_enc_s *pypy_cjk_enc_init(const MultibyteCodec *codec,
+                                          Py_UNICODE *inbuf, Py_ssize_t inlen);
+ void pypy_cjk_enc_free(struct pypy_cjk_enc_s *);
+-Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *);
++Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *, int flags);
+ Py_ssize_t pypy_cjk_enc_reset(struct pypy_cjk_enc_s *);
+ char *pypy_cjk_enc_outbuf(struct pypy_cjk_enc_s *);
+ Py_ssize_t pypy_cjk_enc_outlen(struct pypy_cjk_enc_s *);
+multibytecodec
+whitespace #+boring
Empty file added.
+# HG changeset patch
+# Parent 815376b1143be9585bdb8632c8c9cfca2392064c
+whitespace
+
+diff -r 815376b1143b pypy/rlib/jit.py
+--- a/pypy/rlib/jit.py	Mon Jul 25 22:20:15 2011 -0700
++++ b/pypy/rlib/jit.py	Mon Jul 25 22:21:12 2011 -0700
+@@ -114,7 +114,7 @@
+         s_x = annmodel.not_const(s_x)
+         access_directly = 's_access_directly' in kwds_s
+         fresh_virtualizable = 's_fresh_virtualizable' in kwds_s
+-        if  access_directly or fresh_virtualizable:
++        if access_directly or fresh_virtualizable:
+             assert access_directly, "lone fresh_virtualizable hint"
+             if isinstance(s_x, annmodel.SomeInstance):
+                 from pypy.objspace.flow.model import Constant
+diff -r 815376b1143b pypy/translator/translator.py
+--- a/pypy/translator/translator.py	Mon Jul 25 22:20:15 2011 -0700
++++ b/pypy/translator/translator.py	Mon Jul 25 22:21:12 2011 -0700
+@@ -105,7 +105,7 @@
+             raise ValueError("we already have an rtyper")
+         from pypy.rpython.rtyper import RPythonTyper
+         self.rtyper = RPythonTyper(self.annotator,
+-                                   type_system = type_system)
++                                   type_system=type_system)
+         return self.rtyper
+ 
+     def getexceptiontransformer(self):