Commits

wlav committed 384e11f Merge

o) tests for array overloads
o) tests for global function overloads on lazy lookup
o) code that collects all global overloads lazily
o) merge default into branch

  • Participants
  • Parent commits 2cbe44d, 385b313
  • Branches reflex-support

Comments (0)

Files changed (31)

File pypy/doc/config/objspace.usemodules._csv.txt

+Implementation in RPython for the core of the 'csv' module
+

File pypy/jit/metainterp/optimizeopt/util.py

 from pypy.rlib.objectmodel import r_dict, compute_identity_hash
 from pypy.rlib.rarithmetic import intmask
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.jit.metainterp import resoperation, history
+from pypy.jit.metainterp import resoperation
 from pypy.rlib.debug import make_sure_not_resized
 from pypy.jit.metainterp.resoperation import rop
+from pypy.rlib.objectmodel import we_are_translated
 
 # ____________________________________________________________
 # Misc. utilities
 def make_dispatcher_method(Class, name_prefix, op_prefix=None, default=None):
     ops = _findall(Class, name_prefix, op_prefix)
     def dispatch(self, op, *args):
-        opnum = op.getopnum()
-        for value, cls, func in ops:
-            if opnum == value:
-                assert isinstance(op, cls)
+        if we_are_translated():
+            opnum = op.getopnum()
+            for value, cls, func in ops:
+                if opnum == value:
+                    assert isinstance(op, cls)
+                    return func(self, op, *args)
+            if default:
+                return default(self, op, *args)
+        else:
+            func = getattr(Class, name_prefix + op.getopname().upper(), None)
+            if func is not None:
                 return func(self, op, *args)
-        if default:
-            return default(self, op, *args)
+            if default:
+                return default(self, op, *args)
     dispatch.func_name = "dispatch_" + name_prefix
     return dispatch
 

File pypy/jit/metainterp/test/test_ajit.py

                 y -= 1
             return res
         def g(x, y):
+            set_param(myjitdriver, 'max_unroll_loops', 5)
             a1 = f(A(x), y)
             a2 = f(A(x), y)
             b1 = f(B(x), y)

File pypy/jit/metainterp/test/test_send.py

 import py
-from pypy.rlib.jit import JitDriver, promote, elidable
+from pypy.rlib.jit import JitDriver, promote, elidable, set_param
 from pypy.jit.codewriter.policy import StopAtXPolicy
 from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
 
             def getvalue(self):
                 return self.y
         def f(x, y):
+            set_param(myjitdriver, 'max_unroll_loops', 5)
             if x & 1:
                 w = W1(x)
             else:
         w2 = W2(20)
 
         def f(x, y):
+            set_param(myjitdriver, 'max_unroll_loops', 5)
             if x & 1:
                 w = w1
             else:

File pypy/module/_cffi_backend/ctypefunc.py

         for i, cf in enumerate(ctype.fields_list):
             if cf.is_bitfield():
                 raise OperationError(space.w_NotImplementedError,
-                    space.wrap("cannot pass as argument a struct "
-                               "with bit fields"))
+                    space.wrap("cannot pass as argument or return value "
+                               "a struct with bit fields"))
             ffi_subtype = self.fb_fill_type(cf.ctype, False)
             if elements:
                 elements[i] = ffi_subtype

File pypy/module/_codecs/interp_codecs.py

          "ascii_encode",
          "latin_1_encode",
          "utf_7_encode",
-         "utf_8_encode",
          "utf_16_encode",
          "utf_16_be_encode",
          "utf_16_le_encode",
          "ascii_decode",
          "latin_1_decode",
          "utf_7_decode",
-         "utf_8_decode",
          "utf_16_decode",
          "utf_16_be_decode",
          "utf_16_le_decode",
     make_encoder_wrapper('mbcs_encode')
     make_decoder_wrapper('mbcs_decode')
 
+# utf-8 functions are not regular, because we have to pass
+# "allow_surrogates=True"
+@unwrap_spec(uni=unicode, errors='str_or_None')
+def utf_8_encode(space, uni, errors="strict"):
+    if errors is None:
+        errors = 'strict'
+    state = space.fromcache(CodecState)
+    result = runicode.unicode_encode_utf_8(
+        uni, len(uni), errors, state.encode_error_handler,
+        allow_surrogates=True)
+    return space.newtuple([space.wrap(result), space.wrap(len(uni))])
+
+@unwrap_spec(string='bufferstr', errors='str_or_None')
+def utf_8_decode(space, string, errors="strict", w_final=False):
+    if errors is None:
+        errors = 'strict'
+    final = space.is_true(w_final)
+    state = space.fromcache(CodecState)
+    result, consumed = runicode.str_decode_utf_8(
+        string, len(string), errors,
+        final, state.decode_error_handler,
+        allow_surrogates=True)
+    return space.newtuple([space.wrap(result), space.wrap(consumed)])
+
 @unwrap_spec(data=str, errors='str_or_None', byteorder=int)
 def utf_16_ex_decode(space, data, errors='strict', byteorder=0, w_final=False):
     if errors is None:

File pypy/module/_csv/interp_reader.py

                 w_line = space.next(self.w_iter)
             except OperationError, e:
                 if e.match(space, space.w_StopIteration):
-                    if field_builder is not None:
-                        raise self.error("newline inside string")
+                    if (field_builder is not None and
+                            state != START_RECORD and state != EAT_CRNL and
+                            (len(field_builder.build()) > 0 or
+                             state == IN_QUOTED_FIELD)):
+                        if dialect.strict:
+                            raise self.error("newline inside string")
+                        else:
+                            self.save_field(field_builder)
+                            break
                 raise
             self.line_num += 1
             line = space.str_w(w_line)

File pypy/module/_csv/test/test_reader.py

 
     def test_dubious_quote(self):
         self._read_test(['12,12,1",'], [['12', '12', '1"', '']])
+
+    def test_read_eof(self):
+        self._read_test(['a,"'], [['a', '']])
+        self._read_test(['"a'], [['a']])
+        self._read_test(['^'], [['\n']], escapechar='^')
+        self._read_test(['a,"'], 'Error', strict=True)
+        self._read_test(['"a'], 'Error', strict=True)
+        self._read_test(['^'], 'Error', escapechar='^', strict=True)

File pypy/module/_ffi/interp_funcptr.py

                                                                w_restype)
     addr = rffi.cast(rffi.VOIDP, addr)
     func = libffi.Func(name, argtypes, restype, addr, flags)
-    return W_FuncPtr(func, argtypes_w, w_restype)
+    try:
+        return W_FuncPtr(func, argtypes_w, w_restype)
+    except OSError:
+        raise OperationError(space.w_SystemError,
+                         space.wrap("internal error building the Func object"))
 
 
 W_FuncPtr.typedef = TypeDef(

File pypy/module/_socket/interp_socket.py

         info is a pair (hostaddr, port).
         """
         try:
-            sock, addr = self.accept(W_RSocket)
+            fd, addr = self.accept()
+            sock = rsocket.make_socket(
+                fd, self.family, self.type, self.proto, W_RSocket)
             return space.newtuple([space.wrap(sock),
                                    addr.as_object(sock.fd, space)])
         except SocketError, e:

File pypy/module/cppyy/capi/__init__.py

 
 C_METHOD = _C_OPAQUE_PTR
 C_INDEX = rffi.LONG
+C_INDEX_ARRAY = rffi.LONGP
 WLAVC_INDEX = rffi.LONG
 
 C_METHPTRGETTER = lltype.FuncType([C_OBJECT], rffi.VOIDP)
     compilation_info=backend.eci)
 def c_method_index_at(cppscope, imethod):
     return _c_method_index_at(cppscope.handle, imethod)
-_c_method_index_from_name = rffi.llexternal(
-    "cppyy_method_index_from_name",
-    [C_SCOPE, rffi.CCHARP], C_INDEX,
+_c_method_indices_from_name = rffi.llexternal(
+    "cppyy_method_indices_from_name",
+    [C_SCOPE, rffi.CCHARP], C_INDEX_ARRAY,
     threadsafe=ts_reflect,
     compilation_info=backend.eci)
-def c_method_index_from_name(cppscope, name):
-    return _c_method_index_from_name(cppscope.handle, name)
+def c_method_indices_from_name(cppscope, name):
+    indices = _c_method_indices_from_name(cppscope.handle, name)
+    if not indices:
+        return []
+    py_indices = []
+    i = 0
+    index = indices[i]
+    while index != -1:
+        i += 1
+        py_indices.append(index)
+        index = indices[i]
+    c_free(rffi.cast(rffi.VOIDP, indices))   # c_free defined below
+    return py_indices
 
 _c_method_name = rffi.llexternal(
     "cppyy_method_name",

File pypy/module/cppyy/include/capi.h

     /* method/function reflection information --------------------------------- */
     int cppyy_num_methods(cppyy_scope_t scope);
     cppyy_index_t cppyy_method_index_at(cppyy_scope_t scope, int imeth);
-    cppyy_index_t cppyy_method_index_from_name(cppyy_scope_t scope, const char* name);
+    cppyy_index_t* cppyy_method_indices_from_name(cppyy_scope_t scope, const char* name);
 
     char* cppyy_method_name(cppyy_scope_t scope, cppyy_index_t idx);
     char* cppyy_method_result_type(cppyy_scope_t scope, cppyy_index_t idx);

File pypy/module/cppyy/interp_cppyy.py

                 self._make_datamember(datamember_name, i)
 
     def find_overload(self, meth_name):
-        # TODO: collect all overloads, not just the non-overloaded version
-        meth_idx = capi.c_method_index_from_name(self, meth_name)
-        if meth_idx == -1:
+        indices = capi.c_method_indices_from_name(self, meth_name)
+        if not indices:
             raise self.missing_attribute_error(meth_name)
-        cppfunction = self._make_cppfunction(meth_name, meth_idx)
-        overload = W_CPPOverload(self.space, self, [cppfunction])
+        cppfunctions = []
+        for meth_idx in indices:
+            f = self._make_cppfunction(meth_name, meth_idx)
+            cppfunctions.append(f)
+        overload = W_CPPOverload(self.space, self, cppfunctions)
         return overload
 
     def find_datamember(self, dm_name):

File pypy/module/cppyy/src/reflexcwrapper.cxx

     return (cppyy_index_t)imeth;
 }
 
-cppyy_index_t cppyy_method_index_from_name(cppyy_scope_t handle, const char* name) {
+cppyy_index_t* cppyy_method_indices_from_name(cppyy_scope_t handle, const char* name) {
+    std::vector<cppyy_index_t> result;
     Reflex::Scope s = scope_from_handle(handle);
     // the following appears dumb, but the internal storage for Reflex is an
     // unsorted std::vector anyway, so there's no gain to be had in using the
         Reflex::Member m = s.FunctionMemberAt(imeth);
         if (m.Name() == name) {
             if (m.IsPublic())
-               return (cppyy_index_t)imeth;
-            return (cppyy_index_t)-1;
+                result.push_back((cppyy_index_t)imeth);
         }
     }
-    return (cppyy_index_t)-1;
+    if (result.empty())
+        return (cppyy_index_t*)0;
+    cppyy_index_t* llresult = (cppyy_index_t*)malloc(sizeof(cppyy_index_t)*result.size()+1);
+    for (int i = 0; i < (int)result.size(); ++i) llresult[i] = result[i];
+    llresult[result.size()] = -1;
+    return llresult;
 }
 
 char* cppyy_method_name(cppyy_scope_t handle, cppyy_index_t method_index) {

File pypy/module/cppyy/test/overloads.cxx

 
 std::string more_overloads2::call(const dd_ol*, int) { return "dd_olptr"; }
 std::string more_overloads2::call(const dd_ol&, int) { return "dd_olref"; }
+
+
+double calc_mean(long n, const float* a)     { return calc_mean<float>(n, a); }
+double calc_mean(long n, const double* a)    { return calc_mean<double>(n, a); }
+double calc_mean(long n, const int* a)       { return calc_mean<int>(n, a); }
+double calc_mean(long n, const short* a)     { return calc_mean<short>(n, a); }
+double calc_mean(long n, const long* a)      { return calc_mean<long>(n, a); }

File pypy/module/cppyy/test/overloads.h

 
 class a_overload {
 public:
-   a_overload();
-   int i1, i2;
+    a_overload();
+    int i1, i2;
 };
 
 namespace ns_a_overload {
-   class a_overload {
-   public:
-      a_overload();
-      int i1, i2;
-   };
+    class a_overload {
+    public:
+        a_overload();
+        int i1, i2;
+    };
 
-   class b_overload {
-   public:
-      int f(const std::vector<int>* v);
-   };
+    class b_overload {
+    public:
+        int f(const std::vector<int>* v);
+    };
 }
 
 namespace ns_b_overload {
-   class a_overload {
-   public:
-      a_overload();
-      int i1, i2;
-   };
+    class a_overload {
+    public:
+        a_overload();
+        int i1, i2;
+    };
 }
 
 class b_overload {
 public:
-   b_overload();
-   int i1, i2;
+    b_overload();
+    int i1, i2;
 };
 
 class c_overload {
 public:
-   c_overload();
-   int get_int(a_overload* a);
-   int get_int(ns_a_overload::a_overload* a);
-   int get_int(ns_b_overload::a_overload* a);
-   int get_int(short* p);
-   int get_int(b_overload* b);
-   int get_int(int* p);
+    c_overload();
+    int get_int(a_overload* a);
+    int get_int(ns_a_overload::a_overload* a);
+    int get_int(ns_b_overload::a_overload* a);
+    int get_int(short* p);
+    int get_int(b_overload* b);
+    int get_int(int* p);
 };
 
 class d_overload {
 public:
-   d_overload();
+    d_overload();
 //   int get_int(void* p) { return *(int*)p; }
-   int get_int(int* p);
-   int get_int(b_overload* b);
-   int get_int(short* p);
-   int get_int(ns_b_overload::a_overload* a);
-   int get_int(ns_a_overload::a_overload* a);
-   int get_int(a_overload* a);
+    int get_int(int* p);
+    int get_int(b_overload* b);
+    int get_int(short* p);
+    int get_int(ns_b_overload::a_overload* a);
+    int get_int(ns_a_overload::a_overload* a);
+    int get_int(a_overload* a);
 };
 
 
 
 class more_overloads {
 public:
-   more_overloads();
-   std::string call(const aa_ol&);
-   std::string call(const bb_ol&, void* n=0);
-   std::string call(const cc_ol&);
-   std::string call(const dd_ol&);
+    more_overloads();
+    std::string call(const aa_ol&);
+    std::string call(const bb_ol&, void* n=0);
+    std::string call(const cc_ol&);
+    std::string call(const dd_ol&);
 
-   std::string call_unknown(const dd_ol&);
+    std::string call_unknown(const dd_ol&);
 
-   std::string call(double);
-   std::string call(int);
-   std::string call1(int);
-   std::string call1(double);
+    std::string call(double);
+    std::string call(int);
+    std::string call1(int);
+    std::string call1(double);
 };
 
 class more_overloads2 {
 public:
-   more_overloads2();
-   std::string call(const bb_ol&);
-   std::string call(const bb_ol*);
+    more_overloads2();
+    std::string call(const bb_ol&);
+    std::string call(const bb_ol*);
 
-   std::string call(const dd_ol*, int);
-   std::string call(const dd_ol&, int);
+    std::string call(const dd_ol*, int);
+    std::string call(const dd_ol&, int);
 };
+
+template<typename T>
+double calc_mean(long n, const T* a) {
+    double sum = 0., sumw = 0.;
+    const T* end = a+n;
+    while (a != end) {
+        sum += *a++;
+        sumw += 1;
+    }
+
+    return sum/sumw;
+}
+
+double calc_mean(long n, const float* a);
+double calc_mean(long n, const double* a);
+double calc_mean(long n, const int* a);
+double calc_mean(long n, const short* a);
+double calc_mean(long n, const long* a);

File pypy/module/cppyy/test/overloads.xml

   <class name="more_overloads" />
   <class name="more_overloads2" />
 
+  <function name="calc_mean" />
+
 </lcgdict>

File pypy/module/cppyy/test/overloads_LinkDef.h

 #pragma link C++ class more_overloads;
 #pragma link C++ class more_overloads2;
 
+#pragma link C++ function calc_mean;
+
 #endif

File pypy/module/cppyy/test/test_overloads.py

 currpath = py.path.local(__file__).dirpath()
 test_dct = str(currpath.join("overloadsDict.so"))
 
-space = gettestobjspace(usemodules=['cppyy'])
+space = gettestobjspace(usemodules=['cppyy', 'array'])
 
 def setup_module(mod):
     if sys.platform == 'win32':
 #        assert more_overloads().call(1.)  == "double"
         assert more_overloads().call1(1)  == "int"
         assert more_overloads().call1(1.) == "double"
+
+    def test07_mean_overloads(self):
+        """Adapted test for array overloading"""
+
+        import cppyy, array
+        cmean = cppyy.gbl.calc_mean
+
+        numbers = [8, 2, 4, 2, 4, 2, 4, 4, 1, 5, 6, 3, 7]
+        mean, median = 4.0, 4.0
+
+        for l in ['f', 'd', 'i', 'h', 'l']:
+            a = array.array(l, numbers)
+            assert(round(cmean(len(a), a) - mean, 8), 0)

File pypy/objspace/std/dictmultiobject.py

 # Iteration
 
 
-class W_DictMultiIterKeysObject(W_Object):
+class W_BaseDictMultiIterObject(W_Object):
     from pypy.objspace.std.dicttype import dictiter_typedef as typedef
 
     _immutable_fields_ = ["iteratorimplementation"]
         w_self.space = space
         w_self.iteratorimplementation = iteratorimplementation
 
+class W_DictMultiIterKeysObject(W_BaseDictMultiIterObject):
+    pass
+
+class W_DictMultiIterValuesObject(W_BaseDictMultiIterObject):
+    pass
+
+class W_DictMultiIterItemsObject(W_BaseDictMultiIterObject):
+    pass
+
 registerimplementation(W_DictMultiIterKeysObject)
-
-class W_DictMultiIterValuesObject(W_Object):
-    from pypy.objspace.std.dicttype import dictiter_typedef as typedef
-
-    _immutable_fields_ = ["iteratorimplementation"]
-
-    ignore_for_isinstance_cache = True
-
-    def __init__(w_self, space, iteratorimplementation):
-        w_self.space = space
-        w_self.iteratorimplementation = iteratorimplementation
-
 registerimplementation(W_DictMultiIterValuesObject)
-
-class W_DictMultiIterItemsObject(W_Object):
-    from pypy.objspace.std.dicttype import dictiter_typedef as typedef
-
-    _immutable_fields_ = ["iteratorimplementation"]
-
-    ignore_for_isinstance_cache = True
-
-    def __init__(w_self, space, iteratorimplementation):
-        w_self.space = space
-        w_self.iteratorimplementation = iteratorimplementation
-
 registerimplementation(W_DictMultiIterItemsObject)
 
 def iter__DictMultiIterKeysObject(space, w_dictiter):

File pypy/objspace/std/ropeunicodeobject.py

             if result is not None:
                 return W_RopeObject(result)
         elif encoding == "utf-8":
-            result = rope.unicode_encode_utf8(node)
+            result = rope.unicode_encode_utf8(node, allow_surrogates=True)
             if result is not None:
                 return W_RopeObject(result)
     return encode_object(space, w_unistr, encoding, errors)

File pypy/objspace/std/unicodeobject.py

 from pypy.rlib.objectmodel import compute_hash, specialize
 from pypy.rlib.objectmodel import compute_unique_id
 from pypy.rlib.rstring import UnicodeBuilder
-from pypy.rlib.runicode import unicode_encode_unicode_escape
+from pypy.rlib.runicode import make_unicode_escape_function
 from pypy.module.unicodedata import unicodedb
 from pypy.tool.sourcetools import func_with_new_name
 from pypy.rlib import jit
                     space.wrap("character mapping must return integer, None or unicode"))
     return W_UnicodeObject(u''.join(result))
 
+_repr_function, _ = make_unicode_escape_function(
+    pass_printable=False, unicode_output=False, quotes=True, prefix='u')
+
 def repr__Unicode(space, w_unicode):
     chars = w_unicode._value
     size = len(chars)
-    s = unicode_encode_unicode_escape(chars, size, "strict", quotes=True)
+    s = _repr_function(chars, size, "strict")
     return space.wrap(s)
 
 def mod__Unicode_ANY(space, w_format, w_values):

File pypy/objspace/std/unicodetype.py

             if encoding == 'ascii':
                 u = space.unicode_w(w_object)
                 eh = encode_error_handler(space)
-                return space.wrap(unicode_encode_ascii(u, len(u), None,
-                                                       errorhandler=eh))
+                return space.wrap(unicode_encode_ascii(
+                        u, len(u), None, errorhandler=eh))
             if encoding == 'utf-8':
                 u = space.unicode_w(w_object)
                 eh = encode_error_handler(space)
-                return space.wrap(unicode_encode_utf_8(u, len(u), None,
-                                                       errorhandler=eh))
+                return space.wrap(unicode_encode_utf_8(
+                        u, len(u), None, errorhandler=eh,
+                        allow_surrogates=True))
         from pypy.module._codecs.interp_codecs import lookup_codec
         w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
     if errors is None:
             # XXX error handling
             s = space.bufferstr_w(w_obj)
             eh = decode_error_handler(space)
-            return space.wrap(str_decode_ascii(s, len(s), None,
-                                               final=True,
-                                               errorhandler=eh)[0])
+            return space.wrap(str_decode_ascii(
+                    s, len(s), None, final=True, errorhandler=eh)[0])
         if encoding == 'utf-8':
             s = space.bufferstr_w(w_obj)
             eh = decode_error_handler(space)
-            return space.wrap(str_decode_utf_8(s, len(s), None,
-                                               final=True,
-                                               errorhandler=eh)[0])
+            return space.wrap(str_decode_utf_8(
+                    s, len(s), None, final=True, errorhandler=eh,
+                    allow_surrogates=True)[0])
     w_codecs = space.getbuiltinmodule("_codecs")
     w_decode = space.getattr(w_codecs, space.wrap("decode"))
     if errors is None:

File pypy/rlib/rope.py

     if rope.is_bytestring():
         return rope
 
-def unicode_encode_utf8(rope):
+def unicode_encode_utf8(rope, allow_surrogates=False):
     from pypy.rlib.runicode import unicode_encode_utf_8
     if rope.is_ascii():
         return rope
                                 unicode_encode_utf8(rope.right))
     elif isinstance(rope, LiteralUnicodeNode):
         return LiteralStringNode(
-            unicode_encode_utf_8(rope.u, len(rope.u), "strict"))
+            unicode_encode_utf_8(rope.u, len(rope.u), "strict",
+                                 allow_surrogates=allow_surrogates))
     elif isinstance(rope, LiteralStringNode):
         return LiteralStringNode(_str_encode_utf_8(rope.s))
 

File pypy/rlib/rsocket.py

     """
     _mixin_ = True        # for interp_socket.py
     fd = _c.INVALID_SOCKET
-    def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0):
+    def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0,
+                 fd=_c.INVALID_SOCKET):
         """Create a new socket."""
-        fd = _c.socket(family, type, proto)
+        if _c.invalid_socket(fd):
+            fd = _c.socket(family, type, proto)
         if _c.invalid_socket(fd):
             raise self.error_handler()
         # PLAT RISCOS
         addrlen_p[0] = rffi.cast(_c.socklen_t, maxlen)
         return addr, addr.addr_p, addrlen_p
 
-    def accept(self, SocketClass=None):
+    def accept(self):
         """Wait for an incoming connection.
-        Return (new socket object, client address)."""
-        if SocketClass is None:
-            SocketClass = RSocket
+        Return (new socket fd, client address)."""
         if self._select(False) == 1:
             raise SocketTimeout
         address, addr_p, addrlen_p = self._addrbuf()
         if _c.invalid_socket(newfd):
             raise self.error_handler()
         address.addrlen = rffi.cast(lltype.Signed, addrlen)
-        sock = make_socket(newfd, self.family, self.type, self.proto,
-                           SocketClass)
-        return (sock, address)
+        return (newfd, address)
 
     def bind(self, address):
         """Bind the socket to a local address."""
             if res != 0:
                 raise self.error_handler()
 
+    def detach(self):
+        fd = self.fd
+        self.fd = _c.INVALID_SOCKET
+        return fd
+
     if _c.WIN32:
         def _connect(self, address):
             """Connect the socket to a remote address."""

File pypy/rlib/runicode.py

 ]
 
 def str_decode_utf_8(s, size, errors, final=False,
-                     errorhandler=None):
+                     errorhandler=None, allow_surrogates=False):
     if errorhandler is None:
         errorhandler = raise_unicode_exception_decode
-    return str_decode_utf_8_impl(s, size, errors, final, errorhandler)
+    return str_decode_utf_8_impl(s, size, errors, final, errorhandler,
+                                 allow_surrogates=allow_surrogates)
 
-def str_decode_utf_8_impl(s, size, errors, final, errorhandler):
+def str_decode_utf_8_impl(s, size, errors, final, errorhandler,
+                          allow_surrogates):
     if size == 0:
         return u'', 0
 
             if (ordch2>>6 != 0x2 or    # 0b10
                 (ordch1 == 0xe0 and ordch2 < 0xa0)
                 # surrogates shouldn't be valid UTF-8!
-                # Uncomment the line below to make them invalid.
-                # or (ordch1 == 0xed and ordch2 > 0x9f)
+                or (not allow_surrogates and ordch1 == 0xed and ordch2 > 0x9f)
                 ):
                 r, pos = errorhandler(errors, 'utf-8',
                                       'invalid continuation byte',
     result.append((chr((0x80 | ((ch >> 6) & 0x3f)))))
     result.append((chr((0x80 | (ch & 0x3f)))))
 
-def unicode_encode_utf_8(s, size, errors, errorhandler=None):
+def unicode_encode_utf_8(s, size, errors, errorhandler=None,
+                         allow_surrogates=False):
+    if errorhandler is None:
+        errorhandler = raise_unicode_exception_encode
+    return unicode_encode_utf_8_impl(s, size, errors, errorhandler,
+                                     allow_surrogates=allow_surrogates)
+
+def unicode_encode_utf_8_impl(s, size, errors, errorhandler,
+                              allow_surrogates=False):
     assert(size >= 0)
     result = StringBuilder(size)
-    i = 0
-    while i < size:
-        ch = ord(s[i])
-        i += 1
+    pos = 0
+    while pos < size:
+        ch = ord(s[pos])
+        pos += 1
         if ch < 0x80:
             # Encode ASCII
             result.append(chr(ch))
             # Encode UCS2 Unicode ordinals
             if ch < 0x10000:
                 # Special case: check for high surrogate
-                if 0xD800 <= ch <= 0xDBFF and i != size:
-                    ch2 = ord(s[i])
-                    # Check for low surrogate and combine the two to
-                    # form a UCS4 value
-                    if 0xDC00 <= ch2 <= 0xDFFF:
-                        ch3 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000
-                        i += 1
-                        _encodeUCS4(result, ch3)
+                if 0xD800 <= ch <= 0xDFFF:
+                    if pos != size:
+                        ch2 = ord(s[pos])
+                        # Check for low surrogate and combine the two to
+                        # form a UCS4 value
+                        if ch <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
+                            ch3 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000
+                            pos += 1
+                            _encodeUCS4(result, ch3)
+                            continue
+                    if not allow_surrogates:
+                        r, pos = errorhandler(errors, 'utf-8',
+                                              'surrogates not allowed',
+                                              s, pos-1, pos)
+                        for ch in r:
+                            if ord(ch) < 0x80:
+                                result.append(chr(ord(ch)))
+                            else:
+                                errorhandler('strict', 'utf-8',
+                                             'surrogates not allowed',
+                                             s, pos-1, pos)
                         continue
-                # Fall through: handles isolated high surrogates
+                    # else: Fall through and handles isolated high surrogates
                 result.append((chr((0xe0 | (ch >> 12)))))
                 result.append((chr((0x80 | ((ch >> 6) & 0x3f)))))
                 result.append((chr((0x80 | (ch & 0x3f)))))
-                continue
             else:
                 _encodeUCS4(result, ch)
     return result.build()
 
     return builder.build(), pos
 
-def unicode_encode_unicode_escape(s, size, errors, errorhandler=None, quotes=False):
-    # errorhandler is not used: this function cannot cause Unicode errors
-    result = StringBuilder(size)
+def make_unicode_escape_function(pass_printable=False, unicode_output=False,
+                                 quotes=False, prefix=None):
+    # Python3 has two similar escape functions: One to implement
+    # encode('unicode_escape') and which outputs bytes, and unicode.__repr__
+    # which outputs unicode.  They cannot share RPython code, so we generate
+    # them with the template below.
+    # Python2 does not really need this, but it reduces diffs between branches.
 
-    if quotes:
-        if s.find(u'\'') != -1 and s.find(u'\"') == -1:
-            quote = ord('\"')
-            result.append('u"')
+    if unicode_output:
+        STRING_BUILDER = UnicodeBuilder
+        STR = unicode
+        CHR = UNICHR
+    else:
+        STRING_BUILDER = StringBuilder
+        STR = str
+        CHR = chr
+
+    def unicode_escape(s, size, errors, errorhandler=None):
+        # errorhandler is not used: this function cannot cause Unicode errors
+        result = STRING_BUILDER(size)
+
+        if quotes:
+            if prefix:
+                result.append(STR(prefix))
+            if s.find(u'\'') != -1 and s.find(u'\"') == -1:
+                quote = ord('\"')
+                result.append(STR('"'))
+            else:
+                quote = ord('\'')
+                result.append(STR('\''))
         else:
-            quote = ord('\'')
-            result.append('u\'')
-    else:
-        quote = 0
+            quote = 0
 
-        if size == 0:
-            return ''
+            if size == 0:
+                return STR('')
 
-    pos = 0
-    while pos < size:
-        ch = s[pos]
-        oc = ord(ch)
+        pos = 0
+        while pos < size:
+            ch = s[pos]
+            oc = ord(ch)
 
-        # Escape quotes
-        if quotes and (oc == quote or ch == '\\'):
-            result.append('\\')
-            result.append(chr(oc))
-            pos += 1
-            continue
-
-        # The following logic is enabled only if MAXUNICODE == 0xffff, or
-        # for testing on top of a host CPython where sys.maxunicode == 0xffff
-        if ((MAXUNICODE < 65536 or
-                (not we_are_translated() and sys.maxunicode < 65536))
-            and 0xD800 <= oc < 0xDC00 and pos + 1 < size):
-            # Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes
-            pos += 1
-            oc2 = ord(s[pos])
-
-            if 0xDC00 <= oc2 <= 0xDFFF:
-                ucs = (((oc & 0x03FF) << 10) | (oc2 & 0x03FF)) + 0x00010000
-                raw_unicode_escape_helper(result, ucs)
+            # Escape quotes
+            if quotes and (oc == quote or ch == '\\'):
+                result.append(STR('\\'))
+                result.append(CHR(oc))
                 pos += 1
                 continue
-            # Fall through: isolated surrogates are copied as-is
-            pos -= 1
 
-        # Map special whitespace to '\t', \n', '\r'
-        if ch == '\t':
-            result.append('\\t')
-        elif ch == '\n':
-            result.append('\\n')
-        elif ch == '\r':
-            result.append('\\r')
-        elif ch == '\\':
-            result.append('\\\\')
+            # The following logic is enabled only if MAXUNICODE == 0xffff, or
+            # for testing on top of a host Python where sys.maxunicode == 0xffff
+            if ((MAXUNICODE < 65536 or
+                    (not we_are_translated() and sys.maxunicode < 65536))
+                and 0xD800 <= oc < 0xDC00 and pos + 1 < size):
+                # Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes
+                pos += 1
+                oc2 = ord(s[pos])
 
-        # Map non-printable or non-ascii to '\xhh' or '\uhhhh'
-        elif oc < 32 or oc >= 0x7F:
-            raw_unicode_escape_helper(result, oc)
+                if 0xDC00 <= oc2 <= 0xDFFF:
+                    ucs = (((oc & 0x03FF) << 10) | (oc2 & 0x03FF)) + 0x00010000
+                    char_escape_helper(result, ucs)
+                    pos += 1
+                    continue
+                # Fall through: isolated surrogates are copied as-is
+                pos -= 1
 
-        # Copy everything else as-is
+            # Map special whitespace to '\t', \n', '\r'
+            if ch == '\t':
+                result.append(STR('\\t'))
+            elif ch == '\n':
+                result.append(STR('\\n'))
+            elif ch == '\r':
+                result.append(STR('\\r'))
+            elif ch == '\\':
+                result.append(STR('\\\\'))
+
+            # Map non-printable or non-ascii to '\xhh' or '\uhhhh'
+            elif pass_printable and not unicodedb.isprintable(oc):
+                char_escape_helper(result, oc)
+            elif not pass_printable and (oc < 32 or oc >= 0x7F):
+                char_escape_helper(result, oc)
+
+            # Copy everything else as-is
+            else:
+                result.append(CHR(oc))
+            pos += 1
+
+        if quotes:
+            result.append(CHR(quote))
+        return result.build()
+
+    def char_escape_helper(result, char):
+        num = hex(char)
+        if STR is unicode:
+            num = num.decode('ascii')
+        if char >= 0x10000:
+            result.append(STR("\\U"))
+            zeros = 8
+        elif char >= 0x100:
+            result.append(STR("\\u"))
+            zeros = 4
         else:
-            result.append(chr(oc))
-        pos += 1
+            result.append(STR("\\x"))
+            zeros = 2
+        lnum = len(num)
+        nb = zeros + 2 - lnum # num starts with '0x'
+        if nb > 0:
+            result.append_multiple_char(STR('0'), nb)
+        result.append_slice(num, 2, lnum)
 
-    if quotes:
-        result.append(chr(quote))
-    return result.build()
+    return unicode_escape, char_escape_helper
+
+# This function is also used by _codecs/interp_codecs.py
+(unicode_encode_unicode_escape, raw_unicode_escape_helper
+ ) = make_unicode_escape_function()
 
 # ____________________________________________________________
 # Raw unicode escape
 
     return result.build(), pos
 
-def raw_unicode_escape_helper(result, char):
-    num = hex(char)
-    if char >= 0x10000:
-        result.append("\\U")
-        zeros = 8
-    elif char >= 0x100:
-        result.append("\\u")
-        zeros = 4
-    else:
-        result.append("\\x")
-        zeros = 2
-    lnum = len(num)
-    nb = zeros + 2 - lnum # num starts with '0x'
-    if nb > 0:
-        result.append_multiple_char('0', nb)
-    result.append_slice(num, 2, lnum)
-
 def unicode_encode_raw_unicode_escape(s, size, errors, errorhandler=None):
     # errorhandler is not used: this function cannot cause Unicode errors
     if size == 0:

File pypy/rlib/test/test_rpoll.py

     assert events[0][0] == serv.fd
     assert events[0][1] & POLLIN
 
-    servconn, cliaddr = serv.accept()
+    servconn_fd, cliaddr = serv.accept()
+    servconn = RSocket(AF_INET, fd=servconn_fd)
 
     events = poll({serv.fd: POLLIN,
                    cli.fd: POLLOUT}, timeout=500)

File pypy/rlib/test/test_rsocket.py

     lock.acquire()
     thread.start_new_thread(connecting, ())
     print 'waiting for connection'
-    s1, addr2 = sock.accept()
+    fd1, addr2 = sock.accept()
+    s1 = RSocket(fd=fd1)
     print 'connection accepted'
     lock.acquire()
     print 'connecting side knows that the connection was accepted too'
     if errcodesok:
         assert err.value.errno in (errno.EINPROGRESS, errno.EWOULDBLOCK)
 
-    s1, addr2 = sock.accept()
+    fd1, addr2 = sock.accept()
+    s1 = RSocket(fd=fd1)
     s1.setblocking(False)
     assert addr.eq(s2.getpeername())
     assert addr2.get_port() == s2.getsockname().get_port()
 
     clientsock = RSocket(AF_UNIX)
     clientsock.connect(a)
-    s, addr = serversock.accept()
+    fd, addr = serversock.accept()
+    s = RSocket(AF_UNIX, fd=fd)
 
     s.send('X')
     data = clientsock.recv(100)

File pypy/rlib/test/test_runicode.py

         for i in range(10000):
             for encoding in ("utf-7 utf-8 utf-16 utf-16-be utf-16-le "
                              "utf-32 utf-32-be utf-32-le").split():
+                if encoding == 'utf-8' and 0xd800 <= i <= 0xdfff:
+                    # Don't try to encode lone surrogates
+                    continue
                 self.checkdecode(unichr(i), encoding)
 
     def test_random(self):
             self.checkdecode(s, "utf-8")
 
     def test_utf8_surrogate(self):
-        # A surrogate should not be valid utf-8, but python 2.x accepts them.
-        # This test will raise an error with python 3.x
-        self.checkdecode(u"\ud800", "utf-8")
+        # surrogates used to be allowed by python 2.x
+        raises(UnicodeDecodeError, self.checkdecode, u"\ud800", "utf-8")
 
     def test_invalid_start_byte(self):
         """
             self.checkencode(s, "utf-8")
 
     def test_utf8_surrogates(self):
-        # check replacing of two surrogates by single char while encoding
         # make sure that the string itself is not marshalled
         u = u"\ud800"
         for i in range(4):
             u += u"\udc00"
-        self.checkencode(u, "utf-8")
+        if runicode.MAXUNICODE < 65536:
+            # Check replacing of two surrogates by single char while encoding
+            self.checkencode(u, "utf-8")
+        else:
+            # This is not done in wide unicode builds
+            raises(UnicodeEncodeError, self.checkencode, u, "utf-8")
 
     def test_ascii_error(self):
         self.checkencodeerror(u"abc\xFF\xFF\xFFcde", "ascii", 3, 6)

File pypy/rpython/rstr.py

         from pypy.rpython.annlowlevel import hlstr
         value = hlstr(llvalue)
         assert value is not None
-        univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict',
-                                             False, self.ll_raise_unicode_exception_decode)
+        univalue, _ = self.rstr_decode_utf_8(
+            value, len(value), 'strict', final=False,
+            errorhandler=self.ll_raise_unicode_exception_decode,
+            allow_surrogates=False)
         return self.ll.llunicode(univalue)
 
     def ll_raise_unicode_exception_decode(self, errors, encoding, msg, s,
         self.runicode_encode_utf_8 = None
 
     def ensure_ll_encode_utf8(self):
-        from pypy.rlib.runicode import unicode_encode_utf_8
-        self.runicode_encode_utf_8 = func_with_new_name(unicode_encode_utf_8,
-                                                        'runicode_encode_utf_8')
+        from pypy.rlib.runicode import unicode_encode_utf_8_impl
+        self.runicode_encode_utf_8 = func_with_new_name(
+            unicode_encode_utf_8_impl, 'runicode_encode_utf_8')
 
     def rtype_method_upper(self, hop):
         raise TypeError("Cannot do toupper on unicode string")
         from pypy.rpython.annlowlevel import hlunicode
         s = hlunicode(ll_s)
         assert s is not None
-        bytes = self.runicode_encode_utf_8(s, len(s), 'strict')
+        bytes = self.runicode_encode_utf_8(
+            s, len(s), 'strict',
+            errorhandler=self.ll_raise_unicode_exception_decode,
+            allow_surrogates=False)
         return self.ll.llstr(bytes)
 
+    def ll_raise_unicode_exception_encode(self, errors, encoding, msg, u,
+                                          startingpos, endingpos):
+        raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
+    
 class __extend__(annmodel.SomeString):
     def rtyper_makerepr(self, rtyper):
         return rtyper.type_system.rstr.string_repr

File pypy/translator/c/genc.py

 def get_recent_cpython_executable():
 
     if sys.platform == 'win32':
-        python = sys.executable.replace('\\', '/') + ' '
+        python = sys.executable.replace('\\', '/')
     else:
-        python = sys.executable + ' '
-
+        python = sys.executable
     # Is there a command 'python' that runs python 2.5-2.7?
     # If there is, then we can use it instead of sys.executable
     returncode, stdout, stderr = runsubprocess.run_subprocess(
         "python", "-V")
     if _CPYTHON_RE.match(stdout) or _CPYTHON_RE.match(stderr):
-        python = 'python '
+        python = 'python'
     return python
 
 
         for rule in rules:
             mk.rule(*rule)
 
+        #XXX: this conditional part is not tested at all
         if self.config.translation.gcrootfinder == 'asmgcc':
             trackgcfiles = [cfile[:cfile.rfind('.')] for cfile in mk.cfiles]
             if self.translator.platform.name == 'msvc':
             else:
                 mk.definition('PYPY_MAIN_FUNCTION', "main")
 
-            python = get_recent_cpython_executable()
+            mk.definition('PYTHON', get_recent_cpython_executable())
 
             if self.translator.platform.name == 'msvc':
                 lblofiles = []
                         'cmd /c $(MASM) /nologo /Cx /Cp /Zm /coff /Fo$@ /c $< $(INCLUDEDIRS)')
                 mk.rule('.c.gcmap', '',
                         ['$(CC) /nologo $(ASM_CFLAGS) /c /FAs /Fa$*.s $< $(INCLUDEDIRS)',
-                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -t $*.s > $@']
+                         'cmd /c $(PYTHON) $(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -t $*.s > $@']
                         )
                 mk.rule('gcmaptable.c', '$(GCMAPFILES)',
-                        'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc $(GCMAPFILES) > $@')
+                        'cmd /c $(PYTHON) $(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc $(GCMAPFILES) > $@')
 
             else:
                 mk.definition('OBJECTS', '$(ASMLBLFILES) gcmaptable.s')
                 mk.rule('%.s', '%.c', '$(CC) $(CFLAGS) $(CFLAGSEXTRA) -frandom-seed=$< -o $@ -S $< $(INCLUDEDIRS)')
                 mk.rule('%.lbl.s %.gcmap', '%.s',
-                        [python +
-                             '$(PYPYDIR)/translator/c/gcc/trackgcroot.py '
+                        [
+                             '$(PYTHON) $(PYPYDIR)/translator/c/gcc/trackgcroot.py '
                              '-t $< > $*.gctmp',
                          'mv $*.gctmp $*.gcmap'])
                 mk.rule('gcmaptable.s', '$(GCMAPFILES)',
-                        [python +
-                             '$(PYPYDIR)/translator/c/gcc/trackgcroot.py '
+                        [
+                             '$(PYTHON) $(PYPYDIR)/translator/c/gcc/trackgcroot.py '
                              '$(GCMAPFILES) > $@.tmp',
                          'mv $@.tmp $@'])
                 mk.rule('.PRECIOUS', '%.s', "# don't remove .s files if Ctrl-C'ed")