Commits

Armin Rigo  committed 2a455f2

(cfbolz, arigo)

Prebuild the empty string and single-character strings.
Controlled by two new option, --objspace-std-sharesmallstr
and --objspace-std-withprebuiltchar.

  • Participants
  • Parent commits 15c9aaf

Comments (0)

Files changed (10)

File pypy/config/pypyoption.py

         BoolOption("withstrslice", "use strings optimized for slicing",
                    default=False),
 
+        BoolOption("withprebuiltchar",
+                   "use prebuilt single-character string objects",
+                   default=False),
+
+        BoolOption("sharesmallstr",
+                   "always reuse the prebuilt string objects "
+                   "(the empty string and potentially single-char strings)",
+                   default=False),
+
         BoolOption("withstrdict",
                    "use dictionaries optimized for string keys",
                    default=False),

File pypy/doc/config/objspace.std.sharesmallstr.txt

Empty file added.

File pypy/doc/config/objspace.std.withprebuiltchar.txt

Empty file added.

File pypy/objspace/std/marshal_impl.py

         m.atom_str(TYPE_STRING, s)
 
 def unmarshal_String(space, u, tc):
-    return W_StringObject(u.get_str())
+    return space.wrap(u.get_str())
 register(TYPE_STRING, unmarshal_String)
 
 def unmarshal_interned(space, u, tc):
-    w_ret = W_StringObject(u.get_str())
+    w_ret = space.wrap(u.get_str())
     u.stringtable_w.append(w_ret)
     w_intern = space.builtin.get('intern')
     space.call_function(w_intern, w_ret)
 string_to_buffer = app.interphook('string_to_buffer')
 
 def unmarshal_buffer(space, u, tc):
-    w_s = W_StringObject(u.get_str())
+    w_s = space.wrap(u.get_str())
     return string_to_buffer(space, w_s)
 register(TYPE_UNKNOWN, unmarshal_buffer)
 

File pypy/objspace/std/objspace.py

             else:
                 return self.newint(x)
         if isinstance(x, str):
-            return W_StringObject(x)
+            from pypy.objspace.std.stringtype import wrapstr
+            return wrapstr(self, x)
         if isinstance(x, unicode):
             return W_UnicodeObject([unichr(ord(u)) for u in x]) # xxx
         if isinstance(x, float):
         except ValueError:  # chr(out-of-range)
             raise OperationError(self.w_ValueError,
                                  self.wrap("character code not in range(256)"))
-        return W_StringObject(''.join(chars))
+        return self.wrap(''.join(chars))
 
     def newunicode(self, chars):
         try:

File pypy/objspace/std/stringobject.py

 from pypy.objspace.std.noneobject import W_NoneObject
 from pypy.objspace.std.tupleobject import W_TupleObject
 
-from pypy.objspace.std.stringtype import sliced, joined
+from pypy.objspace.std.stringtype import sliced, joined, wrapstr, wrapchar
 
 
 class W_StringObject(W_Object):
     def unwrap(w_self, space):
         return w_self._value
 
+registerimplementation(W_StringObject)
 
-registerimplementation(W_StringObject)
+W_StringObject.EMPTY = W_StringObject('')
+W_StringObject.PREBUILT = [W_StringObject(chr(i)) for i in range(256)]
+del i
 
 
 def _is_generic(space, w_self, fun): 
             l.append(space.str_w(w_s))
         return space.wrap(self.join(l))
     else:
-        return space.wrap("")
+        return W_StringObject.EMPTY
 
 def str_rjust__String_ANY_ANY(space, w_self, w_arg, w_fillchar):
 
     else:
         u_centered = u_self
 
-    return W_StringObject(u_centered)
+    return wrapstr(space, u_centered)
 
 def str_count__String_String_ANY_ANY(space, w_self, w_arg, w_start, w_end): 
     u_self  = w_self._value
             u_expanded += " " * _tabindent(oldtoken,u_tabsize) + token
             oldtoken = token
             
-    return W_StringObject(u_expanded)        
+    return wrapstr(space, u_expanded)        
  
  
 def str_splitlines__String_ANY(space, w_self, w_keepends):
         exc = space.call_function(space.w_IndexError,
                                   space.wrap("string index out of range"))
         raise OperationError(space.w_IndexError, exc)
-    return W_StringObject(str[ival])
+    return wrapchar(space, str[ival])
 
 def getitem__String_Slice(space, w_str, w_slice):
     w = space.wrap
     length = len(s)
     start, stop, step, sl = w_slice.indices4(space, length)
     if sl == 0:
-        str = ""
+        return W_StringObject.EMPTY
     elif step == 1:
         assert start >= 0 and stop >= 0
         return sliced(space, s, start, stop)
     else:
         str = "".join([s[start + i*step] for i in range(sl)])
-    return W_StringObject(str)
+    return wrapstr(space, str)
 
 def mul_string_times(space, w_str, w_times):
     try:
             raise FailedToImplement
         raise
     if mul <= 0:
-        return space.wrap('')
+        return W_StringObject.EMPTY
     input = w_str._value
     input_len = len(input)
     try:
 def str__String(space, w_str):
     if type(w_str) is W_StringObject:
         return w_str
-    return W_StringObject(w_str._value)
+    return wrapstr(space, w_str._value)
 
 def iter__String(space, w_list):
     from pypy.objspace.std import iterobject
     return space.wrap(ord(u_str))
 
 def getnewargs__String(space, w_str):
-    return space.newtuple([W_StringObject(w_str._value)])
+    return space.newtuple([wrapstr(space, w_str._value)])
 
 def repr__String(space, w_str):
     s = w_str._value

File pypy/objspace/std/stringtype.py

 
 from sys import maxint
 
+def wrapstr(space, s):
+    from pypy.objspace.std.stringobject import W_StringObject
+    if space.config.objspace.std.sharesmallstr:
+        if space.config.objspace.std.withprebuiltchar:
+            # share characters and empty string
+            if len(s) <= 1:
+                if len(s) == 0:
+                    return W_StringObject.EMPTY
+                else:
+                    s = s[0]     # annotator hint: a single char
+                    return wrapchar(space, s)
+        else:
+            # only share the empty string
+            if len(s) == 0:
+                return W_StringObject.EMPTY
+    return W_StringObject(s)
+
+def wrapchar(space, c):
+    from pypy.objspace.std.stringobject import W_StringObject
+    if space.config.objspace.std.withprebuiltchar:
+        return W_StringObject.PREBUILT[ord(c)]
+    else:
+        return W_StringObject(c)
+
 def sliced(space, s, start, stop):
     if space.config.objspace.std.withstrslice:
         from pypy.objspace.std.strsliceobject import W_StringSliceObject
-        from pypy.objspace.std.stringobject import W_StringObject
         # XXX heuristic, should be improved!
         if (stop - start) > len(s) * 0.20 + 40:
             return W_StringSliceObject(s, start, stop)
-        else:
-            return W_StringObject(s[start:stop])
-    else:
-        from pypy.objspace.std.stringobject import W_StringObject
-        return W_StringObject(s[start:stop])
+    return wrapstr(space, s[start:stop])
 
 def joined(space, strlist):
     if space.config.objspace.std.withstrjoin:
         from pypy.objspace.std.strjoinobject import W_StringJoinObject
         return W_StringJoinObject(strlist)
     else:
-        from pypy.objspace.std.stringobject import W_StringObject
-        return W_StringObject("".join(strlist))
+        return wrapstr(space, "".join(strlist))
 
 str_join    = SMM('join', 2,
                   doc='S.join(sequence) -> string\n\nReturn a string which is'

File pypy/objspace/std/strjoinobject.py

 from pypy.objspace.std.stringobject import W_StringObject
 from pypy.objspace.std.unicodeobject import delegate_String2Unicode
 
-from pypy.objspace.std.stringtype import joined
+from pypy.objspace.std.stringtype import joined, wrapstr
 
 class W_StringJoinObject(W_Object):
     from pypy.objspace.std.stringtype import str_typedef as typedef
 registerimplementation(W_StringJoinObject)
 
 def delegate_join2str(space, w_strjoin):
-    return W_StringObject(w_strjoin.force())
+    return wrapstr(space, w_strjoin.force())
 
 def delegate_join2unicode(space, w_strjoin):
-    w_str = W_StringObject(w_strjoin.force())
+    w_str = wrapstr(space, w_strjoin.force())
     return delegate_String2Unicode(space, w_str)
 
 def len__StringJoin(space, w_self):

File pypy/objspace/std/strsliceobject.py

 from pypy.objspace.std import slicetype
 from pypy.objspace.std.inttype import wrapint
 
+from pypy.objspace.std.stringtype import wrapstr, wrapchar
+
 
 class W_StringSliceObject(W_Object):
     from pypy.objspace.std.stringtype import str_typedef as typedef
 
 
 def delegate_slice2str(space, w_strslice):
-    return W_StringObject(w_strslice.force())
+    return wrapstr(space, w_strslice.force())
 
 def delegate_slice2unicode(space, w_strslice):
-    w_str = W_StringObject(w_strslice.force())
+    w_str = wrapstr(space, w_strslice.force())
     return delegate_String2Unicode(space, w_str)
 
 # ____________________________________________________________
         exc = space.call_function(space.w_IndexError,
                                   space.wrap("string index out of range"))
         raise OperationError(space.w_IndexError, exc)
-    return W_StringObject(w_str.str[w_str.start + ival])
+    return wrapchar(space, w_str.str[w_str.start + ival])
 
 def getitem__StringSlice_Slice(space, w_str, w_slice):
     w = space.wrap
     length = w_str.stop - w_str.start
     start, stop, step, sl = w_slice.indices4(space, length)
     if sl == 0:
-        str = ""
+        return W_StringObject.EMPTY
     else:
         s = w_str.str
         start = w_str.start + start
             return W_StringSliceObject(s, start, stop)
         else:
             str = "".join([s[start + i*step] for i in range(sl)])
-    return W_StringObject(str)
+    return wrapstr(space, str)
 
 def len__StringSlice(space, w_str):
     return space.wrap(w_str.stop - w_str.start)

File pypy/objspace/std/test/test_stringobject.py

 from pypy.objspace.std import stringobject
 from pypy.objspace.std.stringobject import W_StringObject
-
+from pypy.conftest import gettestobjspace
 
 
 class TestW_StringObject:
         assert type(s.lstrip()) is str
         assert type(s.center(3)) is str
         assert type(s.splitlines()[0]) is str
+
+
+class AppTestPrebuilt(AppTestStringObject):
+    def setup_class(cls):
+        cls.space = gettestobjspace(**{"objspace.std.withprebuiltchar": True})
+
+class AppTestShare(AppTestStringObject):
+    def setup_class(cls):
+        cls.space = gettestobjspace(**{"objspace.std.sharesmallstr": True})
+
+class AppTestPrebuiltShare(AppTestStringObject):
+    def setup_class(cls):
+        cls.space = gettestobjspace(**{"objspace.std.withprebuiltchar": True,
+                                       "objspace.std.sharesmallstr": True})