Commits

Armin Rigo committed 6a60ef6

Tweak the gc.get_*() functions to give them hopefully reasonable
performance, based on the new ability to set and clear a flag in
the object headers from RPython.

  • Participants
  • Parent commits c3af538

Comments (0)

Files changed (7)

pypy/module/gc/referents.py

     return OperationError(space.w_NotImplementedError,
                           space.wrap("operation not implemented by this GC"))
 
+# ____________________________________________________________
+
+def clear_gcflag_extra(fromlist):
+    pending = fromlist[:]
+    while pending:
+        gcref = pending.pop()
+        if rgc.get_gcflag_extra(gcref):
+            rgc.toggle_gcflag_extra(gcref)
+            pending.extend(rgc.get_rpy_referents(gcref))
+
+def do_get_objects():
+    roots = [gcref for gcref in rgc.get_rpy_roots() if gcref]
+    pending = roots[:]
+    result_w = []
+    while pending:
+        gcref = pending.pop()
+        if not rgc.get_gcflag_extra(gcref):
+            rgc.toggle_gcflag_extra(gcref)
+            w_obj = try_cast_gcref_to_w_root(gcref)
+            if w_obj is not None:
+                result_w.append(w_obj)
+            pending.extend(rgc.get_rpy_referents(gcref))
+    clear_gcflag_extra(roots)
+    return result_w
+
+# ____________________________________________________________
+
+class PathEntry(object):
+    # PathEntries are nodes of a complete tree of all objects, but
+    # built lazily (there is only one branch alive at any time).
+    # Each node has a 'gcref' and the list of referents from this gcref.
+    def __init__(self, prev, gcref, referents):
+        self.prev = prev
+        self.gcref = gcref
+        self.referents = referents
+        self.remaining = len(referents)
+
+    def get_most_recent_w_obj(self):
+        entry = self
+        while entry is not None:
+            if entry.gcref:
+                w_obj = try_cast_gcref_to_w_root(entry.gcref)
+                if w_obj is not None:
+                    return w_obj
+            entry = entry.prev
+        return None
+
+def do_get_referrers(w_arg):
+    result_w = {}
+    gcarg = rgc.cast_instance_to_gcref(w_arg)
+    roots = [gcref for gcref in rgc.get_rpy_roots() if gcref]
+    head = PathEntry(None, rgc.NULL_GCREF, roots)
+    while True:
+        head.remaining -= 1
+        if head.remaining >= 0:
+            gcref = head.referents[head.remaining]
+            if not rgc.get_gcflag_extra(gcref):
+                # not visited so far
+                if gcref == gcarg:
+                    w_obj = head.get_most_recent_w_obj()
+                    if w_obj is not None:
+                        result_w[w_obj] = None    # found!
+                rgc.toggle_gcflag_extra(gcref)
+                head = PathEntry(head, gcref, rgc.get_rpy_referents(gcref))
+        else:
+            # no more referents to visit
+            head = head.prev
+            if head is None:
+                break
+    clear_gcflag_extra(roots)
+    return result_w.keys()
+
+# ____________________________________________________________
+
+def _list_w_obj_referents(gcref, result_w):
+    # Get all W_Root reachable directly from gcref, and add them to
+    # the list 'result_w'.
+    pending = []    # = list of all objects whose gcflag was toggled
+    i = 0
+    gcrefparent = gcref
+    while True:
+        for gcref in rgc.get_rpy_referents(gcrefparent):
+            if rgc.get_gcflag_extra(gcref):
+                continue
+            rgc.toggle_gcflag_extra(gcref)
+            pending.append(gcref)
+
+        while i < len(pending):
+            gcrefparent = pending[i]
+            i += 1
+            w_obj = try_cast_gcref_to_w_root(gcrefparent)
+            if w_obj is not None:
+                result_w.append(w_obj)
+            else:
+                break   # jump back to the start of the outermost loop
+        else:
+            break   # done
+
+    for gcref in pending:
+        rgc.toggle_gcflag_extra(gcref)    # reset the gcflag_extra's
+
+# ____________________________________________________________
+
 def get_rpy_roots(space):
     lst = rgc.get_rpy_roots()
     if lst is None:
         raise missing_operation(space)
     return space.wrap(index)
 
-def _list_w_obj_referents(gcref, result_w):
-    # Get all W_Root reachable directly from gcref, and add them to
-    # the list 'result_w'.  The logic here is not robust against gc
-    # moves, and may return the same object several times.
-    seen = {}     # map {current_addr: obj}
-    pending = [gcref]
-    i = 0
-    while i < len(pending):
-        gcrefparent = pending[i]
-        i += 1
-        for gcref in rgc.get_rpy_referents(gcrefparent):
-            key = rgc.cast_gcref_to_int(gcref)
-            if gcref == seen.get(key, rgc.NULL_GCREF):
-                continue     # already in 'seen'
-            seen[key] = gcref
-            w_obj = try_cast_gcref_to_w_root(gcref)
-            if w_obj is not None:
-                result_w.append(w_obj)
-            else:
-                pending.append(gcref)
-
-def _get_objects_from_rpy(list_of_gcrefs):
-    # given a list of gcrefs that may or may not be W_Roots, build a list
-    # of W_Roots obtained by following references from there.
-    result_w = []   # <- list of W_Roots
-    for gcref in list_of_gcrefs:
-        if gcref:
-            w_obj = try_cast_gcref_to_w_root(gcref)
-            if w_obj is not None:
-                result_w.append(w_obj)
-            else:
-                _list_w_obj_referents(gcref, result_w)
-    return result_w
-
 def get_objects(space):
     """Return a list of all app-level objects."""
-    roots = rgc.get_rpy_roots()
-    pending_w = _get_objects_from_rpy(roots)
-    # continue by following every W_Root.  Note that this will force a hash
-    # on every W_Root, which is kind of bad, but not on every RPython object,
-    # which is really good.
-    result_w = {}
-    while len(pending_w) > 0:
-        previous_w = pending_w
-        pending_w = []
-        for w_obj in previous_w:
-            if w_obj not in result_w:
-                result_w[w_obj] = None
-                gcref = rgc.cast_instance_to_gcref(w_obj)
-                _list_w_obj_referents(gcref, pending_w)
-    return space.newlist(result_w.keys())
+    if not rgc.has_gcflag_extra():
+        raise missing_operation(space)
+    result_w = do_get_objects()
+    rgc.assert_no_more_gcflags()
+    return space.newlist(result_w)
 
 def get_referents(space, args_w):
     """Return a list of objects directly referred to by any of the arguments.
-    Approximative: follow references recursively until it finds
-    app-level objects.  May return several times the same object, too."""
-    result = []
+    """
+    if not rgc.has_gcflag_extra():
+        raise missing_operation(space)
+    result_w = []
     for w_obj in args_w:
         gcref = rgc.cast_instance_to_gcref(w_obj)
-        _list_w_obj_referents(gcref, result)
-    return space.newlist(result)
+        _list_w_obj_referents(gcref, result_w)
+    rgc.assert_no_more_gcflags()
+    return space.newlist(result_w)
 
 def get_referrers(space, args_w):
     """Return the list of objects that directly refer to any of objs."""
-    roots = rgc.get_rpy_roots()
-    pending_w = _get_objects_from_rpy(roots)
-    arguments_w = {}
-    for w_obj in args_w:
-        arguments_w[w_obj] = None
-    # continue by following every W_Root.  Same remark about hashes as
-    # in get_objects().
-    result_w = {}
-    seen_w = {}
-    while len(pending_w) > 0:
-        previous_w = pending_w
-        pending_w = []
-        for w_obj in previous_w:
-            if w_obj not in seen_w:
-                seen_w[w_obj] = None
-                gcref = rgc.cast_instance_to_gcref(w_obj)
-                referents_w = []
-                _list_w_obj_referents(gcref, referents_w)
-                for w_subobj in referents_w:
-                    if w_subobj in arguments_w:
-                        result_w[w_obj] = None
-                pending_w += referents_w
-    return space.newlist(result_w.keys())
+    if not rgc.has_gcflag_extra():
+        raise missing_operation(space)
+    result_w = []
+    for w_arg in args_w:
+        result_w += do_get_referrers(w_arg)
+    rgc.assert_no_more_gcflags()
+    return space.newlist(result_w)
 
 @unwrap_spec(fd=int)
 def _dump_rpy_heap(space, fd):
     "NOT_RPYTHON"
     raise NotImplementedError
 
+def has_gcflag_extra():
+    "NOT_RPYTHON"
+    return True
+has_gcflag_extra._subopnum = 1
+
+_gcflag_extras = []
+
+def get_gcflag_extra(gcref):
+    "NOT_RPYTHON"
+    assert gcref   # not NULL!
+    return gcref in _gcflag_extras    # XXX slow
+get_gcflag_extra._subopnum = 2
+
+def toggle_gcflag_extra(gcref):
+    "NOT_RPYTHON"
+    assert gcref   # not NULL!
+    try:
+        _gcflag_extras.remove(gcref)  # XXX slow
+    except ValueError:
+        _gcflag_extras.append(gcref)
+toggle_gcflag_extra._subopnum = 3
+
+def assert_no_more_gcflags():
+    if not we_are_translated():
+        assert not _gcflag_extras
+
 ARRAY_OF_CHAR = lltype.Array(lltype.Char)
 NULL_GCREF = lltype.nullptr(llmemory.GCREF.TO)
 
         hop.exception_is_here()
         return hop.genop('gc_typeids_z', [], resulttype = hop.r_result)
 
+class Entry(ExtRegistryEntry):
+    _about_ = (has_gcflag_extra, get_gcflag_extra, toggle_gcflag_extra)
+    def compute_result_annotation(self, s_arg=None):
+        from pypy.annotation.model import s_Bool
+        return s_Bool
+    def specialize_call(self, hop):
+        subopnum = self.instance._subopnum
+        vlist = [hop.inputconst(lltype.Signed, subopnum)]
+        vlist += hop.inputargs(*hop.args_r)
+        hop.exception_cannot_occur()
+        return hop.genop('gc_gcflag_extra', vlist, resulttype = hop.r_result)
+
 def lltype_is_gc(TP):
     return getattr(getattr(TP, "TO", None), "_gckind", "?") == 'gc'

pypy/rpython/llinterp.py

     def op_gc_typeids_z(self):
         raise NotImplementedError("gc_typeids_z")
 
+    def op_gc_gcflag_extra(self, subopnum, *args):
+        return self.heap.gcflag_extra(subopnum, *args)
+
     def op_do_malloc_fixedsize_clear(self):
         raise NotImplementedError("do_malloc_fixedsize_clear")
 

pypy/rpython/lltypesystem/lloperation.py

     'gc_is_rpy_instance'  : LLOp(),
     'gc_dump_rpy_heap'    : LLOp(),
     'gc_typeids_z'        : LLOp(),
+    'gc_gcflag_extra'     : LLOp(),
     'gc_add_memory_pressure': LLOp(),
 
     # ------- JIT & GC interaction, only for some GCs ----------

pypy/rpython/memory/gc/minimark.py

 # collection.  See pypy/doc/discussion/finalizer-order.txt
 GCFLAG_FINALIZATION_ORDERING = first_gcflag << 4
 
+# This flag is reserved for RPython.
+GCFLAG_EXTRA        = first_gcflag << 5
+
 # The following flag is set on externally raw_malloc'ed arrays of pointers.
 # They are allocated with some extra space in front of them for a bitfield,
 # one bit per 'card_page_indices' indices.
 # note that GCFLAG_CARDS_SET is the most significant bit of a byte:
 # this is required for the JIT (x86)
 
-#GCFLAG_UNUSED      = first_gcflag << 5     # this flag is free
 TID_MASK            = (first_gcflag << 8) - 1
 
 
     needs_write_barrier = True
     prebuilt_gc_objects_are_static_roots = False
     malloc_zero_filled = True    # xxx experiment with False
-    gcflag_extra = GCFLAG_FINALIZATION_ORDERING
+    gcflag_extra = GCFLAG_EXTRA
 
     # All objects start with a HDR, i.e. with a field 'tid' which contains
     # a word.  This word is divided in two halves: the lower half contains

pypy/rpython/memory/gcwrapper.py

         else:
             return True
 
+    def gcflag_extra(self, subopnum, *args):
+        if subopnum == 1:      # has_gcflag_extra
+            assert len(args) == 0
+            return self.gc.gcflag_extra != 0
+        assert len(args) == 1
+        addr = llmemory.cast_ptr_to_adr(args[0])
+        hdr = self.gc.header(addr)
+        if subopnum == 3:      # toggle_gcflag_extra
+            if hdr.tid & self.gc.gcflag_extra:
+                hdr.tid &= ~self.gc.gcflag_extra
+            else:
+                hdr.tid |= self.gc.gcflag_extra
+        return (hdr.tid & self.gc.gcflag_extra) != 0
+
 # ____________________________________________________________
 
 class LLInterpRootWalker:

pypy/rpython/memory/test/test_gc.py

         res = self.interpret(fn, [])
         assert res == ord('y')
 
+    def test_gcflag_extra(self):
+        class A:
+            pass
+        a1 = A()
+        def fn():
+            a2 = A()
+            if not rgc.has_gcflag_extra():
+                return     # cannot test it then
+            assert rgc.get_gcflag_extra(a1) == False
+            assert rgc.get_gcflag_extra(a2) == False
+            rgc.toggle_gcflag_extra(a1)
+            assert rgc.get_gcflag_extra(a1) == True
+            assert rgc.get_gcflag_extra(a2) == False
+            rgc.toggle_gcflag_extra(a2)
+            assert rgc.get_gcflag_extra(a1) == True
+            assert rgc.get_gcflag_extra(a2) == True
+            rgc.toggle_gcflag_extra(a1)
+            assert rgc.get_gcflag_extra(a1) == False
+            assert rgc.get_gcflag_extra(a2) == True
+            rgc.toggle_gcflag_extra(a2)
+            assert rgc.get_gcflag_extra(a1) == False
+            assert rgc.get_gcflag_extra(a2) == False
+        self.interpret(fn, [])
+
 from pypy.rlib.objectmodel import UnboxedValue
 
 class TaggedBase(object):