1. Pypy
  2. Untitled project
  3. pypy

Commits

Armin Rigo  committed 24b1a13

Remove the semispace, generational, and hybrid GCs, after discussing
on IRC their remaining value. Even the educational value is very low
because of the 3-GCs-in-one approach, and the fact that starting from
a semispace GC is really the wrong approach for PyPy --- semispace GCs
work very badly in this context.

  • Participants
  • Parent commits 2e7d879
  • Branches gc-del

Comments (0)

Files changed (14)

File rpython/memory/gc/base.py

View file
     if config.translation.gctransformer != "framework":
         raise AssertionError("fix this test")
 
-    classes = {"semispace": "semispace.SemiSpaceGC",
-               "generation": "generation.GenerationGC",
-               "hybrid": "hybrid.HybridGC",
-               "minimark" : "minimark.MiniMarkGC",
+    classes = {"minimark" : "minimark.MiniMarkGC",
                }
     try:
         modulename, classname = classes[config.translation.gc].split('.')

File rpython/memory/gc/generation.py

-import sys
-from rpython.memory.gc.semispace import SemiSpaceGC
-from rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
-from rpython.memory.gc.semispace import GC_HASH_TAKEN_ADDR
-from rpython.memory.gc import env
-from rpython.rtyper.lltypesystem.llmemory import NULL, raw_malloc_usage
-from rpython.rtyper.lltypesystem import lltype, llmemory, llarena
-from rpython.rlib.objectmodel import free_non_gc_object
-from rpython.rlib.debug import ll_assert
-from rpython.rlib.debug import debug_print, debug_start, debug_stop
-from rpython.rlib.rarithmetic import intmask, LONG_BIT
-from rpython.rtyper.lltypesystem.lloperation import llop
-
-WORD = LONG_BIT // 8
-
-# The following flag is never set on young objects, i.e. the ones living
-# in the nursery.  It is initially set on all prebuilt and old objects,
-# and gets cleared by the write_barrier() when we write in them a
-# pointer to a young object.
-GCFLAG_NO_YOUNG_PTRS = SemiSpaceGC.first_unused_gcflag << 0
-
-# The following flag is set on some last-generation objects (== prebuilt
-# objects for GenerationGC, but see also HybridGC).  The flag is set
-# unless the object is already listed in 'last_generation_root_objects'.
-# When a pointer is written inside an object with GCFLAG_NO_HEAP_PTRS
-# set, the write_barrier clears the flag and adds the object to
-# 'last_generation_root_objects'.
-GCFLAG_NO_HEAP_PTRS = SemiSpaceGC.first_unused_gcflag << 1
-
-class GenerationGC(SemiSpaceGC):
-    """A basic generational GC: it's a SemiSpaceGC with an additional
-    nursery for young objects.  A write barrier is used to ensure that
-    old objects that contain pointers to young objects are recorded in
-    a list.
-    """
-    inline_simple_malloc = True
-    inline_simple_malloc_varsize = True
-    needs_write_barrier = True
-    prebuilt_gc_objects_are_static_roots = False
-    first_unused_gcflag = SemiSpaceGC.first_unused_gcflag << 2
-
-    # the following values override the default arguments of __init__ when
-    # translating to a real backend.
-    TRANSLATION_PARAMS = {'space_size': 8*1024*1024,     # 8 MB
-                          'nursery_size': 3*1024*1024,   # 3 MB
-                          'min_nursery_size': 48*1024,
-                          'auto_nursery_size': True}
-
-    nursery_hash_base = -1
-
-    def __init__(self, config,
-                 nursery_size=32*WORD,
-                 min_nursery_size=32*WORD,
-                 auto_nursery_size=False,
-                 space_size=1024*WORD,
-                 max_space_size=sys.maxint//2+1,
-                 **kwds):
-        SemiSpaceGC.__init__(self, config,
-                             space_size = space_size,
-                             max_space_size = max_space_size,
-                             **kwds)
-        assert min_nursery_size <= nursery_size <= space_size // 2
-        self.initial_nursery_size = nursery_size
-        self.auto_nursery_size = auto_nursery_size
-        self.min_nursery_size = min_nursery_size
-
-        # define nursery fields
-        self.reset_nursery()
-        self._setup_wb()
-
-        # compute the constant lower bounds for the attributes
-        # largest_young_fixedsize and largest_young_var_basesize.
-        # It is expected that most (or all) objects have a fixedsize
-        # that is much lower anyway.
-        sz = self.get_young_fixedsize(self.min_nursery_size)
-        self.lb_young_fixedsize = sz
-        sz = self.get_young_var_basesize(self.min_nursery_size)
-        self.lb_young_var_basesize = sz
-
-    def setup(self):
-        self.old_objects_pointing_to_young = self.AddressStack()
-        # ^^^ a list of addresses inside the old objects space; it
-        # may contain static prebuilt objects as well.  More precisely,
-        # it lists exactly the old and static objects whose
-        # GCFLAG_NO_YOUNG_PTRS bit is not set.
-        self.young_objects_with_weakrefs = self.AddressStack()
-
-        self.last_generation_root_objects = self.AddressStack()
-        self.young_objects_with_id = self.AddressDict()
-        SemiSpaceGC.setup(self)
-        self.set_nursery_size(self.initial_nursery_size)
-        # the GC is fully setup now.  The rest can make use of it.
-        if self.auto_nursery_size:
-            newsize = nursery_size_from_env()
-            #if newsize <= 0:
-            #    ---disabled--- just use the default value.
-            #    newsize = env.estimate_best_nursery_size()
-            if newsize > 0:
-                self.set_nursery_size(newsize)
-
-        self.reset_nursery()
-
-    def _teardown(self):
-        self.collect() # should restore last gen objects flags
-        SemiSpaceGC._teardown(self)
-
-    def reset_nursery(self):
-        self.nursery      = NULL
-        self.nursery_top  = NULL
-        self.nursery_free = NULL
-
-    def set_nursery_size(self, newsize):
-        debug_start("gc-set-nursery-size")
-        if newsize < self.min_nursery_size:
-            newsize = self.min_nursery_size
-        if newsize > self.space_size // 2:
-            newsize = self.space_size // 2
-
-        # Compute the new bounds for how large young objects can be
-        # (larger objects are allocated directly old).   XXX adjust
-        self.nursery_size = newsize
-        self.largest_young_fixedsize = self.get_young_fixedsize(newsize)
-        self.largest_young_var_basesize = self.get_young_var_basesize(newsize)
-        scale = 0
-        while (self.min_nursery_size << (scale+1)) <= newsize:
-            scale += 1
-        self.nursery_scale = scale
-        debug_print("nursery_size =", newsize)
-        debug_print("largest_young_fixedsize =",
-                    self.largest_young_fixedsize)
-        debug_print("largest_young_var_basesize =",
-                    self.largest_young_var_basesize)
-        debug_print("nursery_scale =", scale)
-        # we get the following invariant:
-        assert self.nursery_size >= (self.min_nursery_size << scale)
-
-        # Force a full collect to remove the current nursery whose size
-        # no longer matches the bounds that we just computed.  This must
-        # be done after changing the bounds, because it might re-create
-        # a new nursery (e.g. if it invokes finalizers).
-        self.semispace_collect()
-        debug_stop("gc-set-nursery-size")
-
-    @staticmethod
-    def get_young_fixedsize(nursery_size):
-        return nursery_size // 2 - 1
-
-    @staticmethod
-    def get_young_var_basesize(nursery_size):
-        return nursery_size // 4 - 1
-
-    @classmethod
-    def JIT_max_size_of_young_obj(cls):
-        min_nurs_size = cls.TRANSLATION_PARAMS['min_nursery_size']
-        return cls.get_young_fixedsize(min_nurs_size)
-
-    def is_in_nursery(self, addr):
-        ll_assert(llmemory.cast_adr_to_int(addr) & 1 == 0,
-                  "odd-valued (i.e. tagged) pointer unexpected here")
-        return self.nursery <= addr < self.nursery_top
-
-    def appears_to_be_in_nursery(self, addr):
-        # same as is_in_nursery(), but may return True accidentally if
-        # 'addr' is a tagged pointer with just the wrong value.
-        if not self.translated_to_c:
-            if not self.is_valid_gc_object(addr):
-                return False
-        return self.nursery <= addr < self.nursery_top
-
-    def malloc_fixedsize_clear(self, typeid, size,
-                               has_finalizer=False,
-                               is_finalizer_light=False,
-                               contains_weakptr=False):
-        if (has_finalizer or
-            (raw_malloc_usage(size) > self.lb_young_fixedsize and
-             raw_malloc_usage(size) > self.largest_young_fixedsize)):
-            # ^^^ we do two size comparisons; the first one appears redundant,
-            #     but it can be constant-folded if 'size' is a constant; then
-            #     it almost always folds down to False, which kills the
-            #     second comparison as well.
-            ll_assert(not contains_weakptr, "wrong case for mallocing weakref")
-            # "non-simple" case or object too big: don't use the nursery
-            return SemiSpaceGC.malloc_fixedsize_clear(self, typeid, size,
-                                                      has_finalizer,
-                                                      is_finalizer_light,
-                                                      contains_weakptr)
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        totalsize = size_gc_header + size
-        result = self.nursery_free
-        if raw_malloc_usage(totalsize) > self.nursery_top - result:
-            result = self.collect_nursery()
-        llarena.arena_reserve(result, totalsize)
-        # GCFLAG_NO_YOUNG_PTRS is never set on young objs
-        self.init_gc_object(result, typeid, flags=0)
-        self.nursery_free = result + totalsize
-        if contains_weakptr:
-            self.young_objects_with_weakrefs.append(result + size_gc_header)
-        return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
-    def malloc_varsize_clear(self, typeid, length, size, itemsize,
-                             offset_to_length):
-        # Only use the nursery if there are not too many items.
-        if not raw_malloc_usage(itemsize):
-            too_many_items = False
-        else:
-            # The following line is usually constant-folded because both
-            # min_nursery_size and itemsize are constants (the latter
-            # due to inlining).
-            maxlength_for_minimal_nursery = (self.min_nursery_size // 4 //
-                                             raw_malloc_usage(itemsize))
-            
-            # The actual maximum length for our nursery depends on how
-            # many times our nursery is bigger than the minimal size.
-            # The computation is done in this roundabout way so that
-            # only the only remaining computation is the following
-            # shift.
-            maxlength = maxlength_for_minimal_nursery << self.nursery_scale
-            too_many_items = length > maxlength
-
-        if (too_many_items or
-            (raw_malloc_usage(size) > self.lb_young_var_basesize and
-             raw_malloc_usage(size) > self.largest_young_var_basesize)):
-            # ^^^ we do two size comparisons; the first one appears redundant,
-            #     but it can be constant-folded if 'size' is a constant; then
-            #     it almost always folds down to False, which kills the
-            #     second comparison as well.
-            return SemiSpaceGC.malloc_varsize_clear(self, typeid, length, size,
-                                                    itemsize, offset_to_length)
-        # with the above checks we know now that totalsize cannot be more
-        # than about half of the nursery size; in particular, the + and *
-        # cannot overflow
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        totalsize = size_gc_header + size + itemsize * length
-        result = self.nursery_free
-        if raw_malloc_usage(totalsize) > self.nursery_top - result:
-            result = self.collect_nursery()
-        llarena.arena_reserve(result, totalsize)
-        # GCFLAG_NO_YOUNG_PTRS is never set on young objs
-        self.init_gc_object(result, typeid, flags=0)
-        (result + size_gc_header + offset_to_length).signed[0] = length
-        self.nursery_free = result + llarena.round_up_for_allocation(totalsize)
-        return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
-    # override the init_gc_object methods to change the default value of 'flags',
-    # used by objects that are directly created outside the nursery by the SemiSpaceGC.
-    # These objects must have the GCFLAG_NO_YOUNG_PTRS flag set immediately.
-    def init_gc_object(self, addr, typeid, flags=GCFLAG_NO_YOUNG_PTRS):
-        SemiSpaceGC.init_gc_object(self, addr, typeid, flags)
-
-    def init_gc_object_immortal(self, addr, typeid,
-                                flags=GCFLAG_NO_YOUNG_PTRS|GCFLAG_NO_HEAP_PTRS):
-        SemiSpaceGC.init_gc_object_immortal(self, addr, typeid, flags)
-
-    # flags exposed for the HybridGC subclass
-    GCFLAGS_FOR_NEW_YOUNG_OBJECTS = 0   # NO_YOUNG_PTRS never set on young objs
-    GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS = (GCFLAG_EXTERNAL | GCFLAG_FORWARDED |
-                                        GCFLAG_NO_YOUNG_PTRS |
-                                        GC_HASH_TAKEN_ADDR)
-
-    # ____________________________________________________________
-    # Support code for full collections
-
-    def collect(self, gen=1):
-        if gen == 0:
-            self.collect_nursery()
-        else:
-            SemiSpaceGC.collect(self)
-
-    def semispace_collect(self, size_changing=False):
-        self.reset_young_gcflags() # we are doing a full collection anyway
-        self.weakrefs_grow_older()
-        self.ids_grow_older()
-        self.reset_nursery()
-        SemiSpaceGC.semispace_collect(self, size_changing)
-
-    def make_a_copy(self, obj, objsize):
-        tid = self.header(obj).tid
-        # During a full collect, all copied objects might implicitly come
-        # from the nursery.  In case they do, we must add this flag:
-        tid |= GCFLAG_NO_YOUNG_PTRS
-        return self._make_a_copy_with_tid(obj, objsize, tid)
-        # history: this was missing and caused an object to become old but without the
-        # flag set.  Such an object is bogus in the sense that the write_barrier doesn't
-        # work on it.  So it can eventually contain a ptr to a young object but we didn't
-        # know about it.  That ptr was not updated in the next minor collect... boom at
-        # the next usage.
-
-    def reset_young_gcflags(self):
-        # This empties self.old_objects_pointing_to_young, and puts the
-        # GCFLAG_NO_YOUNG_PTRS back on all these objects.  We could put
-        # the flag back more lazily but we expect this list to be short
-        # anyway, and it's much saner to stick to the invariant:
-        # non-young objects all have GCFLAG_NO_YOUNG_PTRS set unless
-        # they are listed in old_objects_pointing_to_young.
-        oldlist = self.old_objects_pointing_to_young
-        while oldlist.non_empty():
-            obj = oldlist.pop()
-            hdr = self.header(obj)
-            hdr.tid |= GCFLAG_NO_YOUNG_PTRS
-
-    def weakrefs_grow_older(self):
-        while self.young_objects_with_weakrefs.non_empty():
-            obj = self.young_objects_with_weakrefs.pop()
-            self.objects_with_weakrefs.append(obj)
-
-    def collect_roots(self):
-        """GenerationGC: collects all roots.
-           HybridGC: collects all roots, excluding the generation 3 ones.
-        """
-        # Warning!  References from static (and possibly gen3) objects
-        # are found by collect_last_generation_roots(), which must be
-        # called *first*!  If it is called after walk_roots(), then the
-        # HybridGC explodes if one of the _collect_root causes an object
-        # to be added to self.last_generation_root_objects.  Indeed, in
-        # this case, the newly added object is traced twice: once by
-        # collect_last_generation_roots() and once because it was added
-        # in self.rawmalloced_objects_to_trace.
-        self.collect_last_generation_roots()
-        self.root_walker.walk_roots(
-            SemiSpaceGC._collect_root,  # stack roots
-            SemiSpaceGC._collect_root,  # static in prebuilt non-gc structures
-            None)   # we don't need the static in prebuilt gc objects
-
-    def collect_last_generation_roots(self):
-        stack = self.last_generation_root_objects
-        self.last_generation_root_objects = self.AddressStack()
-        while stack.non_empty():
-            obj = stack.pop()
-            self.header(obj).tid |= GCFLAG_NO_HEAP_PTRS
-            # ^^^ the flag we just added will be removed immediately if
-            # the object still contains pointers to younger objects
-            self.trace(obj, self._trace_external_obj, obj)
-        stack.delete()
-
-    def _trace_external_obj(self, pointer, obj):
-        addr = pointer.address[0]
-        newaddr = self.copy(addr)
-        pointer.address[0] = newaddr
-        self.write_into_last_generation_obj(obj, newaddr)
-
-    # ____________________________________________________________
-    # Implementation of nursery-only collections
-
-    def collect_nursery(self):
-        if self.nursery_size > self.top_of_space - self.free:
-            # the semispace is running out, do a full collect
-            self.obtain_free_space(self.nursery_size)
-            ll_assert(self.nursery_size <= self.top_of_space - self.free,
-                         "obtain_free_space failed to do its job")
-        if self.nursery:
-            debug_start("gc-minor")
-            debug_print("--- minor collect ---")
-            debug_print("nursery:", self.nursery, "to", self.nursery_top)
-            # a nursery-only collection
-            scan = beginning = self.free
-            self.collect_oldrefs_to_nursery()
-            self.collect_roots_in_nursery()
-            scan = self.scan_objects_just_copied_out_of_nursery(scan)
-            # at this point, all static and old objects have got their
-            # GCFLAG_NO_YOUNG_PTRS set again by trace_and_drag_out_of_nursery
-            if self.young_objects_with_weakrefs.non_empty():
-                self.invalidate_young_weakrefs()
-            if self.young_objects_with_id.length() > 0:
-                self.update_young_objects_with_id()
-            # mark the nursery as free and fill it with zeroes again
-            llarena.arena_reset(self.nursery, self.nursery_size, 2)
-            debug_print("survived (fraction of the size):",
-                        float(scan - beginning) / self.nursery_size)
-            debug_stop("gc-minor")
-            #self.debug_check_consistency()   # -- quite expensive
-        else:
-            # no nursery - this occurs after a full collect, triggered either
-            # just above or by some previous non-nursery-based allocation.
-            # Grab a piece of the current space for the nursery.
-            self.nursery = self.free
-            self.nursery_top = self.nursery + self.nursery_size
-            self.free = self.nursery_top
-        self.nursery_free = self.nursery
-        # at this point we know that the nursery is empty
-        self.change_nursery_hash_base()
-        return self.nursery_free
-
-    def change_nursery_hash_base(self):
-        # The following should be enough to ensure that young objects
-        # tend to always get a different hash.  It also makes sure that
-        # nursery_hash_base is not a multiple of 4, to avoid collisions
-        # with the hash of non-young objects.
-        hash_base = self.nursery_hash_base
-        hash_base += self.nursery_size - 1
-        if (hash_base & 3) == 0:
-            hash_base -= 1
-        self.nursery_hash_base = intmask(hash_base)
-
-    # NB. we can use self.copy() to move objects out of the nursery,
-    # but only if the object was really in the nursery.
-
-    def collect_oldrefs_to_nursery(self):
-        # Follow the old_objects_pointing_to_young list and move the
-        # young objects they point to out of the nursery.
-        count = 0
-        oldlist = self.old_objects_pointing_to_young
-        while oldlist.non_empty():
-            count += 1
-            obj = oldlist.pop()
-            hdr = self.header(obj)
-            hdr.tid |= GCFLAG_NO_YOUNG_PTRS
-            self.trace_and_drag_out_of_nursery(obj)
-        debug_print("collect_oldrefs_to_nursery", count)
-
-    def collect_roots_in_nursery(self):
-        # we don't need to trace prebuilt GcStructs during a minor collect:
-        # if a prebuilt GcStruct contains a pointer to a young object,
-        # then the write_barrier must have ensured that the prebuilt
-        # GcStruct is in the list self.old_objects_pointing_to_young.
-        self.root_walker.walk_roots(
-            GenerationGC._collect_root_in_nursery,  # stack roots
-            GenerationGC._collect_root_in_nursery,  # static in prebuilt non-gc
-            None)                                   # static in prebuilt gc
-
-    def _collect_root_in_nursery(self, root):
-        obj = root.address[0]
-        if self.is_in_nursery(obj):
-            root.address[0] = self.copy(obj)
-
-    def scan_objects_just_copied_out_of_nursery(self, scan):
-        while scan < self.free:
-            curr = scan + self.size_gc_header()
-            self.trace_and_drag_out_of_nursery(curr)
-            scan += self.size_gc_header() + self.get_size_incl_hash(curr)
-        return scan
-
-    def trace_and_drag_out_of_nursery(self, obj):
-        """obj must not be in the nursery.  This copies all the
-        young objects it references out of the nursery.
-        """
-        self.trace(obj, self._trace_drag_out, None)
-
-    def _trace_drag_out(self, pointer, ignored):
-        if self.is_in_nursery(pointer.address[0]):
-            pointer.address[0] = self.copy(pointer.address[0])
-
-    # The code relies on the fact that no weakref can be an old object
-    # weakly pointing to a young object.  Indeed, weakrefs are immutable
-    # so they cannot point to an object that was created after it.
-    def invalidate_young_weakrefs(self):
-        # walk over the list of objects that contain weakrefs and are in the
-        # nursery.  if the object it references survives then update the
-        # weakref; otherwise invalidate the weakref
-        while self.young_objects_with_weakrefs.non_empty():
-            obj = self.young_objects_with_weakrefs.pop()
-            if not self.surviving(obj):
-                continue # weakref itself dies
-            obj = self.get_forwarding_address(obj)
-            offset = self.weakpointer_offset(self.get_type_id(obj))
-            pointing_to = (obj + offset).address[0]
-            if self.is_in_nursery(pointing_to):
-                if self.surviving(pointing_to):
-                    (obj + offset).address[0] = self.get_forwarding_address(
-                        pointing_to)
-                else:
-                    (obj + offset).address[0] = NULL
-                    continue    # no need to remember this weakref any longer
-            self.objects_with_weakrefs.append(obj)
-
-    # for the JIT: a minimal description of the write_barrier() method
-    # (the JIT assumes it is of the shape
-    #  "if addr_struct.int0 & JIT_WB_IF_FLAG: remember_young_pointer()")
-    JIT_WB_IF_FLAG = GCFLAG_NO_YOUNG_PTRS
-
-    def write_barrier(self, newvalue, addr_struct):
-         if self.header(addr_struct).tid & GCFLAG_NO_YOUNG_PTRS:
-            self.remember_young_pointer(addr_struct, newvalue)
-
-    def _setup_wb(self):
-        DEBUG = self.DEBUG
-        # The purpose of attaching remember_young_pointer to the instance
-        # instead of keeping it as a regular method is to help the JIT call it.
-        # Additionally, it makes the code in write_barrier() marginally smaller
-        # (which is important because it is inlined *everywhere*).
-        # For x86, there is also an extra requirement: when the JIT calls
-        # remember_young_pointer(), it assumes that it will not touch the SSE
-        # registers, so it does not save and restore them (that's a *hack*!).
-        def remember_young_pointer(addr_struct, addr):
-            #llop.debug_print(lltype.Void, "\tremember_young_pointer",
-            #                 addr_struct, "<-", addr)
-            if DEBUG:
-                ll_assert(not self.is_in_nursery(addr_struct),
-                          "nursery object with GCFLAG_NO_YOUNG_PTRS")
-            #
-            # What is important in this function is that it *must*
-            # clear the flag GCFLAG_NO_YOUNG_PTRS from 'addr_struct'
-            # if 'addr' is in the nursery.  It is ok if, accidentally,
-            # it also clears the flag in some more rare cases, like
-            # 'addr' being a tagged pointer whose value happens to be
-            # a large integer that fools is_in_nursery().
-            if self.appears_to_be_in_nursery(addr):
-                self.old_objects_pointing_to_young.append(addr_struct)
-                self.header(addr_struct).tid &= ~GCFLAG_NO_YOUNG_PTRS
-            self.write_into_last_generation_obj(addr_struct, addr)
-        remember_young_pointer._dont_inline_ = True
-        self.remember_young_pointer = remember_young_pointer
-
-    def write_into_last_generation_obj(self, addr_struct, addr):
-        objhdr = self.header(addr_struct)
-        if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
-            if (self.is_valid_gc_object(addr) and
-                    not self.is_last_generation(addr)):
-                objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
-                self.last_generation_root_objects.append(addr_struct)
-    write_into_last_generation_obj._always_inline_ = True
-
-    def assume_young_pointers(self, addr_struct):
-        objhdr = self.header(addr_struct)
-        if objhdr.tid & GCFLAG_NO_YOUNG_PTRS:
-            self.old_objects_pointing_to_young.append(addr_struct)
-            objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
-        if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
-            objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
-            self.last_generation_root_objects.append(addr_struct)
-
-    def writebarrier_before_copy(self, source_addr, dest_addr,
-                                 source_start, dest_start, length):
-        """ This has the same effect as calling writebarrier over
-        each element in dest copied from source, except it might reset
-        one of the following flags a bit too eagerly, which means we'll have
-        a bit more objects to track, but being on the safe side.
-        """
-        source_hdr = self.header(source_addr)
-        dest_hdr = self.header(dest_addr)
-        if dest_hdr.tid & GCFLAG_NO_YOUNG_PTRS == 0:
-            return True
-        # ^^^ a fast path of write-barrier
-        if source_hdr.tid & GCFLAG_NO_YOUNG_PTRS == 0:
-            # there might be an object in source that is in nursery
-            self.old_objects_pointing_to_young.append(dest_addr)
-            dest_hdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
-        if dest_hdr.tid & GCFLAG_NO_HEAP_PTRS:
-            if source_hdr.tid & GCFLAG_NO_HEAP_PTRS == 0:
-                # ^^^ equivalend of addr from source not being in last
-                #     gen
-                dest_hdr.tid &= ~GCFLAG_NO_HEAP_PTRS
-                self.last_generation_root_objects.append(dest_addr)
-        return True
-
-    def is_last_generation(self, obj):
-        # overridden by HybridGC
-        return (self.header(obj).tid & GCFLAG_EXTERNAL) != 0
-
-    def _compute_id(self, obj):
-        if self.is_in_nursery(obj):
-            result = self.young_objects_with_id.get(obj)
-            if not result:
-                result = self._next_id()
-                self.young_objects_with_id.setitem(obj, result)
-            return result
-        else:
-            return SemiSpaceGC._compute_id(self, obj)
-
-    def update_young_objects_with_id(self):
-        self.young_objects_with_id.foreach(self._update_object_id,
-                                           self.objects_with_id)
-        self.young_objects_with_id.clear()
-        # NB. the clear() also makes the dictionary shrink back to its
-        # minimal size, which is actually a good idea: a large, mostly-empty
-        # table is bad for the next call to 'foreach'.
-
-    def ids_grow_older(self):
-        self.young_objects_with_id.foreach(self._id_grow_older, None)
-        self.young_objects_with_id.clear()
-
-    def _id_grow_older(self, obj, id, ignored):
-        self.objects_with_id.setitem(obj, id)
-
-    def _compute_current_nursery_hash(self, obj):
-        return intmask(llmemory.cast_adr_to_int(obj) + self.nursery_hash_base)
-
-    def enumerate_all_roots(self, callback, arg):
-        self.last_generation_root_objects.foreach(callback, arg)
-        SemiSpaceGC.enumerate_all_roots(self, callback, arg)
-    enumerate_all_roots._annspecialcase_ = 'specialize:arg(1)'
-
-    def debug_check_object(self, obj):
-        """Check the invariants about 'obj' that should be true
-        between collections."""
-        SemiSpaceGC.debug_check_object(self, obj)
-        tid = self.header(obj).tid
-        if tid & GCFLAG_NO_YOUNG_PTRS:
-            ll_assert(not self.is_in_nursery(obj),
-                      "nursery object with GCFLAG_NO_YOUNG_PTRS")
-            self.trace(obj, self._debug_no_nursery_pointer, None)
-        elif not self.is_in_nursery(obj):
-            ll_assert(self._d_oopty.contains(obj),
-                      "missing from old_objects_pointing_to_young")
-        if tid & GCFLAG_NO_HEAP_PTRS:
-            ll_assert(self.is_last_generation(obj),
-                      "GCFLAG_NO_HEAP_PTRS on non-3rd-generation object")
-            self.trace(obj, self._debug_no_gen1or2_pointer, None)
-        elif self.is_last_generation(obj):
-            ll_assert(self._d_lgro.contains(obj),
-                      "missing from last_generation_root_objects")
-
-    def _debug_no_nursery_pointer(self, root, ignored):
-        ll_assert(not self.is_in_nursery(root.address[0]),
-                  "GCFLAG_NO_YOUNG_PTRS but found a young pointer")
-    def _debug_no_gen1or2_pointer(self, root, ignored):
-        target = root.address[0]
-        ll_assert(not target or self.is_last_generation(target),
-                  "GCFLAG_NO_HEAP_PTRS but found a pointer to gen1or2")
-
-    def debug_check_consistency(self):
-        if self.DEBUG:
-            self._d_oopty = self.old_objects_pointing_to_young.stack2dict()
-            self._d_lgro = self.last_generation_root_objects.stack2dict()
-            SemiSpaceGC.debug_check_consistency(self)
-            self._d_oopty.delete()
-            self._d_lgro.delete()
-            self.old_objects_pointing_to_young.foreach(
-                self._debug_check_flag_1, None)
-            self.last_generation_root_objects.foreach(
-                self._debug_check_flag_2, None)
-
-    def _debug_check_flag_1(self, obj, ignored):
-        ll_assert(not (self.header(obj).tid & GCFLAG_NO_YOUNG_PTRS),
-                  "unexpected GCFLAG_NO_YOUNG_PTRS")
-    def _debug_check_flag_2(self, obj, ignored):
-        ll_assert(not (self.header(obj).tid & GCFLAG_NO_HEAP_PTRS),
-                  "unexpected GCFLAG_NO_HEAP_PTRS")
-
-    def debug_check_can_copy(self, obj):
-        if self.is_in_nursery(obj):
-            pass    # it's ok to copy an object out of the nursery
-        else:
-            SemiSpaceGC.debug_check_can_copy(self, obj)
-
-
-# ____________________________________________________________
-
-def nursery_size_from_env():
-    return env.read_from_env('PYPY_GENERATIONGC_NURSERY')

File rpython/memory/gc/hybrid.py

-import sys
-from rpython.memory.gc.semispace import SemiSpaceGC
-from rpython.memory.gc.generation import GenerationGC, WORD
-from rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
-from rpython.memory.gc.semispace import GCFLAG_HASHMASK
-from rpython.memory.gc.generation import GCFLAG_NO_YOUNG_PTRS
-from rpython.memory.gc.generation import GCFLAG_NO_HEAP_PTRS
-from rpython.memory.gc.semispace import GC_HASH_TAKEN_ADDR
-from rpython.memory.gc.semispace import GC_HASH_HASFIELD
-from rpython.rtyper.lltypesystem import lltype, llmemory, llarena
-from rpython.rtyper.lltypesystem.llmemory import raw_malloc_usage
-from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.rlib.debug import ll_assert, have_debug_prints
-from rpython.rlib.debug import debug_print, debug_start, debug_stop
-from rpython.rlib.rarithmetic import ovfcheck
-from rpython.rtyper.lltypesystem import rffi
-
-#   _______in the semispaces_________      ______external (non-moving)_____
-#  /                                 \    /                                \
-#                                          ___raw_malloc'ed__    _prebuilt_
-#  +----------------------------------+   /                  \  /          \
-#  |    | | | | |    |                |
-#  |    | | | | |    |                |    age < max      age == max
-#  |nur-|o|o|o|o|    |                |      +---+      +---+      +---+
-#  |sery|b|b|b|b|free|     empty      |      |obj|      |obj|      |obj|  
-#  |    |j|j|j|j|    |                |      +---+      +---+      +---+  
-#  |    | | | | |    |                |       +---+      +---+      +---+
-#  +-----------------+----------------+       |obj|      |obj|      |obj|
-#        age <= max                           +---+      +---+      +---+
-#            
-#  |gen1|------------- generation 2 -----------------|-----generation 3-----|
-#
-# Object lists:
-#   * gen2_rawmalloced_objects
-#   * gen3_rawmalloced_objects
-#   * old_objects_pointing_to_young: gen2or3 objs that point to gen1 objs
-#   * last_generation_root_objects: gen3 objs that point to gen1or2 objs
-#
-# How to tell the objects apart:
-#   * external:      tid & GCFLAG_EXTERNAL
-#   * gen1:          is_in_nursery(obj)
-#   * gen3:          (tid & (GCFLAG_EXTERNAL|GCFLAG_AGE_MASK)) ==
-#                           (GCFLAG_EXTERNAL|GCFLAG_AGE_MAX)
-#
-# Some invariants:
-#   * gen3 are either GCFLAG_NO_HEAP_PTRS or in 'last_generation_root_objects'
-#   * between collections, GCFLAG_UNVISITED set exactly for gen2_rawmalloced
-#
-# A malloc_varsize() of large objects returns objects that are external
-# but initially of generation 2.  Old objects from the semispaces are
-# moved to external objects directly as generation 3.
-
-# The "age" of an object is the number of times it survived a full
-# collections, without counting the step that moved it out of the nursery.
-# When a semispace-based object would grow older than MAX_SEMISPACE_AGE,
-# it is instead copied to a nonmoving location.  For example, a value of 3
-# ensures that an object is copied at most 5 times in total: from the
-# nursery to the semispace, then three times between the two spaces,
-# then one last time to a nonmoving location.
-MAX_SEMISPACE_AGE = 3
-
-GCFLAG_UNVISITED = GenerationGC.first_unused_gcflag << 0
-_gcflag_next_bit = GenerationGC.first_unused_gcflag << 1
-GCFLAG_AGE_ONE   = _gcflag_next_bit
-GCFLAG_AGE_MAX   = _gcflag_next_bit * MAX_SEMISPACE_AGE
-GCFLAG_AGE_MASK  = 0
-while GCFLAG_AGE_MASK < GCFLAG_AGE_MAX:
-    GCFLAG_AGE_MASK |= _gcflag_next_bit
-    _gcflag_next_bit <<= 1
-
-# The 3rd generation objects are only collected after the following
-# number of calls to semispace_collect():
-GENERATION3_COLLECT_THRESHOLD = 20
-
-class HybridGC(GenerationGC):
-    """A two-generations semi-space GC like the GenerationGC,
-    except that objects above a certain size are handled separately:
-    they are allocated via raw_malloc/raw_free in a mark-n-sweep fashion.
-    """
-    first_unused_gcflag = _gcflag_next_bit
-    prebuilt_gc_objects_are_static_roots = True
-
-    # the following values override the default arguments of __init__ when
-    # translating to a real backend.
-    TRANSLATION_PARAMS = GenerationGC.TRANSLATION_PARAMS.copy()
-    TRANSLATION_PARAMS['large_object'] = 6*1024    # XXX adjust
-    TRANSLATION_PARAMS['large_object_gcptrs'] = 31*1024    # XXX adjust
-    TRANSLATION_PARAMS['min_nursery_size'] = 128*1024
-    # condition: large_object <= large_object_gcptrs < min_nursery_size/4
-
-    def __init__(self, *args, **kwds):
-        large_object = kwds.pop('large_object', 6*WORD)
-        large_object_gcptrs = kwds.pop('large_object_gcptrs', 8*WORD)
-        self.generation3_collect_threshold = kwds.pop(
-            'generation3_collect_threshold', GENERATION3_COLLECT_THRESHOLD)
-        GenerationGC.__init__(self, *args, **kwds)
-
-        # Objects whose total size is at least 'large_object' bytes are
-        # allocated separately in a mark-n-sweep fashion.  If the object
-        # has GC pointers in its varsized part, we use instead the
-        # higher limit 'large_object_gcptrs'.  The idea is that
-        # separately allocated objects are allocated immediately "old"
-        # and it's not good to have too many pointers from old to young
-        # objects.
-
-        # In this class, we assume that the 'large_object' limit is not
-        # very high, so that all objects that wouldn't easily fit in the
-        # nursery are considered large by this limit.  This is the
-        # meaning of the 'assert' below.
-        self.nonlarge_max = large_object - 1
-        self.nonlarge_gcptrs_max = large_object_gcptrs - 1
-        assert self.nonlarge_gcptrs_max <= self.lb_young_var_basesize
-        assert self.nonlarge_max <= self.nonlarge_gcptrs_max
-
-    def setup(self):
-        self.large_objects_collect_trigger = self.param_space_size
-        self._initial_trigger = self.large_objects_collect_trigger
-        self.rawmalloced_objects_to_trace = self.AddressStack()
-        self.count_semispaceonly_collects = 0
-
-        self.gen2_rawmalloced_objects = self.AddressStack()
-        self.gen3_rawmalloced_objects = self.AddressStack()
-        GenerationGC.setup(self)
-
-    def set_max_heap_size(self, size):
-        raise NotImplementedError
-
-    # NB. to simplify the code, only varsized objects can be considered
-    # 'large'.
-
-    def malloc_varsize_clear(self, typeid, length, size, itemsize,
-                             offset_to_length):
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        nonvarsize = size_gc_header + size
-
-        # Compute the maximal length that makes the object still
-        # below 'nonlarge_max'.  All the following logic is usually
-        # constant-folded because self.nonlarge_max, size and itemsize
-        # are all constants (the arguments are constant due to
-        # inlining) and self.has_gcptr_in_varsize() is constant-folded.
-        if self.has_gcptr_in_varsize(typeid):
-            nonlarge_max = self.nonlarge_gcptrs_max
-        else:
-            nonlarge_max = self.nonlarge_max
-
-        if not raw_malloc_usage(itemsize):
-            too_many_items = raw_malloc_usage(nonvarsize) > nonlarge_max
-        else:
-            maxlength = nonlarge_max - raw_malloc_usage(nonvarsize)
-            maxlength = maxlength // raw_malloc_usage(itemsize)
-            too_many_items = length > maxlength
-
-        if not too_many_items:
-            # With the above checks we know now that totalsize cannot be more
-            # than 'nonlarge_max'; in particular, the + and * cannot overflow.
-            # Let's try to fit the object in the nursery.
-            totalsize = nonvarsize + itemsize * length
-            result = self.nursery_free
-            if raw_malloc_usage(totalsize) <= self.nursery_top - result:
-                llarena.arena_reserve(result, totalsize)
-                # GCFLAG_NO_YOUNG_PTRS is never set on young objs
-                self.init_gc_object(result, typeid, flags=0)
-                (result + size_gc_header + offset_to_length).signed[0] = length
-                self.nursery_free = result + llarena.round_up_for_allocation(
-                    totalsize)
-                return llmemory.cast_adr_to_ptr(result+size_gc_header,
-                                                llmemory.GCREF)
-        return self.malloc_varsize_slowpath(typeid, length)
-
-    def malloc_varsize_slowpath(self, typeid, length, force_nonmovable=False):
-        # For objects that are too large, or when the nursery is exhausted.
-        # In order to keep malloc_varsize_clear() as compact as possible,
-        # we recompute what we need in this slow path instead of passing
-        # it all as function arguments.
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        nonvarsize = size_gc_header + self.fixed_size(typeid)
-        itemsize = self.varsize_item_sizes(typeid)
-        offset_to_length = self.varsize_offset_to_length(typeid)
-        try:
-            varsize = ovfcheck(itemsize * length)
-            totalsize = ovfcheck(nonvarsize + varsize)
-        except OverflowError:
-            raise MemoryError()
-        if self.has_gcptr_in_varsize(typeid):
-            nonlarge_max = self.nonlarge_gcptrs_max
-        else:
-            nonlarge_max = self.nonlarge_max
-        if force_nonmovable or raw_malloc_usage(totalsize) > nonlarge_max:
-            result = self.malloc_varsize_marknsweep(totalsize)
-            flags = self.GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS | GCFLAG_UNVISITED
-        else:
-            result = self.malloc_varsize_collecting_nursery(totalsize)
-            flags = self.GCFLAGS_FOR_NEW_YOUNG_OBJECTS
-        self.init_gc_object(result, typeid, flags)
-        (result + size_gc_header + offset_to_length).signed[0] = length
-        return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
-    malloc_varsize_slowpath._dont_inline_ = True
-
-    def malloc_varsize_nonmovable(self, typeid, length):
-        return self.malloc_varsize_slowpath(typeid, length, True)
-
-    def malloc_nonmovable(self, typeid, length, zero):
-        # helper for testing, same as GCBase.malloc
-        if self.is_varsize(typeid):
-            gcref = self.malloc_varsize_slowpath(typeid, length, True)
-        else:
-            raise NotImplementedError("Not supported")
-        return llmemory.cast_ptr_to_adr(gcref)
-
-    def can_move(self, addr):
-        tid = self.header(addr).tid
-        return not (tid & GCFLAG_EXTERNAL)
-
-    def malloc_varsize_collecting_nursery(self, totalsize):
-        result = self.collect_nursery()
-        ll_assert(raw_malloc_usage(totalsize) <= self.nursery_top - result,
-                  "not enough room in malloc_varsize_collecting_nursery()")
-        llarena.arena_reserve(result, totalsize)
-        self.nursery_free = result + llarena.round_up_for_allocation(
-            totalsize)
-        return result
-
-    def _check_rawsize_alloced(self, size_estimate):
-        self.large_objects_collect_trigger -= size_estimate
-        if self.large_objects_collect_trigger < 0:
-            debug_start("gc-rawsize-collect")
-            debug_print("allocated", (self._initial_trigger -
-                                      self.large_objects_collect_trigger),
-                        "bytes, triggering full collection")
-            self.semispace_collect()
-            debug_stop("gc-rawsize-collect")
-
-    def malloc_varsize_marknsweep(self, totalsize):
-        # In order to free the large objects from time to time, we
-        # arbitrarily force a full collect() if none occurs when we have
-        # allocated self.space_size + rawmalloced bytes of large objects.
-        self._check_rawsize_alloced(raw_malloc_usage(totalsize))
-        result = self.allocate_external_object(totalsize)
-        if not result:
-            raise MemoryError()
-        # The parent classes guarantee zero-filled allocations, so we
-        # need to follow suit.
-        llmemory.raw_memclear(result, totalsize)
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        self.gen2_rawmalloced_objects.append(result + size_gc_header)
-        return result
-
-    def allocate_external_object(self, totalsize):
-        # XXX maybe we should use arena_malloc() above a certain size?
-        # If so, we'd also use arena_reset() in malloc_varsize_marknsweep().
-        return llmemory.raw_malloc(totalsize)
-
-    def init_gc_object_immortal(self, addr, typeid,
-                                flags=(GCFLAG_NO_YOUNG_PTRS |
-                                       GCFLAG_NO_HEAP_PTRS |
-                                       GCFLAG_AGE_MAX)):
-        GenerationGC.init_gc_object_immortal(self, addr, typeid, flags)
-
-    # ___________________________________________________________________
-    # collect() and semispace_collect() are not synonyms in this GC: the
-    # former is a complete collect, while the latter is only collecting
-    # the semispaces and not always doing the mark-n-sweep pass over the
-    # external objects of 3rd generation.
-
-    def collect(self, gen=2):
-        if gen > 1:
-            self.count_semispaceonly_collects = self.generation3_collect_threshold
-        GenerationGC.collect(self, gen)
-
-    def is_collecting_gen3(self):
-        count = self.count_semispaceonly_collects
-        return count >= self.generation3_collect_threshold
-
-    # ___________________________________________________________________
-    # the following methods are hook into SemiSpaceGC.semispace_collect()
-
-    def starting_full_collect(self):
-        # At the start of a collection, the GCFLAG_UNVISITED bit is set
-        # exactly on the objects in gen2_rawmalloced_objects.  Only
-        # raw_malloc'ed objects can ever have this bit set.
-        self.count_semispaceonly_collects += 1
-        if self.is_collecting_gen3():
-            # set the GCFLAG_UNVISITED on all rawmalloced generation-3 objects
-            # as well, to let them be recorded by visit_external_object()
-            self.gen3_rawmalloced_objects.foreach(self._set_gcflag_unvisited,
-                                                  None)
-        ll_assert(not self.rawmalloced_objects_to_trace.non_empty(),
-                  "rawmalloced_objects_to_trace should be empty at start")
-        self._nonmoving_copy_count = 0
-        self._nonmoving_copy_size = 0
-
-    def _set_gcflag_unvisited(self, obj, ignored):
-        ll_assert(not (self.header(obj).tid & GCFLAG_UNVISITED),
-                  "bogus GCFLAG_UNVISITED on gen3 obj")
-        self.header(obj).tid |= GCFLAG_UNVISITED
-
-    def collect_roots(self):
-        if not self.is_collecting_gen3():
-            GenerationGC.collect_roots(self)
-        else:
-            # as we don't record which prebuilt gc objects point to
-            # rawmalloced generation 3 objects, we have to trace all
-            # the prebuilt gc objects.
-            self.root_walker.walk_roots(
-                SemiSpaceGC._collect_root,  # stack roots
-                SemiSpaceGC._collect_root,  # static in prebuilt non-gc structs
-                SemiSpaceGC._collect_root)  # static in prebuilt gc objects
-
-    def surviving(self, obj):
-        # To use during a collection.  The objects that survive are the
-        # ones with GCFLAG_FORWARDED set and GCFLAG_UNVISITED not set.
-        # This is equivalent to self.is_forwarded() for all objects except
-        # the ones obtained by raw_malloc.
-        flags = self.header(obj).tid & (GCFLAG_FORWARDED|GCFLAG_UNVISITED)
-        return flags == GCFLAG_FORWARDED
-
-    def is_last_generation(self, obj):
-        return ((self.header(obj).tid & (GCFLAG_EXTERNAL|GCFLAG_AGE_MASK)) ==
-                (GCFLAG_EXTERNAL|GCFLAG_AGE_MAX))
-
-    def visit_external_object(self, obj):
-        hdr = self.header(obj)
-        if hdr.tid & GCFLAG_UNVISITED:
-            # This is a not-visited-yet raw_malloced object.
-            hdr.tid &= ~GCFLAG_UNVISITED
-            self.rawmalloced_objects_to_trace.append(obj)
-
-    def make_a_copy(self, obj, objsize):
-        # During a full collect, all copied objects might implicitly come
-        # from the nursery.  If they do, we must add the GCFLAG_NO_YOUNG_PTRS.
-        # If they don't, we count how many times they are copied and when
-        # some threshold is reached we make the copy a non-movable "external"
-        # object.  The threshold is MAX_SEMISPACE_AGE.
-        tid = self.header(obj).tid
-        # XXX the following logic is not doing exactly what is explained
-        # above: any object without GCFLAG_NO_YOUNG_PTRS has its age not
-        # incremented.  This is accidental: it means that objects that
-        # are very often modified to point to young objects don't reach
-        # the 3rd generation.  For now I'll leave it this way because
-        # I'm not sure that it's a bad thing.
-        if not (tid & GCFLAG_NO_YOUNG_PTRS):
-            tid |= GCFLAG_NO_YOUNG_PTRS    # object comes from the nursery
-        elif (tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX:
-            tid += GCFLAG_AGE_ONE
-        else:
-            newobj = self.make_a_nonmoving_copy(obj, objsize)
-            if newobj:
-                return newobj
-            tid &= ~GCFLAG_AGE_MASK
-        # skip GenerationGC.make_a_copy() as we already did the right
-        # thing about GCFLAG_NO_YOUNG_PTRS
-        return self._make_a_copy_with_tid(obj, objsize, tid)
-
-    def make_a_nonmoving_copy(self, obj, objsize):
-        # NB. the object can have a finalizer or be a weakref, but
-        # it's not an issue.
-        totalsize = self.size_gc_header() + objsize
-        tid = self.header(obj).tid
-        if tid & GCFLAG_HASHMASK:
-            totalsize_incl_hash = totalsize + llmemory.sizeof(lltype.Signed)
-        else:
-            totalsize_incl_hash = totalsize
-        newaddr = self.allocate_external_object(totalsize_incl_hash)
-        if not newaddr:
-            return llmemory.NULL   # can't raise MemoryError during a collect()
-        self._nonmoving_copy_count += 1
-        self._nonmoving_copy_size += raw_malloc_usage(totalsize)
-
-        llmemory.raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
-        if tid & GCFLAG_HASHMASK:
-            hash = self._get_object_hash(obj, objsize, tid)
-            (newaddr + totalsize).signed[0] = hash
-            tid |= GC_HASH_HASFIELD
-        #
-        # GCFLAG_UNVISITED is not set
-        # GCFLAG_NO_HEAP_PTRS is not set either, conservatively.  It may be
-        # set by the next collection's collect_last_generation_roots().
-        # This old object is immediately put at generation 3.
-        newobj = newaddr + self.size_gc_header()
-        hdr = self.header(newobj)
-        hdr.tid = tid | self.GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS
-        ll_assert(self.is_last_generation(newobj),
-                  "make_a_nonmoving_copy: object too young")
-        self.gen3_rawmalloced_objects.append(newobj)
-        self.last_generation_root_objects.append(newobj)
-        self.rawmalloced_objects_to_trace.append(newobj)   # visit me
-        return newobj
-
-    def scan_copied(self, scan):
-        # Alternate between scanning the regular objects we just moved
-        # and scanning the raw_malloc'ed object we just visited.
-        progress = True
-        while progress:
-            newscan = GenerationGC.scan_copied(self, scan)
-            progress = newscan != scan
-            scan = newscan
-            while self.rawmalloced_objects_to_trace.non_empty():
-                obj = self.rawmalloced_objects_to_trace.pop()
-                self.trace_and_copy(obj)
-                progress = True
-        return scan
-
-    def finished_full_collect(self):
-        ll_assert(not self.rawmalloced_objects_to_trace.non_empty(),
-                  "rawmalloced_objects_to_trace should be empty at end")
-        debug_print("| [hybrid] made nonmoving:         ",
-                    self._nonmoving_copy_size, "bytes in",
-                    self._nonmoving_copy_count, "objs")
-        rawmalloced_trigger = 0
-        # sweep the nonmarked rawmalloced objects
-        if self.is_collecting_gen3():
-            rawmalloced_trigger += self.sweep_rawmalloced_objects(generation=3)
-        rawmalloced_trigger += self.sweep_rawmalloced_objects(generation=2)
-        self.large_objects_collect_trigger = (rawmalloced_trigger +
-                                              self.space_size)
-        if self.is_collecting_gen3():
-            self.count_semispaceonly_collects = 0
-        self._initial_trigger = self.large_objects_collect_trigger
-
-    def sweep_rawmalloced_objects(self, generation):
-        # free all the rawmalloced objects of the specified generation
-        # that have not been marked
-        if generation == 2:
-            objects = self.gen2_rawmalloced_objects
-            # generation 2 sweep: if A points to an object object B that
-            # moves from gen2 to gen3, it's possible that A no longer points
-            # to any gen2 object.  In this case, A remains a bit too long in
-            # last_generation_root_objects, but this will be fixed by the
-            # next collect_last_generation_roots().
-        elif generation == 3:
-            objects = self.gen3_rawmalloced_objects
-            # generation 3 sweep: remove from last_generation_root_objects
-            # all the objects that we are about to free
-            gen3roots = self.last_generation_root_objects
-            newgen3roots = self.AddressStack()
-            while gen3roots.non_empty():
-                obj = gen3roots.pop()
-                if not (self.header(obj).tid & GCFLAG_UNVISITED):
-                    newgen3roots.append(obj)
-            gen3roots.delete()
-            self.last_generation_root_objects = newgen3roots
-        else:
-            ll_assert(False, "bogus 'generation'")
-            return 0 # to please the flowspace
-
-        surviving_objects = self.AddressStack()
-        # Help the flow space
-        alive_count = alive_size = dead_count = dead_size = 0
-        debug = have_debug_prints()
-        while objects.non_empty():
-            obj = objects.pop()
-            tid = self.header(obj).tid
-            if tid & GCFLAG_UNVISITED:
-                if debug:
-                    dead_count+=1
-                    dead_size+=raw_malloc_usage(self.get_size_incl_hash(obj))
-                addr = obj - self.gcheaderbuilder.size_gc_header
-                llmemory.raw_free(addr)
-            else:
-                if debug:
-                    alive_count+=1
-                alive_size+=raw_malloc_usage(self.get_size_incl_hash(obj))
-                if generation == 3:
-                    surviving_objects.append(obj)
-                elif generation == 2:
-                    ll_assert((tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX,
-                              "wrong age for generation 2 object")
-                    tid += GCFLAG_AGE_ONE
-                    if (tid & GCFLAG_AGE_MASK) == GCFLAG_AGE_MAX:
-                        # the object becomes part of generation 3
-                        self.gen3_rawmalloced_objects.append(obj)
-                        # GCFLAG_NO_HEAP_PTRS not set yet, conservatively
-                        self.last_generation_root_objects.append(obj)
-                    else:
-                        # the object stays in generation 2
-                        tid |= GCFLAG_UNVISITED
-                        surviving_objects.append(obj)
-                    self.header(obj).tid = tid
-        objects.delete()
-        if generation == 2:
-            self.gen2_rawmalloced_objects = surviving_objects
-        elif generation == 3:
-            self.gen3_rawmalloced_objects = surviving_objects
-        debug_print("| [hyb] gen", generation,
-                    "nonmoving now alive: ",
-                    alive_size, "bytes in",
-                    alive_count, "objs")
-        debug_print("| [hyb] gen", generation,
-                    "nonmoving freed:     ",
-                    dead_size, "bytes in",
-                    dead_count, "objs")
-        return alive_size
-
-    def id(self, ptr):
-        obj = llmemory.cast_ptr_to_adr(ptr)
-
-        # is it a tagged pointer?
-        if not self.is_valid_gc_object(obj):
-            return llmemory.cast_adr_to_int(obj)
-
-        if self._is_external(obj):
-            # a prebuilt or rawmalloced object
-            if self.is_last_generation(obj):
-                # a generation 3 object may be one that used to live in
-                # the semispace.  So we still need to check if the object had
-                # its id taken before.  If not, we can use its address as its
-                # id as it is not going to move any more.
-                result = self.objects_with_id.get(obj, obj)
-            else:
-                # a generation 2 external object was never non-external in
-                # the past, so it cannot be listed in self.objects_with_id.
-                result = obj
-        else:
-            result = self._compute_id(obj)     # common case
-        return llmemory.cast_adr_to_int(result) * 2 # see comment in base.py
-        # XXX a possible optimization would be to use three dicts, one
-        # for each generation, instead of mixing gen2 and gen3 objects.
-
-    def debug_check_object(self, obj):
-        """Check the invariants about 'obj' that should be true
-        between collections."""
-        GenerationGC.debug_check_object(self, obj)
-        tid = self.header(obj).tid
-        if tid & GCFLAG_UNVISITED:
-            ll_assert(self._d_gen2ro.contains(obj),
-                      "GCFLAG_UNVISITED on non-gen2 object")
-
-    def debug_check_consistency(self):
-        if self.DEBUG:
-            self._d_gen2ro = self.gen2_rawmalloced_objects.stack2dict()
-            GenerationGC.debug_check_consistency(self)
-            self._d_gen2ro.delete()
-            self.gen2_rawmalloced_objects.foreach(self._debug_check_gen2, None)
-            self.gen3_rawmalloced_objects.foreach(self._debug_check_gen3, None)
-
-    def _debug_check_gen2(self, obj, ignored):
-        tid = self.header(obj).tid
-        ll_assert(bool(tid & GCFLAG_EXTERNAL),
-                  "gen2: missing GCFLAG_EXTERNAL")
-        ll_assert(bool(tid & GC_HASH_TAKEN_ADDR),
-                  "gen2: missing GC_HASH_TAKEN_ADDR")
-        ll_assert(bool(tid & GCFLAG_UNVISITED),
-                  "gen2: missing GCFLAG_UNVISITED")
-        ll_assert((tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX,
-                  "gen2: age field too large")
-    def _debug_check_gen3(self, obj, ignored):
-        tid = self.header(obj).tid
-        ll_assert(bool(tid & GCFLAG_EXTERNAL),
-                  "gen3: missing GCFLAG_EXTERNAL")
-        ll_assert(bool(tid & GC_HASH_TAKEN_ADDR),
-                  "gen3: missing GC_HASH_TAKEN_ADDR")
-        ll_assert(not (tid & GCFLAG_UNVISITED),
-                  "gen3: unexpected GCFLAG_UNVISITED")
-        ll_assert((tid & GCFLAG_AGE_MASK) == GCFLAG_AGE_MAX,
-                  "gen3: wrong age field")
-
-    def can_malloc_nonmovable(self):
-        return True

File rpython/memory/gc/semispace.py

-from rpython.rtyper.lltypesystem.llmemory import raw_malloc, raw_free
-from rpython.rtyper.lltypesystem.llmemory import raw_memcopy, raw_memclear
-from rpython.rtyper.lltypesystem.llmemory import NULL, raw_malloc_usage
-from rpython.memory.support import get_address_stack, get_address_deque
-from rpython.memory.support import AddressDict
-from rpython.rtyper.lltypesystem import lltype, llmemory, llarena, rffi, llgroup
-from rpython.rlib.objectmodel import free_non_gc_object
-from rpython.rlib.debug import ll_assert, have_debug_prints
-from rpython.rlib.debug import debug_print, debug_start, debug_stop
-from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.rlib.rarithmetic import ovfcheck, LONG_BIT
-from rpython.memory.gc.base import MovingGCBase, ARRAY_TYPEID_MAP,\
-     TYPEID_MAP
-
-import sys, os
-
-first_gcflag = 1 << (LONG_BIT//2)
-GCFLAG_FORWARDED = first_gcflag
-# GCFLAG_EXTERNAL is set on objects not living in the semispace:
-# either immortal objects or (for HybridGC) externally raw_malloc'ed
-GCFLAG_EXTERNAL = first_gcflag << 1
-GCFLAG_FINALIZATION_ORDERING = first_gcflag << 2
-
-_GCFLAG_HASH_BASE = first_gcflag << 3
-GCFLAG_HASHMASK = _GCFLAG_HASH_BASE * 0x3   # also consumes 'first_gcflag << 4'
-# the two bits in GCFLAG_HASHMASK can have one of the following values:
-#   - nobody ever asked for the hash of the object
-GC_HASH_NOTTAKEN   = _GCFLAG_HASH_BASE * 0x0
-#   - someone asked, and we gave the address of the object
-GC_HASH_TAKEN_ADDR = _GCFLAG_HASH_BASE * 0x1
-#   - someone asked, and we gave the address plus 'nursery_hash_base'
-GC_HASH_TAKEN_NURS = _GCFLAG_HASH_BASE * 0x2
-#   - we have our own extra field to store the hash
-GC_HASH_HASFIELD   = _GCFLAG_HASH_BASE * 0x3
-
-GCFLAG_EXTRA = first_gcflag << 5    # for RPython abuse only
-
-memoryError = MemoryError()
-
-
-class SemiSpaceGC(MovingGCBase):
-    _alloc_flavor_ = "raw"
-    inline_simple_malloc = True
-    inline_simple_malloc_varsize = True
-    malloc_zero_filled = True
-    first_unused_gcflag = first_gcflag << 6
-    gcflag_extra = GCFLAG_EXTRA
-
-    HDR = lltype.Struct('header', ('tid', lltype.Signed))   # XXX or rffi.INT?
-    typeid_is_in_field = 'tid'
-    withhash_flag_is_in_field = 'tid', _GCFLAG_HASH_BASE * 0x2
-    # ^^^ prebuilt objects either have GC_HASH_TAKEN_ADDR or they
-    #     have GC_HASH_HASFIELD (and then they are one word longer).
-    FORWARDSTUB = lltype.GcStruct('forwarding_stub',
-                                  ('forw', llmemory.Address))
-    FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB)
-
-    object_minimal_size = llmemory.sizeof(FORWARDSTUB)
-
-    # the following values override the default arguments of __init__ when
-    # translating to a real backend.
-    TRANSLATION_PARAMS = {'space_size': 8*1024*1024} # XXX adjust
-
-    def __init__(self, config, space_size=4096, max_space_size=sys.maxint//2+1,
-                 **kwds):
-        self.param_space_size = space_size
-        self.param_max_space_size = max_space_size
-        MovingGCBase.__init__(self, config, **kwds)
-
-    def setup(self):
-        #self.total_collection_time = 0.0
-        self.total_collection_count = 0
-
-        self.space_size = self.param_space_size
-        self.max_space_size = self.param_max_space_size
-        self.red_zone = 0
-
-        #self.program_start_time = time.time()
-        self.tospace = llarena.arena_malloc(self.space_size, True)
-        ll_assert(bool(self.tospace), "couldn't allocate tospace")
-        self.top_of_space = self.tospace + self.space_size
-        self.fromspace = llarena.arena_malloc(self.space_size, True)
-        ll_assert(bool(self.fromspace), "couldn't allocate fromspace")
-        self.free = self.tospace
-        MovingGCBase.setup(self)
-        self.objects_with_finalizers = self.AddressDeque()
-        self.objects_with_light_finalizers = self.AddressStack()
-        self.objects_with_weakrefs = self.AddressStack()
-
-    def _teardown(self):
-        debug_print("Teardown")
-        llarena.arena_free(self.fromspace)
-        llarena.arena_free(self.tospace)
-
-    # This class only defines the malloc_{fixed,var}size_clear() methods
-    # because the spaces are filled with zeroes in advance.
-
-    def malloc_fixedsize_clear(self, typeid16, size,
-                               has_finalizer=False,
-                               is_finalizer_light=False,
-                               contains_weakptr=False):
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        totalsize = size_gc_header + size
-        result = self.free
-        if raw_malloc_usage(totalsize) > self.top_of_space - result:
-            result = self.obtain_free_space(totalsize)
-        llarena.arena_reserve(result, totalsize)
-        self.init_gc_object(result, typeid16)
-        self.free = result + totalsize
-        #if is_finalizer_light:
-        #    self.objects_with_light_finalizers.append(result + size_gc_header)
-        #else:
-        if has_finalizer:
-            self.objects_with_finalizers.append(result + size_gc_header)
-        if contains_weakptr:
-            self.objects_with_weakrefs.append(result + size_gc_header)
-        return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
-    def malloc_varsize_clear(self, typeid16, length, size, itemsize,
-                             offset_to_length):
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        nonvarsize = size_gc_header + size
-        try:
-            varsize = ovfcheck(itemsize * length)
-            totalsize = ovfcheck(nonvarsize + varsize)
-        except OverflowError:
-            raise memoryError
-        result = self.free
-        if raw_malloc_usage(totalsize) > self.top_of_space - result:
-            result = self.obtain_free_space(totalsize)
-        llarena.arena_reserve(result, totalsize)
-        self.init_gc_object(result, typeid16)
-        (result + size_gc_header + offset_to_length).signed[0] = length
-        self.free = result + llarena.round_up_for_allocation(totalsize)
-        return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
-    def shrink_array(self, addr, smallerlength):
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        if self._is_in_the_space(addr - size_gc_header):
-            typeid = self.get_type_id(addr)
-            totalsmallersize = (
-                size_gc_header + self.fixed_size(typeid) +
-                self.varsize_item_sizes(typeid) * smallerlength)
-            llarena.arena_shrink_obj(addr - size_gc_header, totalsmallersize)
-            #
-            offset_to_length = self.varsize_offset_to_length(typeid)
-            (addr + offset_to_length).signed[0] = smallerlength
-            return True
-        else:
-            return False
-
-    def obtain_free_space(self, needed):
-        # a bit of tweaking to maximize the performance and minimize the
-        # amount of code in an inlined version of malloc_fixedsize_clear()
-        if not self.try_obtain_free_space(needed):
-            raise memoryError
-        return self.free
-    obtain_free_space._dont_inline_ = True
-
-    def try_obtain_free_space(self, needed):
-        # XXX for bonus points do big objects differently
-        needed = raw_malloc_usage(needed)
-        if (self.red_zone >= 2 and self.space_size < self.max_space_size and
-            self.double_space_size()):
-            pass    # collect was done during double_space_size()
-        else:
-            self.semispace_collect()
-        missing = needed - (self.top_of_space - self.free)
-        if missing <= 0:
-            return True      # success
-        else:
-            # first check if the object could possibly fit
-            proposed_size = self.space_size
-            while missing > 0:
-                if proposed_size >= self.max_space_size:
-                    return False    # no way
-                missing -= proposed_size
-                proposed_size *= 2
-            # For address space fragmentation reasons, we double the space
-            # size possibly several times, moving the objects at each step,
-            # instead of going directly for the final size.  We assume that
-            # it's a rare case anyway.
-            while self.space_size < proposed_size:
-                if not self.double_space_size():
-                    return False
-            ll_assert(needed <= self.top_of_space - self.free,
-                         "double_space_size() failed to do its job")
-            return True
-
-    def double_space_size(self):
-        self.red_zone = 0
-        old_fromspace = self.fromspace
-        newsize = self.space_size * 2
-        newspace = llarena.arena_malloc(newsize, True)
-        if not newspace:
-            return False    # out of memory
-        llarena.arena_free(old_fromspace)
-        self.fromspace = newspace
-        # now self.tospace contains the existing objects and
-        # self.fromspace is the freshly allocated bigger space
-
-        self.semispace_collect(size_changing=True)
-        self.top_of_space = self.tospace + newsize
-        # now self.tospace is the freshly allocated bigger space,
-        # and self.fromspace is the old smaller space, now empty
-        llarena.arena_free(self.fromspace)
-
-        newspace = llarena.arena_malloc(newsize, True)
-        if not newspace:
-            # Complex failure case: we have in self.tospace a big chunk
-            # of memory, and the two smaller original spaces are already gone.
-            # Unsure if it's worth these efforts, but we can artificially
-            # split self.tospace in two again...
-            self.max_space_size = self.space_size    # don't try to grow again,
-            #              because doing arena_free(self.fromspace) would crash
-            self.fromspace = self.tospace + self.space_size
-            self.top_of_space = self.fromspace
-            ll_assert(self.free <= self.top_of_space,
-                         "unexpected growth of GC space usage during collect")
-            return False     # out of memory
-
-        self.fromspace = newspace
-        self.space_size = newsize
-        return True    # success
-
-    def set_max_heap_size(self, size):
-        # Set the maximum semispace size.
-        # The size is rounded down to the next power of two.  Also, this is
-        # the size of one semispace only, so actual usage can be the double
-        # during a collection.  Finally, note that this will never shrink
-        # an already-allocated heap.
-        if size < 1:
-            size = 1     # actually, the minimum is 8MB in default translations
-        self.max_space_size = sys.maxint//2+1
-        while self.max_space_size > size:
-            self.max_space_size >>= 1
-
-    @classmethod
-    def JIT_minimal_size_in_nursery(cls):
-        return cls.object_minimal_size
-
-    def collect(self, gen=0):
-        self.debug_check_consistency()
-        self.semispace_collect()
-        # the indirection is required by the fact that collect() is referred
-        # to by the gc transformer, and the default argument would crash
-        # (this is also a hook for the HybridGC)
-
-    def semispace_collect(self, size_changing=False):
-        debug_start("gc-collect")
-        debug_print()
-        debug_print(".----------- Full collection ------------------")
-        start_usage = self.free - self.tospace
-        debug_print("| used before collection:          ",
-                    start_usage, "bytes")
-        #start_time = time.time()
-        #llop.debug_print(lltype.Void, 'semispace_collect', int(size_changing))
-
-        # Switch the spaces.  We copy everything over to the empty space
-        # (self.fromspace at the beginning of the collection), and clear the old
-        # one (self.tospace at the beginning).  Their purposes will be reversed
-        # for the next collection.
-        tospace = self.fromspace
-        fromspace = self.tospace
-        self.fromspace = fromspace
-        self.tospace = tospace
-        self.top_of_space = tospace + self.space_size
-        scan = self.free = tospace
-        self.starting_full_collect()
-        self.collect_roots()
-        if self.run_finalizers.non_empty():
-            self.update_run_finalizers()
-        scan = self.scan_copied(scan)
-        if self.objects_with_light_finalizers.non_empty():
-            self.deal_with_objects_with_light_finalizers()
-        if self.objects_with_finalizers.non_empty():
-            scan = self.deal_with_objects_with_finalizers(scan)
-        if self.objects_with_weakrefs.non_empty():
-            self.invalidate_weakrefs()
-        self.update_objects_with_id()
-        self.finished_full_collect()
-        self.debug_check_consistency()
-        if not size_changing:
-            llarena.arena_reset(fromspace, self.space_size, True)
-            self.record_red_zone()
-            self.execute_finalizers()
-        #llop.debug_print(lltype.Void, 'collected', self.space_size, size_changing, self.top_of_space - self.free)
-        if have_debug_prints():
-            #end_time = time.time()
-            #elapsed_time = end_time - start_time
-            #self.total_collection_time += elapsed_time
-            self.total_collection_count += 1
-            #total_program_time = end_time - self.program_start_time
-            end_usage = self.free - self.tospace
-            debug_print("| used after collection:           ",
-                        end_usage, "bytes")
-            debug_print("| freed:                           ",
-                        start_usage - end_usage, "bytes")
-            debug_print("| size of each semispace:          ",
-                        self.space_size, "bytes")
-            debug_print("| fraction of semispace now used:  ",
-                        end_usage * 100.0 / self.space_size, "%")
-            #ct = self.total_collection_time
-            cc = self.total_collection_count
-            debug_print("| number of semispace_collects:    ",
-                        cc)
-            #debug_print("|                         i.e.:    ",
-            #            cc / total_program_time, "per second")
-            #debug_print("| total time in semispace_collect: ",
-            #            ct, "seconds")
-            #debug_print("|                            i.e.: ",
-            #            ct * 100.0 / total_program_time, "%")
-            debug_print("`----------------------------------------------")
-        debug_stop("gc-collect")
-
-    def starting_full_collect(self):
-        pass    # hook for the HybridGC
-
-    def finished_full_collect(self):
-        pass    # hook for the HybridGC
-
-    def record_red_zone(self):
-        # red zone: if the space is more than 80% full, the next collection
-        # should double its size.  If it is more than 66% full twice in a row,
-        # then it should double its size too.  (XXX adjust)
-        # The goal is to avoid many repeated collection that don't free a lot
-        # of memory each, if the size of the live object set is just below the
-        # size of the space.
-        free_after_collection = self.top_of_space - self.free
-        if free_after_collection > self.space_size // 3:
-            self.red_zone = 0
-        else:
-            self.red_zone += 1
-            if free_after_collection < self.space_size // 5:
-                self.red_zone += 1
-
-    def get_size_incl_hash(self, obj):
-        size = self.get_size(obj)
-        hdr = self.header(obj)
-        if (hdr.tid & GCFLAG_HASHMASK) == GC_HASH_HASFIELD:
-            size += llmemory.sizeof(lltype.Signed)
-        return size
-
-    def scan_copied(self, scan):
-        while scan < self.free:
-            curr = scan + self.size_gc_header()
-            self.trace_and_copy(curr)
-            scan += self.size_gc_header() + self.get_size_incl_hash(curr)
-        return scan
-
-    def collect_roots(self):
-        self.root_walker.walk_roots(
-            SemiSpaceGC._collect_root,  # stack roots
-            SemiSpaceGC._collect_root,  # static in prebuilt non-gc structures
-            SemiSpaceGC._collect_root)  # static in prebuilt gc objects
-
-    def _collect_root(self, root):
-        root.address[0] = self.copy(root.address[0])
-
-    def copy(self, obj):
-        if self.DEBUG:
-            self.debug_check_can_copy(obj)
-        if self.is_forwarded(obj):
-            #llop.debug_print(lltype.Void, obj, "already copied to", self.get_forwarding_address(obj))
-            return self.get_forwarding_address(obj)
-        else:
-            objsize = self.get_size(obj)
-            newobj = self.make_a_copy(obj, objsize)
-            #llop.debug_print(lltype.Void, obj, "copied to", newobj,
-            #                 "tid", self.header(obj).tid,
-            #                 "size", totalsize)
-            self.set_forwarding_address(obj, newobj, objsize)
-            return newobj
-
-    def _get_object_hash(self, obj, objsize, tid):
-        # Returns the hash of the object, which must not be GC_HASH_NOTTAKEN.
-        gc_hash = tid & GCFLAG_HASHMASK
-        if gc_hash == GC_HASH_HASFIELD:
-            obj = llarena.getfakearenaaddress(obj)
-            return (obj + objsize).signed[0]
-        elif gc_hash == GC_HASH_TAKEN_ADDR:
-            return llmemory.cast_adr_to_int(obj)
-        elif gc_hash == GC_HASH_TAKEN_NURS:
-            return self._compute_current_nursery_hash(obj)
-        else:
-            assert 0, "gc_hash == GC_HASH_NOTTAKEN"
-
-    def _make_a_copy_with_tid(self, obj, objsize, tid):
-        totalsize = self.size_gc_header() + objsize
-        newaddr = self.free
-        llarena.arena_reserve(newaddr, totalsize)
-        raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
-        if tid & GCFLAG_HASHMASK:
-            hash = self._get_object_hash(obj, objsize, tid)
-            llarena.arena_reserve(newaddr + totalsize,
-                                  llmemory.sizeof(lltype.Signed))
-            (newaddr + totalsize).signed[0] = hash
-            tid |= GC_HASH_HASFIELD
-            totalsize += llmemory.sizeof(lltype.Signed)
-        self.free += totalsize
-        newhdr = llmemory.cast_adr_to_ptr(newaddr, lltype.Ptr(self.HDR))
-        newhdr.tid = tid
-        newobj = newaddr + self.size_gc_header()
-        return newobj
-
-    def make_a_copy(self, obj, objsize):
-        tid = self.header(obj).tid
-        return self._make_a_copy_with_tid(obj, objsize, tid)
-
-    def trace_and_copy(self, obj):
-        self.trace(obj, self._trace_copy, None)
-
-    def _trace_copy(self, pointer, ignored):
-        pointer.address[0] = self.copy(pointer.address[0])
-
-    def surviving(self, obj):
-        # To use during a collection.  Check if the object is currently
-        # marked as surviving the collection.  This is equivalent to
-        # self.is_forwarded() for all objects except the nonmoving objects
-        # created by the HybridGC subclass.  In all cases, if an object
-        # survives, self.get_forwarding_address() returns its new address.
-        return self.is_forwarded(obj)
-
-    def is_forwarded(self, obj):
-        return self.header(obj).tid & GCFLAG_FORWARDED != 0
-        # note: all prebuilt objects also have this flag set
-
-    def get_forwarding_address(self, obj):
-        tid = self.header(obj).tid
-        if tid & GCFLAG_EXTERNAL:
-            self.visit_external_object(obj)
-            return obj      # external or prebuilt objects are "forwarded"
-                            # to themselves
-        else:
-            stub = llmemory.cast_adr_to_ptr(obj, self.FORWARDSTUBPTR)
-            return stub.forw
-
-    def visit_external_object(self, obj):
-        pass    # hook for the HybridGC
-
-    def get_possibly_forwarded_type_id(self, obj):
-        tid = self.header(obj).tid
-        if self.is_forwarded(obj) and not (tid & GCFLAG_EXTERNAL):
-            obj = self.get_forwarding_address(obj)
-        return self.get_type_id(obj)
-
-    def set_forwarding_address(self, obj, newobj, objsize):
-        # To mark an object as forwarded, we set the GCFLAG_FORWARDED and
-        # overwrite the object with a FORWARDSTUB.  Doing so is a bit
-        # long-winded on llarena, but it all melts down to two memory
-        # writes after translation to C.
-        size_gc_header = self.size_gc_header()
-        stubsize = llmemory.sizeof(self.FORWARDSTUB)
-        tid = self.header(obj).tid
-        ll_assert(tid & GCFLAG_EXTERNAL == 0,  "unexpected GCFLAG_EXTERNAL")
-        ll_assert(tid & GCFLAG_FORWARDED == 0, "unexpected GCFLAG_FORWARDED")
-        # replace the object at 'obj' with a FORWARDSTUB.
-        hdraddr = obj - size_gc_header
-        llarena.arena_reset(hdraddr, size_gc_header + objsize, False)
-        llarena.arena_reserve(hdraddr, size_gc_header + stubsize)
-        hdr = llmemory.cast_adr_to_ptr(hdraddr, lltype.Ptr(self.HDR))
-        hdr.tid = tid | GCFLAG_FORWARDED
-        stub = llmemory.cast_adr_to_ptr(obj, self.FORWARDSTUBPTR)
-        stub.forw = newobj
-
-    def combine(self, typeid16, flags):
-        return llop.combine_ushort(lltype.Signed, typeid16, flags)
-
-    def get_type_id(self, addr):
-        tid = self.header(addr).tid
-        ll_assert(tid & (GCFLAG_FORWARDED|GCFLAG_EXTERNAL) != GCFLAG_FORWARDED,
-                  "get_type_id on forwarded obj")
-        # Non-prebuilt forwarded objects are overwritten with a FORWARDSTUB.
-        # Although calling get_type_id() on a forwarded object works by itself,
-        # we catch it as an error because it's likely that what is then
-        # done with the typeid is bogus.
-        return llop.extract_ushort(llgroup.HALFWORD, tid)
-
-    def init_gc_object(self, addr, typeid16, flags=0):
-        hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
-        hdr.tid = self.combine(typeid16, flags)
-
-    def init_gc_object_immortal(self, addr, typeid16, flags=0):
-        hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
-        flags |= GCFLAG_EXTERNAL | GCFLAG_FORWARDED | GC_HASH_TAKEN_ADDR
-        hdr.tid = self.combine(typeid16, flags)
-        # immortal objects always have GCFLAG_FORWARDED set;
-        # see get_forwarding_address().
-
-    def deal_with_objects_with_light_finalizers(self):
-        """ This is a much simpler version of dealing with finalizers
-        and an optimization - we can reasonably assume that those finalizers
-        don't do anything fancy and *just* call them. Among other things
-        they won't resurrect objects
-        """
-        new_objects = self.AddressStack()
-        while self.objects_with_light_finalizers.non_empty():
-            obj = self.objects_with_light_finalizers.pop()
-            if self.surviving(obj):
-                new_objects.append(self.get_forwarding_address(obj))
-            else:
-                finalizer = self.getfinalizer(self.get_type_id(obj))
-                finalizer(obj)
-        self.objects_with_light_finalizers.delete()
-        self.objects_with_light_finalizers = new_objects
-
-    def deal_with_objects_with_finalizers(self, scan):
-        # walk over list of objects with finalizers
-        # if it is not copied, add it to the list of to-be-called finalizers
-        # and copy it, to me make the finalizer runnable
-        # We try to run the finalizers in a "reasonable" order, like
-        # CPython does.  The details of this algorithm are in
-        # pypy/doc/discussion/finalizer-order.txt.
-        new_with_finalizer = self.AddressDeque()
-        marked = self.AddressDeque()
-        pending = self.AddressStack()
-        self.tmpstack = self.AddressStack()
-        while self.objects_with_finalizers.non_empty():
-            x = self.objects_with_finalizers.popleft()
-            ll_assert(self._finalization_state(x) != 1, 
-                      "bad finalization state 1")
-            if self.surviving(x):
-                new_with_finalizer.append(self.get_forwarding_address(x))
-                continue
-            marked.append(x)
-            pending.append(x)
-            while pending.non_empty():
-                y = pending.pop()
-                state = self._finalization_state(y)
-                if state == 0:
-                    self._bump_finalization_state_from_0_to_1(y)
-                    self.trace(y, self._append_if_nonnull, pending)
-                elif state == 2:
-                    self._recursively_bump_finalization_state_from_2_to_3(y)
-            scan = self._recursively_bump_finalization_state_from_1_to_2(
-                       x, scan)
-
-        while marked.non_empty():
-            x = marked.popleft()
-            state = self._finalization_state(x)
-            ll_assert(state >= 2, "unexpected finalization state < 2")
-            newx = self.get_forwarding_address(x)
-            if state == 2:
-                self.run_finalizers.append(newx)
-                # we must also fix the state from 2 to 3 here, otherwise
-                # we leave the GCFLAG_FINALIZATION_ORDERING bit behind
-                # which will confuse the next collection
-                self._recursively_bump_finalization_state_from_2_to_3(x)
-            else:
-                new_with_finalizer.append(newx)
-
-        self.tmpstack.delete()
-        pending.delete()
-        marked.delete()
-        self.objects_with_finalizers.delete()
-        self.objects_with_finalizers = new_with_finalizer
-        return scan
-
-    def _append_if_nonnull(pointer, stack):
-        stack.append(pointer.address[0])
-    _append_if_nonnull = staticmethod(_append_if_nonnull)
-
-    def _finalization_state(self, obj):
-        if self.surviving(obj):
-            newobj = self.get_forwarding_address(obj)
-            hdr = self.header(newobj)
-            if hdr.tid & GCFLAG_FINALIZATION_ORDERING:
-                return 2
-            else:
-                return 3
-        else:
-            hdr = self.header(obj)
-            if hdr.tid & GCFLAG_FINALIZATION_ORDERING:
-                return 1
-            else:
-                return 0
-
-    def _bump_finalization_state_from_0_to_1(self, obj):
-        ll_assert(self._finalization_state(obj) == 0,
-                  "unexpected finalization state != 0")
-        hdr = self.header(obj)
-        hdr.tid |= GCFLAG_FINALIZATION_ORDERING
-
-    def _recursively_bump_finalization_state_from_2_to_3(self, obj):
-        ll_assert(self._finalization_state(obj) == 2,
-                  "unexpected finalization state != 2")
-        newobj = self.get_forwarding_address(obj)
-        pending = self.tmpstack
-        ll_assert(not pending.non_empty(), "tmpstack not empty")
-        pending.append(newobj)
-        while pending.non_empty():
-            y = pending.pop()
-            hdr = self.header(y)
-            if hdr.tid & GCFLAG_FINALIZATION_ORDERING:     # state 2 ?
-                hdr.tid &= ~GCFLAG_FINALIZATION_ORDERING   # change to state 3
-                self.trace(y, self._append_if_nonnull, pending)
-
-    def _recursively_bump_finalization_state_from_1_to_2(self, obj, scan):
-        # recursively convert objects from state 1 to state 2.
-        # Note that copy() copies all bits, including the
-        # GCFLAG_FINALIZATION_ORDERING.  The mapping between
-        # state numbers and the presence of this bit was designed
-        # for the following to work :-)
-        self.copy(obj)
-        return self.scan_copied(scan)
-
-    def invalidate_weakrefs(self):
-        # walk over list of objects that contain weakrefs
-        # if the object it references survives then update the weakref
-        # otherwise invalidate the weakref
-        new_with_weakref = self.AddressStack()
-        while self.objects_with_weakrefs.non_empty():
-            obj = self.objects_with_weakrefs.pop()
-            if not self.surviving(obj):
-                continue # weakref itself dies
-            obj = self.get_forwarding_address(obj)
-            offset = self.weakpointer_offset(self.get_type_id(obj))
-            pointing_to = (obj + offset).address[0]
-            # XXX I think that pointing_to cannot be NULL here
-            if pointing_to:
-                if self.surviving(pointing_to):
-                    (obj + offset).address[0] = self.get_forwarding_address(
-                        pointing_to)
-                    new_with_weakref.append(obj)
-                else:
-                    (obj + offset).address[0] = NULL
-        self.objects_with_weakrefs.delete()
-        self.objects_with_weakrefs = new_with_weakref
-
-    def update_run_finalizers(self):
-        # we are in an inner collection, caused by a finalizer
-        # the run_finalizers objects need to be copied
-        new_run_finalizer = self.AddressDeque()
-        while self.run_finalizers.non_empty():
-            obj = self.run_finalizers.popleft()
-            new_run_finalizer.append(self.copy(obj))
-        self.run_finalizers.delete()
-        self.run_finalizers = new_run_finalizer
-
-    def _is_external(self, obj):
-        return (self.header(obj).tid & GCFLAG_EXTERNAL) != 0
-
-    def _is_in_the_space(self, obj):
-        return self.tospace <= obj < self.free
-
-    def debug_check_object(self, obj):
-        """Check the invariants about 'obj' that should be true
-        between collections."""
-        tid = self.header(obj).tid
-        if tid & GCFLAG_EXTERNAL:
-            ll_assert(tid & GCFLAG_FORWARDED != 0, "bug: external+!forwarded")
-            ll_assert(not (self.tospace <= obj < self.free),
-                      "external flag but object inside the semispaces")
-        else:
-            ll_assert(not (tid & GCFLAG_FORWARDED), "bug: !external+forwarded")
-            ll_assert(self.tospace <= obj < self.free,
-                      "!external flag but object outside the semispaces")
-        ll_assert(not (tid & GCFLAG_FINALIZATION_ORDERING),
-                  "unexpected GCFLAG_FINALIZATION_ORDERING")
-
-    def debug_check_can_copy(self, obj):
-        ll_assert(not (self.tospace <= obj < self.free),
-                  "copy() on already-copied object")
-
-    STATISTICS_NUMBERS = 0
-
-    def is_in_nursery(self, addr):
-        # overridden in generation.py.
-        return False
-
-    def _compute_current_nursery_hash(self, obj):
-        # overridden in generation.py.
-        raise AssertionError("should not be called")
-
-    def identityhash(self, gcobj):
-        # The following loop should run at most twice.
-        while 1:
-            obj = llmemory.cast_ptr_to_adr(gcobj)
-            hdr = self.header(obj)
-            if hdr.tid & GCFLAG_HASHMASK:
-                break
-            # It's the first time we ask for a hash, and it's not an
-            # external object.  Shrink the top of space by the extra
-            # hash word that will be needed after a collect.
-            shrunk_top = self.top_of_space - llmemory.sizeof(lltype.Signed)
-            if shrunk_top < self.free:
-                # Cannot shrink!  Do a collection, asking for at least
-                # one word of free space, and try again.  May raise
-                # MemoryError.  Obscure: not called directly, but
-                # across an llop, to make sure that there is the
-                # correct push_roots/pop_roots around the call...
-                llop.gc_obtain_free_space(llmemory.Address,
-                                          llmemory.sizeof(lltype.Signed))
-                continue
-            else:
-                # Now we can have side-effects: lower the top of space
-                # and set one of the GC_HASH_TAKEN_xxx flags.
-                self.top_of_space = shrunk_top
-                if self.is_in_nursery(obj):
-                    hdr.tid |= GC_HASH_TAKEN_NURS
-                else:
-                    hdr.tid |= GC_HASH_TAKEN_ADDR
-                break
-        # Now we can return the result
-        objsize = self.get_size(obj)
-        return self._get_object_hash(obj, objsize, hdr.tid)
-
-    def track_heap_parent(self, obj, parent):
-        addr = obj.address[0]
-        parent_idx = llop.get_member_index(lltype.Signed,
-                                           self.get_type_id(parent))
-        idx = llop.get_member_index(lltype.Signed, self.get_type_id(addr))
-        self._ll_typeid_map[parent_idx].links[idx] += 1
-        self.track_heap(addr)
-
-    def track_heap(self, adr):
-        if self._tracked_dict.contains(adr):
-            return
-        self._tracked_dict.add(adr)
-        idx = llop.get_member_index(lltype.Signed, self.get_type_id(adr))
-        self._ll_typeid_map[idx].count += 1
-        totsize = self.get_size(adr) + self.size_gc_header()
-        self._ll_typeid_map[idx].size += llmemory.raw_malloc_usage(totsize)
-        self.trace(adr, self.track_heap_parent, adr)
-
-    @staticmethod
-    def _track_heap_root(obj, self):
-        self.track_heap(obj)
-
-    def heap_stats(self):
-        self._tracked_dict = self.AddressDict()
-        max_tid = self.root_walker.gcdata.max_type_id
-        ll_typeid_map = lltype.malloc(ARRAY_TYPEID_MAP, max_tid, zero=True)
-        for i in range(max_tid):
-            ll_typeid_map[i] = lltype.malloc(TYPEID_MAP, max_tid, zero=True)
-        self._ll_typeid_map = ll_typeid_map
-        self._tracked_dict.add(llmemory.cast_ptr_to_adr(ll_typeid_map))
-        i = 0
-        while i < max_tid:
-            self._tracked_dict.add(llmemory.cast_ptr_to_adr(ll_typeid_map[i]))
-            i += 1
-        self.enumerate_all_roots(SemiSpaceGC._track_heap_root, self)
-        self._ll_typeid_map = lltype.nullptr(ARRAY_TYPEID_MAP)
-        self._tracked_dict.delete()
-        return ll_typeid_map

File rpython/memory/gc/test/test_direct.py

View file
             p = self.stackroots[-50+i]
             assert p[i-1] == chr(i)
 
-class TestSemiSpaceGC(DirectGCTest):
-    from rpython.memory.gc.semispace import SemiSpaceGC as GCClass
-
-    def test_shrink_array(self):
-        S1 = lltype.GcStruct('S1', ('h', lltype.Char),
-                                   ('v', lltype.Array(lltype.Char)))
-        p1 = self.malloc(S1, 2)
-        p1.h = '?'
-        for i in range(2):
-            p1.v[i] = chr(50 + i)
-        addr = llmemory.cast_ptr_to_adr(p1)
-        ok = self.gc.shrink_array(addr, 1)
-        assert ok
-        assert p1.h == '?'
-        assert len(p1.v) == 1
-        for i in range(1):
-            assert p1.v[i] == chr(50 + i)
-
-
-class TestGenerationGC(TestSemiSpaceGC):
-    from rpython.memory.gc.generation import GenerationGC as GCClass
-
-    def test_collect_gen(self):
-        gc = self.gc
-        old_semispace_collect = gc.semispace_collect
-        old_collect_nursery = gc.collect_nursery
-        calls = []
-        def semispace_collect():
-            calls.append('semispace_collect')
-            return old_semispace_collect()
-        def collect_nursery():
-            calls.append('collect_nursery')
-            return old_collect_nursery()
-        gc.collect_nursery = collect_nursery
-        gc.semispace_collect = semispace_collect
-
-        gc.collect()
-        assert calls == ['semispace_collect']
-        calls = []
-
-        gc.collect(0)
-        assert calls == ['collect_nursery']
-        calls = []
-
-        gc.collect(1)
-        assert calls == ['semispace_collect']
-        calls = []
-
-        gc.collect(9)
-        assert calls == ['semispace_collect']
-        calls = []
-
-    def test_assume_young_pointers(self):
-        s0 = lltype.malloc(S, immortal=True)
-        self.consider_constant(s0)
-        s = self.malloc(S)
-        s.x = 1