Commits

Armin Rigo committed 24b1a13

Remove the semispace, generational, and hybrid GCs, after discussing
on IRC their remaining value. Even the educational value is very low
because of the 3-GCs-in-one approach, and the fact that starting from
a semispace GC is really the wrong approach for PyPy --- semispace GCs
work very badly in this context.

Comments (0)

Files changed (14)

rpython/memory/gc/base.py

     if config.translation.gctransformer != "framework":
         raise AssertionError("fix this test")
 
-    classes = {"semispace": "semispace.SemiSpaceGC",
-               "generation": "generation.GenerationGC",
-               "hybrid": "hybrid.HybridGC",
-               "minimark" : "minimark.MiniMarkGC",
+    classes = {"minimark" : "minimark.MiniMarkGC",
                }
     try:
         modulename, classname = classes[config.translation.gc].split('.')

rpython/memory/gc/generation.py

-import sys
-from rpython.memory.gc.semispace import SemiSpaceGC
-from rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
-from rpython.memory.gc.semispace import GC_HASH_TAKEN_ADDR
-from rpython.memory.gc import env
-from rpython.rtyper.lltypesystem.llmemory import NULL, raw_malloc_usage
-from rpython.rtyper.lltypesystem import lltype, llmemory, llarena
-from rpython.rlib.objectmodel import free_non_gc_object
-from rpython.rlib.debug import ll_assert
-from rpython.rlib.debug import debug_print, debug_start, debug_stop
-from rpython.rlib.rarithmetic import intmask, LONG_BIT
-from rpython.rtyper.lltypesystem.lloperation import llop
-
-WORD = LONG_BIT // 8
-
-# The following flag is never set on young objects, i.e. the ones living
-# in the nursery.  It is initially set on all prebuilt and old objects,
-# and gets cleared by the write_barrier() when we write in them a
-# pointer to a young object.
-GCFLAG_NO_YOUNG_PTRS = SemiSpaceGC.first_unused_gcflag << 0
-
-# The following flag is set on some last-generation objects (== prebuilt
-# objects for GenerationGC, but see also HybridGC).  The flag is set
-# unless the object is already listed in 'last_generation_root_objects'.
-# When a pointer is written inside an object with GCFLAG_NO_HEAP_PTRS
-# set, the write_barrier clears the flag and adds the object to
-# 'last_generation_root_objects'.
-GCFLAG_NO_HEAP_PTRS = SemiSpaceGC.first_unused_gcflag << 1
-
-class GenerationGC(SemiSpaceGC):
-    """A basic generational GC: it's a SemiSpaceGC with an additional
-    nursery for young objects.  A write barrier is used to ensure that
-    old objects that contain pointers to young objects are recorded in
-    a list.
-    """
-    inline_simple_malloc = True
-    inline_simple_malloc_varsize = True
-    needs_write_barrier = True
-    prebuilt_gc_objects_are_static_roots = False
-    first_unused_gcflag = SemiSpaceGC.first_unused_gcflag << 2
-
-    # the following values override the default arguments of __init__ when
-    # translating to a real backend.
-    TRANSLATION_PARAMS = {'space_size': 8*1024*1024,     # 8 MB
-                          'nursery_size': 3*1024*1024,   # 3 MB
-                          'min_nursery_size': 48*1024,
-                          'auto_nursery_size': True}
-
-    nursery_hash_base = -1
-
-    def __init__(self, config,
-                 nursery_size=32*WORD,
-                 min_nursery_size=32*WORD,
-                 auto_nursery_size=False,
-                 space_size=1024*WORD,
-                 max_space_size=sys.maxint//2+1,
-                 **kwds):
-        SemiSpaceGC.__init__(self, config,
-                             space_size = space_size,
-                             max_space_size = max_space_size,
-                             **kwds)
-        assert min_nursery_size <= nursery_size <= space_size // 2
-        self.initial_nursery_size = nursery_size
-        self.auto_nursery_size = auto_nursery_size
-        self.min_nursery_size = min_nursery_size
-
-        # define nursery fields
-        self.reset_nursery()
-        self._setup_wb()
-
-        # compute the constant lower bounds for the attributes
-        # largest_young_fixedsize and largest_young_var_basesize.
-        # It is expected that most (or all) objects have a fixedsize
-        # that is much lower anyway.
-        sz = self.get_young_fixedsize(self.min_nursery_size)
-        self.lb_young_fixedsize = sz
-        sz = self.get_young_var_basesize(self.min_nursery_size)
-        self.lb_young_var_basesize = sz
-
-    def setup(self):
-        self.old_objects_pointing_to_young = self.AddressStack()
-        # ^^^ a list of addresses inside the old objects space; it
-        # may contain static prebuilt objects as well.  More precisely,
-        # it lists exactly the old and static objects whose
-        # GCFLAG_NO_YOUNG_PTRS bit is not set.
-        self.young_objects_with_weakrefs = self.AddressStack()
-
-        self.last_generation_root_objects = self.AddressStack()
-        self.young_objects_with_id = self.AddressDict()
-        SemiSpaceGC.setup(self)
-        self.set_nursery_size(self.initial_nursery_size)
-        # the GC is fully setup now.  The rest can make use of it.
-        if self.auto_nursery_size:
-            newsize = nursery_size_from_env()
-            #if newsize <= 0:
-            #    ---disabled--- just use the default value.
-            #    newsize = env.estimate_best_nursery_size()
-            if newsize > 0:
-                self.set_nursery_size(newsize)
-
-        self.reset_nursery()
-
-    def _teardown(self):
-        self.collect() # should restore last gen objects flags
-        SemiSpaceGC._teardown(self)
-
-    def reset_nursery(self):
-        self.nursery      = NULL
-        self.nursery_top  = NULL
-        self.nursery_free = NULL
-
-    def set_nursery_size(self, newsize):
-        debug_start("gc-set-nursery-size")
-        if newsize < self.min_nursery_size:
-            newsize = self.min_nursery_size
-        if newsize > self.space_size // 2:
-            newsize = self.space_size // 2
-
-        # Compute the new bounds for how large young objects can be
-        # (larger objects are allocated directly old).   XXX adjust
-        self.nursery_size = newsize
-        self.largest_young_fixedsize = self.get_young_fixedsize(newsize)
-        self.largest_young_var_basesize = self.get_young_var_basesize(newsize)
-        scale = 0
-        while (self.min_nursery_size << (scale+1)) <= newsize:
-            scale += 1
-        self.nursery_scale = scale
-        debug_print("nursery_size =", newsize)
-        debug_print("largest_young_fixedsize =",
-                    self.largest_young_fixedsize)
-        debug_print("largest_young_var_basesize =",
-                    self.largest_young_var_basesize)
-        debug_print("nursery_scale =", scale)
-        # we get the following invariant:
-        assert self.nursery_size >= (self.min_nursery_size << scale)
-
-        # Force a full collect to remove the current nursery whose size
-        # no longer matches the bounds that we just computed.  This must
-        # be done after changing the bounds, because it might re-create
-        # a new nursery (e.g. if it invokes finalizers).
-        self.semispace_collect()
-        debug_stop("gc-set-nursery-size")
-
-    @staticmethod
-    def get_young_fixedsize(nursery_size):
-        return nursery_size // 2 - 1
-
-    @staticmethod
-    def get_young_var_basesize(nursery_size):
-        return nursery_size // 4 - 1
-
-    @classmethod
-    def JIT_max_size_of_young_obj(cls):
-        min_nurs_size = cls.TRANSLATION_PARAMS['min_nursery_size']
-        return cls.get_young_fixedsize(min_nurs_size)
-
-    def is_in_nursery(self, addr):
-        ll_assert(llmemory.cast_adr_to_int(addr) & 1 == 0,
-                  "odd-valued (i.e. tagged) pointer unexpected here")
-        return self.nursery <= addr < self.nursery_top
-
-    def appears_to_be_in_nursery(self, addr):
-        # same as is_in_nursery(), but may return True accidentally if
-        # 'addr' is a tagged pointer with just the wrong value.
-        if not self.translated_to_c:
-            if not self.is_valid_gc_object(addr):
-                return False
-        return self.nursery <= addr < self.nursery_top
-
-    def malloc_fixedsize_clear(self, typeid, size,
-                               has_finalizer=False,
-                               is_finalizer_light=False,
-                               contains_weakptr=False):
-        if (has_finalizer or
-            (raw_malloc_usage(size) > self.lb_young_fixedsize and
-             raw_malloc_usage(size) > self.largest_young_fixedsize)):
-            # ^^^ we do two size comparisons; the first one appears redundant,
-            #     but it can be constant-folded if 'size' is a constant; then
-            #     it almost always folds down to False, which kills the
-            #     second comparison as well.
-            ll_assert(not contains_weakptr, "wrong case for mallocing weakref")
-            # "non-simple" case or object too big: don't use the nursery
-            return SemiSpaceGC.malloc_fixedsize_clear(self, typeid, size,
-                                                      has_finalizer,
-                                                      is_finalizer_light,
-                                                      contains_weakptr)
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        totalsize = size_gc_header + size
-        result = self.nursery_free
-        if raw_malloc_usage(totalsize) > self.nursery_top - result:
-            result = self.collect_nursery()
-        llarena.arena_reserve(result, totalsize)
-        # GCFLAG_NO_YOUNG_PTRS is never set on young objs
-        self.init_gc_object(result, typeid, flags=0)
-        self.nursery_free = result + totalsize
-        if contains_weakptr:
-            self.young_objects_with_weakrefs.append(result + size_gc_header)
-        return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
-    def malloc_varsize_clear(self, typeid, length, size, itemsize,
-                             offset_to_length):
-        # Only use the nursery if there are not too many items.
-        if not raw_malloc_usage(itemsize):
-            too_many_items = False
-        else:
-            # The following line is usually constant-folded because both
-            # min_nursery_size and itemsize are constants (the latter
-            # due to inlining).
-            maxlength_for_minimal_nursery = (self.min_nursery_size // 4 //
-                                             raw_malloc_usage(itemsize))
-            
-            # The actual maximum length for our nursery depends on how
-            # many times our nursery is bigger than the minimal size.
-            # The computation is done in this roundabout way so that
-            # only the only remaining computation is the following
-            # shift.
-            maxlength = maxlength_for_minimal_nursery << self.nursery_scale
-            too_many_items = length > maxlength
-
-        if (too_many_items or
-            (raw_malloc_usage(size) > self.lb_young_var_basesize and
-             raw_malloc_usage(size) > self.largest_young_var_basesize)):
-            # ^^^ we do two size comparisons; the first one appears redundant,
-            #     but it can be constant-folded if 'size' is a constant; then
-            #     it almost always folds down to False, which kills the
-            #     second comparison as well.
-            return SemiSpaceGC.malloc_varsize_clear(self, typeid, length, size,
-                                                    itemsize, offset_to_length)
-        # with the above checks we know now that totalsize cannot be more
-        # than about half of the nursery size; in particular, the + and *
-        # cannot overflow
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        totalsize = size_gc_header + size + itemsize * length
-        result = self.nursery_free
-        if raw_malloc_usage(totalsize) > self.nursery_top - result:
-            result = self.collect_nursery()
-        llarena.arena_reserve(result, totalsize)
-        # GCFLAG_NO_YOUNG_PTRS is never set on young objs
-        self.init_gc_object(result, typeid, flags=0)
-        (result + size_gc_header + offset_to_length).signed[0] = length
-        self.nursery_free = result + llarena.round_up_for_allocation(totalsize)
-        return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
-    # override the init_gc_object methods to change the default value of 'flags',
-    # used by objects that are directly created outside the nursery by the SemiSpaceGC.
-    # These objects must have the GCFLAG_NO_YOUNG_PTRS flag set immediately.
-    def init_gc_object(self, addr, typeid, flags=GCFLAG_NO_YOUNG_PTRS):
-        SemiSpaceGC.init_gc_object(self, addr, typeid, flags)
-
-    def init_gc_object_immortal(self, addr, typeid,
-                                flags=GCFLAG_NO_YOUNG_PTRS|GCFLAG_NO_HEAP_PTRS):
-        SemiSpaceGC.init_gc_object_immortal(self, addr, typeid, flags)
-
-    # flags exposed for the HybridGC subclass
-    GCFLAGS_FOR_NEW_YOUNG_OBJECTS = 0   # NO_YOUNG_PTRS never set on young objs
-    GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS = (GCFLAG_EXTERNAL | GCFLAG_FORWARDED |
-                                        GCFLAG_NO_YOUNG_PTRS |
-                                        GC_HASH_TAKEN_ADDR)
-
-    # ____________________________________________________________
-    # Support code for full collections
-
-    def collect(self, gen=1):
-        if gen == 0:
-            self.collect_nursery()
-        else:
-            SemiSpaceGC.collect(self)
-
-    def semispace_collect(self, size_changing=False):
-        self.reset_young_gcflags() # we are doing a full collection anyway
-        self.weakrefs_grow_older()
-        self.ids_grow_older()
-        self.reset_nursery()
-        SemiSpaceGC.semispace_collect(self, size_changing)
-
-    def make_a_copy(self, obj, objsize):
-        tid = self.header(obj).tid
-        # During a full collect, all copied objects might implicitly come
-        # from the nursery.  In case they do, we must add this flag:
-        tid |= GCFLAG_NO_YOUNG_PTRS
-        return self._make_a_copy_with_tid(obj, objsize, tid)
-        # history: this was missing and caused an object to become old but without the
-        # flag set.  Such an object is bogus in the sense that the write_barrier doesn't
-        # work on it.  So it can eventually contain a ptr to a young object but we didn't
-        # know about it.  That ptr was not updated in the next minor collect... boom at
-        # the next usage.
-
-    def reset_young_gcflags(self):
-        # This empties self.old_objects_pointing_to_young, and puts the
-        # GCFLAG_NO_YOUNG_PTRS back on all these objects.  We could put
-        # the flag back more lazily but we expect this list to be short
-        # anyway, and it's much saner to stick to the invariant:
-        # non-young objects all have GCFLAG_NO_YOUNG_PTRS set unless
-        # they are listed in old_objects_pointing_to_young.
-        oldlist = self.old_objects_pointing_to_young
-        while oldlist.non_empty():
-            obj = oldlist.pop()
-            hdr = self.header(obj)
-            hdr.tid |= GCFLAG_NO_YOUNG_PTRS
-
-    def weakrefs_grow_older(self):
-        while self.young_objects_with_weakrefs.non_empty():
-            obj = self.young_objects_with_weakrefs.pop()
-            self.objects_with_weakrefs.append(obj)
-
-    def collect_roots(self):
-        """GenerationGC: collects all roots.
-           HybridGC: collects all roots, excluding the generation 3 ones.
-        """
-        # Warning!  References from static (and possibly gen3) objects
-        # are found by collect_last_generation_roots(), which must be
-        # called *first*!  If it is called after walk_roots(), then the
-        # HybridGC explodes if one of the _collect_root causes an object
-        # to be added to self.last_generation_root_objects.  Indeed, in
-        # this case, the newly added object is traced twice: once by
-        # collect_last_generation_roots() and once because it was added
-        # in self.rawmalloced_objects_to_trace.
-        self.collect_last_generation_roots()
-        self.root_walker.walk_roots(
-            SemiSpaceGC._collect_root,  # stack roots
-            SemiSpaceGC._collect_root,  # static in prebuilt non-gc structures
-            None)   # we don't need the static in prebuilt gc objects
-
-    def collect_last_generation_roots(self):
-        stack = self.last_generation_root_objects
-        self.last_generation_root_objects = self.AddressStack()
-        while stack.non_empty():
-            obj = stack.pop()
-            self.header(obj).tid |= GCFLAG_NO_HEAP_PTRS
-            # ^^^ the flag we just added will be removed immediately if
-            # the object still contains pointers to younger objects
-            self.trace(obj, self._trace_external_obj, obj)
-        stack.delete()
-
-    def _trace_external_obj(self, pointer, obj):
-        addr = pointer.address[0]
-        newaddr = self.copy(addr)
-        pointer.address[0] = newaddr
-        self.write_into_last_generation_obj(obj, newaddr)
-
-    # ____________________________________________________________
-    # Implementation of nursery-only collections
-
-    def collect_nursery(self):
-        if self.nursery_size > self.top_of_space - self.free:
-            # the semispace is running out, do a full collect
-            self.obtain_free_space(self.nursery_size)
-            ll_assert(self.nursery_size <= self.top_of_space - self.free,
-                         "obtain_free_space failed to do its job")
-        if self.nursery:
-            debug_start("gc-minor")
-            debug_print("--- minor collect ---")
-            debug_print("nursery:", self.nursery, "to", self.nursery_top)
-            # a nursery-only collection
-            scan = beginning = self.free
-            self.collect_oldrefs_to_nursery()
-            self.collect_roots_in_nursery()
-            scan = self.scan_objects_just_copied_out_of_nursery(scan)
-            # at this point, all static and old objects have got their
-            # GCFLAG_NO_YOUNG_PTRS set again by trace_and_drag_out_of_nursery
-            if self.young_objects_with_weakrefs.non_empty():
-                self.invalidate_young_weakrefs()
-            if self.young_objects_with_id.length() > 0:
-                self.update_young_objects_with_id()
-            # mark the nursery as free and fill it with zeroes again
-            llarena.arena_reset(self.nursery, self.nursery_size, 2)
-            debug_print("survived (fraction of the size):",
-                        float(scan - beginning) / self.nursery_size)
-            debug_stop("gc-minor")
-            #self.debug_check_consistency()   # -- quite expensive
-        else:
-            # no nursery - this occurs after a full collect, triggered either
-            # just above or by some previous non-nursery-based allocation.
-            # Grab a piece of the current space for the nursery.
-            self.nursery = self.free
-            self.nursery_top = self.nursery + self.nursery_size
-            self.free = self.nursery_top
-        self.nursery_free = self.nursery
-        # at this point we know that the nursery is empty
-        self.change_nursery_hash_base()
-        return self.nursery_free
-
-    def change_nursery_hash_base(self):
-        # The following should be enough to ensure that young objects
-        # tend to always get a different hash.  It also makes sure that
-        # nursery_hash_base is not a multiple of 4, to avoid collisions
-        # with the hash of non-young objects.
-        hash_base = self.nursery_hash_base
-        hash_base += self.nursery_size - 1
-        if (hash_base & 3) == 0:
-            hash_base -= 1
-        self.nursery_hash_base = intmask(hash_base)
-
-    # NB. we can use self.copy() to move objects out of the nursery,
-    # but only if the object was really in the nursery.
-
-    def collect_oldrefs_to_nursery(self):
-        # Follow the old_objects_pointing_to_young list and move the
-        # young objects they point to out of the nursery.
-        count = 0
-        oldlist = self.old_objects_pointing_to_young
-        while oldlist.non_empty():
-            count += 1
-            obj = oldlist.pop()
-            hdr = self.header(obj)
-            hdr.tid |= GCFLAG_NO_YOUNG_PTRS
-            self.trace_and_drag_out_of_nursery(obj)
-        debug_print("collect_oldrefs_to_nursery", count)
-
-    def collect_roots_in_nursery(self):
-        # we don't need to trace prebuilt GcStructs during a minor collect:
-        # if a prebuilt GcStruct contains a pointer to a young object,
-        # then the write_barrier must have ensured that the prebuilt
-        # GcStruct is in the list self.old_objects_pointing_to_young.
-        self.root_walker.walk_roots(
-            GenerationGC._collect_root_in_nursery,  # stack roots
-            GenerationGC._collect_root_in_nursery,  # static in prebuilt non-gc
-            None)                                   # static in prebuilt gc
-
-    def _collect_root_in_nursery(self, root):
-        obj = root.address[0]
-        if self.is_in_nursery(obj):
-            root.address[0] = self.copy(obj)
-
-    def scan_objects_just_copied_out_of_nursery(self, scan):
-        while scan < self.free:
-            curr = scan + self.size_gc_header()
-            self.trace_and_drag_out_of_nursery(curr)
-            scan += self.size_gc_header() + self.get_size_incl_hash(curr)
-        return scan
-
-    def trace_and_drag_out_of_nursery(self, obj):
-        """obj must not be in the nursery.  This copies all the
-        young objects it references out of the nursery.
-        """
-        self.trace(obj, self._trace_drag_out, None)
-
-    def _trace_drag_out(self, pointer, ignored):
-        if self.is_in_nursery(pointer.address[0]):
-            pointer.address[0] = self.copy(pointer.address[0])
-
-    # The code relies on the fact that no weakref can be an old object
-    # weakly pointing to a young object.  Indeed, weakrefs are immutable
-    # so they cannot point to an object that was created after it.
-    def invalidate_young_weakrefs(self):
-        # walk over the list of objects that contain weakrefs and are in the
-        # nursery.  if the object it references survives then update the
-        # weakref; otherwise invalidate the weakref
-        while self.young_objects_with_weakrefs.non_empty():
-            obj = self.young_objects_with_weakrefs.pop()
-            if not self.surviving(obj):
-                continue # weakref itself dies
-            obj = self.get_forwarding_address(obj)
-            offset = self.weakpointer_offset(self.get_type_id(obj))
-            pointing_to = (obj + offset).address[0]
-            if self.is_in_nursery(pointing_to):
-                if self.surviving(pointing_to):
-                    (obj + offset).address[0] = self.get_forwarding_address(
-                        pointing_to)
-                else:
-                    (obj + offset).address[0] = NULL
-                    continue    # no need to remember this weakref any longer
-            self.objects_with_weakrefs.append(obj)
-
-    # for the JIT: a minimal description of the write_barrier() method
-    # (the JIT assumes it is of the shape
-    #  "if addr_struct.int0 & JIT_WB_IF_FLAG: remember_young_pointer()")
-    JIT_WB_IF_FLAG = GCFLAG_NO_YOUNG_PTRS
-
-    def write_barrier(self, newvalue, addr_struct):
-         if self.header(addr_struct).tid & GCFLAG_NO_YOUNG_PTRS:
-            self.remember_young_pointer(addr_struct, newvalue)
-
-    def _setup_wb(self):
-        DEBUG = self.DEBUG
-        # The purpose of attaching remember_young_pointer to the instance
-        # instead of keeping it as a regular method is to help the JIT call it.
-        # Additionally, it makes the code in write_barrier() marginally smaller
-        # (which is important because it is inlined *everywhere*).
-        # For x86, there is also an extra requirement: when the JIT calls
-        # remember_young_pointer(), it assumes that it will not touch the SSE
-        # registers, so it does not save and restore them (that's a *hack*!).
-        def remember_young_pointer(addr_struct, addr):
-            #llop.debug_print(lltype.Void, "\tremember_young_pointer",
-            #                 addr_struct, "<-", addr)
-            if DEBUG:
-                ll_assert(not self.is_in_nursery(addr_struct),
-                          "nursery object with GCFLAG_NO_YOUNG_PTRS")
-            #
-            # What is important in this function is that it *must*
-            # clear the flag GCFLAG_NO_YOUNG_PTRS from 'addr_struct'
-            # if 'addr' is in the nursery.  It is ok if, accidentally,
-            # it also clears the flag in some more rare cases, like
-            # 'addr' being a tagged pointer whose value happens to be
-            # a large integer that fools is_in_nursery().
-            if self.appears_to_be_in_nursery(addr):
-                self.old_objects_pointing_to_young.append(addr_struct)
-                self.header(addr_struct).tid &= ~GCFLAG_NO_YOUNG_PTRS
-            self.write_into_last_generation_obj(addr_struct, addr)
-        remember_young_pointer._dont_inline_ = True
-        self.remember_young_pointer = remember_young_pointer
-
-    def write_into_last_generation_obj(self, addr_struct, addr):
-        objhdr = self.header(addr_struct)
-        if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
-            if (self.is_valid_gc_object(addr) and
-                    not self.is_last_generation(addr)):
-                objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
-                self.last_generation_root_objects.append(addr_struct)
-    write_into_last_generation_obj._always_inline_ = True
-
-    def assume_young_pointers(self, addr_struct):
-        objhdr = self.header(addr_struct)
-        if objhdr.tid & GCFLAG_NO_YOUNG_PTRS:
-            self.old_objects_pointing_to_young.append(addr_struct)
-            objhdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
-        if objhdr.tid & GCFLAG_NO_HEAP_PTRS:
-            objhdr.tid &= ~GCFLAG_NO_HEAP_PTRS
-            self.last_generation_root_objects.append(addr_struct)
-
-    def writebarrier_before_copy(self, source_addr, dest_addr,
-                                 source_start, dest_start, length):
-        """ This has the same effect as calling writebarrier over
-        each element in dest copied from source, except it might reset
-        one of the following flags a bit too eagerly, which means we'll have
-        a bit more objects to track, but being on the safe side.
-        """
-        source_hdr = self.header(source_addr)
-        dest_hdr = self.header(dest_addr)
-        if dest_hdr.tid & GCFLAG_NO_YOUNG_PTRS == 0:
-            return True
-        # ^^^ a fast path of write-barrier
-        if source_hdr.tid & GCFLAG_NO_YOUNG_PTRS == 0:
-            # there might be an object in source that is in nursery
-            self.old_objects_pointing_to_young.append(dest_addr)
-            dest_hdr.tid &= ~GCFLAG_NO_YOUNG_PTRS
-        if dest_hdr.tid & GCFLAG_NO_HEAP_PTRS:
-            if source_hdr.tid & GCFLAG_NO_HEAP_PTRS == 0:
-                # ^^^ equivalend of addr from source not being in last
-                #     gen
-                dest_hdr.tid &= ~GCFLAG_NO_HEAP_PTRS
-                self.last_generation_root_objects.append(dest_addr)
-        return True
-
-    def is_last_generation(self, obj):
-        # overridden by HybridGC
-        return (self.header(obj).tid & GCFLAG_EXTERNAL) != 0
-
-    def _compute_id(self, obj):
-        if self.is_in_nursery(obj):
-            result = self.young_objects_with_id.get(obj)
-            if not result:
-                result = self._next_id()
-                self.young_objects_with_id.setitem(obj, result)
-            return result
-        else:
-            return SemiSpaceGC._compute_id(self, obj)
-
-    def update_young_objects_with_id(self):
-        self.young_objects_with_id.foreach(self._update_object_id,
-                                           self.objects_with_id)
-        self.young_objects_with_id.clear()
-        # NB. the clear() also makes the dictionary shrink back to its
-        # minimal size, which is actually a good idea: a large, mostly-empty
-        # table is bad for the next call to 'foreach'.
-
-    def ids_grow_older(self):
-        self.young_objects_with_id.foreach(self._id_grow_older, None)
-        self.young_objects_with_id.clear()
-
-    def _id_grow_older(self, obj, id, ignored):
-        self.objects_with_id.setitem(obj, id)
-
-    def _compute_current_nursery_hash(self, obj):
-        return intmask(llmemory.cast_adr_to_int(obj) + self.nursery_hash_base)
-
-    def enumerate_all_roots(self, callback, arg):
-        self.last_generation_root_objects.foreach(callback, arg)
-        SemiSpaceGC.enumerate_all_roots(self, callback, arg)
-    enumerate_all_roots._annspecialcase_ = 'specialize:arg(1)'
-
-    def debug_check_object(self, obj):
-        """Check the invariants about 'obj' that should be true
-        between collections."""
-        SemiSpaceGC.debug_check_object(self, obj)
-        tid = self.header(obj).tid
-        if tid & GCFLAG_NO_YOUNG_PTRS:
-            ll_assert(not self.is_in_nursery(obj),
-                      "nursery object with GCFLAG_NO_YOUNG_PTRS")
-            self.trace(obj, self._debug_no_nursery_pointer, None)
-        elif not self.is_in_nursery(obj):
-            ll_assert(self._d_oopty.contains(obj),
-                      "missing from old_objects_pointing_to_young")
-        if tid & GCFLAG_NO_HEAP_PTRS:
-            ll_assert(self.is_last_generation(obj),
-                      "GCFLAG_NO_HEAP_PTRS on non-3rd-generation object")
-            self.trace(obj, self._debug_no_gen1or2_pointer, None)
-        elif self.is_last_generation(obj):
-            ll_assert(self._d_lgro.contains(obj),
-                      "missing from last_generation_root_objects")
-
-    def _debug_no_nursery_pointer(self, root, ignored):
-        ll_assert(not self.is_in_nursery(root.address[0]),
-                  "GCFLAG_NO_YOUNG_PTRS but found a young pointer")
-    def _debug_no_gen1or2_pointer(self, root, ignored):
-        target = root.address[0]
-        ll_assert(not target or self.is_last_generation(target),
-                  "GCFLAG_NO_HEAP_PTRS but found a pointer to gen1or2")
-
-    def debug_check_consistency(self):
-        if self.DEBUG:
-            self._d_oopty = self.old_objects_pointing_to_young.stack2dict()
-            self._d_lgro = self.last_generation_root_objects.stack2dict()
-            SemiSpaceGC.debug_check_consistency(self)
-            self._d_oopty.delete()
-            self._d_lgro.delete()
-            self.old_objects_pointing_to_young.foreach(
-                self._debug_check_flag_1, None)
-            self.last_generation_root_objects.foreach(
-                self._debug_check_flag_2, None)
-
-    def _debug_check_flag_1(self, obj, ignored):
-        ll_assert(not (self.header(obj).tid & GCFLAG_NO_YOUNG_PTRS),
-                  "unexpected GCFLAG_NO_YOUNG_PTRS")
-    def _debug_check_flag_2(self, obj, ignored):
-        ll_assert(not (self.header(obj).tid & GCFLAG_NO_HEAP_PTRS),
-                  "unexpected GCFLAG_NO_HEAP_PTRS")
-
-    def debug_check_can_copy(self, obj):
-        if self.is_in_nursery(obj):
-            pass    # it's ok to copy an object out of the nursery
-        else:
-            SemiSpaceGC.debug_check_can_copy(self, obj)
-
-
-# ____________________________________________________________
-
-def nursery_size_from_env():
-    return env.read_from_env('PYPY_GENERATIONGC_NURSERY')

rpython/memory/gc/hybrid.py

-import sys
-from rpython.memory.gc.semispace import SemiSpaceGC
-from rpython.memory.gc.generation import GenerationGC, WORD
-from rpython.memory.gc.semispace import GCFLAG_EXTERNAL, GCFLAG_FORWARDED
-from rpython.memory.gc.semispace import GCFLAG_HASHMASK
-from rpython.memory.gc.generation import GCFLAG_NO_YOUNG_PTRS
-from rpython.memory.gc.generation import GCFLAG_NO_HEAP_PTRS
-from rpython.memory.gc.semispace import GC_HASH_TAKEN_ADDR
-from rpython.memory.gc.semispace import GC_HASH_HASFIELD
-from rpython.rtyper.lltypesystem import lltype, llmemory, llarena
-from rpython.rtyper.lltypesystem.llmemory import raw_malloc_usage
-from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.rlib.debug import ll_assert, have_debug_prints
-from rpython.rlib.debug import debug_print, debug_start, debug_stop
-from rpython.rlib.rarithmetic import ovfcheck
-from rpython.rtyper.lltypesystem import rffi
-
-#   _______in the semispaces_________      ______external (non-moving)_____
-#  /                                 \    /                                \
-#                                          ___raw_malloc'ed__    _prebuilt_
-#  +----------------------------------+   /                  \  /          \
-#  |    | | | | |    |                |
-#  |    | | | | |    |                |    age < max      age == max
-#  |nur-|o|o|o|o|    |                |      +---+      +---+      +---+
-#  |sery|b|b|b|b|free|     empty      |      |obj|      |obj|      |obj|  
-#  |    |j|j|j|j|    |                |      +---+      +---+      +---+  
-#  |    | | | | |    |                |       +---+      +---+      +---+
-#  +-----------------+----------------+       |obj|      |obj|      |obj|
-#        age <= max                           +---+      +---+      +---+
-#            
-#  |gen1|------------- generation 2 -----------------|-----generation 3-----|
-#
-# Object lists:
-#   * gen2_rawmalloced_objects
-#   * gen3_rawmalloced_objects
-#   * old_objects_pointing_to_young: gen2or3 objs that point to gen1 objs
-#   * last_generation_root_objects: gen3 objs that point to gen1or2 objs
-#
-# How to tell the objects apart:
-#   * external:      tid & GCFLAG_EXTERNAL
-#   * gen1:          is_in_nursery(obj)
-#   * gen3:          (tid & (GCFLAG_EXTERNAL|GCFLAG_AGE_MASK)) ==
-#                           (GCFLAG_EXTERNAL|GCFLAG_AGE_MAX)
-#
-# Some invariants:
-#   * gen3 are either GCFLAG_NO_HEAP_PTRS or in 'last_generation_root_objects'
-#   * between collections, GCFLAG_UNVISITED set exactly for gen2_rawmalloced
-#
-# A malloc_varsize() of large objects returns objects that are external
-# but initially of generation 2.  Old objects from the semispaces are
-# moved to external objects directly as generation 3.
-
-# The "age" of an object is the number of times it survived a full
-# collections, without counting the step that moved it out of the nursery.
-# When a semispace-based object would grow older than MAX_SEMISPACE_AGE,
-# it is instead copied to a nonmoving location.  For example, a value of 3
-# ensures that an object is copied at most 5 times in total: from the
-# nursery to the semispace, then three times between the two spaces,
-# then one last time to a nonmoving location.
-MAX_SEMISPACE_AGE = 3
-
-GCFLAG_UNVISITED = GenerationGC.first_unused_gcflag << 0
-_gcflag_next_bit = GenerationGC.first_unused_gcflag << 1
-GCFLAG_AGE_ONE   = _gcflag_next_bit
-GCFLAG_AGE_MAX   = _gcflag_next_bit * MAX_SEMISPACE_AGE
-GCFLAG_AGE_MASK  = 0
-while GCFLAG_AGE_MASK < GCFLAG_AGE_MAX:
-    GCFLAG_AGE_MASK |= _gcflag_next_bit
-    _gcflag_next_bit <<= 1
-
-# The 3rd generation objects are only collected after the following
-# number of calls to semispace_collect():
-GENERATION3_COLLECT_THRESHOLD = 20
-
-class HybridGC(GenerationGC):
-    """A two-generations semi-space GC like the GenerationGC,
-    except that objects above a certain size are handled separately:
-    they are allocated via raw_malloc/raw_free in a mark-n-sweep fashion.
-    """
-    first_unused_gcflag = _gcflag_next_bit
-    prebuilt_gc_objects_are_static_roots = True
-
-    # the following values override the default arguments of __init__ when
-    # translating to a real backend.
-    TRANSLATION_PARAMS = GenerationGC.TRANSLATION_PARAMS.copy()
-    TRANSLATION_PARAMS['large_object'] = 6*1024    # XXX adjust
-    TRANSLATION_PARAMS['large_object_gcptrs'] = 31*1024    # XXX adjust
-    TRANSLATION_PARAMS['min_nursery_size'] = 128*1024
-    # condition: large_object <= large_object_gcptrs < min_nursery_size/4
-
-    def __init__(self, *args, **kwds):
-        large_object = kwds.pop('large_object', 6*WORD)
-        large_object_gcptrs = kwds.pop('large_object_gcptrs', 8*WORD)
-        self.generation3_collect_threshold = kwds.pop(
-            'generation3_collect_threshold', GENERATION3_COLLECT_THRESHOLD)
-        GenerationGC.__init__(self, *args, **kwds)
-
-        # Objects whose total size is at least 'large_object' bytes are
-        # allocated separately in a mark-n-sweep fashion.  If the object
-        # has GC pointers in its varsized part, we use instead the
-        # higher limit 'large_object_gcptrs'.  The idea is that
-        # separately allocated objects are allocated immediately "old"
-        # and it's not good to have too many pointers from old to young
-        # objects.
-
-        # In this class, we assume that the 'large_object' limit is not
-        # very high, so that all objects that wouldn't easily fit in the
-        # nursery are considered large by this limit.  This is the
-        # meaning of the 'assert' below.
-        self.nonlarge_max = large_object - 1
-        self.nonlarge_gcptrs_max = large_object_gcptrs - 1
-        assert self.nonlarge_gcptrs_max <= self.lb_young_var_basesize
-        assert self.nonlarge_max <= self.nonlarge_gcptrs_max
-
-    def setup(self):
-        self.large_objects_collect_trigger = self.param_space_size
-        self._initial_trigger = self.large_objects_collect_trigger
-        self.rawmalloced_objects_to_trace = self.AddressStack()
-        self.count_semispaceonly_collects = 0
-
-        self.gen2_rawmalloced_objects = self.AddressStack()
-        self.gen3_rawmalloced_objects = self.AddressStack()
-        GenerationGC.setup(self)
-
-    def set_max_heap_size(self, size):
-        raise NotImplementedError
-
-    # NB. to simplify the code, only varsized objects can be considered
-    # 'large'.
-
-    def malloc_varsize_clear(self, typeid, length, size, itemsize,
-                             offset_to_length):
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        nonvarsize = size_gc_header + size
-
-        # Compute the maximal length that makes the object still
-        # below 'nonlarge_max'.  All the following logic is usually
-        # constant-folded because self.nonlarge_max, size and itemsize
-        # are all constants (the arguments are constant due to
-        # inlining) and self.has_gcptr_in_varsize() is constant-folded.
-        if self.has_gcptr_in_varsize(typeid):
-            nonlarge_max = self.nonlarge_gcptrs_max
-        else:
-            nonlarge_max = self.nonlarge_max
-
-        if not raw_malloc_usage(itemsize):
-            too_many_items = raw_malloc_usage(nonvarsize) > nonlarge_max
-        else:
-            maxlength = nonlarge_max - raw_malloc_usage(nonvarsize)
-            maxlength = maxlength // raw_malloc_usage(itemsize)
-            too_many_items = length > maxlength
-
-        if not too_many_items:
-            # With the above checks we know now that totalsize cannot be more
-            # than 'nonlarge_max'; in particular, the + and * cannot overflow.
-            # Let's try to fit the object in the nursery.
-            totalsize = nonvarsize + itemsize * length
-            result = self.nursery_free
-            if raw_malloc_usage(totalsize) <= self.nursery_top - result:
-                llarena.arena_reserve(result, totalsize)
-                # GCFLAG_NO_YOUNG_PTRS is never set on young objs
-                self.init_gc_object(result, typeid, flags=0)
-                (result + size_gc_header + offset_to_length).signed[0] = length
-                self.nursery_free = result + llarena.round_up_for_allocation(
-                    totalsize)
-                return llmemory.cast_adr_to_ptr(result+size_gc_header,
-                                                llmemory.GCREF)
-        return self.malloc_varsize_slowpath(typeid, length)
-
-    def malloc_varsize_slowpath(self, typeid, length, force_nonmovable=False):
-        # For objects that are too large, or when the nursery is exhausted.
-        # In order to keep malloc_varsize_clear() as compact as possible,
-        # we recompute what we need in this slow path instead of passing
-        # it all as function arguments.
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        nonvarsize = size_gc_header + self.fixed_size(typeid)
-        itemsize = self.varsize_item_sizes(typeid)
-        offset_to_length = self.varsize_offset_to_length(typeid)
-        try:
-            varsize = ovfcheck(itemsize * length)
-            totalsize = ovfcheck(nonvarsize + varsize)
-        except OverflowError:
-            raise MemoryError()
-        if self.has_gcptr_in_varsize(typeid):
-            nonlarge_max = self.nonlarge_gcptrs_max
-        else:
-            nonlarge_max = self.nonlarge_max
-        if force_nonmovable or raw_malloc_usage(totalsize) > nonlarge_max:
-            result = self.malloc_varsize_marknsweep(totalsize)
-            flags = self.GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS | GCFLAG_UNVISITED
-        else:
-            result = self.malloc_varsize_collecting_nursery(totalsize)
-            flags = self.GCFLAGS_FOR_NEW_YOUNG_OBJECTS
-        self.init_gc_object(result, typeid, flags)
-        (result + size_gc_header + offset_to_length).signed[0] = length
-        return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
-    malloc_varsize_slowpath._dont_inline_ = True
-
-    def malloc_varsize_nonmovable(self, typeid, length):
-        return self.malloc_varsize_slowpath(typeid, length, True)
-
-    def malloc_nonmovable(self, typeid, length, zero):
-        # helper for testing, same as GCBase.malloc
-        if self.is_varsize(typeid):
-            gcref = self.malloc_varsize_slowpath(typeid, length, True)
-        else:
-            raise NotImplementedError("Not supported")
-        return llmemory.cast_ptr_to_adr(gcref)
-
-    def can_move(self, addr):
-        tid = self.header(addr).tid
-        return not (tid & GCFLAG_EXTERNAL)
-
-    def malloc_varsize_collecting_nursery(self, totalsize):
-        result = self.collect_nursery()
-        ll_assert(raw_malloc_usage(totalsize) <= self.nursery_top - result,
-                  "not enough room in malloc_varsize_collecting_nursery()")
-        llarena.arena_reserve(result, totalsize)
-        self.nursery_free = result + llarena.round_up_for_allocation(
-            totalsize)
-        return result
-
-    def _check_rawsize_alloced(self, size_estimate):
-        self.large_objects_collect_trigger -= size_estimate
-        if self.large_objects_collect_trigger < 0:
-            debug_start("gc-rawsize-collect")
-            debug_print("allocated", (self._initial_trigger -
-                                      self.large_objects_collect_trigger),
-                        "bytes, triggering full collection")
-            self.semispace_collect()
-            debug_stop("gc-rawsize-collect")
-
-    def malloc_varsize_marknsweep(self, totalsize):
-        # In order to free the large objects from time to time, we
-        # arbitrarily force a full collect() if none occurs when we have
-        # allocated self.space_size + rawmalloced bytes of large objects.
-        self._check_rawsize_alloced(raw_malloc_usage(totalsize))
-        result = self.allocate_external_object(totalsize)
-        if not result:
-            raise MemoryError()
-        # The parent classes guarantee zero-filled allocations, so we
-        # need to follow suit.
-        llmemory.raw_memclear(result, totalsize)
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        self.gen2_rawmalloced_objects.append(result + size_gc_header)
-        return result
-
-    def allocate_external_object(self, totalsize):
-        # XXX maybe we should use arena_malloc() above a certain size?
-        # If so, we'd also use arena_reset() in malloc_varsize_marknsweep().
-        return llmemory.raw_malloc(totalsize)
-
-    def init_gc_object_immortal(self, addr, typeid,
-                                flags=(GCFLAG_NO_YOUNG_PTRS |
-                                       GCFLAG_NO_HEAP_PTRS |
-                                       GCFLAG_AGE_MAX)):
-        GenerationGC.init_gc_object_immortal(self, addr, typeid, flags)
-
-    # ___________________________________________________________________
-    # collect() and semispace_collect() are not synonyms in this GC: the
-    # former is a complete collect, while the latter is only collecting
-    # the semispaces and not always doing the mark-n-sweep pass over the
-    # external objects of 3rd generation.
-
-    def collect(self, gen=2):
-        if gen > 1:
-            self.count_semispaceonly_collects = self.generation3_collect_threshold
-        GenerationGC.collect(self, gen)
-
-    def is_collecting_gen3(self):
-        count = self.count_semispaceonly_collects
-        return count >= self.generation3_collect_threshold
-
-    # ___________________________________________________________________
-    # the following methods are hook into SemiSpaceGC.semispace_collect()
-
-    def starting_full_collect(self):
-        # At the start of a collection, the GCFLAG_UNVISITED bit is set
-        # exactly on the objects in gen2_rawmalloced_objects.  Only
-        # raw_malloc'ed objects can ever have this bit set.
-        self.count_semispaceonly_collects += 1
-        if self.is_collecting_gen3():
-            # set the GCFLAG_UNVISITED on all rawmalloced generation-3 objects
-            # as well, to let them be recorded by visit_external_object()
-            self.gen3_rawmalloced_objects.foreach(self._set_gcflag_unvisited,
-                                                  None)
-        ll_assert(not self.rawmalloced_objects_to_trace.non_empty(),
-                  "rawmalloced_objects_to_trace should be empty at start")
-        self._nonmoving_copy_count = 0
-        self._nonmoving_copy_size = 0
-
-    def _set_gcflag_unvisited(self, obj, ignored):
-        ll_assert(not (self.header(obj).tid & GCFLAG_UNVISITED),
-                  "bogus GCFLAG_UNVISITED on gen3 obj")
-        self.header(obj).tid |= GCFLAG_UNVISITED
-
-    def collect_roots(self):
-        if not self.is_collecting_gen3():
-            GenerationGC.collect_roots(self)
-        else:
-            # as we don't record which prebuilt gc objects point to
-            # rawmalloced generation 3 objects, we have to trace all
-            # the prebuilt gc objects.
-            self.root_walker.walk_roots(
-                SemiSpaceGC._collect_root,  # stack roots
-                SemiSpaceGC._collect_root,  # static in prebuilt non-gc structs
-                SemiSpaceGC._collect_root)  # static in prebuilt gc objects
-
-    def surviving(self, obj):
-        # To use during a collection.  The objects that survive are the
-        # ones with GCFLAG_FORWARDED set and GCFLAG_UNVISITED not set.
-        # This is equivalent to self.is_forwarded() for all objects except
-        # the ones obtained by raw_malloc.
-        flags = self.header(obj).tid & (GCFLAG_FORWARDED|GCFLAG_UNVISITED)
-        return flags == GCFLAG_FORWARDED
-
-    def is_last_generation(self, obj):
-        return ((self.header(obj).tid & (GCFLAG_EXTERNAL|GCFLAG_AGE_MASK)) ==
-                (GCFLAG_EXTERNAL|GCFLAG_AGE_MAX))
-
-    def visit_external_object(self, obj):
-        hdr = self.header(obj)
-        if hdr.tid & GCFLAG_UNVISITED:
-            # This is a not-visited-yet raw_malloced object.
-            hdr.tid &= ~GCFLAG_UNVISITED
-            self.rawmalloced_objects_to_trace.append(obj)
-
-    def make_a_copy(self, obj, objsize):
-        # During a full collect, all copied objects might implicitly come
-        # from the nursery.  If they do, we must add the GCFLAG_NO_YOUNG_PTRS.
-        # If they don't, we count how many times they are copied and when
-        # some threshold is reached we make the copy a non-movable "external"
-        # object.  The threshold is MAX_SEMISPACE_AGE.
-        tid = self.header(obj).tid
-        # XXX the following logic is not doing exactly what is explained
-        # above: any object without GCFLAG_NO_YOUNG_PTRS has its age not
-        # incremented.  This is accidental: it means that objects that
-        # are very often modified to point to young objects don't reach
-        # the 3rd generation.  For now I'll leave it this way because
-        # I'm not sure that it's a bad thing.
-        if not (tid & GCFLAG_NO_YOUNG_PTRS):
-            tid |= GCFLAG_NO_YOUNG_PTRS    # object comes from the nursery
-        elif (tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX:
-            tid += GCFLAG_AGE_ONE
-        else:
-            newobj = self.make_a_nonmoving_copy(obj, objsize)
-            if newobj:
-                return newobj
-            tid &= ~GCFLAG_AGE_MASK
-        # skip GenerationGC.make_a_copy() as we already did the right
-        # thing about GCFLAG_NO_YOUNG_PTRS
-        return self._make_a_copy_with_tid(obj, objsize, tid)
-
-    def make_a_nonmoving_copy(self, obj, objsize):
-        # NB. the object can have a finalizer or be a weakref, but
-        # it's not an issue.
-        totalsize = self.size_gc_header() + objsize
-        tid = self.header(obj).tid
-        if tid & GCFLAG_HASHMASK:
-            totalsize_incl_hash = totalsize + llmemory.sizeof(lltype.Signed)
-        else:
-            totalsize_incl_hash = totalsize
-        newaddr = self.allocate_external_object(totalsize_incl_hash)
-        if not newaddr:
-            return llmemory.NULL   # can't raise MemoryError during a collect()
-        self._nonmoving_copy_count += 1
-        self._nonmoving_copy_size += raw_malloc_usage(totalsize)
-
-        llmemory.raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
-        if tid & GCFLAG_HASHMASK:
-            hash = self._get_object_hash(obj, objsize, tid)
-            (newaddr + totalsize).signed[0] = hash
-            tid |= GC_HASH_HASFIELD
-        #
-        # GCFLAG_UNVISITED is not set
-        # GCFLAG_NO_HEAP_PTRS is not set either, conservatively.  It may be
-        # set by the next collection's collect_last_generation_roots().
-        # This old object is immediately put at generation 3.
-        newobj = newaddr + self.size_gc_header()
-        hdr = self.header(newobj)
-        hdr.tid = tid | self.GCFLAGS_FOR_NEW_EXTERNAL_OBJECTS
-        ll_assert(self.is_last_generation(newobj),
-                  "make_a_nonmoving_copy: object too young")
-        self.gen3_rawmalloced_objects.append(newobj)
-        self.last_generation_root_objects.append(newobj)
-        self.rawmalloced_objects_to_trace.append(newobj)   # visit me
-        return newobj
-
-    def scan_copied(self, scan):
-        # Alternate between scanning the regular objects we just moved
-        # and scanning the raw_malloc'ed object we just visited.
-        progress = True
-        while progress:
-            newscan = GenerationGC.scan_copied(self, scan)
-            progress = newscan != scan
-            scan = newscan
-            while self.rawmalloced_objects_to_trace.non_empty():
-                obj = self.rawmalloced_objects_to_trace.pop()
-                self.trace_and_copy(obj)
-                progress = True
-        return scan
-
-    def finished_full_collect(self):
-        ll_assert(not self.rawmalloced_objects_to_trace.non_empty(),
-                  "rawmalloced_objects_to_trace should be empty at end")
-        debug_print("| [hybrid] made nonmoving:         ",
-                    self._nonmoving_copy_size, "bytes in",
-                    self._nonmoving_copy_count, "objs")
-        rawmalloced_trigger = 0
-        # sweep the nonmarked rawmalloced objects
-        if self.is_collecting_gen3():
-            rawmalloced_trigger += self.sweep_rawmalloced_objects(generation=3)
-        rawmalloced_trigger += self.sweep_rawmalloced_objects(generation=2)
-        self.large_objects_collect_trigger = (rawmalloced_trigger +
-                                              self.space_size)
-        if self.is_collecting_gen3():
-            self.count_semispaceonly_collects = 0
-        self._initial_trigger = self.large_objects_collect_trigger
-
-    def sweep_rawmalloced_objects(self, generation):
-        # free all the rawmalloced objects of the specified generation
-        # that have not been marked
-        if generation == 2:
-            objects = self.gen2_rawmalloced_objects
-            # generation 2 sweep: if A points to an object object B that
-            # moves from gen2 to gen3, it's possible that A no longer points
-            # to any gen2 object.  In this case, A remains a bit too long in
-            # last_generation_root_objects, but this will be fixed by the
-            # next collect_last_generation_roots().
-        elif generation == 3:
-            objects = self.gen3_rawmalloced_objects
-            # generation 3 sweep: remove from last_generation_root_objects
-            # all the objects that we are about to free
-            gen3roots = self.last_generation_root_objects
-            newgen3roots = self.AddressStack()
-            while gen3roots.non_empty():
-                obj = gen3roots.pop()
-                if not (self.header(obj).tid & GCFLAG_UNVISITED):
-                    newgen3roots.append(obj)
-            gen3roots.delete()
-            self.last_generation_root_objects = newgen3roots
-        else:
-            ll_assert(False, "bogus 'generation'")
-            return 0 # to please the flowspace
-
-        surviving_objects = self.AddressStack()
-        # Help the flow space
-        alive_count = alive_size = dead_count = dead_size = 0
-        debug = have_debug_prints()
-        while objects.non_empty():
-            obj = objects.pop()
-            tid = self.header(obj).tid
-            if tid & GCFLAG_UNVISITED:
-                if debug:
-                    dead_count+=1
-                    dead_size+=raw_malloc_usage(self.get_size_incl_hash(obj))
-                addr = obj - self.gcheaderbuilder.size_gc_header
-                llmemory.raw_free(addr)
-            else:
-                if debug:
-                    alive_count+=1
-                alive_size+=raw_malloc_usage(self.get_size_incl_hash(obj))
-                if generation == 3:
-                    surviving_objects.append(obj)
-                elif generation == 2:
-                    ll_assert((tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX,
-                              "wrong age for generation 2 object")
-                    tid += GCFLAG_AGE_ONE
-                    if (tid & GCFLAG_AGE_MASK) == GCFLAG_AGE_MAX:
-                        # the object becomes part of generation 3
-                        self.gen3_rawmalloced_objects.append(obj)
-                        # GCFLAG_NO_HEAP_PTRS not set yet, conservatively
-                        self.last_generation_root_objects.append(obj)
-                    else:
-                        # the object stays in generation 2
-                        tid |= GCFLAG_UNVISITED
-                        surviving_objects.append(obj)
-                    self.header(obj).tid = tid
-        objects.delete()
-        if generation == 2:
-            self.gen2_rawmalloced_objects = surviving_objects
-        elif generation == 3:
-            self.gen3_rawmalloced_objects = surviving_objects
-        debug_print("| [hyb] gen", generation,
-                    "nonmoving now alive: ",
-                    alive_size, "bytes in",
-                    alive_count, "objs")
-        debug_print("| [hyb] gen", generation,
-                    "nonmoving freed:     ",
-                    dead_size, "bytes in",
-                    dead_count, "objs")
-        return alive_size
-
-    def id(self, ptr):
-        obj = llmemory.cast_ptr_to_adr(ptr)
-
-        # is it a tagged pointer?
-        if not self.is_valid_gc_object(obj):
-            return llmemory.cast_adr_to_int(obj)
-
-        if self._is_external(obj):
-            # a prebuilt or rawmalloced object
-            if self.is_last_generation(obj):
-                # a generation 3 object may be one that used to live in
-                # the semispace.  So we still need to check if the object had
-                # its id taken before.  If not, we can use its address as its
-                # id as it is not going to move any more.
-                result = self.objects_with_id.get(obj, obj)
-            else:
-                # a generation 2 external object was never non-external in
-                # the past, so it cannot be listed in self.objects_with_id.
-                result = obj
-        else:
-            result = self._compute_id(obj)     # common case
-        return llmemory.cast_adr_to_int(result) * 2 # see comment in base.py
-        # XXX a possible optimization would be to use three dicts, one
-        # for each generation, instead of mixing gen2 and gen3 objects.
-
-    def debug_check_object(self, obj):
-        """Check the invariants about 'obj' that should be true
-        between collections."""
-        GenerationGC.debug_check_object(self, obj)
-        tid = self.header(obj).tid
-        if tid & GCFLAG_UNVISITED:
-            ll_assert(self._d_gen2ro.contains(obj),
-                      "GCFLAG_UNVISITED on non-gen2 object")
-
-    def debug_check_consistency(self):
-        if self.DEBUG:
-            self._d_gen2ro = self.gen2_rawmalloced_objects.stack2dict()
-            GenerationGC.debug_check_consistency(self)
-            self._d_gen2ro.delete()
-            self.gen2_rawmalloced_objects.foreach(self._debug_check_gen2, None)
-            self.gen3_rawmalloced_objects.foreach(self._debug_check_gen3, None)
-
-    def _debug_check_gen2(self, obj, ignored):
-        tid = self.header(obj).tid
-        ll_assert(bool(tid & GCFLAG_EXTERNAL),
-                  "gen2: missing GCFLAG_EXTERNAL")
-        ll_assert(bool(tid & GC_HASH_TAKEN_ADDR),
-                  "gen2: missing GC_HASH_TAKEN_ADDR")
-        ll_assert(bool(tid & GCFLAG_UNVISITED),
-                  "gen2: missing GCFLAG_UNVISITED")
-        ll_assert((tid & GCFLAG_AGE_MASK) < GCFLAG_AGE_MAX,
-                  "gen2: age field too large")
-    def _debug_check_gen3(self, obj, ignored):
-        tid = self.header(obj).tid
-        ll_assert(bool(tid & GCFLAG_EXTERNAL),
-                  "gen3: missing GCFLAG_EXTERNAL")
-        ll_assert(bool(tid & GC_HASH_TAKEN_ADDR),
-                  "gen3: missing GC_HASH_TAKEN_ADDR")
-        ll_assert(not (tid & GCFLAG_UNVISITED),
-                  "gen3: unexpected GCFLAG_UNVISITED")
-        ll_assert((tid & GCFLAG_AGE_MASK) == GCFLAG_AGE_MAX,
-                  "gen3: wrong age field")
-
-    def can_malloc_nonmovable(self):
-        return True

rpython/memory/gc/semispace.py

-from rpython.rtyper.lltypesystem.llmemory import raw_malloc, raw_free
-from rpython.rtyper.lltypesystem.llmemory import raw_memcopy, raw_memclear
-from rpython.rtyper.lltypesystem.llmemory import NULL, raw_malloc_usage
-from rpython.memory.support import get_address_stack, get_address_deque
-from rpython.memory.support import AddressDict
-from rpython.rtyper.lltypesystem import lltype, llmemory, llarena, rffi, llgroup
-from rpython.rlib.objectmodel import free_non_gc_object
-from rpython.rlib.debug import ll_assert, have_debug_prints
-from rpython.rlib.debug import debug_print, debug_start, debug_stop
-from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.rlib.rarithmetic import ovfcheck, LONG_BIT
-from rpython.memory.gc.base import MovingGCBase, ARRAY_TYPEID_MAP,\
-     TYPEID_MAP
-
-import sys, os
-
-first_gcflag = 1 << (LONG_BIT//2)
-GCFLAG_FORWARDED = first_gcflag
-# GCFLAG_EXTERNAL is set on objects not living in the semispace:
-# either immortal objects or (for HybridGC) externally raw_malloc'ed
-GCFLAG_EXTERNAL = first_gcflag << 1
-GCFLAG_FINALIZATION_ORDERING = first_gcflag << 2
-
-_GCFLAG_HASH_BASE = first_gcflag << 3
-GCFLAG_HASHMASK = _GCFLAG_HASH_BASE * 0x3   # also consumes 'first_gcflag << 4'
-# the two bits in GCFLAG_HASHMASK can have one of the following values:
-#   - nobody ever asked for the hash of the object
-GC_HASH_NOTTAKEN   = _GCFLAG_HASH_BASE * 0x0
-#   - someone asked, and we gave the address of the object
-GC_HASH_TAKEN_ADDR = _GCFLAG_HASH_BASE * 0x1
-#   - someone asked, and we gave the address plus 'nursery_hash_base'
-GC_HASH_TAKEN_NURS = _GCFLAG_HASH_BASE * 0x2
-#   - we have our own extra field to store the hash
-GC_HASH_HASFIELD   = _GCFLAG_HASH_BASE * 0x3
-
-GCFLAG_EXTRA = first_gcflag << 5    # for RPython abuse only
-
-memoryError = MemoryError()
-
-
-class SemiSpaceGC(MovingGCBase):
-    _alloc_flavor_ = "raw"
-    inline_simple_malloc = True
-    inline_simple_malloc_varsize = True
-    malloc_zero_filled = True
-    first_unused_gcflag = first_gcflag << 6
-    gcflag_extra = GCFLAG_EXTRA
-
-    HDR = lltype.Struct('header', ('tid', lltype.Signed))   # XXX or rffi.INT?
-    typeid_is_in_field = 'tid'
-    withhash_flag_is_in_field = 'tid', _GCFLAG_HASH_BASE * 0x2
-    # ^^^ prebuilt objects either have GC_HASH_TAKEN_ADDR or they
-    #     have GC_HASH_HASFIELD (and then they are one word longer).
-    FORWARDSTUB = lltype.GcStruct('forwarding_stub',
-                                  ('forw', llmemory.Address))
-    FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB)
-
-    object_minimal_size = llmemory.sizeof(FORWARDSTUB)
-
-    # the following values override the default arguments of __init__ when
-    # translating to a real backend.
-    TRANSLATION_PARAMS = {'space_size': 8*1024*1024} # XXX adjust
-
-    def __init__(self, config, space_size=4096, max_space_size=sys.maxint//2+1,
-                 **kwds):
-        self.param_space_size = space_size
-        self.param_max_space_size = max_space_size
-        MovingGCBase.__init__(self, config, **kwds)
-
-    def setup(self):
-        #self.total_collection_time = 0.0
-        self.total_collection_count = 0
-
-        self.space_size = self.param_space_size
-        self.max_space_size = self.param_max_space_size
-        self.red_zone = 0
-
-        #self.program_start_time = time.time()
-        self.tospace = llarena.arena_malloc(self.space_size, True)
-        ll_assert(bool(self.tospace), "couldn't allocate tospace")
-        self.top_of_space = self.tospace + self.space_size
-        self.fromspace = llarena.arena_malloc(self.space_size, True)
-        ll_assert(bool(self.fromspace), "couldn't allocate fromspace")
-        self.free = self.tospace
-        MovingGCBase.setup(self)
-        self.objects_with_finalizers = self.AddressDeque()
-        self.objects_with_light_finalizers = self.AddressStack()
-        self.objects_with_weakrefs = self.AddressStack()
-
-    def _teardown(self):
-        debug_print("Teardown")
-        llarena.arena_free(self.fromspace)
-        llarena.arena_free(self.tospace)
-
-    # This class only defines the malloc_{fixed,var}size_clear() methods
-    # because the spaces are filled with zeroes in advance.
-
-    def malloc_fixedsize_clear(self, typeid16, size,
-                               has_finalizer=False,
-                               is_finalizer_light=False,
-                               contains_weakptr=False):
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        totalsize = size_gc_header + size
-        result = self.free
-        if raw_malloc_usage(totalsize) > self.top_of_space - result:
-            result = self.obtain_free_space(totalsize)
-        llarena.arena_reserve(result, totalsize)
-        self.init_gc_object(result, typeid16)
-        self.free = result + totalsize
-        #if is_finalizer_light:
-        #    self.objects_with_light_finalizers.append(result + size_gc_header)
-        #else:
-        if has_finalizer:
-            self.objects_with_finalizers.append(result + size_gc_header)
-        if contains_weakptr:
-            self.objects_with_weakrefs.append(result + size_gc_header)
-        return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
-    def malloc_varsize_clear(self, typeid16, length, size, itemsize,
-                             offset_to_length):
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        nonvarsize = size_gc_header + size
-        try:
-            varsize = ovfcheck(itemsize * length)
-            totalsize = ovfcheck(nonvarsize + varsize)
-        except OverflowError:
-            raise memoryError
-        result = self.free
-        if raw_malloc_usage(totalsize) > self.top_of_space - result:
-            result = self.obtain_free_space(totalsize)
-        llarena.arena_reserve(result, totalsize)
-        self.init_gc_object(result, typeid16)
-        (result + size_gc_header + offset_to_length).signed[0] = length
-        self.free = result + llarena.round_up_for_allocation(totalsize)
-        return llmemory.cast_adr_to_ptr(result+size_gc_header, llmemory.GCREF)
-
-    def shrink_array(self, addr, smallerlength):
-        size_gc_header = self.gcheaderbuilder.size_gc_header
-        if self._is_in_the_space(addr - size_gc_header):
-            typeid = self.get_type_id(addr)
-            totalsmallersize = (
-                size_gc_header + self.fixed_size(typeid) +
-                self.varsize_item_sizes(typeid) * smallerlength)
-            llarena.arena_shrink_obj(addr - size_gc_header, totalsmallersize)
-            #
-            offset_to_length = self.varsize_offset_to_length(typeid)
-            (addr + offset_to_length).signed[0] = smallerlength
-            return True
-        else:
-            return False
-
-    def obtain_free_space(self, needed):
-        # a bit of tweaking to maximize the performance and minimize the
-        # amount of code in an inlined version of malloc_fixedsize_clear()
-        if not self.try_obtain_free_space(needed):
-            raise memoryError
-        return self.free
-    obtain_free_space._dont_inline_ = True
-
-    def try_obtain_free_space(self, needed):
-        # XXX for bonus points do big objects differently
-        needed = raw_malloc_usage(needed)
-        if (self.red_zone >= 2 and self.space_size < self.max_space_size and
-            self.double_space_size()):
-            pass    # collect was done during double_space_size()
-        else:
-            self.semispace_collect()
-        missing = needed - (self.top_of_space - self.free)
-        if missing <= 0:
-            return True      # success
-        else:
-            # first check if the object could possibly fit
-            proposed_size = self.space_size
-            while missing > 0:
-                if proposed_size >= self.max_space_size:
-                    return False    # no way
-                missing -= proposed_size
-                proposed_size *= 2
-            # For address space fragmentation reasons, we double the space
-            # size possibly several times, moving the objects at each step,
-            # instead of going directly for the final size.  We assume that
-            # it's a rare case anyway.
-            while self.space_size < proposed_size:
-                if not self.double_space_size():
-                    return False
-            ll_assert(needed <= self.top_of_space - self.free,
-                         "double_space_size() failed to do its job")
-            return True
-
-    def double_space_size(self):
-        self.red_zone = 0
-        old_fromspace = self.fromspace
-        newsize = self.space_size * 2
-        newspace = llarena.arena_malloc(newsize, True)
-        if not newspace:
-            return False    # out of memory
-        llarena.arena_free(old_fromspace)
-        self.fromspace = newspace
-        # now self.tospace contains the existing objects and
-        # self.fromspace is the freshly allocated bigger space
-
-        self.semispace_collect(size_changing=True)
-        self.top_of_space = self.tospace + newsize
-        # now self.tospace is the freshly allocated bigger space,
-        # and self.fromspace is the old smaller space, now empty
-        llarena.arena_free(self.fromspace)
-
-        newspace = llarena.arena_malloc(newsize, True)
-        if not newspace:
-            # Complex failure case: we have in self.tospace a big chunk
-            # of memory, and the two smaller original spaces are already gone.
-            # Unsure if it's worth these efforts, but we can artificially
-            # split self.tospace in two again...
-            self.max_space_size = self.space_size    # don't try to grow again,
-            #              because doing arena_free(self.fromspace) would crash
-            self.fromspace = self.tospace + self.space_size
-            self.top_of_space = self.fromspace
-            ll_assert(self.free <= self.top_of_space,
-                         "unexpected growth of GC space usage during collect")
-            return False     # out of memory
-
-        self.fromspace = newspace
-        self.space_size = newsize
-        return True    # success
-
-    def set_max_heap_size(self, size):
-        # Set the maximum semispace size.
-        # The size is rounded down to the next power of two.  Also, this is
-        # the size of one semispace only, so actual usage can be the double
-        # during a collection.  Finally, note that this will never shrink
-        # an already-allocated heap.
-        if size < 1:
-            size = 1     # actually, the minimum is 8MB in default translations
-        self.max_space_size = sys.maxint//2+1
-        while self.max_space_size > size:
-            self.max_space_size >>= 1
-
-    @classmethod
-    def JIT_minimal_size_in_nursery(cls):
-        return cls.object_minimal_size
-
-    def collect(self, gen=0):
-        self.debug_check_consistency()
-        self.semispace_collect()
-        # the indirection is required by the fact that collect() is referred
-        # to by the gc transformer, and the default argument would crash
-        # (this is also a hook for the HybridGC)
-
-    def semispace_collect(self, size_changing=False):
-        debug_start("gc-collect")
-        debug_print()
-        debug_print(".----------- Full collection ------------------")
-        start_usage = self.free - self.tospace
-        debug_print("| used before collection:          ",
-                    start_usage, "bytes")
-        #start_time = time.time()
-        #llop.debug_print(lltype.Void, 'semispace_collect', int(size_changing))
-
-        # Switch the spaces.  We copy everything over to the empty space
-        # (self.fromspace at the beginning of the collection), and clear the old
-        # one (self.tospace at the beginning).  Their purposes will be reversed
-        # for the next collection.
-        tospace = self.fromspace
-        fromspace = self.tospace
-        self.fromspace = fromspace
-        self.tospace = tospace
-        self.top_of_space = tospace + self.space_size
-        scan = self.free = tospace
-        self.starting_full_collect()
-        self.collect_roots()
-        if self.run_finalizers.non_empty():
-            self.update_run_finalizers()
-        scan = self.scan_copied(scan)
-        if self.objects_with_light_finalizers.non_empty():
-            self.deal_with_objects_with_light_finalizers()
-        if self.objects_with_finalizers.non_empty():
-            scan = self.deal_with_objects_with_finalizers(scan)
-        if self.objects_with_weakrefs.non_empty():
-            self.invalidate_weakrefs()
-        self.update_objects_with_id()
-        self.finished_full_collect()
-        self.debug_check_consistency()
-        if not size_changing:
-            llarena.arena_reset(fromspace, self.space_size, True)
-            self.record_red_zone()
-            self.execute_finalizers()
-        #llop.debug_print(lltype.Void, 'collected', self.space_size, size_changing, self.top_of_space - self.free)
-        if have_debug_prints():
-            #end_time = time.time()
-            #elapsed_time = end_time - start_time
-            #self.total_collection_time += elapsed_time
-            self.total_collection_count += 1
-            #total_program_time = end_time - self.program_start_time
-            end_usage = self.free - self.tospace
-            debug_print("| used after collection:           ",
-                        end_usage, "bytes")
-            debug_print("| freed:                           ",
-                        start_usage - end_usage, "bytes")
-            debug_print("| size of each semispace:          ",
-                        self.space_size, "bytes")
-            debug_print("| fraction of semispace now used:  ",
-                        end_usage * 100.0 / self.space_size, "%")
-            #ct = self.total_collection_time
-            cc = self.total_collection_count
-            debug_print("| number of semispace_collects:    ",
-                        cc)
-            #debug_print("|                         i.e.:    ",
-            #            cc / total_program_time, "per second")
-            #debug_print("| total time in semispace_collect: ",
-            #            ct, "seconds")
-            #debug_print("|                            i.e.: ",
-            #            ct * 100.0 / total_program_time, "%")
-            debug_print("`----------------------------------------------")
-        debug_stop("gc-collect")
-
-    def starting_full_collect(self):
-        pass    # hook for the HybridGC
-
-    def finished_full_collect(self):
-        pass    # hook for the HybridGC
-
-    def record_red_zone(self):
-        # red zone: if the space is more than 80% full, the next collection
-        # should double its size.  If it is more than 66% full twice in a row,
-        # then it should double its size too.  (XXX adjust)
-        # The goal is to avoid many repeated collection that don't free a lot
-        # of memory each, if the size of the live object set is just below the
-        # size of the space.
-        free_after_collection = self.top_of_space - self.free
-        if free_after_collection > self.space_size // 3:
-            self.red_zone = 0
-        else:
-            self.red_zone += 1
-            if free_after_collection < self.space_size // 5:
-                self.red_zone += 1
-
-    def get_size_incl_hash(self, obj):
-        size = self.get_size(obj)
-        hdr = self.header(obj)
-        if (hdr.tid & GCFLAG_HASHMASK) == GC_HASH_HASFIELD:
-            size += llmemory.sizeof(lltype.Signed)
-        return size
-
-    def scan_copied(self, scan):
-        while scan < self.free:
-            curr = scan + self.size_gc_header()
-            self.trace_and_copy(curr)
-            scan += self.size_gc_header() + self.get_size_incl_hash(curr)
-        return scan
-
-    def collect_roots(self):
-        self.root_walker.walk_roots(
-            SemiSpaceGC._collect_root,  # stack roots
-            SemiSpaceGC._collect_root,  # static in prebuilt non-gc structures
-            SemiSpaceGC._collect_root)  # static in prebuilt gc objects
-
-    def _collect_root(self, root):
-        root.address[0] = self.copy(root.address[0])
-
-    def copy(self, obj):
-        if self.DEBUG:
-            self.debug_check_can_copy(obj)
-        if self.is_forwarded(obj):
-            #llop.debug_print(lltype.Void, obj, "already copied to", self.get_forwarding_address(obj))
-            return self.get_forwarding_address(obj)
-        else:
-            objsize = self.get_size(obj)
-            newobj = self.make_a_copy(obj, objsize)
-            #llop.debug_print(lltype.Void, obj, "copied to", newobj,
-            #                 "tid", self.header(obj).tid,
-            #                 "size", totalsize)
-            self.set_forwarding_address(obj, newobj, objsize)
-            return newobj
-
-    def _get_object_hash(self, obj, objsize, tid):
-        # Returns the hash of the object, which must not be GC_HASH_NOTTAKEN.
-        gc_hash = tid & GCFLAG_HASHMASK
-        if gc_hash == GC_HASH_HASFIELD:
-            obj = llarena.getfakearenaaddress(obj)
-            return (obj + objsize).signed[0]
-        elif gc_hash == GC_HASH_TAKEN_ADDR:
-            return llmemory.cast_adr_to_int(obj)
-        elif gc_hash == GC_HASH_TAKEN_NURS:
-            return self._compute_current_nursery_hash(obj)
-        else:
-            assert 0, "gc_hash == GC_HASH_NOTTAKEN"
-
-    def _make_a_copy_with_tid(self, obj, objsize, tid):
-        totalsize = self.size_gc_header() + objsize
-        newaddr = self.free
-        llarena.arena_reserve(newaddr, totalsize)
-        raw_memcopy(obj - self.size_gc_header(), newaddr, totalsize)
-        if tid & GCFLAG_HASHMASK:
-            hash = self._get_object_hash(obj, objsize, tid)
-            llarena.arena_reserve(newaddr + totalsize,
-                                  llmemory.sizeof(lltype.Signed))
-            (newaddr + totalsize).signed[0] = hash
-            tid |= GC_HASH_HASFIELD
-            totalsize += llmemory.sizeof(lltype.Signed)
-        self.free += totalsize
-        newhdr = llmemory.cast_adr_to_ptr(newaddr, lltype.Ptr(self.HDR))
-        newhdr.tid = tid
-        newobj = newaddr + self.size_gc_header()
-        return newobj
-
-    def make_a_copy(self, obj, objsize):
-        tid = self.header(obj).tid
-        return self._make_a_copy_with_tid(obj, objsize, tid)
-
-    def trace_and_copy(self, obj):
-        self.trace(obj, self._trace_copy, None)
-
-    def _trace_copy(self, pointer, ignored):
-        pointer.address[0] = self.copy(pointer.address[0])
-
-    def surviving(self, obj):
-        # To use during a collection.  Check if the object is currently
-        # marked as surviving the collection.  This is equivalent to
-        # self.is_forwarded() for all objects except the nonmoving objects
-        # created by the HybridGC subclass.  In all cases, if an object
-        # survives, self.get_forwarding_address() returns its new address.
-        return self.is_forwarded(obj)
-
-    def is_forwarded(self, obj):
-        return self.header(obj).tid & GCFLAG_FORWARDED != 0
-        # note: all prebuilt objects also have this flag set
-
-    def get_forwarding_address(self, obj):
-        tid = self.header(obj).tid
-        if tid & GCFLAG_EXTERNAL:
-            self.visit_external_object(obj)
-            return obj      # external or prebuilt objects are "forwarded"
-                            # to themselves
-        else:
-            stub = llmemory.cast_adr_to_ptr(obj, self.FORWARDSTUBPTR)
-            return stub.forw
-
-    def visit_external_object(self, obj):
-        pass    # hook for the HybridGC
-
-    def get_possibly_forwarded_type_id(self, obj):
-        tid = self.header(obj).tid
-        if self.is_forwarded(obj) and not (tid & GCFLAG_EXTERNAL):
-            obj = self.get_forwarding_address(obj)
-        return self.get_type_id(obj)
-
-    def set_forwarding_address(self, obj, newobj, objsize):
-        # To mark an object as forwarded, we set the GCFLAG_FORWARDED and
-        # overwrite the object with a FORWARDSTUB.  Doing so is a bit
-        # long-winded on llarena, but it all melts down to two memory
-        # writes after translation to C.
-        size_gc_header = self.size_gc_header()
-        stubsize = llmemory.sizeof(self.FORWARDSTUB)
-        tid = self.header(obj).tid
-        ll_assert(tid & GCFLAG_EXTERNAL == 0,  "unexpected GCFLAG_EXTERNAL")
-        ll_assert(tid & GCFLAG_FORWARDED == 0, "unexpected GCFLAG_FORWARDED")
-        # replace the object at 'obj' with a FORWARDSTUB.
-        hdraddr = obj - size_gc_header
-        llarena.arena_reset(hdraddr, size_gc_header + objsize, False)
-        llarena.arena_reserve(hdraddr, size_gc_header + stubsize)
-        hdr = llmemory.cast_adr_to_ptr(hdraddr, lltype.Ptr(self.HDR))
-        hdr.tid = tid | GCFLAG_FORWARDED
-        stub = llmemory.cast_adr_to_ptr(obj, self.FORWARDSTUBPTR)
-        stub.forw = newobj
-
-    def combine(self, typeid16, flags):
-        return llop.combine_ushort(lltype.Signed, typeid16, flags)
-
-    def get_type_id(self, addr):
-        tid = self.header(addr).tid
-        ll_assert(tid & (GCFLAG_FORWARDED|GCFLAG_EXTERNAL) != GCFLAG_FORWARDED,
-                  "get_type_id on forwarded obj")
-        # Non-prebuilt forwarded objects are overwritten with a FORWARDSTUB.
-        # Although calling get_type_id() on a forwarded object works by itself,
-        # we catch it as an error because it's likely that what is then
-        # done with the typeid is bogus.
-        return llop.extract_ushort(llgroup.HALFWORD, tid)
-
-    def init_gc_object(self, addr, typeid16, flags=0):
-        hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
-        hdr.tid = self.combine(typeid16, flags)
-
-    def init_gc_object_immortal(self, addr, typeid16, flags=0):
-        hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
-        flags |= GCFLAG_EXTERNAL | GCFLAG_FORWARDED | GC_HASH_TAKEN_ADDR
-        hdr.tid = self.combine(typeid16, flags)
-        # immortal objects always have GCFLAG_FORWARDED set;
-        # see get_forwarding_address().
-
-    def deal_with_objects_with_light_finalizers(self):
-        """ This is a much simpler version of dealing with finalizers
-        and an optimization - we can reasonably assume that those finalizers
-        don't do anything fancy and *just* call them. Among other things
-        they won't resurrect objects
-        """
-        new_objects = self.AddressStack()
-        while self.objects_with_light_finalizers.non_empty():
-            obj = self.objects_with_light_finalizers.pop()
-            if self.surviving(obj):
-                new_objects.append(self.get_forwarding_address(obj))
-            else:
-                finalizer = self.getfinalizer(self.get_type_id(obj))
-                finalizer(obj)
-        self.objects_with_light_finalizers.delete()
-        self.objects_with_light_finalizers = new_objects
-
-    def deal_with_objects_with_finalizers(self, scan):
-        # walk over list of objects with finalizers
-        # if it is not copied, add it to the list of to-be-called finalizers
-        # and copy it, to me make the finalizer runnable
-        # We try to run the finalizers in a "reasonable" order, like
-        # CPython does.  The details of this algorithm are in
-        # pypy/doc/discussion/finalizer-order.txt.
-        new_with_finalizer = self.AddressDeque()
-        marked = self.AddressDeque()
-        pending = self.AddressStack()
-        self.tmpstack = self.AddressStack()
-        while self.objects_with_finalizers.non_empty():
-            x = self.objects_with_finalizers.popleft()
-            ll_assert(self._finalization_state(x) != 1, 
-                      "bad finalization state 1")
-            if self.surviving(x):
-                new_with_finalizer.append(self.get_forwarding_address(x))
-                continue
-            marked.append(x)
-            pending.append(x)
-            while pending.non_empty():
-                y = pending.pop()
-                state = self._finalization_state(y)
-                if state == 0:
-                    self._bump_finalization_state_from_0_to_1(y)
-                    self.trace(y, self._append_if_nonnull, pending)
-                elif state == 2:
-                    self._recursively_bump_finalization_state_from_2_to_3(y)
-            scan = self._recursively_bump_finalization_state_from_1_to_2(
-                       x, scan)
-
-        while marked.non_empty():
-            x = marked.popleft()
-            state = self._finalization_state(x)
-            ll_assert(state >= 2, "unexpected finalization state < 2")
-            newx = self.get_forwarding_address(x)
-            if state == 2:
-                self.run_finalizers.append(newx)
-                # we must also fix the state from 2 to 3 here, otherwise
-                # we leave the GCFLAG_FINALIZATION_ORDERING bit behind
-                # which will confuse the next collection
-                self._recursively_bump_finalization_state_from_2_to_3(x)
-            else:
-                new_with_finalizer.append(newx)
-
-        self.tmpstack.delete()
-        pending.delete()
-        marked.delete()
-        self.objects_with_finalizers.delete()
-        self.objects_with_finalizers = new_with_finalizer
-        return scan
-
-    def _append_if_nonnull(pointer, stack):
-        stack.append(pointer.address[0])
-    _append_if_nonnull = staticmethod(_append_if_nonnull)
-
-    def _finalization_state(self, obj):
-        if self.surviving(obj):
-            newobj = self.get_forwarding_address(obj)
-            hdr = self.header(newobj)
-            if hdr.tid & GCFLAG_FINALIZATION_ORDERING:
-                return 2
-            else:
-                return 3
-        else:
-            hdr = self.header(obj)
-            if hdr.tid & GCFLAG_FINALIZATION_ORDERING:
-                return 1
-            else:
-                return 0
-
-    def _bump_finalization_state_from_0_to_1(self, obj):
-        ll_assert(self._finalization_state(obj) == 0,
-                  "unexpected finalization state != 0")
-        hdr = self.header(obj)
-        hdr.tid |= GCFLAG_FINALIZATION_ORDERING
-
-    def _recursively_bump_finalization_state_from_2_to_3(self, obj):
-        ll_assert(self._finalization_state(obj) == 2,
-                  "unexpected finalization state != 2")
-        newobj = self.get_forwarding_address(obj)
-        pending = self.tmpstack
-        ll_assert(not pending.non_empty(), "tmpstack not empty")
-        pending.append(newobj)
-        while pending.non_empty():
-            y = pending.pop()
-            hdr = self.header(y)
-            if hdr.tid & GCFLAG_FINALIZATION_ORDERING:     # state 2 ?
-                hdr.tid &= ~GCFLAG_FINALIZATION_ORDERING   # change to state 3
-                self.trace(y, self._append_if_nonnull, pending)
-
-    def _recursively_bump_finalization_state_from_1_to_2(self, obj, scan):
-        # recursively convert objects from state 1 to state 2.
-        # Note that copy() copies all bits, including the
-        # GCFLAG_FINALIZATION_ORDERING.  The mapping between
-        # state numbers and the presence of this bit was designed
-        # for the following to work :-)
-        self.copy(obj)
-        return self.scan_copied(scan)
-
-    def invalidate_weakrefs(self):
-        # walk over list of objects that contain weakrefs
-        # if the object it references survives then update the weakref
-        # otherwise invalidate the weakref
-        new_with_weakref = self.AddressStack()
-        while self.objects_with_weakrefs.non_empty():
-            obj = self.objects_with_weakrefs.pop()
-            if not self.surviving(obj):
-                continue # weakref itself dies
-            obj = self.get_forwarding_address(obj)
-            offset = self.weakpointer_offset(self.get_type_id(obj))
-            pointing_to = (obj + offset).address[0]
-            # XXX I think that pointing_to cannot be NULL here
-            if pointing_to:
-                if self.surviving(pointing_to):
-                    (obj + offset).address[0] = self.get_forwarding_address(
-                        pointing_to)
-                    new_with_weakref.append(obj)
-                else:
-                    (obj + offset).address[0] = NULL
-        self.objects_with_weakrefs.delete()
-        self.objects_with_weakrefs = new_with_weakref
-
-    def update_run_finalizers(self):
-        # we are in an inner collection, caused by a finalizer
-        # the run_finalizers objects need to be copied
-        new_run_finalizer = self.AddressDeque()
-        while self.run_finalizers.non_empty():
-            obj = self.run_finalizers.popleft()
-            new_run_finalizer.append(self.copy(obj))
-        self.run_finalizers.delete()
-        self.run_finalizers = new_run_finalizer
-
-    def _is_external(self, obj):
-        return (self.header(obj).tid & GCFLAG_EXTERNAL) != 0
-
-    def _is_in_the_space(self, obj):
-        return self.tospace <= obj < self.free
-
-    def debug_check_object(self, obj):
-        """Check the invariants about 'obj' that should be true
-        between collections."""
-        tid = self.header(obj).tid
-        if tid & GCFLAG_EXTERNAL:
-            ll_assert(tid & GCFLAG_FORWARDED != 0, "bug: external+!forwarded")
-            ll_assert(not (self.tospace <= obj < self.free),
-                      "external flag but object inside the semispaces")
-        else:
-            ll_assert(not (tid & GCFLAG_FORWARDED), "bug: !external+forwarded")
-            ll_assert(self.tospace <= obj < self.free,
-                      "!external flag but object outside the semispaces")
-        ll_assert(not (tid & GCFLAG_FINALIZATION_ORDERING),
-                  "unexpected GCFLAG_FINALIZATION_ORDERING")
-
-    def debug_check_can_copy(self, obj):
-        ll_assert(not (self.tospace <= obj < self.free),
-                  "copy() on already-copied object")
-
-    STATISTICS_NUMBERS = 0
-
-    def is_in_nursery(self, addr):
-        # overridden in generation.py.
-        return False
-
-    def _compute_current_nursery_hash(self, obj):
-        # overridden in generation.py.
-        raise AssertionError("should not be called")
-
-    def identityhash(self, gcobj):
-        # The following loop should run at most twice.
-        while 1:
-            obj = llmemory.cast_ptr_to_adr(gcobj)
-            hdr = self.header(obj)
-            if hdr.tid & GCFLAG_HASHMASK:
-                break
-            # It's the first time we ask for a hash, and it's not an
-            # external object.  Shrink the top of space by the extra
-            # hash word that will be needed after a collect.
-            shrunk_top = self.top_of_space - llmemory.sizeof(lltype.Signed)
-            if shrunk_top < self.free:
-                # Cannot shrink!  Do a collection, asking for at least
-                # one word of free space, and try again.  May raise
-                # MemoryError.  Obscure: not called directly, but
-                # across an llop, to make sure that there is the
-                # correct push_roots/pop_roots around the call...
-                llop.gc_obtain_free_space(llmemory.Address,
-                                          llmemory.sizeof(lltype.Signed))
-                continue
-            else:
-                # Now we can have side-effects: lower the top of space
-                # and set one of the GC_HASH_TAKEN_xxx flags.
-                self.top_of_space = shrunk_top
-                if self.is_in_nursery(obj):
-                    hdr.tid |= GC_HASH_TAKEN_NURS
-                else:
-                    hdr.tid |= GC_HASH_TAKEN_ADDR
-                break
-        # Now we can return the result
-        objsize = self.get_size(obj)
-        return self._get_object_hash(obj, objsize, hdr.tid)
-
-    def track_heap_parent(self, obj, parent):
-        addr = obj.address[0]
-        parent_idx = llop.get_member_index(lltype.Signed,
-                                           self.get_type_id(parent))
-        idx = llop.get_member_index(lltype.Signed, self.get_type_id(addr))
-        self._ll_typeid_map[parent_idx].links[idx] += 1
-        self.track_heap(addr)
-
-    def track_heap(self, adr):
-        if self._tracked_dict.contains(adr):
-            return
-        self._tracked_dict.add(adr)
-        idx = llop.get_member_index(lltype.Signed, self.get_type_id(adr))
-        self._ll_typeid_map[idx].count += 1
-        totsize = self.get_size(adr) + self.size_gc_header()
-        self._ll_typeid_map[idx].size += llmemory.raw_malloc_usage(totsize)
-        self.trace(adr, self.track_heap_parent, adr)
-
-    @staticmethod
-    def _track_heap_root(obj, self):
-        self.track_heap(obj)
-
-    def heap_stats(self):
-        self._tracked_dict = self.AddressDict()
-        max_tid = self.root_walker.gcdata.max_type_id
-        ll_typeid_map = lltype.malloc(ARRAY_TYPEID_MAP, max_tid, zero=True)
-        for i in range(max_tid):
-            ll_typeid_map[i] = lltype.malloc(TYPEID_MAP, max_tid, zero=True)
-        self._ll_typeid_map = ll_typeid_map
-        self._tracked_dict.add(llmemory.cast_ptr_to_adr(ll_typeid_map))
-        i = 0
-        while i < max_tid:
-            self._tracked_dict.add(llmemory.cast_ptr_to_adr(ll_typeid_map[i]))
-            i += 1
-        self.enumerate_all_roots(SemiSpaceGC._track_heap_root, self)
-        self._ll_typeid_map = lltype.nullptr(ARRAY_TYPEID_MAP)
-        self._tracked_dict.delete()
-        return ll_typeid_map

rpython/memory/gc/test/test_direct.py

             p = self.stackroots[-50+i]
             assert p[i-1] == chr(i)
 
-class TestSemiSpaceGC(DirectGCTest):
-    from rpython.memory.gc.semispace import SemiSpaceGC as GCClass
-
-    def test_shrink_array(self):
-        S1 = lltype.GcStruct('S1', ('h', lltype.Char),
-                                   ('v', lltype.Array(lltype.Char)))
-        p1 = self.malloc(S1, 2)
-        p1.h = '?'
-        for i in range(2):
-            p1.v[i] = chr(50 + i)
-        addr = llmemory.cast_ptr_to_adr(p1)
-        ok = self.gc.shrink_array(addr, 1)
-        assert ok
-        assert p1.h == '?'
-        assert len(p1.v) == 1
-        for i in range(1):
-            assert p1.v[i] == chr(50 + i)
-
-
-class TestGenerationGC(TestSemiSpaceGC):
-    from rpython.memory.gc.generation import GenerationGC as GCClass
-
-    def test_collect_gen(self):
-        gc = self.gc
-        old_semispace_collect = gc.semispace_collect
-        old_collect_nursery = gc.collect_nursery
-        calls = []
-        def semispace_collect():
-            calls.append('semispace_collect')
-            return old_semispace_collect()
-        def collect_nursery():
-            calls.append('collect_nursery')
-            return old_collect_nursery()
-        gc.collect_nursery = collect_nursery
-        gc.semispace_collect = semispace_collect
-
-        gc.collect()
-        assert calls == ['semispace_collect']
-        calls = []
-
-        gc.collect(0)
-        assert calls == ['collect_nursery']
-        calls = []
-
-        gc.collect(1)
-        assert calls == ['semispace_collect']
-        calls = []
-
-        gc.collect(9)
-        assert calls == ['semispace_collect']
-        calls = []
-
-    def test_assume_young_pointers(self):
-        s0 = lltype.malloc(S, immortal=True)
-        self.consider_constant(s0)
-        s = self.malloc(S)
-        s.x = 1
-        s0.next = s
-        self.gc.assume_young_pointers(llmemory.cast_ptr_to_adr(s0))
-
-        self.gc.collect(0)
-
-        assert s0.next.x == 1
-
-
-class TestHybridGC(TestGenerationGC):
-    from rpython.memory.gc.hybrid import HybridGC as GCClass
-
-    GC_PARAMS = {'space_size': 48*WORD,
-                 'min_nursery_size': 12*WORD,
-                 'nursery_size': 12*WORD,
-                 'large_object': 3*WORD,
-                 'large_object_gcptrs': 3*WORD,
-                 'generation3_collect_threshold': 5,
-                 }
-
-    def test_collect_gen(self):
-        gc = self.gc
-        old_semispace_collect = gc.semispace_collect
-        old_collect_nursery = gc.collect_nursery
-        calls = []
-        def semispace_collect():
-            gen3 = gc.is_collecting_gen3()
-            calls.append(('semispace_collect', gen3))
-            return old_semispace_collect()
-        def collect_nursery():
-            calls.append('collect_nursery')
-            return old_collect_nursery()
-        gc.collect_nursery = collect_nursery
-        gc.semispace_collect = semispace_collect
-
-        gc.collect()
-        assert calls == [('semispace_collect', True)]
-        calls = []
-
-        gc.collect(0)
-        assert calls == ['collect_nursery']
-        calls = []
-
-        gc.collect(1)
-        assert calls == [('semispace_collect', False)]
-        calls = []
-
-        gc.collect(2)
-        assert calls == [('semispace_collect', True)]
-        calls = []
-
-        gc.collect(9)
-        assert calls == [('semispace_collect', True)]
-        calls = []
-
-    def test_identityhash(self):
-        py.test.skip("does not support raw_mallocs(sizeof(S)+sizeof(hash))")
-
 
 class TestMiniMarkGCSimple(DirectGCTest):
     from rpython.memory.gc.minimark import MiniMarkGC as GCClass

rpython/memory/gctransform/test/test_framework.py

 from rpython.flowspace.model import Constant, SpaceOperation
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.memory.gc.semispace import SemiSpaceGC
+from rpython.memory.gc.minimark import MiniMarkGC
 from rpython.memory.gctransform.framework import (CollectAnalyzer,
      find_initializing_stores, find_clean_setarrayitems)
 from rpython.memory.gctransform.shadowstack import (
 class WriteBarrierTransformer(ShadowStackFrameworkGCTransformer):
     clean_sets = {}