Source

pypy / pypy / rpython / memory / gc / stmgc.py

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
from pypy.rpython.lltypesystem import lltype, llmemory, llarena, rffi
from pypy.rpython.lltypesystem.lloperation import llop
from pypy.rpython.lltypesystem.llmemory import raw_malloc_usage
from pypy.rpython.memory.gc.base import GCBase
from pypy.rpython.annlowlevel import llhelper
from pypy.rlib.rarithmetic import LONG_BIT
from pypy.rlib.debug import ll_assert, debug_start, debug_stop
from pypy.module.thread import ll_thread


WORD = LONG_BIT // 8
NULL = llmemory.NULL

first_gcflag = 1 << (LONG_BIT//2)

GCFLAG_GLOBAL     = first_gcflag << 0     # keep in sync with et.c
GCFLAG_WAS_COPIED = first_gcflag << 1     # keep in sync with et.c

PRIMITIVE_SIZES   = {1: lltype.Char,
                     2: rffi.SHORT,
                     4: rffi.INT,
                     8: lltype.SignedLongLong}


def always_inline(fn):
    fn._always_inline_ = True
    return fn
def dont_inline(fn):
    fn._dont_inline_ = True
    return fn


class StmGC(GCBase):
    _alloc_flavor_ = "raw"
    inline_simple_malloc = True
    inline_simple_malloc_varsize = True
    needs_write_barrier = "stm"
    prebuilt_gc_objects_are_static_roots = False
    malloc_zero_filled = True    # xxx?

    HDR = lltype.Struct('header', ('tid', lltype.Signed),
                                  ('version', llmemory.Address))
    typeid_is_in_field = 'tid'
    withhash_flag_is_in_field = 'tid', 'XXX'

    GCTLS = lltype.Struct('GCTLS', ('nursery_free', llmemory.Address),
                                   ('nursery_top', llmemory.Address),
                                   ('nursery_start', llmemory.Address),
                                   ('nursery_size', lltype.Signed),
                                   ('malloc_flags', lltype.Signed),
                                   ('pending_list', llmemory.Address),
                          )

    TRANSLATION_PARAMS = {
        'stm_operations': 'use_real_one',
        'max_nursery_size': 400*1024*1024,      # XXX 400MB
    }

    def __init__(self, config, stm_operations='use_emulator',
                 max_nursery_size=1024,
                 **kwds):
        GCBase.__init__(self, config, **kwds)
        #
        if isinstance(stm_operations, str):
            assert stm_operations == 'use_real_one', (
                "XXX not provided so far: stm_operations == %r" % (
                stm_operations,))
            from pypy.translator.stm.stmgcintf import StmOperations
            stm_operations = StmOperations()
        #
        self.stm_operations = stm_operations
        self.collector = Collector(self)
        self.max_nursery_size = max_nursery_size
        #
        def _get_size(obj):     # indirection to hide 'self'
            return self.get_size(obj)
        self._getsize_fn = _get_size
        #
        for size, TYPE in PRIMITIVE_SIZES.items():
            self.declare_reader(size, TYPE)
        self.declare_write_barrier()

    def setup(self):
        """Called at run-time to initialize the GC."""
        GCBase.setup(self)
        GETSIZE = lltype.Ptr(lltype.FuncType([llmemory.Address],lltype.Signed))
        self.stm_operations.setup_size_getter(
                llhelper(GETSIZE, self._getsize_fn))
        self.main_thread_tls = self.setup_thread(True)
        self.mutex_lock = ll_thread.allocate_ll_lock()

    def _alloc_nursery(self):
        nursery = llarena.arena_malloc(self.max_nursery_size, 1)
        if not nursery:
            raise MemoryError("cannot allocate nursery")
        return nursery

    def _free_nursery(self, nursery):
        llarena.arena_free(nursery)

    def setup_thread(self, in_main_thread):
        """Setup a thread.  Allocates the thread-local data structures.
        Must be called only once per OS-level thread."""
        tls = lltype.malloc(self.GCTLS, flavor='raw')
        self.stm_operations.set_tls(llmemory.cast_ptr_to_adr(tls),
                                    int(in_main_thread))
        tls.nursery_start = self._alloc_nursery()
        tls.nursery_size  = self.max_nursery_size
        tls.nursery_free  = tls.nursery_start
        tls.nursery_top   = tls.nursery_start + tls.nursery_size
        #
        # XXX for now, we use as the "global area" the nursery of the
        # main thread.  So allocation in the main thread is the same as
        # allocation in another thread, except that the new objects
        # should be immediately marked as GCFLAG_GLOBAL.
        if in_main_thread:
            tls.malloc_flags = GCFLAG_GLOBAL
        else:
            tls.malloc_flags = 0
        return tls

    @staticmethod
    def reset_nursery(tls):
        """Clear and forget all locally allocated objects."""
        size = tls.nursery_free - tls.nursery_start
        llarena.arena_reset(tls.nursery_start, size, 2)
        tls.nursery_free = tls.nursery_start

    def teardown_thread(self):
        """Teardown a thread.  Call this just before the OS-level thread
        disappears."""
        tls = self.collector.get_tls()
        self.stm_operations.del_tls()
        self._free_nursery(tls.nursery_start)
        lltype.free(tls, flavor='raw')

    # ----------

    def allocate_bump_pointer(self, size):
        return self._allocate_bump_pointer(self.collector.get_tls(), size)

    @always_inline
    def _allocate_bump_pointer(self, tls, size):
        free = tls.nursery_free
        top  = tls.nursery_top
        new  = free + size
        tls.nursery_free = new
        if new > top:
            free = self.local_collection(free)
        return free

    @dont_inline
    def local_collection(self, oldfree):
        raise MemoryError("nursery exhausted")   # XXX for now


    def malloc_fixedsize_clear(self, typeid, size,
                               needs_finalizer=False,
                               is_finalizer_light=False,
                               contains_weakptr=False):
        assert not needs_finalizer, "XXX"
        assert not contains_weakptr, "XXX"
        #
        # Check the mode: either in a transactional thread, or in
        # the main thread.  For now we do the same thing in both
        # modes, but set different flags.
        tls = self.collector.get_tls()
        flags = tls.malloc_flags
        #
        # Get the memory from the nursery.
        size_gc_header = self.gcheaderbuilder.size_gc_header
        totalsize = size_gc_header + size
        result = self._allocate_bump_pointer(tls, totalsize)
        #
        # Build the object.
        llarena.arena_reserve(result, totalsize)
        obj = result + size_gc_header
        self.init_gc_object(result, typeid, flags=flags)
        #
        return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF)


    def malloc_varsize_clear(self, typeid, length, size, itemsize,
                             offset_to_length):
        raise NotImplementedError


    def _malloc_local_raw(self, tls, size):
        # for _stm_write_barrier_global(): a version of malloc that does
        # no initialization of the malloc'ed object
        size_gc_header = self.gcheaderbuilder.size_gc_header
        totalsize = size_gc_header + size
        result = self._allocate_bump_pointer(tls, totalsize)
        llarena.arena_reserve(result, totalsize)
        obj = result + size_gc_header
        return obj


    def collect(self, gen=0):
        raise NotImplementedError


    @always_inline
    def combine(self, typeid16, flags):
        return llop.combine_ushort(lltype.Signed, typeid16, flags)

    @always_inline
    def init_gc_object(self, addr, typeid16, flags=0):
        hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
        hdr.tid = self.combine(typeid16, flags)

    # ----------

    def declare_reader(self, size, TYPE):
        # Reading functions.  Defined here to avoid the extra burden of
        # passing 'self' explicitly.
        assert rffi.sizeof(TYPE) == size
        PTYPE = rffi.CArrayPtr(TYPE)
        stm_read_int = getattr(self.stm_operations, 'stm_read_int%d' % size)
        #
        @always_inline
        def reader(obj, offset):
            if self.header(obj).tid & GCFLAG_GLOBAL == 0:
                adr = rffi.cast(PTYPE, obj + offset)
                return adr[0]                      # local obj: read directly
            else:
                return stm_read_int(obj, offset)   # else: call a helper
        setattr(self, 'read_int%d' % size, reader)
        #
        # the following logic was moved to et.c to avoid a double call
##        @dont_inline
##        def _read_word_global(obj, offset):
##            hdr = self.header(obj)
##            if hdr.tid & GCFLAG_WAS_COPIED != 0:
##                #
##                # Look up in the thread-local dictionary.
##                localobj = stm_operations.tldict_lookup(obj)
##                if localobj:
##                    ll_assert(self.header(localobj).tid & GCFLAG_GLOBAL == 0,
##                              "stm_read: tldict_lookup() -> GLOBAL obj")
##                    return (localobj + offset).signed[0]
##            #
##            return stm_operations.stm_read_word(obj, offset)


    def declare_write_barrier(self):
        # Write barrier.  Defined here to avoid the extra burden of
        # passing 'self' explicitly.
        stm_operations = self.stm_operations
        #
        @always_inline
        def write_barrier(obj):
            if self.header(obj).tid & GCFLAG_GLOBAL != 0:
                obj = _stm_write_barrier_global(obj)
            return obj
        self.write_barrier = write_barrier
        #
        @dont_inline
        def _stm_write_barrier_global(obj):
            # we need to find of make a local copy
            hdr = self.header(obj)
            if hdr.tid & GCFLAG_WAS_COPIED == 0:
                # in this case, we are sure that we don't have a copy
                hdr.tid |= GCFLAG_WAS_COPIED
                # ^^^ non-protected write, but concurrent writes should
                #     have the same effect, so fine
            else:
                # in this case, we need to check first
                localobj = stm_operations.tldict_lookup(obj)
                if localobj:
                    hdr = self.header(localobj)
                    ll_assert(hdr.tid & GCFLAG_GLOBAL == 0,
                              "stm_write: tldict_lookup() -> GLOBAL obj")
                    ll_assert(hdr.tid & GCFLAG_WAS_COPIED != 0,
                              "stm_write: tldict_lookup() -> non-COPIED obj")
                    return localobj
            #
            # Here, we need to really make a local copy
            size = self.get_size(obj)
            tls = self.collector.get_tls()
            try:
                localobj = self._malloc_local_raw(tls, size)
            except MemoryError:
                # XXX
                fatalerror("MemoryError in _stm_write_barrier_global -- sorry")
                return llmemory.NULL
            #
            # Initialize the copy by doing an stm raw copy of the bytes
            stm_operations.stm_copy_transactional_to_raw(obj, localobj, size)
            #
            # The raw copy done above does not include the header fields.
            hdr = self.header(obj)
            localhdr = self.header(localobj)
            GCFLAGS = (GCFLAG_GLOBAL | GCFLAG_WAS_COPIED)
            ll_assert(hdr.tid & GCFLAGS == GCFLAGS,
                      "stm_write: bogus flags on source object")
            #
            # Remove the GCFLAG_GLOBAL from the copy
            localhdr.tid = hdr.tid & ~GCFLAG_GLOBAL
            #
            # Set the 'version' field of the local copy to be a pointer
            # to the global obj.  (The field is called 'version' because
            # of its use by the C STM library: on global objects (only),
            # it is a version number.)
            localhdr.version = obj
            #
            # Register the object as a valid copy
            stm_operations.tldict_add(obj, localobj)
            #
            return localobj

    # ----------

    def acquire(self, lock):
        ll_thread.c_thread_acquirelock(lock, 1)

    def release(self, lock):
        ll_thread.c_thread_releaselock(lock)


# ------------------------------------------------------------


class Collector(object):
    """A separate frozen class.  Useful to prevent any buggy concurrent
    access to GC data.  The methods here use the GCTLS instead for
    storing things in a thread-local way."""

    def __init__(self, gc):
        self.gc = gc
        self.stm_operations = gc.stm_operations

    def _freeze_(self):
        return True

    def get_tls(self):
        tls = self.stm_operations.get_tls()
        return llmemory.cast_adr_to_ptr(tls, lltype.Ptr(StmGC.GCTLS))

    def is_in_nursery(self, tls, addr):
        ll_assert(llmemory.cast_adr_to_int(addr) & 1 == 0,
                  "odd-valued (i.e. tagged) pointer unexpected here")
        return tls.nursery_start <= addr < tls.nursery_top

    def header(self, obj):
        return self.gc.header(obj)


    def start_transaction(self):
        """Start a transaction, by clearing and resetting the tls nursery."""
        tls = self.get_tls()
        self.gc.reset_nursery(tls)


    def commit_transaction(self):
        """End of a transaction, just before its end.  No more GC
        operations should occur afterwards!  Note that the C code that
        does the commit runs afterwards, and may still abort."""
        #
        debug_start("gc-collect-commit")
        #
        tls = self.get_tls()
        #
        # Do a mark-and-move minor collection out of the tls' nursery
        # into the main thread's global area (which is right now also
        # called a nursery).  To simplify things, we use a global lock
        # around the whole mark-and-move.
        self.gc.acquire(self.gc.mutex_lock)
        #
        # We are starting from the tldict's local objects as roots.  At
        # this point, these objects have GCFLAG_WAS_COPIED, and the other
        # local objects don't.  We want to move all reachable local objects
        # to the global area.
        #
        # Start from tracing the root objects
        self.collect_roots_from_tldict(tls)
        #
        # Continue iteratively until we have reached all the reachable
        # local objects
        self.collect_from_pending_list(tls)
        #
        self.gc.release(self.gc.mutex_lock)
        #
        # Now, all indirectly reachable local objects have been copied into
        # the global area, and all pointers have been fixed to point to the
        # global copies, including in the local copy of the roots.  What
        # remains is only overwriting of the global copy of the roots.
        # This is done by the C code.
        debug_stop("gc-collect-commit")


    def collect_roots_from_tldict(self, tls):
        tls.pending_list = NULL
        # Enumerate the roots, which are the local copies of global objects.
        # For each root, trace it.
        self.stm_operations.enum_tldict_start()
        while self.stm_operations.enum_tldict_find_next():
            globalobj = self.stm_operations.enum_tldict_globalobj()
            localobj = self.stm_operations.enum_tldict_localobj()
            #
            localhdr = self.header(localobj)
            ll_assert(localhdr.version == globalobj,
                      "in a root: localobj.version != globalobj")
            ll_assert(localhdr.tid & GCFLAG_GLOBAL == 0,
                      "in a root: unexpected GCFLAG_GLOBAL")
            ll_assert(localhdr.tid & GCFLAG_WAS_COPIED != 0,
                      "in a root: missing GCFLAG_WAS_COPIED")
            #
            self.trace_and_drag_out_of_nursery(tls, localobj)


    def collect_from_pending_list(self, tls):
        while tls.pending_list != NULL:
            pending_obj = tls.pending_list
            pending_hdr = self.header(pending_obj)
            #
            # 'pending_list' is a chained list of fresh global objects,
            # linked together via their 'version' field.  The 'version'
            # must be replaced with NULL after we pop the object from
            # the linked list.
            tls.pending_list = pending_hdr.version
            pending_hdr.version = NULL
            #
            # Check the flags of pending_obj: it should be a fresh global
            # object, without GCFLAG_WAS_COPIED
            ll_assert(pending_hdr.tid & GCFLAG_GLOBAL != 0,
                      "from pending list: missing GCFLAG_GLOBAL")
            ll_assert(pending_hdr.tid & GCFLAG_WAS_COPIED == 0,
                      "from pending list: unexpected GCFLAG_WAS_COPIED")
            #
            self.trace_and_drag_out_of_nursery(tls, pending_obj)


    def trace_and_drag_out_of_nursery(self, tls, obj):
        # This is called to fix the references inside 'obj', to ensure that
        # they are global.  If necessary, the referenced objects are copied
        # into the global area first.  This is called on the *local* copy of
        # the roots, and on the fresh *global* copy of all other reached
        # objects.
        self.gc.trace(obj, self._trace_drag_out, tls)

    def _trace_drag_out(self, root, tls):
        obj = root.address[0]
        hdr = self.header(obj)
        #
        # Figure out if the object is GLOBAL or not by looking at its
        # address, not at its header --- to avoid cache misses and
        # pollution for all global objects
        if not self.is_in_nursery(tls, obj):
            ll_assert(hdr.tid & GCFLAG_GLOBAL != 0,
                      "trace_and_mark: non-GLOBAL obj is not in nursery")
            return        # ignore global objects
        #
        ll_assert(hdr.tid & GCFLAG_GLOBAL == 0,
                  "trace_and_mark: GLOBAL obj in nursery")
        #
        if hdr.tid & GCFLAG_WAS_COPIED != 0:
            # this local object is a root or was already marked.  Either
            # way, its 'version' field should point to the corresponding
            # global object.
            globalobj = hdr.version
            #
        else:
            # First visit to a local-only 'obj': copy it into the global area
            size = self.gc.get_size(obj)
            main_tls = self.gc.main_thread_tls
            globalobj = self.gc._malloc_local_raw(main_tls, size)
            llmemory.raw_memcopy(obj, globalobj, size)
            #
            # Initialize the header of the 'globalobj'
            globalhdr = self.header(globalobj)
            globalhdr.tid = hdr.tid | GCFLAG_GLOBAL
            #
            # Add the flags to 'localobj' to say 'has been copied now'
            hdr.tid |= GCFLAG_WAS_COPIED
            hdr.version = globalobj
            #
            # Set a temporary linked list through the globalobj's version
            # numbers.  This is normally not allowed, but it works here
            # because these new globalobjs are not visible to any other
            # thread before the commit is really complete.
            globalhdr.version = tls.pending_list
            tls.pending_list = globalobj
        #
        # Fix the original root.address[0] to point to the globalobj
        root.address[0] = globalobj