1. Stefano Rivera
  2. pypy

Source

pypy / pypy / rpython / memory / gc / concurrentms.py

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
import time, sys
from pypy.rpython.lltypesystem import lltype, llmemory, llarena, llgroup, rffi
from pypy.rpython.lltypesystem.llmemory import raw_malloc_usage
from pypy.rpython.lltypesystem.lloperation import llop
from pypy.rpython.annlowlevel import llhelper
from pypy.translator.tool.cbuild import ExternalCompilationInfo
from pypy.rlib.objectmodel import we_are_translated, running_on_llinterp
from pypy.rlib.debug import ll_assert
from pypy.rlib.rarithmetic import ovfcheck, LONG_BIT, r_uint
from pypy.rpython.memory.gc.base import GCBase
from pypy.module.thread import ll_thread

#
# A "mostly concurrent" mark&sweep GC.  It can delegate most of the GC
# operations to a separate thread, which runs concurrently with the
# mutator (i.e. the rest of the program).  Based on the idea that the
# concurrent collection should be relatively fast --- 20-25% of the
# time? after which the collector thread just sleeps --- it uses a
# snapshot-at-the-beginning technique with a "deletion barrier", i.e. a
# write barrier that prevents changes to objects that have not been
# scanned yet (Abraham and Patel, Yuasa).
#
# Reference: The Garbage Collection Handbook, Richard Jones and Antony
# Hosking and Eliot Moss.
#

WORD = LONG_BIT // 8
WORD_POWER_2 = {32: 2, 64: 3}[LONG_BIT]
assert 1 << WORD_POWER_2 == WORD
MAXIMUM_SIZE = sys.maxint - (3*WORD-1)


# Objects start with an integer 'tid', which is decomposed as follows.
# Lowest byte: one of the the following values (which are all odd, so
# let us know if the 'tid' is valid or is just a word-aligned address):
MARK_VALUE_1      = 0x4D    # 'M', 77
MARK_VALUE_2      = 0x6B    # 'k', 107
MARK_VALUE_STATIC = 0x53    # 'S', 83
# Next lower byte: a combination of flags.
FL_WITHHASH = 0x0100
# And the high half of the word contains the numeric typeid.


class MostlyConcurrentMarkSweepGC(GCBase):
    _alloc_flavor_ = "raw"
    inline_simple_malloc = True
    inline_simple_malloc_varsize = True
    needs_write_barrier = True
    prebuilt_gc_objects_are_static_roots = False
    malloc_zero_filled = True
    #gcflag_extra = GCFLAG_FINALIZATION_ORDERING

    HDR = lltype.Struct('header', ('tid', lltype.Signed))
    HDRPTR = lltype.Ptr(HDR)
    HDRSIZE = llmemory.sizeof(HDR)
    NULL = lltype.nullptr(HDR)
    typeid_is_in_field = 'tid'
    withhash_flag_is_in_field = 'tid', FL_WITHHASH

    TRANSLATION_PARAMS = {'page_size': 4096,
                          'small_request_threshold': 35*WORD,
                          }

    def __init__(self, config, page_size=64, small_request_threshold=24,
                 **kwds):
        # 'small_request_threshold' is the largest size that we will
        # satisfy using our own pages mecanism.  Larger requests just
        # go to the system malloc().
        GCBase.__init__(self, config, **kwds)
        assert small_request_threshold % WORD == 0
        self.small_request_threshold = small_request_threshold
        self.page_size = page_size
        self.free_pages = lltype.nullptr(self.HDR)
        self.pagelists_length = small_request_threshold // WORD + 1
        #
        # The following are arrays of 36 linked lists: the linked lists
        # at indices 1 to 35 correspond to pages that store objects of
        # size  1 * WORD  to  35 * WORD,  and the linked list at index 0
        # is a list of all larger objects.
        def list_of_addresses_per_small_size():
            return lltype.malloc(rffi.CArray(self.HDRPTR),
                                 self.pagelists_length, flavor='raw',
                                 zero=True, immortal=True)
        # 1-35: a linked list of all pages; 0: a linked list of all larger objs
        self.nonfree_pages = list_of_addresses_per_small_size()
        # a snapshot of 'nonfree_pages' done when the collection starts
        self.collect_pages = list_of_addresses_per_small_size()
        # 1-35: free list of non-allocated locations; 0: unused
        self.free_lists    = list_of_addresses_per_small_size()
        # 1-35: head and tail of the free list built by the collector thread
        # 0: head and tail of the linked list of surviving large objects
        self.collect_heads = list_of_addresses_per_small_size()
        self.collect_tails = list_of_addresses_per_small_size()
        #
        # The following character is either MARK_VALUE_1 or MARK_VALUE_2,
        # and represents the character that must be in the 'mark' field
        # of an object header in order for the object to be considered as
        # marked.  Objects whose 'mark' field have the opposite value are
        # not marked yet; the collector thread will mark them if they are
        # still alive, or sweep them away if they are not reachable.
        # The special value MARK_VALUE_STATIC is initially used in the
        # 'mark' field of static prebuilt GC objects.
        self.current_mark = MARK_VALUE_1
        #
        # When the mutator thread wants to trigger the next collection,
        # it scans its own stack roots and prepares everything, then
        # sets 'collection_running' to 1, and releases
        # 'ready_to_start_lock'.  This triggers the collector thread,
        # which re-acquires 'ready_to_start_lock' and does its job.
        # When done it releases 'finished_lock'.  The mutator thread is
        # responsible for resetting 'collection_running' to 0.
        self.collection_running = 0
        #self.ready_to_start_lock = ...built in setup()
        #self.finished_lock = ...built in setup()
        #
        # NOT_RPYTHON: set to non-empty in _teardown()
        self._teardown_now = []
        #
        def collector_start():
            if we_are_translated():
                self.collector_run()
            else:
                try:
                    self.collector_run()
                except Exception, e:
                    print 'Crash!', e.__class__.__name__, e
                    self._exc_info = sys.exc_info()
        #
        collector_start._should_never_raise_ = True
        self.collector_start = collector_start
        #
        #self.mutex_lock = ...built in setup()
        self.gray_objects = self.AddressStack()
        self.extra_objects_to_mark = self.AddressStack()
        self.prebuilt_root_objects = self.AddressStack()
        #
        # Write barrier: actually a deletion barrier, triggered when there
        # is a collection running and the mutator tries to change an object
        # that was not scanned yet.
        self._init_writebarrier_logic()
        #
        self.main_thread_ident = ll_thread.get_ident()

    def setup(self):
        "Start the concurrent collector thread."
        GCBase.setup(self)
        #
        self.ready_to_start_lock = ll_thread.allocate_ll_lock()
        self.finished_lock = ll_thread.allocate_ll_lock()
        self.mutex_lock = ll_thread.allocate_ll_lock()
        #
        self.acquire(self.finished_lock)
        self.acquire(self.ready_to_start_lock)
        #
        self.collector_ident = ll_thread.c_thread_start_nowrapper(
            llhelper(ll_thread.CALLBACK, self.collector_start))
        assert self.collector_ident != -1

    def _teardown(self):
        "Stop the collector thread after tests have run."
        if self._teardown_now:
            return
        self.wait_for_the_end_of_collection()
        #
        # start the next collection, but with "stop" in _teardown_now,
        # which should shut down the collector thread
        self._teardown_now.append(-1)
        self.release(self.ready_to_start_lock)
        self.acquire(self.finished_lock)
        if not we_are_translated():
            del self.ready_to_start_lock, self.finished_lock

    def get_type_id(self, obj):
        tid = self.header(obj).tid
        return llop.extract_high_ushort(llgroup.HALFWORD, tid)

    def combine(self, typeid16, mark, flags):
        return llop.combine_high_ushort(lltype.Signed, typeid16, mark | flags)

    def init_gc_object_immortal(self, addr, typeid, flags=0):
        # 'flags' is ignored here
        hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
        hdr.tid = self.combine(typeid, MARK_VALUE_STATIC, 0)

    def malloc_fixedsize_clear(self, typeid, size,
                               needs_finalizer=False, contains_weakptr=False):
        assert not needs_finalizer  # XXX
        assert not contains_weakptr # XXX
        size_gc_header = self.gcheaderbuilder.size_gc_header
        totalsize = size_gc_header + size
        rawtotalsize = raw_malloc_usage(totalsize)
        if rawtotalsize <= self.small_request_threshold:
            ll_assert(rawtotalsize & (WORD - 1) == 0,
                      "fixedsize not properly rounded")
            #
            n = rawtotalsize >> WORD_POWER_2
            result = self.free_lists[n]
            if result != self.NULL:
                self.free_lists[n] = self.cast_int_to_hdrptr(result.tid)
                obj = self.grow_reservation(result, totalsize)
                hdr = self.header(obj)
                hdr.tid = self.combine(typeid, self.current_mark, 0)
                return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF)
                #
        return self._malloc_slowpath(typeid, size)

    def malloc_varsize_clear(self, typeid, length, size, itemsize,
                             offset_to_length):
        size_gc_header = self.gcheaderbuilder.size_gc_header
        nonvarsize = size_gc_header + size
        #
        # Compute the maximal length that makes the object still below
        # 'small_request_threshold'.  All the following logic is usually
        # constant-folded because size and itemsize are constants (due
        # to inlining).
        maxsize = self.small_request_threshold - raw_malloc_usage(nonvarsize)
        if maxsize < 0:
            toobig = r_uint(0)    # the nonvarsize alone is too big
        elif raw_malloc_usage(itemsize):
            toobig = r_uint(maxsize // raw_malloc_usage(itemsize)) + 1
        else:
            toobig = r_uint(sys.maxint) + 1

        if r_uint(length) < r_uint(toobig):
            # With the above checks we know now that totalsize cannot be more
            # than 'small_request_threshold'; in particular, the + and *
            # cannot overflow.
            totalsize = nonvarsize + itemsize * length
            totalsize = llarena.round_up_for_allocation(totalsize)
            rawtotalsize = raw_malloc_usage(totalsize)
            ll_assert(rawtotalsize & (WORD - 1) == 0,
                      "round_up_for_allocation failed")
            #
            n = rawtotalsize >> WORD_POWER_2
            result = self.free_lists[n]
            if result != self.NULL:
                self.free_lists[n] = self.cast_int_to_hdrptr(result.tid)
                obj = self.grow_reservation(result, totalsize)
                hdr = self.header(obj)
                hdr.tid = self.combine(typeid, self.current_mark, 0)
                (obj + offset_to_length).signed[0] = length
                return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF)
        #
        # If the total size of the object would be larger than
        # 'small_request_threshold', or if the free_list is empty,
        # then allocate it externally.  We also go there if 'length'
        # is actually negative.
        return self._malloc_varsize_slowpath(typeid, length)

    def _malloc_slowpath(self, typeid, size):
        # Slow-path malloc.  Call this with 'size' being a valid and
        # rounded number, between WORD and up to MAXIMUM_SIZE.
        size_gc_header = self.gcheaderbuilder.size_gc_header
        totalsize = size_gc_header + size
        rawtotalsize = raw_malloc_usage(totalsize)
        ll_assert(rawtotalsize & (WORD - 1) == 0,
                  "malloc_slowpath: non-rounded size")
        #
        if rawtotalsize <= self.small_request_threshold:
            #
            # Case 1: we have run out of the free list corresponding to
            # the size.  Grab the next free page.
            newpage = self.free_pages
            if newpage == self.NULL:
                self.allocate_next_arena()
                newpage = self.free_pages
            self.free_pages = self.cast_int_to_hdrptr(newpage.tid)
            #
            # Put the free page in the list 'nonfree_pages[n]'.  This is
            # a linked list chained through the first word of each page.
            n = rawtotalsize >> WORD_POWER_2
            newpage.tid = self.cast_hdrptr_to_int(self.nonfree_pages[n])
            self.nonfree_pages[n] = newpage
            #
            # Initialize the free page to contain objects of the given
            # size.  This requires setting up all object locations in the
            # page, linking them in the free list.
            head = self.free_lists[n]
            ll_assert(not head, "_malloc_slowpath: unexpected free_lists[n]")
            i = self.page_size - rawtotalsize
            limit = rawtotalsize + raw_malloc_usage(self.HDRSIZE)
            newpageadr = llmemory.cast_ptr_to_adr(newpage)
            newpageadr = llarena.getfakearenaaddress(newpageadr)
            while i >= limit:
                adr = newpageadr + i
                llarena.arena_reserve(adr, self.HDRSIZE)
                p = llmemory.cast_adr_to_ptr(adr, self.HDRPTR)
                p.tid = self.cast_hdrptr_to_int(head)
                head = p
                i -= rawtotalsize
            self.free_lists[n] = head
            result = newpageadr + i
            #
            # Done: all object locations are linked, apart from
            # 'result', which is the first object location in the page.
            # Note that if the size is not an exact divisor of
            # 4096-WORD, there are a few wasted WORDs, which we place at
            # the start of the page rather than at the end (Hans Boehm,
            # xxx ref).
            #
        else:
            # Case 2: the object is too large, so allocate it directly
            # with the system malloc().  xxx on 32-bit, we'll prefer 64-bit
            # alignment of the object by always allocating an 8-bytes header
            rawtotalsize += 8
            block = llarena.arena_malloc(rawtotalsize, 2)
            if not block:
                raise MemoryError
            llarena.arena_reserve(block, self.HDRSIZE)
            blockhdr = llmemory.cast_adr_to_ptr(block, self.HDRPTR)
            blockhdr.tid = self.cast_hdrptr_to_int(self.nonfree_pages[0])
            self.nonfree_pages[0] = blockhdr
            result = block + 8
        #
        llarena.arena_reserve(result, totalsize)
        hdr = llmemory.cast_adr_to_ptr(result, self.HDRPTR)
        hdr.tid = self.combine(typeid, self.current_mark, 0)
        #
        obj = result + size_gc_header
        return llmemory.cast_adr_to_ptr(obj, llmemory.GCREF)
        #
    _malloc_slowpath._dont_inline_ = True

    def _malloc_varsize_slowpath(self, typeid, length):
        #
        if length < 0:
            # negative length!  This likely comes from an overflow
            # earlier.  We will just raise MemoryError here.
            raise MemoryError
        #
        # Compute the total size, carefully checking for overflows.
        nonvarsize = self.fixed_size(typeid)
        itemsize = self.varsize_item_sizes(typeid)
        try:
            varsize = ovfcheck(itemsize * length)
            totalsize = ovfcheck(nonvarsize + varsize)
        except OverflowError:
            raise MemoryError
        #
        # Detect very rare cases of overflows
        if raw_malloc_usage(totalsize) > MAXIMUM_SIZE:
            raise MemoryError("rare case of overflow")
        #
        totalsize = llarena.round_up_for_allocation(totalsize)
        result = self._malloc_slowpath(typeid, totalsize)
        #
        offset_to_length = self.varsize_offset_to_length(typeid)
        obj = llmemory.cast_ptr_to_adr(result)
        (obj + offset_to_length).signed[0] = length
        return result
    _malloc_varsize_slowpath._dont_inline_ = True

    def allocate_next_arena(self):
        # xxx for now, allocate one page at a time with the system malloc()
        page = llarena.arena_malloc(self.page_size, 2)     # zero-filled
        if not page:
            raise MemoryError
        llarena.arena_reserve(page, self.HDRSIZE)
        page = llmemory.cast_adr_to_ptr(page, self.HDRPTR)
        page.tid = 0
        self.free_pages = page

    def grow_reservation(self, hdr, totalsize):
        # Transform 'hdr', which used to point to just a HDR,
        # into a pointer to a full object of size 'totalsize'.
        # This is a no-op after translation.  Returns the
        # address of the full object.
        adr = llmemory.cast_ptr_to_adr(hdr)
        adr = llarena.getfakearenaaddress(adr)
        llarena.arena_reset(adr, self.HDRSIZE, 0)
        llarena.arena_reserve(adr, totalsize)
        return adr + llmemory.raw_malloc_usage(self.HDRSIZE)
    grow_reservation._always_inline_ = True

    def write_barrier(self, newvalue, addr_struct):
        mark = self.header(addr_struct).tid & 0xFF
        if mark != self.current_mark:
            self.force_scan(addr_struct)

    def writebarrier_before_copy(self, source_addr, dest_addr,
                                 source_start, dest_start, length):
        mark = self.header(dest_addr).tid & 0xFF
        if mark != self.current_mark:
            self.force_scan(dest_addr)
        return True

    def _init_writebarrier_logic(self):
        #
        def force_scan(obj):
            self.acquire(self.mutex_lock)
            mark = self.header(obj).tid & 0xFF
            if mark != self.current_mark:
                #
                if mark == MARK_VALUE_STATIC:
                    # This is the first write into a prebuilt GC object.
                    # Record it in 'prebuilt_root_objects'.  Even if a
                    # collection marking phase is running now, we can
                    # ignore this object, because at the snapshot-at-the-
                    # beginning it didn't contain any pointer to non-
                    # prebuilt objects.
                    self.prebuilt_root_objects.append(obj)
                    self.set_mark(obj, self.current_mark)
                    #
                else:
                    # it is only possible to reach this point if there is
                    # a collection running in collector_mark(), before it
                    # does mutex_lock itself.  Check this:
                    ll_assert(self.collection_running == 1,
                              "write barrier: wrong call?")
                    #
                    self.set_mark(obj, self.current_mark)
                    self.trace(obj, self._barrier_add_extra, None)
                #
            self.release(self.mutex_lock)
        #
        force_scan._dont_inline_ = True
        self.force_scan = force_scan

    def _barrier_add_extra(self, root, ignored):
        self.extra_objects_to_mark.append(root.address[0])


    def wait_for_the_end_of_collection(self):
        """In the mutator thread: wait for the collection currently
        running (if any) to finish."""
        if self.collection_running != 0:
            self.acquire(self.finished_lock)
            self.collection_running = 0
            #
            # Check invariants
            ll_assert(not self.extra_objects_to_mark.non_empty(),
                      "objs left behind in extra_objects_to_mark")
            ll_assert(not self.gray_objects.non_empty(),
                      "objs left behind in gray_objects")
            #
            # Grab the results of the last collection: read the collector's
            # 'collect_heads/collect_tails' and merge them with the mutator's
            # 'free_lists'.
            n = 1
            while n < self.pagelists_length:
                if self.collect_tails[n] != self.NULL:
                    self.collect_tails[n].tid = self.cast_hdrptr_to_int(
                        self.free_lists[n])
                    self.free_lists[n] = self.collect_heads[n]
                n += 1
            #
            # Do the same with 'collect_heads[0]/collect_tails[0]'.
            if self.collect_tails[0] != self.NULL:
                self.collect_tails[0].tid = self.cast_hdrptr_to_int(
                    self.nonfree_pages[0])
                self.nonfree_pages[0] = self.collect_heads[0]
            #
            if self.DEBUG:
                self.debug_check_lists()


    def collect(self, gen=2):
        """
        gen=0: Trigger a collection if none is running.  Never blocks.
        
        gen=1: The same, but if a collection is running, wait for it
        to finish before triggering the next one.  Guarantees that
        objects not reachable when collect() is called will soon be
        freed.

        gen>=2: The same, but wait for the triggered collection to
        finish.  Guarantees that objects not reachable when collect()
        is called will be freed by the time collect() returns.
        """
        if gen >= 1 or self.collection_running == 0:
            self.trigger_next_collection()
            if gen >= 2:
                self.wait_for_the_end_of_collection()

    def trigger_next_collection(self):
        """In the mutator thread: triggers the next collection."""
        #
        # In case the previous collection is not over yet, wait for it
        self.wait_for_the_end_of_collection()
        #
        # Scan the stack roots and the refs in non-GC objects
        self.root_walker.walk_roots(
            MostlyConcurrentMarkSweepGC._add_stack_root,  # stack roots
            MostlyConcurrentMarkSweepGC._add_stack_root,  # in prebuilt non-gc
            None)                         # static in prebuilt gc
        #
        # Add the prebuilt root objects that have been written to
        self.prebuilt_root_objects.foreach(self._add_prebuilt_root, None)
        #
        # Invert this global variable, which has the effect that on all
        # objects' state go instantly from "marked" to "non marked"
        self.current_mark = self.other_mark(self.current_mark)
        #
        # Copy a few 'mutator' fields to 'collector' fields:
        # 'collect_pages' make linked lists of all nonfree pages at the
        # start of the collection (unlike the 'nonfree_pages' lists, which
        # the mutator will continue to grow).
        n = 0
        while n < self.pagelists_length:
            self.collect_pages[n] = self.nonfree_pages[n]
            n += 1
        self.nonfree_pages[0] = self.NULL
        #
        # Start the collector thread
        self.collection_running = 1
        self.release(self.ready_to_start_lock)

    def _add_stack_root(self, root):
        obj = root.address[0]
        self.gray_objects.append(obj)

    def _add_prebuilt_root(self, obj, ignored):
        self.gray_objects.append(obj)

    def debug_check_lists(self):
        # just check that they are correct, non-infinite linked lists
        self.debug_check_list(self.nonfree_pages[0])
        n = 1
        while n < self.pagelists_length:
            self.debug_check_list(self.free_lists[n])
            n += 1

    def debug_check_list(self, page):
        try:
            previous_page = self.NULL
            while page != self.NULL:
                # prevent constant-folding, and detects loops of length 1
                ll_assert(page != previous_page, "loop!")
                previous_page = page
                page = self.cast_int_to_hdrptr(page.tid)
        except KeyboardInterrupt:
            ll_assert(False, "interrupted")
            raise

    def acquire(self, lock):
        if (we_are_translated() or
                ll_thread.get_ident() != self.main_thread_ident):
            ll_thread.c_thread_acquirelock(lock, 1)
        else:
            while rffi.cast(lltype.Signed,
                            ll_thread.c_thread_acquirelock(lock, 0)) == 0:
                time.sleep(0.001)
                # ---------- EXCEPTION FROM THE COLLECTOR THREAD ----------
                if hasattr(self, '_exc_info'):
                    self._reraise_from_collector_thread()

    def release(self, lock):
        ll_thread.c_thread_releaselock(lock)

    def _reraise_from_collector_thread(self):
        exc, val, tb = self._exc_info
        raise exc, val, tb

    def cast_int_to_hdrptr(self, tid):
        return llmemory.cast_adr_to_ptr(llmemory.cast_int_to_adr(tid),
                                        self.HDRPTR)

    def cast_hdrptr_to_int(self, hdr):
        return llmemory.cast_adr_to_int(llmemory.cast_ptr_to_adr(hdr),
                                        "symbolic")


    def collector_run(self):
        """Main function of the collector's thread."""
        #
        # hack: make it an infinite loop, but in a way that the annotator
        # doesn't notice.  It prevents the caller from ending automatically
        # in a "raise AssertionError", annoyingly, because we don't want
        # any exception in this thread
        while self.collection_running < 99:
            #
            # Wait for the lock to be released
            self.acquire(self.ready_to_start_lock)
            #
            # For tests: detect when we have to shut down
            if not we_are_translated():
                if self._teardown_now:
                    self.release(finished_lock)
                    break
            #
            # Mark
            self.collector_mark()
            self.collection_running = 2
            #
            # Sweep
            self.collector_sweep()
            self.release(self.finished_lock)


    def other_mark(self, mark):
        ll_assert(mark == MARK_VALUE_1 or mark == MARK_VALUE_2,
                  "bad mark value")
        return mark ^ (MARK_VALUE_1 ^ MARK_VALUE_2)

    def is_marked(self, obj, current_mark):
        mark = self.header(obj).tid & 0xFF
        ll_assert(mark in (MARK_VALUE_1, MARK_VALUE_2, MARK_VALUE_STATIC),
                  "bad mark byte in object")
        return mark == current_mark

    def set_mark(self, obj, newmark):
        _set_mark(self.header(obj), newmark)

    def collector_mark(self):
        while True:
            #
            # Do marking.  The following function call is interrupted
            # if the mutator's write barrier adds new objects to
            # 'extra_objects_to_mark'.
            self._collect_mark()
            #
            # Move the objects from 'extra_objects_to_mark' to
            # 'gray_objects'.  This requires the mutex lock.
            # There are typically only a few objects to move here,
            # unless XXX we've hit the write barrier of a large array
            self.acquire(self.mutex_lock)
            while self.extra_objects_to_mark.non_empty():
                obj = self.extra_objects_to_mark.pop()
                self.gray_objects.append(obj)
            self.release(self.mutex_lock)
            #
            # If 'gray_objects' is empty, we are done: there should be
            # no possible case in which more objects are being added to
            # 'extra_objects_to_mark' concurrently, because 'gray_objects'
            # and 'extra_objects_to_mark' were already empty before we
            # acquired the 'mutex_lock', so all reachable objects have
            # been marked.
            if not self.gray_objects.non_empty():
                break

    def _collect_mark(self):
        current_mark = self.current_mark
        while self.gray_objects.non_empty():
            obj = self.gray_objects.pop()
            if not self.is_marked(obj, current_mark):
                self.set_mark(obj, current_mark)
                self.trace(obj, self._collect_add_pending, None)
                #
                # Interrupt early if the mutator's write barrier adds stuff
                # to that list.  Note that the check is imprecise because
                # it is not lock-protected, but that's good enough.  The
                # idea is that we trace in priority objects flagged with
                # the write barrier, because they are more likely to
                # reference further objects that will soon be accessed too.
                if self.extra_objects_to_mark.non_empty():
                    return

    def _collect_add_pending(self, root, ignored):
        self.gray_objects.append(root.address[0])

    def collector_sweep(self):
        self._collect_sweep_large_objects()
        n = 1
        while n < self.pagelists_length:
            self._collect_sweep_pages(n)
            n += 1

    def _collect_sweep_large_objects(self):
        block = self.collect_pages[0]
        nonmarked = self.other_mark(self.current_mark)
        linked_list = self.NULL
        first_block_in_linked_list = self.NULL
        while block != self.NULL:
            nextblock = self.cast_int_to_hdrptr(block.tid)
            blockadr = llmemory.cast_ptr_to_adr(block)
            blockadr = llarena.getfakearenaaddress(blockadr)
            hdr = llmemory.cast_adr_to_ptr(blockadr + 8, self.HDRPTR)
            mark = hdr.tid & 0xFF
            if mark == nonmarked:
                # the object is still not marked.  Free it.
                llarena.arena_free(blockadr)
                #
            else:
                # the object was marked: relink it
                ll_assert(mark == self.current_mark,
                          "bad mark in large object")
                block.tid = self.cast_hdrptr_to_int(linked_list)
                linked_list = block
                if first_block_in_linked_list == self.NULL:
                    first_block_in_linked_list = block
            block = nextblock
        #
        self.collect_heads[0] = linked_list
        self.collect_tails[0] = first_block_in_linked_list

    def _collect_sweep_pages(self, n):
        # sweep all pages from the linked list starting at 'page',
        # containing objects of fixed size 'object_size'.
        page = self.collect_pages[n]
        object_size = n << WORD_POWER_2
        linked_list = self.NULL
        first_loc_in_linked_list = self.NULL
        nonmarked = self.other_mark(self.current_mark)
        while page != self.NULL:
            i = self.page_size - object_size
            limit = raw_malloc_usage(self.HDRSIZE)
            pageadr = llmemory.cast_ptr_to_adr(page)
            pageadr = llarena.getfakearenaaddress(pageadr)
            while i >= limit:
                adr = pageadr + i
                hdr = llmemory.cast_adr_to_ptr(adr, self.HDRPTR)
                #
                if (hdr.tid & 0xFF) == nonmarked:
                    # the location contains really an object (and is not just
                    # part of a linked list of free locations), and moreover
                    # the object is still not marked.  Free it by inserting
                    # it into the linked list.
                    llarena.arena_reset(adr, object_size, 0)
                    llarena.arena_reserve(adr, self.HDRSIZE)
                    hdr = llmemory.cast_adr_to_ptr(adr, self.HDRPTR)
                    hdr.tid = self.cast_hdrptr_to_int(linked_list)
                    linked_list = hdr
                    if first_loc_in_linked_list == self.NULL:
                        first_loc_in_linked_list = hdr
                    # XXX detect when the whole page is freed again
                    #
                    # Clear the data, in prevision for the following
                    # malloc_fixedsize_clear().
                    size_of_int = raw_malloc_usage(
                        llmemory.sizeof(lltype.Signed))
                    llarena.arena_reset(adr + size_of_int,
                                        object_size - size_of_int, 2)
                #
                i -= object_size
            #
            page = self.cast_int_to_hdrptr(page.tid)
        #
        self.collect_heads[n] = linked_list
        self.collect_tails[n] = first_loc_in_linked_list


    def identityhash(self, obj):
        obj = llmemory.cast_ptr_to_adr(obj)
        if self.header(obj).tid & FL_WITHHASH:
            obj += self.get_size(obj)
            return obj.signed[0]
        else:
            return llmemory.cast_adr_to_int(obj)

# ____________________________________________________________
#
# Hack to write the 'mark' or the 'flags' bytes of an object header
# without overwriting the whole word.  Essential in the rare case where
# the other thread might be concurrently writing the other byte.

concurrent_setter_lock = ll_thread.allocate_lock()

def emulate_set_mark(p, v):
    "NOT_RPYTHON"
    assert v in (MARK_VALUE_1, MARK_VALUE_2, MARK_VALUE_STATIC)
    concurrent_setter_lock.acquire(True)
    p.tid = (p.tid &~ 0xFF) | v
    concurrent_setter_lock.release()

def emulate_set_flags(p, v):
    "NOT_RPYTHON"
    assert (v & ~0xFF00) == 0
    concurrent_setter_lock.acquire(True)
    p.tid = (p.tid &~ 0xFF00) | v
    concurrent_setter_lock.release()

if sys.byteorder == 'little':
    eci = ExternalCompilationInfo(
        post_include_bits = ["""
#define pypy_concurrentms_set_mark(p, v)   ((char*)p)[0] = v
#define pypy_concurrentms_set_flags(p, v)  ((char*)p)[1] = v
        """])
elif sys.byteorder == 'big':
    eci = ExternalCompilationInfo(
        post_include_bits = [r"""
#define pypy_concurrentms_set_mark(p, v)   ((char*)p)[sizeof(long)-1] = v
#define pypy_concurrentms_set_flags(p, v)  ((char*)p)[sizeof(long)-2] = v
        """])
else:
    raise NotImplementedError(sys.byteorder)

_set_mark = rffi.llexternal("pypy_concurrentms_set_mark",
                           [MostlyConcurrentMarkSweepGC.HDRPTR, lltype.Signed],
                           lltype.Void, compilation_info=eci, _nowrapper=True,
                           _callable=emulate_set_mark)
_set_flags = rffi.llexternal("pypy_concurrentms_set_flags",
                           [MostlyConcurrentMarkSweepGC.HDRPTR, lltype.Signed],
                           lltype.Void, compilation_info=eci, _nowrapper=True,
                           _callable=emulate_set_flags)