Commits

Remi Meier  committed 5f23756

port the write barrier placement code from c4 to place read barriers in c7 more
intelligently

  • Participants
  • Parent commits 0febb0d
  • Branches stmgc-c7

Comments (0)

Files changed (5)

File rpython/translator/stm/breakfinder.py

     #'jit_assembler_call',
     'stm_enter_callback_call',
     'stm_leave_callback_call',
+    'stm_transaction_break',
     ])
 
 for tb in TRANSACTION_BREAK:

File rpython/translator/stm/readbarrier.py

 from rpython.flowspace.model import SpaceOperation, Constant, Variable
-from rpython.translator.unsimplify import varoftype
+from rpython.translator.unsimplify import varoftype, insert_empty_block, insert_empty_startblock
 from rpython.rtyper.lltypesystem import lltype
 from rpython.translator.stm.support import is_immutable
+from rpython.translator.simplify import join_blocks
 
-
+MALLOCS = set([
+    'malloc', 'malloc_varsize',
+    'malloc_nonmovable', 'malloc_nonmovable_varsize',
+    ])
 READ_OPS = set(['getfield', 'getarrayitem', 'getinteriorfield', 'raw_load'])
 
 
+
+
+def needs_barrier(frm, to):
+    return to > frm
+
 def is_gc_ptr(T):
     return isinstance(T, lltype.Ptr) and T.TO._gckind == 'gc'
 
+class Renaming(object):
+    def __init__(self, newvar, category):
+        self.newvar = newvar        # a Variable or a Constant
+        self.TYPE = newvar.concretetype
+        self.category = category
 
-def insert_stm_read_barrier(transformer, graph):
-    # We need to put enough 'stm_read' in the graph so that any
-    # execution of a READ_OP on some GC object is guaranteed to also
-    # execute either 'stm_read' or 'stm_write' on the same GC object
-    # during the same transaction.
-    #
-    # XXX this can be optimized a lot, but for now we go with the
-    # simplest possible solution...
-    #
-    gcremovetypeptr = transformer.translator.config.translation.gcremovetypeptr
 
-    for block in graph.iterblocks():
-        if not block.operations:
-            continue
-        newops = []
+
+class BlockTransformer(object):
+
+    def __init__(self, stmtransformer, block):
+        self.stmtransformer = stmtransformer
+        self.block = block
+        self.patch = None
+        self.inputargs_category = None
+        self.inputargs_category_per_link = {}
+
+    def init_start_block(self):
+        # all input args have category "any"
+        from_outside = ['A'] * len(self.block.inputargs)
+        self.inputargs_category_per_link[None] = from_outside
+        self.update_inputargs_category()
+
+
+    def analyze_inside_block(self, graph):
+        gcremovetypeptr = self.stmtransformer.translator.config.translation.gcremovetypeptr
+
+        wants_a_barrier = {}
         stm_ignored = False
-        for op in block.operations:
+        for op in self.block.operations:
             is_getter = (op.opname in READ_OPS and
                          op.result.concretetype is not lltype.Void and
                          is_gc_ptr(op.args[0].concretetype))
                 # typeptr is always immutable
                 pass
             elif ((op.opname in ('getarraysize', 'getinteriorarraysize') and
-                  is_gc_ptr(op.args[0].concretetype)) or
+                   is_gc_ptr(op.args[0].concretetype)) or
                   (is_getter and is_immutable(op))):
                 # immutable getters
+                pass
+            elif is_getter:
+                if not stm_ignored:
+                    wants_a_barrier[op] = 'R'
+            elif op.opname == 'weakref_deref':
                 # 'weakref_deref': kind of immutable, but the GC has to see
                 #     which transactions read from a dying weakref, so we
                 #     need the barrier nonetheless...
-                pass
-            elif is_getter:
-                if not stm_ignored:
-                    v_none = varoftype(lltype.Void)
-                    newops.append(SpaceOperation('stm_read',
-                                                 [op.args[0]], v_none))
-                    transformer.read_barrier_counts += 1
+                wants_a_barrier[op] = 'R'
             elif op.opname == 'stm_ignored_start':
-                assert stm_ignored == False
+                assert not stm_ignored, "nested 'with stm_ignored'"
                 stm_ignored = True
             elif op.opname == 'stm_ignored_stop':
-                assert stm_ignored == True
+                assert stm_ignored, "stm_ignored_stop without start?"
                 stm_ignored = False
-            newops.append(op)
-        assert stm_ignored == False
-        block.operations = newops
+
+            if stm_ignored and op in wants_a_barrier:
+                assert wants_a_barrier[op] == 'R'
+                if is_getter and is_gc_ptr(op.result.concretetype):
+                    raise Exception(
+                        "%r: 'with stm_ignored:' contains unsupported "
+                        "operation %r reading a GC pointer" % (graph, op))
+        #
+        if stm_ignored:
+            raise Exception("%r: 'with stm_ignored:' code body too complex"
+                            % (graph,))
+        self.wants_a_barrier = wants_a_barrier
+
+
+    def flow_through_block(self):
+
+        def renfetch(v):
+            try:
+                return renamings[v]
+            except KeyError:
+                ren = Renaming(v, 'A')
+                renamings[v] = ren
+                return ren
+
+        def get_category_or_null(v):
+            # 'v' is an original variable here, or a constant
+            if isinstance(v, Constant) and not v.value:    # a NULL constant
+                return 'Z'
+            if v in renamings:
+                return renamings[v].category
+            if isinstance(v, Constant):
+                return 'R'
+            else:
+                return 'A'
+
+        def renamings_get(v):
+            try:
+                ren = renamings[v]
+            except KeyError:
+                return v       # unmodified
+            v2 = ren.newvar
+            if v2.concretetype == v.concretetype:
+                return v2
+            v3 = varoftype(v.concretetype)
+            newoperations.append(SpaceOperation('cast_pointer', [v2], v3))
+            if lltype.castable(ren.TYPE, v3.concretetype) > 0:
+                ren.TYPE = v3.concretetype
+            return v3
+
+        # note: 'renamings' maps old vars to new vars, but cast_pointers
+        # are done lazily.  It means that the two vars may not have
+        # exactly the same type.
+        renamings = {}   # {original-var: Renaming(newvar, category)}
+        newoperations = []
+        stmtransformer = self.stmtransformer
+
+        # make the initial trivial renamings needed to have some precise
+        # categories for the input args
+        for v, cat in zip(self.block.inputargs, self.inputargs_category):
+            if is_gc_ptr(v.concretetype):
+                assert cat is not None
+                renamings[v] = Renaming(v, cat)
+
+        for op in self.block.operations:
+            #
+            if (op.opname in ('cast_pointer', 'same_as') and
+                    is_gc_ptr(op.result.concretetype)):
+                renamings[op.result] = renfetch(op.args[0])
+                continue
+            #
+            to = self.wants_a_barrier.get(op)
+            if to is not None:
+                ren = renfetch(op.args[0])
+                frm = ren.category
+                if needs_barrier(frm, to):
+                    stmtransformer.read_barrier_counts += 1
+                    v_none = varoftype(lltype.Void)
+                    newoperations.append(
+                        SpaceOperation('stm_read', [ren.newvar], v_none))
+                    ren.category = to
+            #
+            # XXX: from c4: we can probably just append the original op
+            newop = SpaceOperation(op.opname,
+                                   [renamings_get(v) for v in op.args],
+                                   op.result)
+            newoperations.append(newop)
+            #
+            if (stmtransformer.break_analyzer.analyze(op)
+                or op.opname == 'debug_stm_flush_barrier'):
+                # this operation can perform a transaction break:
+                # all pointers are lowered to 'A'
+                for ren in renamings.values():
+                    ren.category = 'A'
+            #
+            if op.opname in MALLOCS:
+                assert op.result not in renamings
+                renamings[op.result] = Renaming(op.result, 'R')
+            #
+            if op.opname in ('setfield', 'setarrayitem', 'setinteriorfield',
+                             'raw_store'):
+                # compare with logic in stmframework.py
+                # ops that need a write barrier also make the var 'R'
+                if (op.args[-1].concretetype is not lltype.Void
+                    and is_gc_ptr(op.args[0].concretetype)):
+                    renfetch(op.args[0]).category = 'R'
+
+        if isinstance(self.block.exitswitch, Variable):
+            switchv = renamings_get(self.block.exitswitch)
+        else:
+            switchv = None
+        blockoperations = newoperations
+        linkoperations = []
+        for link in self.block.exits:
+            output_categories = []
+            for v in link.args:
+                if is_gc_ptr(v.concretetype):
+                    cat = get_category_or_null(v)
+                else:
+                    cat = None
+                output_categories.append(cat)
+            newoperations = []
+            newargs = [renamings_get(v) for v in link.args]
+            linkoperations.append((newargs, newoperations, output_categories))
+        #
+        # Record how we'd like to patch the block, but don't do any
+        # patching yet
+        self.patch = (blockoperations, switchv, linkoperations)
+
+
+    def update_targets(self, block_transformers):
+        (_, _, linkoperations) = self.patch
+        assert len(linkoperations) == len(self.block.exits)
+        targetbts = []
+        for link, (_, _, output_categories) in zip(self.block.exits,
+                                                   linkoperations):
+            targetblock = link.target
+            if targetblock not in block_transformers:
+                continue      # ignore the exit block
+            targetbt = block_transformers[targetblock]
+            targetbt.inputargs_category_per_link[link] = output_categories
+            if targetbt.update_inputargs_category():
+                targetbts.append(targetbt)
+        return set(targetbts)
+
+    def update_inputargs_category(self):
+        values = self.inputargs_category_per_link.values()
+        newcats = []
+        for i, v in enumerate(self.block.inputargs):
+            if is_gc_ptr(v.concretetype):
+                cats = [output_categories[i] for output_categories in values]
+                assert None not in cats
+                newcats.append(min(cats))
+            else:
+                newcats.append(None)
+        if newcats != self.inputargs_category:
+            self.inputargs_category = newcats
+            return True
+        else:
+            return False
+
+
+    def patch_now(self):
+        if self.patch is None:
+            return
+        newoperations, switchv, linkoperations = self.patch
+        self.block.operations = newoperations
+        if switchv is not None:
+            self.block.exitswitch = switchv
+        assert len(linkoperations) == len(self.block.exits)
+        for link, (newargs, newoperations, _) in zip(self.block.exits,
+                                                     linkoperations):
+            link.args[:] = newargs
+            if newoperations:
+                # must put them in a fresh block along the link
+                annotator = self.stmtransformer.translator.annotator
+                newblock = insert_empty_block(annotator, link,
+                                              newoperations)
+
+
+def insert_stm_read_barrier(stmtransformer, graph):
+    """This function uses the following characters for 'categories':
+
+           * 'A': any general pointer
+           * 'R': the read (or write) barrier was applied
+           * 'Z': the null constant
+
+       The letters are chosen so that a barrier is needed to change a
+       pointer from category x to category y if and only if y > x.
+    """
+    # We need to put enough 'stm_read' in the graph so that any
+    # execution of a READ_OP on some GC object is guaranteed to also
+    # execute either 'stm_read' or 'stm_write' on the same GC object
+    # during the same transaction.
+
+    join_blocks(graph)
+    annotator = stmtransformer.translator.annotator
+    insert_empty_startblock(annotator, graph)
+
+    block_transformers = {}
+
+    for block in graph.iterblocks():
+        if block.operations == ():
+            continue
+        bt = BlockTransformer(stmtransformer, block)
+        bt.analyze_inside_block(graph)
+        block_transformers[block] = bt
+
+    bt = block_transformers[graph.startblock]
+    bt.init_start_block()
+    pending = set([bt])
+
+    while pending:
+        bt = pending.pop()
+        bt.flow_through_block()
+        pending |= bt.update_targets(block_transformers)
+
+    for bt in block_transformers.values():
+        bt.patch_now()

File rpython/translator/stm/test/test_readbarrier.py

 from rpython.rlib.objectmodel import stm_ignored
 from rpython.translator.stm.test.transform_support import BaseTestTransform
-from rpython.rtyper.lltypesystem import lltype
+from rpython.rlib.rstm import register_invoke_around_extcall
+from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rtyper.lltypesystem.lloperation import llop
 
 
 class TestReadBarrier(BaseTestTransform):
         assert res == 42
         assert self.read_barriers == [x1]
 
+    def test_simple_read_after_write(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed))
+        x1 = lltype.malloc(X, immortal=True)
+        x1.foo = 42
+
+        def f1(n):
+            x1.foo = 7 # write barrier will be done
+            return x1.foo
+
+        res = self.interpret(f1, [4])
+        assert res == 7
+        assert self.read_barriers == [] # implicitly by the write-barrier
+
     def test_stm_ignored_read(self):
         X = lltype.GcStruct('X', ('foo', lltype.Signed))
         x1 = lltype.malloc(X, immortal=True)
         res = self.interpret(f1, [2])
         assert res == 42
         assert self.read_barriers == [x1]
+
+    def test_array_size(self):
+        array_gc = lltype.GcArray(('z', lltype.Signed))
+        array_nongc = lltype.Array(('z', lltype.Signed))
+        Q = lltype.GcStruct('Q',
+                            ('gc', lltype.Ptr(array_gc)),
+                            ('raw', lltype.Ptr(array_nongc)))
+        q = lltype.malloc(Q, immortal=True)
+        q.gc = lltype.malloc(array_gc, n=3, flavor='gc', immortal=True)
+        q.raw = lltype.malloc(array_nongc, n=5, flavor='raw', immortal=True)
+        def f1(n):
+            if n == 1:
+                return len(q.gc)
+            else:
+                return len(q.raw)
+        res = self.interpret(f1, [1])
+        assert self.read_barriers == [q]
+        res = self.interpret(f1, [0])
+        assert self.read_barriers == [q]
+
+
+    def test_multiple_reads(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed),
+                                 ('bar', lltype.Signed))
+        x1 = lltype.malloc(X, immortal=True)
+        x1.foo = 6
+        x1.bar = 7
+        x2 = lltype.malloc(X, immortal=True)
+        x2.foo = 81
+        x2.bar = -1
+
+        def f1(n):
+            if n > 1:
+                return x2.foo * x2.bar
+            else:
+                return x1.foo * x1.bar
+
+        res = self.interpret(f1, [4])
+        assert res == -81
+        assert self.read_barriers == [x2]
+
+
+    def test_dont_repeat_read_barrier_after_malloc(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed))
+        x1 = lltype.malloc(X, immortal=True, zero=True)
+        def f1(n):
+            t1 = x1.foo
+            lltype.malloc(X)
+            t1 += x1.foo
+            return t1
+
+        self.interpret(f1, [4])
+        assert self.read_barriers == [x1]
+
+    def test_call_external_release_gil(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed))
+        def f1(p):
+            register_invoke_around_extcall()
+            x1 = p.foo
+            external_release_gil()
+            x2 = p.foo
+            return x1 * x2
+
+        x = lltype.malloc(X, immortal=True); x.foo = 6
+        res = self.interpret(f1, [x])
+        assert res == 36
+        assert self.read_barriers == [x, x]
+
+    def test_call_external_any_gcobj(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed))
+        def f1(p):
+            register_invoke_around_extcall()
+            x1 = p.foo
+            external_any_gcobj()
+            x2 = p.foo
+            return x1 * x2
+
+        x = lltype.malloc(X, immortal=True); x.foo = 6
+        res = self.interpret(f1, [x])
+        assert res == 36
+        assert self.read_barriers == [x]
+
+    def test_call_external_safest(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed))
+        def f1(p):
+            register_invoke_around_extcall()
+            x1 = p.foo
+            external_safest()
+            x2 = p.foo
+            return x1 * x2
+
+        x = lltype.malloc(X, immortal=True); x.foo = 6
+        res = self.interpret(f1, [x])
+        assert res == 36
+        assert self.read_barriers == [x]
+
+    def test_simple_loop(self):
+        X = lltype.GcStruct('X', ('foo', lltype.Signed))
+        def f1(x, i):
+            while i > 0:
+                i -= x.foo
+            return i
+        x = lltype.malloc(X, immortal=True); x.foo = 1
+        res = self.interpret(f1, [x, 5])
+        assert res == 0
+        # for now we get this.  Later, we could probably optimize it
+        assert self.read_barriers == [x] * 5
+
+
+    def test_read_immutable(self):
+        class Foo:
+            _immutable_ = True
+
+        def f1(n):
+            x = Foo()
+            x.foo = 4
+            llop.debug_stm_flush_barrier(lltype.Void)
+            if n > 1:
+                n = x.foo
+            llop.debug_stm_flush_barrier(lltype.Void)
+            return x.foo + n
+
+        res = self.interpret(f1, [4])
+        assert res == 8
+        assert len(self.read_barriers) == 0
+
+    def test_read_immutable_prebuilt(self):
+        class Foo:
+            _immutable_ = True
+        x1 = Foo()
+        x1.foo = 42
+        x2 = Foo()
+        x2.foo = 81
+
+        def f1(n):
+            if n > 1:
+                return x2.foo
+            else:
+                return x1.foo
+
+        res = self.interpret(f1, [4])
+        assert res == 81
+        assert self.read_barriers == []
+
+    def test_immut_barrier_before_weakref_deref(self):
+        import weakref
+        class Foo:
+            pass
+
+        def f1():
+            x = Foo()
+            w = weakref.ref(x)
+            llop.debug_stm_flush_barrier(lltype.Void)
+            return w()
+
+        self.interpret(f1, [])
+        assert len(self.read_barriers) == 1
+
+
+    def test_transaction_breaking_ops(self):
+        class X:
+            a = 1
+        x = X()
+
+        def f1(f):
+            x.a = f
+            t = x.a # no read barrier
+            llop.stm_commit_if_not_atomic(lltype.Void)
+            t += x.a
+            llop.stm_start_if_not_atomic(lltype.Void)
+            t += x.a
+            llop.stm_transaction_break(lltype.Void)
+            t += x.a
+            llop.stm_enter_callback_call(lltype.Void)
+            t += x.a
+            llop.stm_leave_callback_call(lltype.Void)
+            t += x.a
+            return t
+
+        self.interpret(f1, [1])
+        assert len(self.read_barriers) == 5
+
+
+external_release_gil = rffi.llexternal('external_release_gil', [], lltype.Void,
+                                       _callable=lambda: None,
+                                       random_effects_on_gcobjs=True,
+                                       releasegil=True)   # GIL is released
+external_any_gcobj = rffi.llexternal('external_any_gcobj', [], lltype.Void,
+                                     _callable=lambda: None,
+                                     random_effects_on_gcobjs=True,
+                                     releasegil=False)   # GIL is not released
+external_safest = rffi.llexternal('external_safest', [], lltype.Void,
+                                  _callable=lambda: None,
+                                  random_effects_on_gcobjs=False,
+                                  releasegil=False)   # GIL is not released

File rpython/translator/stm/test/transform_support.py

     stm_ignored = False
 
     def eval(self):
-        self.gcptrs_actually_read = []
         result = LLFrame.eval(self)
-        for x in self.gcptrs_actually_read:
-            assert x in self.llinterpreter.tester.read_barriers
         return result
 
     def all_stm_ptrs(self):
     def op_stm_read(self, obj):
         self.llinterpreter.tester.read_barriers.append(obj)
 
-    def op_stm_write(self, obj):
-        self.op_stm_read(obj)      # implicitly counts as a read barrier too
-
     def op_stm_ignored_start(self):
         assert self.stm_ignored == False
         self.stm_ignored = True
         self.stm_ignored = False
 
     def op_getfield(self, obj, field):
-        if obj._TYPE.TO._gckind == 'gc':
-            if obj._TYPE.TO._immutable_field(field):
-                if not self.stm_ignored:
-                    self.gcptrs_actually_read.append(obj)
         return LLFrame.op_getfield(self, obj, field)
 
     def op_setfield(self, obj, fieldname, fieldvalue):
-        if obj._TYPE.TO._gckind == 'gc':
-            T = lltype.typeOf(fieldvalue)
-            if isinstance(T, lltype.Ptr) and T.TO._gckind == 'gc':
-                self.check_category(obj, 'W')
-            else:
-                self.check_category(obj, 'V')
-            # convert R -> Q all other pointers to the same object we can find
-            for p in self.all_stm_ptrs():
-                if p._category == 'R' and p._T == obj._T and p == obj:
-                    _stmptr._category.__set__(p, 'Q')
         return LLFrame.op_setfield(self, obj, fieldname, fieldvalue)
 
     def op_cast_pointer(self, RESTYPE, obj):
         if obj._TYPE.TO._gckind == 'gc':
-            cat = self.check_category(obj, None)
             p = opimpl.op_cast_pointer(RESTYPE, obj)
-            return _stmptr(p, cat)
+            return p
         return lltype.cast_pointer(RESTYPE, obj)
     op_cast_pointer.need_result_type = True
 
     def op_cast_opaque_ptr(self, RESTYPE, obj):
         if obj._TYPE.TO._gckind == 'gc':
-            cat = self.check_category(obj, None)
             p = lltype.cast_opaque_ptr(RESTYPE, obj)
-            return _stmptr(p, cat)
+            return p
         return LLFrame.op_cast_opaque_ptr(self, RESTYPE, obj)
     op_cast_opaque_ptr.need_result_type = True
 
     def op_malloc(self, obj, flags):
         assert flags['flavor'] == 'gc'
-        # convert all existing pointers W -> V
-        for p in self.all_stm_ptrs():
-            if p._category == 'W':
-                _stmptr._category.__set__(p, 'V')
         p = LLFrame.op_malloc(self, obj, flags)
-        ptr2 = _stmptr(p, 'W')
-        self.llinterpreter.tester.writemode.add(ptr2._obj)
+        ptr2 = p
         return ptr2
 
     def transaction_break(self):
-        # convert -> I all other pointers to the same object we can find
-        for p in self.all_stm_ptrs():
-            if p._category > 'I':
-                _stmptr._category.__set__(p, 'I')
+        pass
 
     def op_stm_commit_transaction(self):
         self.transaction_break()
 
+    def op_stm_transaction_break(self):
+        self.transaction_break()
+
+    def op_stm_commit_if_not_atomic(self):
+        self.transaction_break()
+
+    def op_stm_start_if_not_atomic(self):
+        self.transaction_break()
+
+    def op_stm_enter_callback_call(self):
+        self.transaction_break()
+
+    def op_stm_leave_callback_call(self):
+        self.transaction_break()
+
     def op_stm_begin_inevitable_transaction(self):
         self.transaction_break()
 

File rpython/translator/stm/transform.py

 from rpython.translator.stm.inevitable import insert_turn_inevitable
 from rpython.translator.stm.readbarrier import insert_stm_read_barrier
+from rpython.translator.stm.breakfinder import TransactionBreakAnalyzer
 from rpython.translator.c.support import log
 
 
 
     def transform_read_barrier(self):
         self.read_barrier_counts = 0
+        self.break_analyzer = TransactionBreakAnalyzer(self.translator)
+
         for graph in self.translator.graphs:
             insert_stm_read_barrier(self, graph)
+
+        del self.break_analyzer
         log.info("%d read barriers inserted" % (self.read_barrier_counts,))
 
     def transform_turn_inevitable(self):