Commits

Martin Alnæs committed d13cd5a

Experimental work on tuning variable scoring.

  • Participants
  • Parent commits 61c0eeb

Comments (0)

Files changed (3)

File site-packages/uflacs/analysis/graph_ssa.py

 from ufl.classes import (Terminal, GeometricQuantity,
                          Argument, Coefficient,
                          Grad, Restricted, Indexed,
-                         MathFunction)
+                         MathFunction, BesselFunction, Power, Abs, Conditional)
 
 from uflacs.utils.log import error, uflacs_assert
 from uflacs.analysis.datastructures import (int_array, object_array,
             invdeps[d] = invdeps[d] + (i,)
     return rows_to_crs(invdeps, n, m, int)
 
-def default_cache_score_policy(vtype, ndeps, ninvdeps, partition):
-    # Start at 1 and then multiply with various heuristic factors
-    s = 1
+def default_cache_score_policy(vtype, ndeps, ninvdeps, partition, highest_used_partition):
+    # Terminals always get zero score
+    if ndeps == 0:
+        return 0
 
-    # Is the type particularly expensive to compute?
-    expensive = (MathFunction,)
+    # Estimate cost of computation
+    expensive = (MathFunction, BesselFunction, Power, Abs, Conditional)
     if vtype in expensive: # Could make a type-to-cost mapping, but this should do.
-        s *= 20
-
-    # More deps roughly correlates to more operations
-    s *= ndeps
+        cost = 10
+    else:
+        # More deps correlates to higher cost
+        cost = ndeps
 
-    # If it is reused several times let that count significantly
-    s *= ninvdeps**3 # 1->1, 2->8, 3->27
+    # Estimate reuses based on inverse dependencies and highest used partition (possible to count this more accurately!)
+    reuses = ninvdeps * max(1, 10*(highest_used_partition - partition)**2)
 
-    # Finally let partition count for something?
-    # Or perhaps we need some more information, such as
-    # when x from outer loop is used by y within inner loop.
+    # Multiply cost with number of reuses
+    s = cost * reuses
 
     return s
 
+def compute_partition_dependencies(dependencies, inverse_dependencies, partitions):
+    "TODO: Use this? Not tested. Idea: marking vertices that are used in higher partitions for reuse."
+    n = len(partitions)
+    highest_used_partition = int_array(n)
+    for i, p in enumerate(partitions):
+        ideps = inverse_dependencies[i]
+        highest_used_partition[i] = p
+        for j in ideps:
+            highest_used_partition[i] = max(highest_used_partition[i], partitions[j])
+    return highest_used_partition
+
 def compute_cache_scores(V, active, dependencies, inverse_dependencies, partitions,
                          cache_score_policy=default_cache_score_policy):
     """FIXME: Cover with tests.
     TODO: Experiment with heuristics later when we have functional code generation.
     """
     n = len(V)
+    highest_used_partition = compute_partition_dependencies(dependencies, inverse_dependencies, partitions)
     score = int_array(n)
     for i,v in enumerate(V):
         if active[i]:
             ndeps = len(deps)
             invdeps = inverse_dependencies[i]
             ninvdeps = len(invdeps)
+            hup = highest_used_partition[i]
             p = partitions[i]
-            s = cache_score_policy(type(v), ndeps, ninvdeps, p)
+            s = cache_score_policy(type(v), ndeps, ninvdeps, p, hup)
         else:
             s = -1
         score[i] = s

File site-packages/uflacs/generation/compiler.py

     e2i, NV, W, terminals, nvs = rebuild_scalar_e2i(G, DEBUG=False)
     # Target expression is NV[nvs[:]].
 
-    # Assuming a scalar expression, i.e. only one target symbol
-    # TODO: This is good for integrands, but handle nonscalars for expressions!
-    if 0: # TODO: Not sure if we need this anymore?
-        assert expression.shape() == ()
-        assert expression.free_indices() == ()
-        assert len(nvs) == 1
-
     # Straigthen out V to represent single operations
     tt.step('build_scalar_graph_vertices')
     se2i, SV, target_variables = build_scalar_graph_vertices([NV[s] for s in nvs])
 
     # Compute factorization of arguments
     tt.step('compute_dependencies')
-    # FIXME: Fix factorization algorithm for multiple target variables! Or is there any point?
+    # TODO: Fix factorization algorithm for multiple target variables! Or is there any point?
     if parameters["enable_factorization"] and len(target_variables) == 1:
         #AV, FV, IM
         argument_factors, factorized_vertices, argument_factorization = compute_argument_factorization(SV, target_variables, dependencies)
         SV, se2i, dependencies = rebuild_scalar_graph_from_factorization(argument_factors, factorized_vertices, argument_factorization)
-        # TODO: target_variables
         target_variables = [len(SV)-1]
 
     # Mark subexpressisons that are actually needed for final result
     tt.step('mark_active')
     max_symbol = len(SV)
-
     active, num_active = mark_active(max_symbol, dependencies, target_variables)
 
     # Mark subexpressions with which loop they belong inside

File site-packages/uflacs/params.py

         "enable_profiling": False,
         "enable_factorization": False, #True, # Fails for hyperelasticity demo in dolfin, needs debugging
         "max_registers": 1024, # 8 B * 1024 = 8 KB # TODO: Tune this for something very complex
-        "score_threshold": 3, # TODO: Scoring is work in progress and this will change meaning later
+        "score_threshold": 11, # TODO: Scoring is work in progress and this will change meaning later
         }