Commits

Peter Arrenbrecht committed c3c77c6

move clever_sample to Client._sample; improve tracing and timing

Comments (0)

Files changed (1)

src/discovery_tonfa.py

         i += 1
     return i
 
-def clever_sample(dag, nodes, stop):
-    if len(nodes) < MAX_SAMPLE:
-        return set(nodes)
-
-    if TRACE: print "headsof"
-    heads = dag.headsofconnectedset(nodes)
-    if TRACE: print heads
-    sample = set()
-
-    dist = {}
-    order = []
-    visit = list(heads)
-    seen = set(stop)
-    cands = []
-
-    if TRACE: print "heads -> roots"
-    roots = set()
-
-    while visit:
-        curr = visit.pop(0)
-        if curr in seen:
-            continue
-        d = dist.setdefault(curr, 1)
-        order.append(curr)
-        seen.add(curr)
-        cands.append(curr)
-
-        if not len(list(p for p in dag.parents(curr) if p not in stop)):
-            roots.add(curr)
-
-        for p in dag.parents(curr):
-            dist.setdefault(p, d+1)
-            visit.append(p)
-
-    if TRACE: print "sample"
-    factor = 1
-    for n in order:
-        if dist[n] > factor:
-            factor *= 2
-        if dist[n] == factor:
-            sample.add(n)
-
-    if TRACE: print "roots -> heads"
-    visit = list(roots)
-    order = []
-    dist = {}
-    seen = set()
-    while visit:
-        curr = visit.pop(0)
-        if curr in seen:
-            continue
-        d = dist.setdefault(curr, 1)
-        order.append(curr)
-        seen.add(curr)
-        for c in dag.children(curr):
-            if c not in nodes:
-                continue
-            dist.setdefault(c, d+1)
-            visit.append(c)
-
-    if TRACE: print "sample"
-    factor = 1
-    for n in order:
-        if dist[n] > factor:
-            factor *= 2
-        if dist[n] == factor:
-            sample.add(n)
-
-    assert sample
-    sample.difference_update(heads)
-    if len(sample)+len(heads) > MAX_SAMPLE:
-        sample = set(random.sample(sample, MAX_SAMPLE-len(heads)))
-    elif len(sample)+len(heads) < 200:
-        if TRACE: print "Filling from", len(sample) + len(heads)
-        sample.update(random.sample(list(set(cands) - sample - heads), 200 - len(sample) - len(heads)))
-    sample.update(heads)
-    return sample
 
 class Client(Participant):
 
         missing = set() # own nodes the server lacks
 
         self.writer.step("sampling")
-        sample = clever_sample(dag, undecided, common)
+        sample = self._sample(undecided, common)
 
         # first roundtrip queries server's heads too
         self.writer.step("querying")
             while undecided:
 
                 self.writer.show("still undecided: %i, sample size: %s"
-                                 % (len(undecided), len(sample)))
+                                 % (len(undecided), len(sample)), emptyline=True)
 
                 self.writer.step("updating common")
                 commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
                     break
 
                 self.writer.step("sampling")
-                sample = clever_sample(dag, undecided, common)
+                sample = self._sample(undecided, common)
                 self.writer.step("querying")
                 i += 1
                 yesno, allremaining = server.discover(sample)
         self.writer.show("number of iterations: %i" % i)
         return result
 
+    def _sample(self, nodes, stop):
+        if len(nodes) <= MAX_SAMPLE:
+            return set(nodes)
+
+        dag = self.dag
+
+        self.writer.indent()
+        self.writer.step("headsof")
+        heads = dag.headsofconnectedset(nodes)
+        self.writer.done()
+
+        sample = set()
+
+        dist = {}
+        order = []
+        visit = list(heads)
+        seen = set(stop)
+        cands = []
+
+        self.writer.step("heads -> roots")
+        roots = set()
+
+        while visit:
+            curr = visit.pop(0)
+            if curr in seen:
+                continue
+            d = dist.setdefault(curr, 1)
+            order.append(curr)
+            seen.add(curr)
+            cands.append(curr)
+
+            if not len(list(p for p in dag.parents(curr) if p not in stop)):
+                roots.add(curr)
+
+            for p in dag.parents(curr):
+                dist.setdefault(p, d+1)
+                visit.append(p)
+
+        self.writer.step("sample")
+        factor = 1
+        for n in order:
+            if dist[n] > factor:
+                factor *= 2
+            if dist[n] == factor:
+                sample.add(n)
+
+        self.writer.step("roots -> heads")
+        visit = list(roots)
+        order = []
+        dist = {}
+        seen = set()
+        while visit:
+            curr = visit.pop(0)
+            if curr in seen:
+                continue
+            d = dist.setdefault(curr, 1)
+            order.append(curr)
+            seen.add(curr)
+            for c in dag.children(curr):
+                if c not in nodes:
+                    continue
+                dist.setdefault(c, d+1)
+                visit.append(c)
+
+        self.writer.step("sample")
+        factor = 1
+        for n in order:
+            if dist[n] > factor:
+                factor *= 2
+            if dist[n] == factor:
+                sample.add(n)
+
+        self.writer.step("finalize sample")
+        assert sample
+        sample.difference_update(heads)
+        if len(sample)+len(heads) > MAX_SAMPLE:
+            sample = set(random.sample(sample, MAX_SAMPLE - len(heads)))
+        elif len(sample)+len(heads) < 200:
+            more = MAX_SAMPLE - len(sample) - len(heads)
+            self.writer.step("filling with %d random samples" % more)
+            sample.update(random.sample(list(set(cands) - sample - heads), more))
+        sample.update(heads)
+
+        self.writer.unindent()
+        return sample
+
 
 class Server(Participant):
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.