Commits

Mark Howison committed a1cfc05

graph: contigs() selects all paths through active graph (untested)

  • Participants
  • Parent commits 2eabf13
  • Branches issue-7

Comments (0)

Files changed (1)

File gabi/graph.py

 
 import ast
 import networkx as nx
-
 from itertools import izip
 from random import randint
-
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord
+from biolite import utils
 
 # Parameters for drawing graphviz dot plots
 dot_graph_size = (10, 10)
 
     # Graph output
 
-    def contig_header(self, path):
-        header = []
-        for e in path:
-            header.append('#node%d' % self.tail(e))
-            header.append('#edge%d' % e)
-        return ','.join(header)
-
-    def contig_record(self, path, i, desc=""):
+    def contig(self, path, i, desc=""):
         """
         Build a FASTA SeqRecord for the `path` through the graph.
         """
         return SeqRecord(
             Seq(seq), 'contig-%d' % i, description=str(len(seq))+desc)
 
-    def contigs(self, records=True, paths=False):
+    def contigs(self, paths=False):
         """
         An iterator that yields a FASTA SeqRecord for each path in the
         assembly graph.
         """
         ncontigs = 0
-        self.reset_visited(active=True)
-        # First, find all linear contigs starting at a node with an out-edge
-        # but no in-edge.
-        for n in self.A.nodes_iter():
-            if self.A.in_degree(n) == 0:
-                path = []
-                while n is not None:
-                    assert self.visited(n) == False, path + [n]
-                    self.visited(n, True)
-                    path.append(n)
-                    n = self.forward_active(n)
-                if records:
-                    yield self.contig_record(path, ncontigs)
-                elif paths:
-                    yield path
-                else:
-                    yield self.contig_header(path)
-                ncontigs += 1
-        # Next, find all circular contigs from the remaining nodes.
-        for start in self.A.nodes_iter():
-            if not self.visited(start):
-                self.visited(start, True)
-                path = [start]
-                n = self.forward_active(start)
-                while not (n is None or n == start):
-                    assert self.visited(n) == False, path + [n]
-                    self.visited(n, True)
-                    path.append(n)
-                    n = self.forward_active(n)
-                if records:
-                    yield self.contig_record(path, ncontigs)
-                elif paths:
-                    yield path
+        for component in nx.weakly_connected_components(self.A):
+            paths = [[n] for n in component if self.A.in_degree(n) == 0]
+            if not paths:
+                utils.die("found circular component in active graph")
+            while paths:
+                path = paths.pop()
+                if self.A.out_degree(path[-1]) == 0:
+                    if paths:
+                        yield path
+                    else:
+                        yield self.contig(path, ncontigs)
+                    ncontigs += 1
                 else:
-                    yield self.contig_header(path)
-                ncontigs += 1
+                    for n in self.A.successors_iter(path[-1]):
+                        paths.append(list(path) + [n])
 
     def contigs_annotated(self, frequencies):
         # Reset assembly graph.
                 self.A.add_node(int(id[4:]))
             elif id.startswith("edge"):
                 self.activate(int(id[4:]))
-        for i, path in enumerate(self.contigs(records=False, paths=True)):
+        for i, path in enumerate(self.contigs(paths=True)):
             posteriors = []
             j = 0
             for n in path: