Commits

Mark Howison committed 8240d7f

updated sample to use new perturb_path (untested)

  • Participants
  • Parent commits 6663d0f
  • Branches issue-7

Comments (0)

Files changed (3)

File gabi/graph.py

             self.A.add_path(path)
         return e
 
-    def randomize_state(self):
+    def randomize_state(self, rate):
         for _ in xrange(self.E):
-            self.perturb_path()
+            self.perturb_path(rate)
         self.write_dot('random.dot', active=True)
 
     def load_assembly(self, edges):

File gabi/sample.py

 pipe.add_arg('--perturbations', '-P', type=int, metavar='N', default=1, help="""
     Perform N perturbations when proposing a new assembly in each sample.""")
 
-pipe.add_arg('--contigs', '-c', type=int, metavar='N', default=1, help="""
-    Estimated number of contigs (e.g. choromosomes, independent genomes, etc.),
-    used to calculate the prior probability of an assembly.""")
-
-pipe.add_arg('--scale', '-k', type=int, metavar='K', default=10, help="""
-    Scale parameter for the gamma distributions used for the priors.""")
-
-pipe.add_arg('--nodata', action='store_true', default=False, help="""
-    Turn off likelihood calculations and only use priors in MCMC.""")
-
-pipe.add_arg('--flatprior', action='store_true', default=False, help="""
-    Use a flat prior distribution.""")
+pipe.add_arg('--rate', '-k', type=int, metavar='K', default=10, help="""
+    Rate parameter for the exponential distribution used to set the edge
+    length of a perturbation.""")
 
 pipe.add_arg('--seed', type=long, default=-1, help="""
     Deterministic (instead of random) sampling, starting from a seed value.""")
 
 # Required arguments.
 
-pipe.add_arg('--genome_size', '-s', type=int, metavar='SIZE', help="""
-    Estimated size of the genome, used to calculate the prior probability of
-    an assembly.""")
-
 pipe.add_arg('--insert_mean', type=float, help="""
     Mean insert size of the paired read data.""")
 
 
 @pipe.stage
 def sample(
-    data, graphml, genome_size, contigs, scale, nodata, flatprior, seed,
+    data, graphml, rate, seed,
     assembly, insert_mean, insert_stdev, samples, perturbations, outdir):
     """
     Perform MCMC sampling: iteratively perturb the graph, re-calculate
     graph = Graph(graphml)
 
     # Create a sampler that calculates likelihood and prior probabilities.
-    sampler = Sampler(
-                data[-1], genome_size, contigs, scale,
-                insert_mean, insert_stdev, nodata, flatprior)
+    sampler = Sampler(data[-1], None, None, None, insert_mean, insert_stdev, flatprior=True)
 
     # Store all visited states, to avoid duplicate calculations.
     state_cache = {}
             graph.load_assembly(edges)
         else:
             # Random initial state.
-            graph.randomize_state()
+            graph.randomize_state(rate)
         # Start with an initial assembly from the unperturbed graph.
         current = sampler.sample(graph.contigs(), 'initial-contigs.fa')
         save_state = graph.get_state()
 
         # Perturb graph.
         for _ in xrange(perturbations):
-            graph.perturb_path()
+            graph.perturb_path(rate)
 
         # Run assembler on perturbed graph and calculate new log-likelihood
         # and log-prior.

File gabi/sampler.py

             self.reads = None
         else:
             self.reads = reads
-            fq = SeqIO.parse(reads[0], "fastq")
-            readlen = len(next(fq).seq)
-            utils.info("detected read length:", readlen)
-            nreads = 1 + sum(1 for record in fq)
-            utils.info("detected # of reads:", nreads)
-            self.coverage = readlen * nreads / length
-            utils.info("estimated coverage:", self.coverage)
+            if length:
+                fq = SeqIO.parse(reads[0], "fastq")
+                readlen = len(next(fq).seq)
+                utils.info("detected read length:", readlen)
+                nreads = 1 + sum(1 for record in fq)
+                utils.info("detected # of reads:", nreads)
+                self.coverage = readlen * nreads / length
+                utils.info("estimated coverage:", self.coverage)
+            else:
+                self.coverage = 323
             utils.info("setting LAP threshold to 1e-%d" % self.coverage)
 
         if flatprior: