Commits

Mark Howison  committed 2eabf13

sampler: added RSEM abundance to likelihood calculation

  • Participants
  • Parent commits 8169144
  • Branches issue-7

Comments (0)

Files changed (1)

File gabi/sampler.py

         self.tmpdir = tempfile.mkdtemp()
         utils.info("created tmpdir for Bowtie2 calls:", self.tmpdir)
 
-        self.logfile = os.path.join(self.tmpdir, "lap.log")
+        self.logfile = os.path.join(self.tmpdir, "likelihood.log")
         self.dbfile = os.path.join(self.tmpdir, "assembly")
         self.samfile = os.path.join(self.tmpdir, "lap.sam")
+        self.rsempath = os.path.join(self.tmpdir, "rsem")
+        self.abundancefile = os.path.join(self.tmpdir, "abundance.txt")
 
     def likelihood(self, filename):
         """
         """
         if self.reads:
             with open(self.logfile, 'w') as f:
+                insert_min = '%.0f' % (self.insert_mean - 2*self.insert_stdev)
+                insert_max = '%.0f' % (self.insert_mean + 2*self.insert_stdev)
                 try:
                     subprocess.check_call(
+                        [ "rsem-prepare-reference", filename, self.rsempath ],
+                        stdout=f, stderr=f)
+                    subprocess.check_call(
+                        [
+                            "rsem-calculate-expression", "--paired-end",
+                            "--fragment-length-min", insert_min,
+                            "--fragment-length-max", insert_max,
+                            "-1", self.reads[0], "-2", self.reads[1],
+                            "-p", self.threads, self.rsempath
+                        ],
+                        stdout=f, stderr=f)
+                    subprocess.check_call(
+                        [ "awk", "NR>1{print $1,$4}", self.rsempath+".isoforms.results" ],
+                        stdout=open(self.abundancefile, 'w'), stderr=f)
+                    subprocess.check_call(
                         [ "bowtie2-build", filename, self.dbfile ],
                         stdout=f, stderr=f)
                     subprocess.check_call(
                             "--reorder", "--no-mixed", "-k", "10000",
                             "-p", self.threads, "-x", self.dbfile,
                             "-1", self.reads[0], "-2", self.reads[1],
-                            "-I", '%.0f' % (self.insert_mean - 2*self.insert_stdev),
-                            "-X", '%.0f' % (self.insert_mean + 2*self.insert_stdev),
+                            "-I", insert_min, "-X", insert_max,
                             "-S", self.samfile
                         ],
                         stdout=f, stderr=f)
                     lap = subprocess.check_output(
-                        'calc_prob.py -s {} -a {} -1 {} -2 {} -o fr -m {} -t {} -X {} -I {} | sum_prob.py -t 1e-{}'.format(
+                        'calc_prob.py -s {} -a {} -1 {} -2 {} -o fr -m {} -t {} -X {} -I {} -n {} | sum_prob.py -t 1e-{}'.format(
                             self.samfile, filename, self.reads[0], self.reads[1],
                             int(self.insert_mean), int(self.insert_stdev),
-                            int(self.insert_mean + 2*self.insert_stdev),
-                            int(self.insert_mean - 2*self.insert_stdev),
-                            self.coverage),
+                            insert_min, insert_max, self.abundancefile,
+                            min(323, self.coverage)),
                         shell=True, stderr=f)
                     return float(lap.split()[0])
                 except subprocess.CalledProcessError as e:
-                    utils.die(e, "\nfailed to call Bowtie2/LAP: check the logfile", self.logfile)
+                    utils.info(open(self.logfile).read())
+                    utils.die(e, "\nfailed to call RSEM/Bowtie2/LAP: check log output above")
         else:
             return 1.0