Commits

Davide Cittaro committed 68aefec

Extract SM

Comments (0)

Files changed (2)

estimate_distribution.py

   # plot distributions
   stdev = np.sqrt(variance)
   xmin = 0
-  xmax = np.int(max(mean, nb_r) + 3 * stdev)
+  xmax = np.int(max(mean, nb_r) + 2 * stdev)
   x = range(xmin, xmax)
   poisson = SS.distributions.poisson(mean)
   nbinom = SS.distributions.nbinom(nb_r, nb_p)
+#!/usr/bin/python
+
+import sys
+import os
+import pysam
+import getopt
+
+
+bam_file = None
+samples = []
+extract_by_rg = False
+extract_prefix = "extract_"
+
+(opts, args) = getopt.getopt(sys.argv[1:], 'f:rp:')
+for o, a in opts:
+  if o == '-f':
+    bam_file = a
+  if o == '-r':
+    extract_by_rg = True  
+  if o == '-p':
+    extract_prefix = a
+
+if not bam_file or not os.path.exists(bam_file):
+  sys.stderr.write("Please, provide a BAM file\n")
+  sys.exit(1)
+
+samples = args
+tokeep = {} #a dictionary files -> list of read groups 
+
+bamh = pysam.Samfile(bam_file, 'rb')
+for rg in bamh.header['RG']:
+  if len(samples) == 0:
+    # create one file for each
+    if extract_by_rg:
+      tokeep[rg['ID']] = [rg['ID']]
+    else:
+      try:
+        tokeep[rg['SM']].append(rg['ID'])
+      except KeyError:  
+        tokeep[rg['SM']] = [rg['ID']]
+  else:
+    if extract_by_rg:
+      if rg['ID'] in samples:
+        tokeep[rg['ID']] = [rg['ID']]
+    else:
+      if rg['SM'] in samples:
+        try:
+          tokeep[rg['SM']].append(rg['ID'])
+        except KeyError:
+          tokeep[rg['SM']] = [rg['ID']]
+
+
+print tokeep