Commits

Inti Pedroso committed 21936f4 Merge

Merge branch 'feature/make_bigbed' into develop

Comments (0)

Files changed (3)

 print time.ctime(), "Printing results"
 iCC.make_bed(filename=''.join((args.output,'.bed')))
 iCC.print_summary(filename=args.output)
+utilities.functions.make_bed_graph(pd.concat(iCC.data_files.values()),pd.concat(iCC.cnv_call[iCC.working_distn].values()),filename=''.join((args.output,'.bedGraph')))
 
 #if args.parallel:
   #functions.stop_ipcluster(N=args.num_jobs, profile=cluster_profile)

utilities/cnv_caller.py

       for chr_id in index.keys():
 	out = ''.join((filename,'.cnv_calls_summary.',str(chr_id),'.txt'))
 	data.ix[index[chr_id],:].to_csv(out,header=True,index=None,sep="\t")
-
   
   def _changepoint_indicators_from_stateseq(stateseq):
       '''

utilities/functions.py

   appsig=np.sqrt((a*a+1)/(b*b+.108*b-3.795)-appmu*appmu);
   return appmu, appsig
 
+def make_bed_graph(data_files,cnvs,filename,mode='w',cols=['counts','post_prob_of_cnv','cnv_call']):
+  import pandas as pd
+  import time
+  import os
+  print time.ctime(), 'Printing BedGraph output to [',filename,']'
 
+  # open file
+  f = open(filename,mode)
+  # print header
+  header_string = """browser position chr20:1557000-1596001
+browser hide all
+browser pack refGene encodeRegions
+browser full altGraph
+"""
+  f.write(header_string)
+  for col in cols:
+    # print track description line
+    if col == 'counts':
+      name = 'Normalised counts'
+      desc = 'Counts normalised by GC content and divided by mean counts'
+    if col == 'post_prob_of_cnv':
+      name = 'prob of CNV'
+      desc = 'Estimated posterior probability of CNV'
+    if col == 'cnv_call':
+      name = 'Mean CNV'
+      desc = 'Estimated CNV calls'
+    track_str = ''.join(("track type=bedGraph name=\"",name,"\" description=\"",desc,"\" windowingFunction=mean alwaysZero=on graphType=points yLineMark=1 yLineOnOff=on visibility=full color=200,100,0 altColor=0,100,200 priority=20\n"))
+    f.write(track_str)
+    f.flush()
+    # print data for cnv values
+    data_files['chr_v2'] = [ ''.join(('chr',str(chr))) for chr  in data_files['chr']]
+    data_files.to_string(buf=f,index=False,columns=['chr_v2','start','end',col],header=False)
+    f.write('\n')
 
+  # print cnv intervals
+  track_str_cnv_calls = "track type=bed name=\"CNV_calls\" description=\"iCC CNV calls\" visibility=full priority=20 colorByStrand=\"255,0,0 0,0,255\"\n"
+  f.write(track_str_cnv_calls)
+  f.flush()
+  cnvs['chr_v2'] = [ ''.join(('chr',str(chr))) for chr  in cnvs['chromosome'] ]
+  cnvs['id'] = 'iCC_cnv'
+  cnvs['strand'] = '+'
+  cnvs['strand'][ cnvs['genotype'] < 1  ] = '-'
+  cnvs['zero'] = 0
+  cnvs['score'] = 1000*cnvs['cnv_post_prob']
+  cnvs.to_string(buf=f,index=False,columns=['chr_v2','start','end','id','score','strand'],header=False)
+  f.write('\n')    
+  f.flush()
+  f.close()
+  f = open(filename,'r')
+  f2 = open(''.join((filename,'tmp')),'w')
+  for line in f:
+    f2.write(line.strip("^ "))
+  f2.flush()
+  f2.close()
+  os.rename(''.join((filename,'tmp')),filename)
+  
+
+def from_tables_to_bedgraph(filename):
+  import pandas as pd
+  import utilities
+  for chr in xrange(1,22):
+    print chr
+    d= pd.read_table(''.join((filename,str(chr),'.cnv_calls_summary.txt'))) 
+    cnvs = pd.read_table(''.join((filename,str(chr),'.bed')))
+    utilities.functions.make_big_bed(d,cnvs,filename=''.join((filename,str(chr),'.bedGraph')))