Commits

qixuanwang committed 74a4ea2

qc_distribution

Comments (0)

Files changed (2)

+import os
+import sys
+f2=sys.argv[1]
+f3=sys.argv[2]
+ff=open("finalplot.R","wa")
+#mappable rates
+os.chdir("/mnt/Storage/home/wangqx/DC_distribution/all_summary")
+name=[]
+name=os.listdir(os.getcwd())
+mpb=[]
+for i in name:
+
+    f1=open("/mnt/Storage/home/wangqx/DC_distribution/all_summary/"+i,"rU")
+    for line in f1:
+        line=line.strip()
+        if line[:12]=="mapped reads":
+            line=line.split("=")
+            m=(line[1])
+
+        if line[:11]=="total reads":
+            t=(line.split("=")[1])
+
+    mt = float(m)/float(t)
+
+    mpb.append(mt)
+
+
+b=""
+for i in mpb:
+	b=b+","+str(i)
+
+b=b[1:]
+ff2=open(f2,"r")
+for line in ff2:
+    line=line.strip()
+    if line[:12]=="mapped reads":
+
+        line=line.split("=")
+        den2=line[1]
+
+    if line[:11]=="total reads":
+        line=line.split("=")
+        den3=line[1]
+
+den=str(float(den2)/float(den3))
+
+mpb.append(den)
+mpb.sort()
+
+a1=mpb.index(den)
+b1=len(mpb)
+rates=float(a1)/float(b1)*100
+rates=str(rates)+'%'
+rates=rates.split(".")[0]+"."+rates.split(".")[1][0]+"%"
+ff.write("pdf('/mnt/Storage/home/wangqx/DC_distribution/distribution.pdf',height=8.5,width=8.5)"+"\n")
+ff.write("ee=c("+b+")"+"\n")
+
+ff.write("plot(density(ee),main='mappable rates',xlab='mappable rates',ylab='density(mappable rates)')"+"\n")
+ff.write("abline(v="+den+",col='blue')\n")
+ff.write("text(0.4,2.0, 'The mappable rates of your data locates in "+rates+" among all the data')\n")
+f1.close()
+ff2.close()
+
+#unique reads ratio
+os.chdir("/mnt/Storage/home/wangqx/DC_distribution/all_summary")
+name=[]
+name=os.listdir(os.getcwd())
+uni=[]
+for i in name:
+
+    f1=open("/mnt/Storage/home/wangqx/DC_distribution/all_summary/"+i,"rU")
+    for line in f1:
+        line=line.strip()
+        if line[:12]=="unique reads":
+            line=line.split("=")
+            m=(line[1])
+
+        if line[:11]=="total reads":
+            t=(line.split("=")[1])
+
+    mt = float(m)/float(t)
+
+    uni.append(mt)
+
+
+b=""
+for i in mpb:
+	b=b+","+str(i)
+
+b=b[1:]
+ff2=open(f2,"r")
+for line in ff2:
+    line=line.strip()
+    if line[:12]=="unique reads":
+
+        line=line.split("=")
+        den2=line[1]
+
+    if line[:11]=="total reads":
+        line=line.split("=")
+        den3=line[1]
+
+den=str(float(den2)/float(den3))
+#print uni
+uni.append(den)
+uni.sort()
+#print den
+a1=uni.index(den)
+b1=len(uni)
+#print a1
+#print b1
+rates=float(a1)/float(b1)*100
+rates=str(rates)+'%'
+#rates.split(".")
+rates=rates.split(".")[0]+"."+rates.split(".")[1][0]+"%"
+
+ff.write("ee=c("+b+")"+"\n")
+
+ff.write("plot(density(ee),main='unique reads ratio',xlab='unique reads ratio',ylab='density(unique reads ratio)')"+"\n")
+ff.write("abline(v="+den+",col='blue')\n")
+ff.write("text(0.4,2.0, 'The unique reads ratio of your data locates in "+rates+" among all the data')\n")
+f1.close()
+ff2.close()
+
+
+#overlapped_with_DHSs
+os.chdir("/mnt/Storage/home/wangqx/DC_distribution/all_summary")
+name=[]
+name=os.listdir(os.getcwd())
+DHS=[]
+for i in name:
+
+    f1=open("/mnt/Storage/home/wangqx/DC_distribution/all_summary/"+i,"rU")
+    for line in f1:
+        line=line.strip()
+        if line.startswith("percentage_of_peaks"):
+            line=line.split("=")
+            DHS.append(line[1])
+b=""
+for i in DHS:
+	b=b+","+str(float(i[:-1])/100)
+
+b=b[1:]
+ff2=open(f2,"r")
+for line in ff2:
+        line=line.strip()
+        if line.startswith("percentage_of_peaks"):
+            line=line.split("=")
+            dh=line[1]
+DHS.append(dh)
+DHS.sort()
+
+a1=DHS.index(dh)
+dh=str(float(dh[:-1])/100)
+b1=len(DHS)
+rates=float(a1)/float(b1)*100
+rates=str(rates)+'%'
+rates=rates.split(".")[0]+"."+rates.split(".")[1][0]+"%"
+ff.write("bb=c("+b+")"+"\n")
+ff.write("plot(ecdf(bb), verticals=TRUE, col.points='red',col.hor='blue', col.vert='black',main='overlapped_with_DHSs',xlab='overlapped_with_DHSs',ylab='Fn(overlapped_with_DHSs)')"+"\n")
+ff.write("abline(v="+dh+",col='blue')\n")
+ff.write("text(0.5,0.6, 'The overlapped_with_DHSs of your data locates in "+rates+" among all the data')\n")
+f1.close()
+#velcro qc
+os.chdir("/mnt/Storage/home/wangqx/DC_distribution/all_summary")
+name=[]
+name=os.listdir(os.getcwd())
+velcro=[]
+for i in name:
+
+    f1=open("/mnt/Storage/home/wangqx/DC_distribution/all_summary/"+i,"rU")
+    for line in f1:
+        line=line.strip()
+        if line.startswith("verlcro overlap percentage"):
+            line=line.split(":")
+            velcro.append(line[1])
+b=""
+for i in velcro:
+	b=b+","+str(float(i[:-1])/100)
+
+b=b[1:]
+ff2=open(f2,"r")
+for line in ff2:
+        line=line.strip()
+        if line.startswith("verlcro overlap percentage"):
+            line=line.split(":")
+
+            vel=line[1]
+velcro.append(vel)
+velcro.sort()
+
+a1=velcro.index(vel)
+vel=str(float(vel[:-1])/100)
+b1=len(velcro)
+rates=float(a1)/float(b1)*100
+rates=str(rates)+'%'
+rates=rates.split(".")[0]+"."+rates.split(".")[1][0]+"%"
+ff.write("cc=c("+b+")"+"\n")
+ff.write("plot(ecdf(cc), verticals=TRUE, col.points='red',col.hor='blue', col.vert='black',main='velcro overlap percentage',xlab='velcro overlap percentage',ylab='Fn(velcro overlap percentage)')"+"\n")
+ff.write("abline(v="+vel+",col='blue')\n")
+ff.write("text(0.5,0.6, 'The verlcro overlap percentage of your data locates in "+rates+" among all the data')\n")
+f1.close()
+
+ff.write("dev.off()")
+ff.close()
+os.system("Rscript /mnt/Storage/home/wangqx/DC_distribution/finalplot.R")
-1we23
+dc_newqc