Commits

Davide Cittaro  committed 07f2c41

vcf output for pca scores

  • Participants
  • Parent commits a0b8874

Comments (0)

Files changed (1)

   if not color_list:
     color_list = 'k'
 
+  if samples:
+    max_string_len = max([len(x) for x in samples])
+
   for x in range(ncomp - 1):
     for y in range(x + 1, ncomp):
       x_ext = np.max(np.abs(results.Wt[x]))
       plt.ylim(-y_ext, y_ext)
       plt.hlines(0, -x_ext, x_ext, linestyle='dashed')  
       plt.vlines(0, -y_ext, y_ext, linestyle='dashed')  
-      plt.savefig("%s_PC%d_%d.png" % (prefix, (x + 1), (y + 1)))
+      fig = plt.gcf()
+      fig.set_size_inches(10, 10)
+      plt.savefig("%s_PC%d_%d.png" % (prefix, (x + 1), (y + 1)), dpi=100)
       plt.close()
       
 
   cli_options = option_parser.parse_args()
   
   # get the number of variants
+  sys.stderr.write("Counting variants...\n")
   n_var = count_lines(cli_options.vcf)
   
   if len(set([x for x in cli_options.classes])) > 7:
 
   # assign -1 to REF/REF, 0 to REF/ALT and 1 to ALT/ALT. 
   # not called data are np.nan
-  sys.stderr.write("Reading VCF file...\n")
+  sys.stderr.write("Processing VCF file...\n")
   for x, record in enumerate(parser):
     for y, s in enumerate(record.samples):
       if s.called:
   else:
     labels = []  
   plot_pca(results, ncomp=cli_options.ncomp, samples=labels, classes=cli_options.classes, prefix=cli_options.vcf)
-  
+
+  if cli_options.vcfout:
+    sys.stderr.write("Writing VCF file...\n")
+    x = nx = 0
+    file_out = open(cli_options.vcfout, 'w')
+    for line in open(cli_options.vcf):
+      if line.startswith('##'):
+        file_out.write(line)
+      elif line.startswith('#CHROM'):
+        for nc in range(cli_options.ncomp):
+          file_out.write("##INFO=<ID=WCOMP%d,Number=1,Type=Float,Description=\"Variant projection on principal component %d\">\n" % (nc + 1, nc + 1))
+        file_out.write(line)
+      else:
+        fields = line.split('\t')
+        info_field = fields[7]  
+        if nan_mask[x]:
+          #scored
+          for nc in range(cli_options.ncomp):
+            info_field = "%s;WCOMP%d=%.3f" % (info_field, nc + 1, results.Y[nx][nc])
+          nx += 1  
+        fields[7] = info_field
+        file_out.write('\t'.join(fields))
+        x += 1