Commits

Davide Cittaro  committed b5b3de1

added xl support

  • Participants
  • Parent commits b6d743e

Comments (0)

Files changed (1)

File vcf2table.py

 
 import sys
 import vcf
+import openpyxl
 
 # fields to dump
 #    VALIDATED GENOTYPES EFFECTS
       gene_codon[x[6]] = []  
     gene_codon[x[6]].append(x[3])
     gene_aa[x[6]].append(x[4])
-  results = ''  
+  results = []  
   for gene in gene_effect.keys():
     n = len(gene_effect[gene])
+    results.append(gene)
     t_data = []
     for x in range(n):
-      t_data.append('\t'.join([gene_codon[gene][x], gene_aa[gene][x], gene_effect[gene][x] ]))
-    t_data = list(set(t_data))
-    results = results + gene + '\t' + '\t'.join(t_data) + '\t'
-  return results[:-1] 
+      this_effect = gene_effect[gene][x]
+      if 'HIGH' in t_data:
+        continue
+      elif 'MODERATE' in t_data:
+        if this_effect == 'HIGH':
+          t_data  = [gene_codon[gene][x], gene_aa[gene][x], gene_effect[gene][x]] 
+        else:  
+          continue
+      elif 'LOW' in t_data:
+        if this_effect in ['HIGH', 'MODERATE']:
+          t_data  = [gene_codon[gene][x], gene_aa[gene][x], gene_effect[gene][x]] 
+        else:
+          continue
+      elif 'MODIFIER' in t_data:
+        if this_effect in ['HIGH', 'MODERATE', 'LOW']:
+          t_data  = [gene_codon[gene][x], gene_aa[gene][x], gene_effect[gene][x]] 
+        else:
+          continue
+      else:        
+      t_data  = [gene_codon[gene][x], gene_aa[gene][x], gene_effect[gene][x]] 
+    results = results t_data
+  return results
   
 
 
 samples = parser.samples
 fields_in_record = ['CHROM','POS','ID','REF','ALT','QUAL','FILTER']
 fields_in_INFO = sys.argv[2].split(',')
+fileout = sys.argv[3]
+wb = openpyxl.workbook.Workbook()
+ws = wb.worksheets[0]
 #fields_in_INFO = ['DP','MQ','VQSLOD','AC','AF','InbreedingCoeff','GMAF','VALIDATED','dbnsfpAncestral_allele','','LOF','dbnsfpPolyphen2_HVAR_pred','dbnsfpSIFT_score','dbnsfpGERP++_RS','dbnsfpGERP++_NR','','isPolymorphic','Phigene','Phiscore','Phiclass','TG_gene','TG_rank','dbnsfpUniprot_acc','dbnsfpEnsembl_transcriptid']
-header = "#" + '\t'.join(fields_in_record + fields_in_INFO)
+header = fields_in_record + fields_in_INFO
 for s in samples:
-  tmps = "\t%s:GT\t%s:A1,A2" % (s, s)
-  header = header + tmps
-header = header + "\tEFF.GENE\tEFF.CODOON\t.EFF.AA\tEFF.EFFECT\n"
-sys.stdout.write(header)
+  header.append("%s:GT" % s)
+  header.append("%s:A1" % s)
+  header.append("%s:A2" % s)
+header = header ["GENE","CODOON","AA","EFFECT"]
+ws.title = sys.argv[1]
+row = 1
+for idx, f in enumerate(header):
+  #write header
+  col = openpyxl.cell.get_column_letter(1 + idx)
+  ws.cell("%s%s" % (col, row)).value = f
 
 for record in parser:
   FILTER = "PASS"
   if len(record.FILTER):
     FILTER=record.FILTER[0]
-  line = '\t'.join([str(x) for x in (record.CHROM, record.POS, record.ID, record.REF, ','.join([str(x) for x in record.ALT]), record.QUAL, FILTER)])
+  line = [str(x) for x in (record.CHROM, record.POS, record.ID, record.REF, ','.join([str(x) for x in record.ALT]), record.QUAL, FILTER)]
   for k in fields_in_INFO:
     try:
       v = record.INFO[k]
       v = ','.join([str(x) for x in v])  
     else:
       v = str(v)  
-    line = line + "\t" + v
+    line.append(v)
   # get samples
-  line += '\t'
   for x in range(len(samples)):
     s = record.samples[x]
     genotype = s['GT']
     try:
       ad = s['AD']
     except AttributeError:
-      ad = ['.','.']  
+      ad = ['0','0']  
     if not genotype:
-      s_data = './.\t.,.'
+      s_data = ['./.', '0', '0']
     elif not ad:
-      ad = ['.','.']  
+      ad = ['0','0']  
     else:
-      s_data = genotype + '\t' + ','.join([str(x) for x in ad])
-    line = line + s_data + '\t'
+      s_data = [genotype] + ad
+    line = line + s_data
   # get effects
   line = line + get_efflist(record.INFO['EFF'])
-  sys.stdout.write(line + '\n')
+  row += 1
+  for idx, f in enumerate(line):
+    col = openpyxl.cell.get_column_letter(1 + idx)
+    ws.cell("%s%s" % (col, row)).value = f
+
+ws.create_sheet()
+wb.save(filename = fileout)