Commits

Davide Cittaro committed b6d743e

added vcf2table

Comments (0)

Files changed (1)

+#!/usr/bin/env python2.7
+
+import sys
+import vcf
+
+# fields to dump
+#    VALIDATED GENOTYPES EFFECTS
+
+_MAX_EFFECT = 24
+
+def get_efflist(eff):
+  effs = [x.replace("(","|").replace("|)","").split('|') for x in  eff.split(',')]
+  results = ''
+  gene_effect = {}
+  gene_codon = {}
+  gene_aa = {}
+  for x in effs:
+    try:
+      gene_effect[x[6]].append(x[0])
+    except KeyError:
+      gene_effect[x[6]] = [x[0]]
+      gene_aa[x[6]] = []  
+      gene_codon[x[6]] = []  
+    gene_codon[x[6]].append(x[3])
+    gene_aa[x[6]].append(x[4])
+  results = ''  
+  for gene in gene_effect.keys():
+    n = len(gene_effect[gene])
+    t_data = []
+    for x in range(n):
+      t_data.append('\t'.join([gene_codon[gene][x], gene_aa[gene][x], gene_effect[gene][x] ]))
+    t_data = list(set(t_data))
+    results = results + gene + '\t' + '\t'.join(t_data) + '\t'
+  return results[:-1] 
+  
+
+
+    
+#  alt_alleles = all.split(',')
+#  effs = [x.replace("(","|").replace("|)","").split('|') for x in  eff.split(',')]
+#  high = [x for x in effs if x[1] == 'HIGH']
+#  moderate = [x for x in effs if x[1] == 'MODERATE']
+#  low = [x for x in effs if x[1] == 'LOW']
+#  modifier = [x for x in effs if x[1] == 'MODIFIER']
+#  # get the highest impact
+#  if len(high) > 0:
+#    effout = high
+#  elif len(moderate) > 0:
+#    effout = moderate
+#  elif len(low) > 0:
+#    effout = low
+#  else:
+#    effout = modifier
+#  # remove redundancy
+#  gene_effect = {}
+#  gene_codon = {}
+#  gene_aa = {}
+#  for x in effout:
+#    try:
+#      gene_effect[x[6]].append(x[0])
+#    except KeyError:
+#      gene_effect[x[6]] = [x[0]]
+#      gene_aa[x[6]] = [x[4]]  
+#      gene_codon[x[6]] = [x[3]]  
+#    gene_codon[x[6]].append(x[3])
+#    gene_aa[x[6]].append(x[4])
+#  results = ''  
+#  for gene in gene_effect.keys():
+#    n = len(gene_effect[gene])
+#    t_data = []
+#    for x in range(n):
+#      t_data.append('\t'.join([gene_codon[gene][x], gene_aa[gene][x], gene_effect[gene][x] ]))
+#    t_data = list(set(t_data))
+#    results = results + gene + '\t' + '\t'.join(t_data)
+#  return results  
+  
+
+
+parser = vcf.Reader(open(sys.argv[1], 'r'))
+samples = parser.samples
+fields_in_record = ['CHROM','POS','ID','REF','ALT','QUAL','FILTER']
+fields_in_INFO = sys.argv[2].split(',')
+#fields_in_INFO = ['DP','MQ','VQSLOD','AC','AF','InbreedingCoeff','GMAF','VALIDATED','dbnsfpAncestral_allele','','LOF','dbnsfpPolyphen2_HVAR_pred','dbnsfpSIFT_score','dbnsfpGERP++_RS','dbnsfpGERP++_NR','','isPolymorphic','Phigene','Phiscore','Phiclass','TG_gene','TG_rank','dbnsfpUniprot_acc','dbnsfpEnsembl_transcriptid']
+header = "#" + '\t'.join(fields_in_record + fields_in_INFO)
+for s in samples:
+  tmps = "\t%s:GT\t%s:A1,A2" % (s, s)
+  header = header + tmps
+header = header + "\tEFF.GENE\tEFF.CODOON\t.EFF.AA\tEFF.EFFECT\n"
+sys.stdout.write(header)
+
+for record in parser:
+  FILTER = "PASS"
+  if len(record.FILTER):
+    FILTER=record.FILTER[0]
+  line = '\t'.join([str(x) for x in (record.CHROM, record.POS, record.ID, record.REF, ','.join([str(x) for x in record.ALT]), record.QUAL, FILTER)])
+  for k in fields_in_INFO:
+    try:
+      v = record.INFO[k]
+    except KeyError:
+      v = ''  
+    if type(v) == list:
+      v = ','.join([str(x) for x in v])  
+    else:
+      v = str(v)  
+    line = line + "\t" + v
+  # get samples
+  line += '\t'
+  for x in range(len(samples)):
+    s = record.samples[x]
+    genotype = s['GT']
+    try:
+      ad = s['AD']
+    except AttributeError:
+      ad = ['.','.']  
+    if not genotype:
+      s_data = './.\t.,.'
+    elif not ad:
+      ad = ['.','.']  
+    else:
+      s_data = genotype + '\t' + ','.join([str(x) for x in ad])
+    line = line + s_data + '\t'
+  # get effects
+  line = line + get_efflist(record.INFO['EFF'])
+  sys.stdout.write(line + '\n')