Commits

Davide Cittaro committed 2f36af3

added a simple script to order snpeff prediction

  • Participants
  • Parent commits 81ed9f7

Comments (0)

Files changed (2)

+import vcf
+import sys
+import numpy as np
+
+def order_eff(effect):
+  eff_priority = {"SPLICE_SITE_ACCEPTOR":0,
+"SPLICE_SITE_DONOR":1,
+"START_LOST":2,
+"EXON_DELETED":3,
+"FRAME_SHIFT":4,
+"STOP_GAINED":5,
+"STOP_LOST":6,
+"RARE_AMINO_ACID":7,
+"NON_SYNONYMOUS_CODING":8,
+"CODON_CHANGE":9,
+"CODON_INSERTION":10,
+"CODON_CHANGE_PLUS_CODON_INSERTION":11,
+"CODON_DELETION":12,
+"CODON_CHANGE_PLUS_CODON_DELETION":13,
+"UTR_5_DELETED":14,
+"UTR_3_DELETED":15,
+"SYNONYMOUS_START":16,
+"NON_SYNONYMOUS_START":17,
+"START_GAINED":18,
+"SYNONYMOUS_CODING":19,
+"SYNONYMOUS_STOP":20,
+"UTR_5_PRIME":21,
+"UTR_3_PRIME":21,
+"REGULATION":23,
+"UPSTREAM":24,
+"DOWNSTREAM":25,
+"GENE":26,
+"TRANSCRIPT":27,
+"EXON":28,
+"INTRON_CONSERVED":29,
+"INTRON":30,
+"INTRAGENIC":31,
+"INTERGENIC":32,
+"INTERGENIC_CONSERVED":33,
+"NONE":34,
+"CHROMOSOME":35,
+"CUSTOM":36,
+"CDS":37}
+
+  effect_list = np.array(effect.split(','))
+  priorities = np.array([eff_priority[f[:f.find('(')]] for f in effect_list])
+  ordering = np.argsort(priorities)
+  return ','.join(effect_list[ordering])
+
+try:
+  fh = open(sys.argv[1])
+except IndexError:
+  fh = sys.stdin
+  
+parser = vcf.Reader(fh)
+output = vcf.Writer(sys.stdout, parser, lineterminator='\n')
+
+for record in parser:
+  if record.INFO.has_key('EFF'):
+    record.INFO['EFF'] = order_eff(record.INFO['EFF'])
+  output.write_record(record)  
+    
+
+
+
+

File vcf2table.py

 #!/usr/bin/env python2.7
 
+import os
 import sys
 import vcf
 import openpyxl
   header.append("%s:GT" % s)
   header.append("%s:A1,A2" % s)
 header = header + ["GENE","CODOON","AA","EFFECT"]
-ws.title = sys.argv[1]
+ws.title = os.path.basename(sys.argv[1][:30])
 row = 1
 for idx, f in enumerate(header):
   #write header
     elif not ad:
       ad = ['0,0']  
     else:
+          if not type(ad) is list: ad = [ad]
 	  s_data = [genotype , ','.join([str(x) for x in ad])]
     line = line + s_data
   # get effects
-  if record.INFO['EFF']:
+  if record.INFO.has_key('EFF') and record.INFO['EFF']:
     line = line + get_efflist(record.INFO['EFF'])
   row += 1
   for idx, f in enumerate(line):