Source

utils / expand_tiered.py

Full commit
import sys

##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change | Gene_Name | Gene_BioType | Coding | Tra

tierfile = []
try:
  filter_eff = sys.argv[2]
except IndexError:
  filter_eff = ''
for line in open(sys.argv[1], 'r'):
  tierfile.append(line.strip().split('\t'))

eff_index = tierfile[0].index('EFF')
ps_index = tierfile[0].index('PHISCORE')
pc_index = tierfile[0].index('PHICLASS')
effs = [x[eff_index].split(',') for x in tierfile[1:]]
pss = [x[ps_index].split(',') for x in tierfile[1:]]
pcs = [x[pc_index].split(',') for x in tierfile[1:]]

max_genes = max([len(x) for x in effs])

#print header
for x in range(len(tierfile[0])):
  if x in [eff_index, ps_index, pc_index]:
    continue
  sys.stdout.write( tierfile[0][x] + '\t' )

for x in range(max_genes):
  sys.stdout.write("Effect_%d\tEffect_Impact_%d\tFunctional_Class_%d\tCodon_Change_%d\tAmino_Acid_change_%d\tGene_Name_%d\tGene_BioType_%d\tCoding_%d\tTranscript_%d\tExon_%d\tphi_score_%d\tphi_class_%d\t" % (x,x,x,x,x,x,x,x,x,x,x,x))
sys.stdout.write('\n')

#print content
for line in tierfile[1:]:
  for x in range(len(tierfile[0])):
    if x in [eff_index, ps_index, pc_index]:
      continue
    sys.stdout.write( line[x] + '\t' )
    pscores = line[ps_index].split(',')
    pclasses = line[pc_index].split(',')
    effects = line[eff_index].split(',')
  for n in range(len(effects)):
    if not filter_eff in effects[n]: continue
    sys.stdout.write(effects[n].replace('(','\t').replace(')','\t').replace('|','\t'))
    sys.stdout.write(pscores[n] + '\t' + pclasses[n] + '\t')
  sys.stdout.write('\n')