Davide Cittaro avatar Davide Cittaro committed 00a10e0

switched to local gene description for vcf2xls

Comments (0)

Files changed (1)

 import vcf
 import openpyxl
 import argparse
-from Bio import Entrez, SeqIO
-# fields to dump
-def get_description(refseq_id):
-  if not refseq_id:
-    return ''
-  # get entrez_id
-  Entrez.email = 'cittaro.davide@hsr.it'
-  Entrez.tool = 'vcf2xls.py'
-  sys.stderr.write("Annotationg %s\n" % refseq_id)
-  handle = Entrez.esearch(db="nucleotide", term="%s[Primary Accession]" % refseq_id)
-  record = Entrez.read(handle)
-  if record["Count"] == '0':
-    return ''
-  # get the first di by default
-  handle = Entrez.efetch(db="nucleotide", id=record["IdList"][0], rettype="gb")
-  record = SeqIO.read(handle, "genbank")
-  return record.description
 def fill_cell(worksheet, column, row, value):
   if type(column) is int:
 def vcf2xls():
   header = ['CHROM','POS','ID','REF','ALT','QUAL','FILTER']
+  description = {}
   #parse command line options
   option_parser = argparse.ArgumentParser(
   option_parser.add_argument("-o", "--output", help="output XLS file", action="store", default="output.xls")
   option_parser.add_argument("-n", "--name", help="Analysis name", action="store", default="Analysis")
   option_parser.add_argument("-f", "--fields", help="List of INFO properties to be included", nargs="+")
-  option_parser.add_argument("-d", "--description", help="Add Gene description (slow)", action="store_true", default=False)
+  option_parser.add_argument("-d", "--description", help="Add Gene description from file", action="store", type=argparse.FileType('r'))
   # parse arguments
   cli_options = option_parser.parse_args()
     header = header + ["GENE","CODOON","AA","EFFECT"]
     if option_parser.description:
+      for d_line in cli_options.description:
+        tmp_f = d_line.strip().split('\t')
+        description[tmp_f[0]] = tmp_f[1]
   # initialize workbook  
       fill_cell(ws, col_n, row, effect.predicted)
       col_n += 1
       if option_parser.description:
-        fill_cell(ws, col_n, row, get_description(effect.transcript))
+        try:         
+          fill_cell(ws, col_n, row, description[effect.gene])
+        except KeyError:
+          None
         col_n += 1
   write_legend(wb, cli_options.fields, vcf_parser.infos)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.