Davide Cittaro avatar Davide Cittaro committed 3d0ad13

added calculation of AF and AC for the specific excel

Comments (0)

Files changed (1)

   ws.cell("A9").value = 'SAMPLE.AD'
   ws.cell("B8").value = 'Sample Genotype (0: ref allele, 1: first alternate allele...)'
   ws.cell("B9").value = 'Allele depth'
+  ws.cell("A10").value = 'TAC'
+  ws.cell("A11").value = 'TAF'
+  ws.cell("B10").value = 'Allele count in this table'
+  ws.cell("B11").value = 'Allele frequency in this table'
 
-  row=10
+  row=12
   
   for i in fields:
     ws.cell("A%s" % row).value = i
 
 def vcf2xls():
 
-  header = ['CHROM','POS','ID','REF','ALT','QUAL','FILTER']
+  header = ['CHROM','POS','ID','REF','ALT','QUAL','FILTER', 'TAC', 'TAF']
   description = {}
 
   #parse command line options
   option_parser = argparse.ArgumentParser(
   description = "Converts vcf to xls file", 
   prog="vcf2xls")
-  option_parser.add_argument("--version", action="version", version="%(prog)s 0.3")
+  option_parser.add_argument("--version", action="version", version="%(prog)s 0.4")
   option_parser.add_argument("-v", "--vcf", help="input VCF file", action="store", type=argparse.FileType('r'), default=sys.stdin) 
   option_parser.add_argument("-o", "--output", help="output XLS file", action="store", default="output.xls")
   option_parser.add_argument("-n", "--name", help="Analysis name", action="store", default="Analysis")
     fill_cell(ws, idx + 1, row, f)
 
   for record in vcf_parser:
+    tac = [0] + [0] * len(record.ALT)   # allele count for this excel file
+    taf = [0.0]	+ [0.0]	* len(record.ALT) # allele frequency for this excel file
+    
     row += 1 # we are read for the next line
     col_n = 1  # always start to first column
     
       fill_cell(ws, col_n, row, field)
       col_n += 1
 
+    # process new allele counts
+    for sample in samples:
+      if not sample.called:
+        continue
+      genotype = sample['GT']
+      for all in range(len(tac)):
+        all_count = genotype.count(str(all))  # count how many characters of this allele are in genotype
+        tac[all] += all_count
+    for all in range(len(tac)):
+      taf[all] = float(tac[all]) / sum(tac)
+    fill_cell(ws, col_n, row, ','.join([str(x) for x in tac[1:]]))
+    col_n += 1
+    fill_cell(ws, col_n, row, ','.join(["%.2f" % x for x in taf[1:]]))
+    col_n += 1
+          
+
     for field in cli_options.fields:
       # iterate over fields and write to excel
       try:
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.