1. Meltzerlab
  2. Untitled project
  3. seqTools

Commits

Sean Davis  committed 326337d

Added rnacount functionality

  • Participants
  • Parent commits 0e93529
  • Branches master

Comments (0)

Files changed (3)

File seqtools/commands/vcfCommands.py

View file
  • Ignore whitespace
 import seqtools.vcf
 import seqtools.strelka
 import vcf
+import pysam
 
 def meltVcf(opts):
     if(opts.vcf):
     seqtools.strelka.processVcf(v,outfile)
     outfile.close()
 
+def RNAcounts(opts):
+    if(opts.vcf):
+        v = vcf.Reader(filename=opts.vcf)
+    else:
+        v = vcf.Reader(sys.stdin)
+    if(opts.outfile):
+        outfile = open(opts.outfile,'w')
+    else:
+        outfile = sys.stdout
+    bamfile = pysam.Samfile(opts.bamfile)
+    seqtools.vcf.countBases(v,outfile,bamfile)
+    outfile.close()
+
 
 varscan_parser = subparsers.add_parser('vcf')
 varscan_subparsers = varscan_parser.add_subparsers(help="vcf subcommands")
                             help='Filename of VCF file [default=stdout]')
 
 strelka_parser.set_defaults(func=strelkaProcess)
+
+rnacount_parser = varscan_subparsers.add_parser('rnacount',
+                                                help="Count the number of REF and ALT alleles in a bamfile at each variant location in a VCF file.  The results are added to the VCF file in three new INFO tags, RNAC_REF, RNAC_ALT, and RNAC_MAF")
+
+rnacount_parser.add_argument('-f','--vcf',
+                            help='Filename of VCF file [default=stdin]')
+rnacount_parser.add_argument('-o','--outfile',
+                            help='Filename of VCF file [default=stdout]')
+rnacount_parser.add_argument('bamfile',
+                             help='The name of the RNA-seq bamfile from which to collect counts')
+
+rnacount_parser.set_defaults(func=RNAcounts)
+
+
+
+
+
+
+
+
+
+

File seqtools/vcf.py

View file
  • Ignore whitespace
 from seqtools.snpeff import effNames,snpEffEffects
 import itertools
+import vcf
 
 def vcfMelt(reader,outfile,samplename=None):
     """Melt a VCF file into a tab-delimited text file
             else:
                 newrow.append(str(r))
         outfile.write('\t'.join(newrow) + "\n")
+
+
+def basesAtPos(samfile, pos, chromname, minbasequal, minmapqual):
+    'Return a string of the bases at that position.'
+    position = 0
+    coverage = 0
+    bases = ""
+    for pileupcolumn in samfile.pileup(reference=chromname, start=pos-1, end=pos):
+        if ((pileupcolumn.pos+1)==pos):
+            position = int(pileupcolumn.pos+1)
+            coverage = int(pileupcolumn.n)
+            for pileupread in pileupcolumn.pileups:
+                if(pos==73433494):
+                    print(pos,bases)
+                if (pileupread.indel == 0 and pileupread.is_del == 0 and \
+                (pileupread.alignment.qual[pileupread.qpos]) >= minbasequal and \
+                float(pileupread.alignment.mapq) >= minmapqual):
+                    bases += chr(pileupread.alignment.seq[pileupread.qpos])
+    return position, coverage, bases
+
+
+def countBases(reader,outfile,bamfile):
+    """Count the ref and alt bases in a bam file at each variant location
+
+    :params reader: a VCF Reader object
+    :params outfile: a stream to which to write the output
+    :params bamfile: a pysam Samfile, sorted and indexed
+
+    Adds the INFO fields:
+
+    RNAC_REF, RNAC_ALT, RNAC_MAF"""
+
+    reader.infos['RNAC_REF'] = vcf.parser._Info(id='RNAC_REF',num=1,type='Integer',desc='The count of REF alleles in the bamfile')    
+    reader.infos['RNAC_ALT'] = vcf.parser._Info(id='RNAC_ALT',num=1,type='Integer',desc='The count of ALT alleles in the bamfile')    
+    reader.infos['RNAC_MAF'] = vcf.parser._Info(id='RNAC_MAF',num=1,type='Float',desc='The fraction of ALT allele in the bamfile')
+
+    writer = vcf.Writer(outfile,reader)
+
+    for row in reader:
+        ref = row.REF
+        alt = str(row.ALT[0])
+        bases = basesAtPos(bamfile,row.POS,row.CHROM,0,0)[2]
+        refcount,altcount = [len([x for x in bases if x==ref]),
+                             len([x for x in bases if x==alt])]
+        row.INFO['RNAC_REF']=refcount
+        row.INFO['RNAC_ALT']=altcount
+        row.INFO['RNAC_MAF']=0
+        if(refcount+altcount>0):
+            row.INFO['RNAC_MAF']=float(altcount)/(refcount+altcount)
+
+        writer.write_record(row)
+
+

File setup.py

View file
  • Ignore whitespace
 from setuptools import setup, find_packages
 import sys, os, glob
 
-version = '0.3.4'
+version = '0.4.0'
 
 setup(name='seqtools',
       version=version,