Commits

Davide Cittaro committed 4fb1611

added reversed TX in breakdancer converter

  • Participants
  • Parent commits 97feb8d

Comments (0)

Files changed (1)

File breakdancer2vcf.py

 import argparse
 from zlib import crc32
 import pysam
+import random
+import string 
 
 # types are CTX DEL INS INV ITX
 
   def length(self):
     return abs(self._size)
   def build_vcf_string(self, samples=None):
+    sample_fields = '' 
     if self._type in ['CTX', 'ITX']:
+      self._id = "bnd_%s" % ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(5))
+      rev_id = "bnd_%s" % ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(5))
       info_field = "DP=%d;AF:%.2f;SVTYPE=BND;BDTYPE=%s" % (self._num_reads, self._af, self._type)
     else:
       info_field = "DP=%d;AF=%.2f;SVTYPE=%s;SVLEN=%d;END=%d" % (self._num_reads, self._af, self._type, abs(self._size), self._pos2)
       
     fixed_fields = "%s\t%d\t%s\t%s\t%s\t%d\t.\t%s" % (self._chr1, self._pos1, self._id, self._ref, self._alt, self._score, info_field)
     if samples:
-      fixed_fields += "\tGT:DP"
+      sample_fields += "\tGT:DP"
       for s in samples:
         try:
-          fixed_fields = "%s\t1/.:%s" % (fixed_fields, self._sample_dp[s])
+          sample_fields = "%s\t1/.:%s" % (sample_fields, self._sample_dp[s])
         except KeyError:
           # breakdancer does not put all samples in the line if no SV are found
-          fixed_fields = "%s\t./.:0" % (fixed_fields)
-    return fixed_fields + "\n"
+          sample_fields = "%s\t./.:0" % (sample_fields)
+    fixed_fields = fixed_fields + sample_fields  + "\n"     
+
+    # for ITX and CTX also "reversed" bp should be returned
+    if self._type in ['CTX', 'ITX']:
+      rev_alt_pos = "%s:%d" % (self._chr1, self._pos1)
+      rev_ref = genome.fetch(reference = self._chr2, start = self._pos2, end = self._pos2 + 1).upper()
+      if self._orientation2:
+        # reverse
+        if self._orientation1:
+          # double reverse... 
+          rev_alt = "[%s[%s" % (rev_ref, rev_alt_pos)
+        else:
+          rev_alt = "]%s]%s" % (rev_ref, rev_alt_pos)
+      else:
+        # forward    
+        if self._orientation1:
+          rev_alt = "%s[%s[" % (rev_ref, rev_alt_pos)
+        else:
+          # double forward
+          rev_alt = "%s]%s]" % (rev_ref, rev_alt_pos)
+      rev_fields = "%s\t%d\t%s\t%s\t%s\t%d\t.\t%s" % (self._chr2, self._pos2, rev_id, rev_ref, rev_alt, self._score, info_field)    
+      if samples: 
+        rev_fields += sample_fields #these are the same 
+      # join
+      fixed_fields = fixed_fields + rev_fields + '\n'
+      
+        
+  
+    return fixed_fields
        
        
 def write_vcf_header(bd_info, options):