Commits

Guru Ananda  committed d287264

Modified tool: Gene BED to Exon BED

  • Participants
  • Parent commits f087377

Comments (0)

Files changed (2)

File tools/filters/ucsc_gene_bed_to_exon_bed.py

     parser.add_option( "-o", "--output", dest="output", default=None,
                        help="Output file" )
     options, args = parser.parse_args()
-    assert options.region in ( 'coding', 'utr3', 'utr5', 'transcribed' ), "Invalid region argument"
+    assert options.region in ( 'coding', 'utr3', 'utr5', 'transcribed', 'intron' ), "Invalid region argument"
     
     try:
         out_file = open (options.output,"w")
     
     # Read table and handle each gene
     for line in in_file:
+	
         try:
+	    
             if line[0:1] == "#":
                 continue
             # Parse fields from gene tabls
             strand    = fields[5].replace(" ","_")
             cds_start = int( fields[6] )
             cds_end   = int( fields[7] )
-
+	    
+	    	
             # Determine the subset of the transcribed region we are interested in
             if options.region == 'utr3':
                 if strand == '-': region_start, region_end = tx_start, cds_start
 
             # If only interested in exons, print the portion of each exon overlapping
             # the region of interest, otherwise print the span of the region
+	    # options.exons is always TRUE
             if options.exons:
                 exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) )
                 exon_starts = map((lambda x: x + tx_start ), exon_starts)
                 exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) )
                 exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends);
-                for start, end in zip( exon_starts, exon_ends ):
-                    start = max( start, region_start )
-                    end = min( end, region_end )
-                    if start < end:
-                        if strand: print_tab_sep(out_file, chrom, start, end, name, "0", strand )
-                        else: print_tab_sep(out_file, chrom, start, end )
+		#for Intron regions:
+		if options.region == 'intron':
+			i=0
+	    		while i < len(exon_starts)-1:
+            			intron_starts = exon_ends[i] + 1
+				intron_ends = exon_starts[i+1] - 1
+				if strand: print_tab_sep(out_file, chrom, intron_starts, intron_ends, name, "0", strand )
+                		else: print_tab_sep(out_file, chrom, intron_starts, intron_ends )
+				i+=1
+		#for non-intron regions:
+		else:
+                	for start, end in zip( exon_starts, exon_ends ):
+                    		start = max( start, region_start )
+                    		end = min( end, region_end )
+                    		if start < end:
+                        		if strand: print_tab_sep(out_file, chrom, start, end, name, "0", strand )
+                        		else: print_tab_sep(out_file, chrom, start, end )
             else:
                 if strand: print_tab_sep(out_file, chrom, region_start, region_end, name, "0", strand )
                 else: print_tab_sep(out_file, chrom, region_start, region_end )

File tools/filters/ucsc_gene_bed_to_exon_bed.xml

 <tool id="gene2exon1" name="Gene BED To Exon BED">
 <description>expander</description>
-  <command interpreter="python">ucsc_gene_bed_to_exon_bed.py --input=$input1 --output=$out_file1 --region=$region $exon</command>
+  <command interpreter="python">ucsc_gene_bed_to_exon_bed.py --input=$input1 --output=$out_file1 --region=$region "--exons"</command>
   <inputs>
     <param name="input1" type="data" format="interval" label="UCSC Gene Table"/>
     <param name="region" type="select">
       <label>Feature Type</label>
-      <option value="transcribed">Transcribed</option>
+      <option value="transcribed">Coding + UTR</option>
       <option value="coding">Coding</option>
       <option value="utr3">3' UTR</option>
       <option value="utr5">5' UTR</option>
+      <option value="intron">Introns</option>
     </param>
-    <param name="exon" type="select">
-      <label>Output only exons?</label>
-      <option value="--exons">Yes</option>
-      <option value="">No</option>
-    </param>
+    
+    
   </inputs>
   <outputs>
     <data name="out_file1" format="bed"/>
     <test>
       <param name="input1" value="3.bed" /> 
       <param name="region" value="transcribed" />
-      <param name="exon" value="--exons" />
       <output name="out_file1" value="cf-gene2exon.dat"/>
     </test>
   </tests>
 
 - A UCSC gene bed format file::
 
-    chr7 127475281 127491632 NM_000230 0 + 127486022 127488767 0 3 29,172,3225,    0,10713,13126,
-    chr7 127486011 127488900 D49487    0 + 127486022 127488767 0 2 155,490,        0,2399,
+    chr7 127475281 127491632 NM_000230 0 + 127486022 127488767 0 3 29,172,3225,    0,10713,13126
+    chr7 127486011 127488900 D49487    0 + 127486022 127488767 0 2 155,490,        0,2399
 
 - Converts the above file to a list of bed lines, which has the transcribed regions and overlap with exons. (if user selects **"Transcribed** and **Yes** to exon only output)::