galaxy-central / tools / fastq / fastq_paired_end_deinterlacer.py

#Florent Angly
import sys
from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner

def main():
    input_filename   = sys.argv[1]
    input_type       = sys.argv[2] or 'sanger'
    mate1_filename   = sys.argv[3]
    mate2_filename   = sys.argv[4]
    single1_filename = sys.argv[5]
    single2_filename = sys.argv[6]

    type        = input_type
    input       = fastqNamedReader( open( input_filename, 'rb' ), format = type  )
    mate1_out   = fastqWriter( open( mate1_filename, 'wb' ), format = type )
    mate2_out   = fastqWriter( open( mate2_filename, 'wb' ), format = type )
    single1_out = fastqWriter( open( single1_filename, 'wb' ), format = type )
    single2_out = fastqWriter( open( single2_filename, 'wb' ), format = type )
    joiner      = fastqJoiner( type )

    i = None
    skip_count = 0
    found = {}
    for i, read in enumerate( fastqReader( open( input_filename, 'rb' ), format = type ) ):
     
        if read.identifier in found:
            del found[read.identifier]
            continue

        mate1 = input.get( read.identifier )

        mate2 = input.get( joiner.get_paired_identifier( mate1 ) )

        if mate2:
            # This is a mate pair
            found[mate2.identifier] = None
            if joiner.is_first_mate( mate1 ):
                mate1_out.write( mate1 )
                mate2_out.write( mate2 )
            else:
                mate1_out.write( mate2 )
                mate2_out.write( mate1 )
        else:
            # This is a single
            skip_count += 1
            if joiner.is_first_mate( mate1 ):
                single1_out.write( mate1 )
            else:
                single2_out.write( mate1 )

    if i is None:
        print "Your input file contained no valid FASTQ sequences."
    else:
        if skip_count:
            print 'There were %i reads with no mate.' % skip_count
        print 'De-interlaced %s pairs of sequences.' % ( (i - skip_count + 1)/2 )

    input.close()
    mate1_out.close()
    mate2_out.close()
    single1_out.close()
    single2_out.close()

 
if __name__ == "__main__":
    main()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.