bx-python / scripts / maf_to_int_seqs.py

#!/usr/bin/env python

"""
For each block in a maf file (read from stdin) write a sequence of ints 
corresponding to the columns of the block after applying the provided sequence
mapping.

The 'correct' number of species is determined by the mapping file, blocks not having
this number of species will be ignored.

usage: %prog mapping_file
"""

from __future__ import division

import psyco_full

import bx.align.maf
from bx import seqmapping
import string
import sys

def main():

    if len( sys.argv ) > 1:
        _, alpha_map = seqmapping.alignment_mapping_from_file( file( sys.argv[1] ) )
    else:
        alpha_map = None

    for maf in bx.align.maf.Reader( sys.stdin ):
        # Translate alignment to ints
        int_seq = seqmapping.DNA.translate_list( [ c.text for c in maf.components ] )
        # Apply mapping 
        if alpha_map:
            int_seq = alpha_map.translate( int_seq )
        # Write ints separated by spaces
        for i in int_seq: print i,
        print

if __name__ == "__main__": main()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.