Source

bx-python / scripts / maf_to_axt.py

#!/usr/bin/env python
"""
Application to convert MAF file to AXT file, projecting to any two species. 
Reads a MAF file from standard input and writes an AXT file to standard out;  
some statistics are written to standard error.  The user must specify the 
two species of interest.

usage: %prog primary_species secondary_species < maf_file > axt_file
"""

__author__ = "Bob Harris (rsharris@bx.psu.edu)"

import sys
import copy
import bx.align.maf
import bx.align.axt

def usage(s=None):
	message = """
maf_to_axt primary_species secondary_species < maf_file > axt_file
"""
	if (s == None): sys.exit (message)
	else:           sys.exit ("%s\n%s" % (s,message))


def main():

	# parse the command line

	primary   = None
	secondary = None

	args = sys.argv[1:]
	while (len(args) > 0):
		arg = args.pop(0)
		val = None
		fields = arg.split("=",1)
		if (len(fields) == 2):
			arg = fields[0]
			val = fields[1]
			if (val == ""):
				usage("missing a value in %s=" % arg)

		if (primary == None) and (val == None):
			primary = arg
		elif (secondary == None) and (val == None):
			secondary = arg
		else:
			usage("unknown argument: %s" % arg)

	if (primary == None):
		usage("missing primary species")

	if (secondary == None):
		usage("missing secondary species")

	# read the alignments and other info

	out = bx.align.axt.Writer(sys.stdout)

	axtsRead = 0
	mafsWritten = 0
	for mafBlock in bx.align.maf.Reader(sys.stdin):
		axtsRead += 1

		p = mafBlock.get_component_by_src_start(primary)
		if (p == None): continue
		s = mafBlock.get_component_by_src_start(secondary)
		if (s == None): continue

		axtBlock = bx.align.Alignment (mafBlock.score, mafBlock.attributes)
		axtBlock.add_component (clone_component(p))
		axtBlock.add_component (clone_component(s))

		remove_mutual_gaps (axtBlock)
		if (axtBlock.text_size == 0):
			continue

		out.write (axtBlock)
		mafsWritten += 1

	sys.stderr.write ("%d blocks read, %d written\n" % (axtsRead,mafsWritten))


def clone_component(c):
	return bx.align.Component (c.src, c.start, c.size, c.strand, c.src_size, \
	                           copy.copy(c.text))


def remove_mutual_gaps (block):

	if (len(block.components) == 0): return

	nonGaps = []

	for c in block.components:
		for ix in range(0,block.text_size):
			if (ix not in nonGaps) and (c.text[ix] != "-"):
				nonGaps.append(ix)

	nonGaps.sort()

	for c in block.components:
		c.text = "".join([c.text[ix] for ix in nonGaps])

	block.text_size = len(nonGaps)


if __name__ == "__main__": main()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.