# Commits

committed 5c4cb7e

more likelihood edits

# trees/tree_likelihood_calculator.py

` import numpy as np`
` from numpy import *`
` from scipy.linalg import *`
`+from scipy.optimize import fmin_powell`
` `
` #move to datatype`
` position = {"A":[0],"C":[1],"G":[2],"T":[3],"-":[0,1,2,3]}`
`+LARGE = 10000000`
`+`
` `
` def match_tips_and_seqs(tree,seqs):`
` 	lvs = tree.leaves()`
` `
` `
` """`
`-here are a bunch of functions for use in optimizing params`
`+this scales so that the sixth rate (G->T) is 1 `
`+the matrix will be`
`+  A C G T`
`+A   1 2 3`
`+C     4 5`
`+G `
`+T `
`+`
`+the three base composition rates will be parameters`
`+6, 7, 8. The last base composition (T) will be 1- sum(6,7,8)`
`+and the total will be scaled to 1`
` """`
`+def set_gtr_rmatrix_basefreq(params):`
`+	rmatrix = ones((4,4))`
`+	rmatrix[0][1] = params[0]`
`+	rmatrix[1][0] = params[0]`
`+	rmatrix[0][2] = params[1]`
`+	rmatrix[2][0] = params[1]`
`+	rmatrix[0][3] = params[2]`
`+	rmatrix[3][0] = params[2]`
`+	rmatrix[1][2] = params[3]`
`+	rmatrix[2][1] = params[3]`
`+	rmatrix[1][3] = params[4]`
`+	rmatrix[3][1] = params[4]`
`+	rmatrix[2][3] = 1.`
`+	rmatrix[3][2] = 1.`
`+	fill_diagonal(rmatrix,0)`
`+	basecomp = ones(4)	`
`+	basecomp[0] = params[5]`
`+	basecomp[1] = params[6]`
`+	basecomp[2] = params[7]`
`+	basecomp[3] = 1-sum(params[5:8])`
`+	sc = sum(basecomp)`
`+	basecomp /= sc`
`+	return rmatrix,basecomp`
` `
`-LARGE = 10000000`
`-def calc_params_rateparams(params,tree,sites):`
`+`
`+"""`
`+params contain the rmatrix and basefreq params in the form`
`+  A C G T`
`+A   1 2 3`
`+C     4 5`
`+G `
`+T `
`+the three base composition rates will be parameters`
`+6, 7, 8. The last base composition (T) will be 1- sum(6,7,8)`
`+and the total will be scaled to 1`
`+"""`
`+def calc_params_rateparams(params,tree,sites,alpha=None,cats=None):`
` 	for i in params: `
` 		if i < 0:`
` 			return LARGE`
` 	if sum(params[5:8]) > 1:`
` 		return LARGE`
` 	rmatrix,basefreq = set_gtr_rmatrix_basefreq(params)`
`-	like = calc_nuc_tree_likelihood(tree,rmatrix,basefreq,sites)`
`+	like = -1`
`+	if alpha != None:`
`+		like = calc_nuc_tree_likelihood_gamma(tree,rmatrix,basefreq,sites,alpha,cats)`
`+	else:`
`+		like = calc_nuc_tree_likelihood(tree,rmatrix,basefreq,sites)`
` 	print like`
` 	if like < 0 or isnan(like):`
` 		return LARGE`
` 	return like`
` `
` """`
`-optimize everything but the tree topology and gamma`
`+params contains parameter values for everything but the tree topology and gamma`
` """`
`-def calc_params_allparams(params,tree,sites):`
`+def calc_params_allparams(params,tree,sites,alpha,cats):`
` 	for i in params: `
` 		if i < 0:`
` 			return LARGE`
` 			i.length = params[count]`
` 			count += 1`
` 	rmatrix,basefreq = set_gtr_rmatrix_basefreq(rateparams)`
`-	like = calc_nuc_tree_likelihood(tree,rmatrix,basefreq,sites)`
`+	like = -1`
`+	if alpha != None:`
`+		like = calc_nuc_tree_likelihood_gamma(tree,rmatrix,basefreq,sites,alpha,cats)`
`+	else:`
`+		like = calc_nuc_tree_likelihood(tree,rmatrix,basefreq,sites)`
` 	print like`
` 	if like < 0 or isnan(like):`
` 		return LARGE`
` 	return like`
` `
`-def calc_params_rateparams_gamma(params,tree,sites,alpha,cats):`
`-	for i in params: `
`-		if i < 0:`
`-			return LARGE`
`-	if sum(params[5:8]) > 1:`
`-		return LARGE`
`-	rmatrix,basefreq = set_gtr_rmatrix_basefreq(params)`
`-	like = calc_nuc_tree_likelihood_gamma(tree,rmatrix,basefreq,sites,alpha,cats)`
`-	if like < 0 or isnan(like): `
`-		return LARGE`
`-	return like`
`-`
`+"""`
`+params contains just the alpha shape parameter`
`+"""`
` def calc_params_gamma(params,tree,sites,rmatrix,basefreq,cats):`
` 	alpha = params[0]`
` 	if alpha < 0:`
` 	like = calc_nuc_tree_likelihood_gamma(tree,rmatrix,basefreq,sites,alpha,cats)`
` 	if like < 0 or isnan(like):`
` 		return LARGE`
`+	print like`
` 	return like`
` `
`-def calc_params_treebl(params,tree,sites,rmatrix,basefreq):`
`-	count = 0`
`-	for i in tree.iternodes():`
`-		if i != tree:`
`-			if params[count] < 0:`
`-				return LARGE`
`-			i.length = params[count]`
`-			count += 1`
`-	like = calc_nuc_tree_likelihood(tree,rmatrix,basefreq,sites)`
`-	print tree.get_newick_repr(True)`
`-	if like < 0 or isnan(like):`
`-		return LARGE`
`-	return like`
`-`
`-def calc_params_treebl_gamma(params,tree,sites,rmatrix,basefreq,alpha,cats):`
`+"""`
`+params contains the branch lengths`
`+"""`
`+def calc_params_treebl(params,tree,sites,rmatrix,basefreq,alpha=None,cats=None):`
` 	count = 0`
` 	for i in tree.iternodes():`
` 		if i != tree:`
` 				return LARGE`
` 			i.length = params[count]`
` 			count += 1`
`-	like = calc_nuc_tree_likelihood_gamma(tree,rmatrix,basefreq,sites,alpha,cats)`
`-	print like`
`-	print tree.get_newick_repr(True)`
`+	like = -1`
`+	if alpha != None:`
`+		like = calc_nuc_tree_likelihood_gamma(tree,rmatrix,basefreq,sites,alpha,cats)`
`+	else:`
`+		like = calc_nuc_tree_likelihood(tree,rmatrix,basefreq,sites)`
`+	#print tree.get_newick_repr(True)`
`+	#print like`
` 	if like < 0 or isnan(like):`
` 		return LARGE`
` 	return like`
` `
` `
`-`
` """`
`-this scales so that the sixth rate (G->T) is 1 `
`-the matrix will be`
`-  A C G T`
`-A   1 2 3`
`-C     4 5`
`-G `
`-T `
`-`
`-the three base composition rates will be parameters`
`-6, 7, 8. The last base composition (T) will be 1- sum(6,7,8)`
`-and the total will be scaled to 1`
`+optimizing params`
` """`
`-def set_gtr_rmatrix_basefreq(params):`
`-	rmatrix = ones((4,4))`
`-	rmatrix[0][1] = params[0]`
`-	rmatrix[1][0] = params[0]`
`-	rmatrix[0][2] = params[1]`
`-	rmatrix[2][0] = params[1]`
`-	rmatrix[0][3] = params[2]`
`-	rmatrix[3][0] = params[2]`
`-	rmatrix[1][2] = params[3]`
`-	rmatrix[2][1] = params[3]`
`-	rmatrix[1][3] = params[4]`
`-	rmatrix[3][1] = params[4]`
`-	rmatrix[2][3] = 1.`
`-	rmatrix[3][2] = 1.`
`-	fill_diagonal(rmatrix,0)`
`-	basecomp = ones(4)	`
`-	basecomp[0] = params[5]`
`-	basecomp[1] = params[6]`
`-	basecomp[2] = params[7]`
`-	basecomp[3] = 1-sum(params[5:8])`
`-	sc = sum(basecomp)`
`-	basecomp /= sc`
`-	return rmatrix,basecomp`
`-`
` `
` """`
`-optimizing params`
`+this should really be done with derivatives, but `
`+for demonstration purposes we will just use powell `
`+derivative free things`
` """`
`+def optimize_brlen(tree,sites,rmatrix,basefreq,alpha=None,cats=None):`
`+	blstart = []`
`+	for i in tree.iternodes():`
`+		if i != tree:`
`+			blstart.append(i.length)`
`+	res = fmin_powell(calc_params_treebl,blstart,args=(tree,sites,rmatrix,basefreq,alpha,cats),full_output=True)`
`+	return res`
`+`
`+def optimize_rateparams(a,tree,sites,alpha=None,cats=None):`
`+	assert len(a) == 8`
`+	res =  fmin_powell(calc_params_rateparams,a,args=(tree,sites,alpha,cats),full_output=True)`
`+	return res`
`+`
` """`
` #maximize rate params`
` res = fmin_powell(optimize_rateparams,a,args=(tree,sites),full_output=True)`
` print rmatrix`
` print basefreq`
` print calc_nuc_q_matrix(rmatrix,basefreq)`
`+"""`
` `
`-#res = fmin_powell(optimize_treebl_gamma,blstart	,args=(tree,sites,rmatrix,basefreq,alpha,cats),maxiter=5,ftol=full_output=True)`
`-#res = fmin_powell(optimize_treebl,blstart,args=(tree,sites,rmatrix,basefreq),maxiter=5,full_output=True)`
` `
`-alp = a + blstart`
`-print alp`
`-res = fmin_powell(optimize_allparams,alp,args=(tree,sites),full_output=True)`
`-print res[0]`
`-rmatrix,basefreq = set_gtr_rmatrix_basefreq(res[0][0:8])`
`-print rmatrix`
`-print basefreq`
`-print calc_nuc_q_matrix(rmatrix,basefreq)`
`-print tree.get_newick_repr(True)`
`-"""`
`+def optimize_allparams(a,tree,sites,alpha=None,cats=None):`
`+	assert len(a) == 8`
`+	blstart = []`
`+	for i in tree.iternodes():`
`+		if i != tree:`
`+			blstart.append(i.length)`
`+	a = a + blstart`
`+	res =  fmin_powell(calc_params_allparams,a,args=(tree,sites,alpha,cats),full_output=True)`
`+	return res`
`+`
`+def optimize_gammaparam(alphastart,tree,sites,rmatrix,basefreq,cats):`
`+	res =  fmin_powell(calc_params_gamma,[alphastart],args=(tree,sites,rmatrix,basefreq,cats),full_output=True)`
`+	return res`