Commits

Anonymous committed e8423b1

[svn r427] Fix OntoShell bug in calculating indirect annotation counts
Fix log4cpp static initialization to work around gcc fiasco
This updates every g_CatSleipnir logging call!

Comments (0)

Files changed (31)

src/annotation.cpp

 
 	m_iLine++;
 	m_istm.getline( m_szLine, c_iBuffer - 1 );
-	g_CatSleipnir.debug( "COntologyImpl::SParser::GetLine( ) %s", m_szLine );
+	g_CatSleipnir( ).debug( "COntologyImpl::SParser::GetLine( ) %s", m_szLine );
 	return true; }
 
 bool COntologyImpl::SParser::IsStart( const char* szStart ) const {
 void COntologyImpl::CollectGenes( size_t iNode, TSetPGenes& setpGenes ) {
 	SNode&						sNode	= m_aNodes[ iNode ];
 	size_t						i;
-	TSetPGenes					setpKids;
+	TSetPGenes					setpSelf, setpKids;
 	TSetPGenes::const_iterator	iterGenes;
 
 	if( sNode.m_iCacheGenes != -1 ) {
 			setpGenes.insert( sNode.m_apCacheGenes[ i ] );
 		return; }
 
-	for( i = 0; i < sNode.m_iGenes; ++i )
-		setpGenes.insert( sNode.m_apGenes[ i ] );
+	for( i = 0; i < sNode.m_iGenes; ++i ) {
+		setpSelf.insert( sNode.m_apGenes[ i ] );
+		setpGenes.insert( sNode.m_apGenes[ i ] ); }
 	for( i = 0; i < sNode.m_iChildren; ++i )
 		CollectGenes( sNode.m_aiChildren[ i ], setpKids );
 	sNode.m_iCacheGenes = 0;
 	for( iterGenes = setpKids.begin( ); iterGenes != setpKids.end( ); ++iterGenes )
-		if( setpGenes.find( *iterGenes ) == setpGenes.end( ) )
+		if( setpSelf.find( *iterGenes ) == setpSelf.end( ) )
 			sNode.m_iCacheGenes++;
 	if( sNode.m_iCacheGenes ) {
 		sNode.m_apCacheGenes = new const CGene*[ sNode.m_iCacheGenes ];
 		for( i = 0,iterGenes = setpKids.begin( ); iterGenes != setpKids.end( ); ++iterGenes )
-			if( setpGenes.find( *iterGenes ) == setpGenes.end( ) ) {
+			if( setpSelf.find( *iterGenes ) == setpSelf.end( ) ) {
 				sNode.m_apCacheGenes[ i++ ] = *iterGenes;
 				setpGenes.insert( *iterGenes ); } } }
 
 	set<const CGene*>					setiGenes;
 	set<const CGene*>::const_iterator	iterGene;
 
-	g_CatSleipnir.info( "CSlim::Open( %s )", pOntology->GetID( ).c_str( ) );
+	g_CatSleipnir( ).info( "CSlim::Open( %s )", pOntology->GetID( ).c_str( ) );
 
 	Reset( pOntology );
 	while( istmSlim.peek( ) != EOF ) {
 					m_vecstrSlims.push_back( str );
 				else {
 					if( ( j = m_pOntology->GetNode( str ) ) == -1 ) {
-						g_CatSleipnir.error( "CSlim::Open( %s ) unknown node: %s",
+						g_CatSleipnir( ).error( "CSlim::Open( %s ) unknown node: %s",
 							m_pOntology->GetID( ).c_str( ), str.c_str( ) );
 						return false; }
 					m_vecveciTerms[ i ].push_back( j ); } }

src/annotationkegg.cpp

 	vector<set<CGene*> >		vecsetpGenes;
 	set<CGene*>::iterator		iterGene;
 
-	g_CatSleipnir.info( "COntologyKEGG::Open( %s )", strOrganism.c_str( ) );
+	g_CatSleipnir( ).info( "COntologyKEGG::Open( %s )", strOrganism.c_str( ) );
 	Reset( );
 	sParser.GetLine( );
 	while( istm.peek( ) != EOF ) {
 
 bool COntologyKEGGImpl::OpenName( SParserKEGG& sParser ) {
 
-	g_CatSleipnir.debug( "COntologyKEGGImpl::OpenName( ) %s", sParser.m_szLine );
+	g_CatSleipnir( ).debug( "COntologyKEGGImpl::OpenName( ) %s", sParser.m_szLine );
 	return ( sParser.IsStart( c_szName ) ? sParser.GetLine( ) : true ); }
 
 bool COntologyKEGGImpl::OpenDefinition( SParserKEGG& sParser ) {

src/annotationmips.cpp

 	SParserMIPS	sParserGene( istmAnnotations, Genome );
 
 	if( !OpenOntology( sParserOnto ) ) {
-		g_CatSleipnir.error( "COntologyMIPS::Open( ) failed on ontology line %d: %s", sParserOnto.m_iLine,
+		g_CatSleipnir( ).error( "COntologyMIPS::Open( ) failed on ontology line %d: %s", sParserOnto.m_iLine,
 			sParserOnto.m_szLine );
 		return false; }
 	if( !OpenGenes( sParserGene ) ) {
-		g_CatSleipnir.error( "COntologyMIPS::Open( ) failed on genes line %d: %s", sParserGene.m_iLine,
+		g_CatSleipnir( ).error( "COntologyMIPS::Open( ) failed on genes line %d: %s", sParserGene.m_iLine,
 			sParserGene.m_szLine );
 		return false; }
 
 	size_t					i, j;
 	vector<vector<size_t> >	vecveciChildren;
 
-	g_CatSleipnir.info( "COntologyMIPSImpl::OpenOntology( )" );
+	g_CatSleipnir( ).info( "COntologyMIPSImpl::OpenOntology( )" );
 	if( !( sParser.GetLine( ) && ( sParser.m_szLine[ 0 ] == '#' ) && sParser.GetLine( ) ) )
 		return false;
 
 bool COntologyMIPSImpl::OpenGenes( SParserMIPS& sParser ) {
 	size_t	i, j;
 
-	g_CatSleipnir.info( "COntologyMIPSImpl::OpenGenes( )" );
+	g_CatSleipnir( ).info( "COntologyMIPSImpl::OpenGenes( )" );
 	if( !sParser.GetLine( ) )
 		return false;
 	if( !sParser.m_szLine[ 0 ] )

src/bayesnetfn.cpp

 
 	for( i = 0; i < pData->GetGenes( ); ++i ) {
 		if( !( i % 250 ) )
-			g_CatSleipnir.notice( "CBayesNetFN::Evaluate( %d ) %d/%d", fZero, i, pData->GetGenes( ) );
+			g_CatSleipnir( ).notice( "CBayesNetFN::Evaluate( %d ) %d/%d", fZero, i, pData->GetGenes( ) );
 		for( j = ( i + 1 ); j < pData->GetGenes( ); ++j ) {
 			if( !pData->IsExample( i, j ) )
 				continue;
 		const CDataMatrix&	MatDefault	= pBNDefault->m_vecNodes[ iNode ].m_MatCPT;
 
 		if( MatDefault.GetRows( ) != vecdProbs.size( ) ) {
-			g_CatSleipnir.error( "CBayesNetMinimal::Counts2Probs( ) default distribution size mismatch: wanted %d, got %d",
+			g_CatSleipnir( ).error( "CBayesNetMinimal::Counts2Probs( ) default distribution size mismatch: wanted %d, got %d",
 				vecdProbs.size( ), MatDefault.GetRows( ) );
 			return false; }
 		for( i = 0; i < vecdProbs.size( ); ++i )
 		const CDataMatrix&	MatCPT	= m_vecNodes[ i ].m_MatCPT;
 		for( j = 0; j < MatCPT.GetColumns( ); ++j ) {
 			if( c >= MatCPT.GetRows( ) ) {
-				g_CatSleipnir.error( "CBayesNetMinimal::Evaluate( %d ) illegal value: %d/%d in %d", iOffset, c, MatCPT.GetRows( ), i );
+				g_CatSleipnir( ).error( "CBayesNetMinimal::Evaluate( %d ) illegal value: %d/%d in %d", iOffset, c, MatCPT.GetRows( ), i );
 				return CMeta::GetNaN( ); }
 			m_adNY[ j ] *= MatCPT.Get( c, j ); } }
 
 	iState = c_iStateInitial;
 	ifsm.open( szFileCounts );
 	if( !ifsm.is_open( ) ) {
-		g_CatSleipnir.error( "CBayesNetMinimal::OpenCounts( %s ) could not open file", szFileCounts );
+		g_CatSleipnir( ).error( "CBayesNetMinimal::OpenCounts( %s ) could not open file", szFileCounts );
 		return false; }
 	while( !ifsm.eof( ) ) {
 		ifsm.getline( szBuffer, CFile::c_iBufferSize - 1 );
 		switch( iState ) {
 			case c_iStateInitial:
 				if( vecstrLine.size( ) != 2 ) {
-					g_CatSleipnir.error( "CBayesNetMinimal::OpenCounts( %s ) illegal line: %s", szFileCounts,
+					g_CatSleipnir( ).error( "CBayesNetMinimal::OpenCounts( %s ) illegal line: %s", szFileCounts,
 						szBuffer );
 					return false; }
 				m_strID = vecstrLine[ 0 ];
 
 			case c_iStatePreCPT:
 				if( ( iterNode = mapstriNodes.find( vecstrLine[ 0 ] ) ) == mapstriNodes.end( ) ) {
-					g_CatSleipnir.error( "CBayesNetMinimal::OpenCounts( %s ) could not identify node: %s",
+					g_CatSleipnir( ).error( "CBayesNetMinimal::OpenCounts( %s ) could not identify node: %s",
 						szFileCounts, vecstrLine[ 0 ].c_str( ) );
 					return false; }
 				iNode = iterNode->second;
 
 					if( iClass ) {
 						if( MatCPT.GetRows( ) != vecdProbs.size( ) ) {
-							g_CatSleipnir.error( "CBayesNetMinimal::OpenCounts( %s ) illegal count number: given %d, expected %d",
+							g_CatSleipnir( ).error( "CBayesNetMinimal::OpenCounts( %s ) illegal count number: given %d, expected %d",
 								szFileCounts, vecdProbs.size( ), MatCPT.GetRows( ) );
 							return false; } }
 					else

src/bayesnetpnl.cpp

 	iNode = 0;
 	for( i = 0; i < pData->GetGenes( ); ++i ) {
 		if( !( i % 250 ) )
-			g_CatSleipnir.notice( "CBayesNetPNL::Evaluate( %d ) %d/%d", fZero, i,
+			g_CatSleipnir( ).notice( "CBayesNetPNL::Evaluate( %d ) %d/%d", fZero, i,
 				pData->GetGenes( ) );
 		for( j = ( i + 1 ); j < pData->GetGenes( ); ++j ) {
 			if( !pData->IsExample( i, j ) )

src/bayesnetsmile.cpp

 			vecpExpected[ i ]->FillWith( 0 );
 		for( iterDatum = mapData.begin( ); iterDatum != mapData.end( ); ++iterDatum ) {
 			if( !( iDatum++ % 50 ) )
-				g_CatSleipnir.notice( "CBayesNetSmile::LearnGrouped( %d, %d ) iteration %d, datum %d/%d",
+				g_CatSleipnir( ).notice( "CBayesNetSmile::LearnGrouped( %d, %d ) iteration %d, datum %d/%d",
 					iIterations, fZero, iIter, ( iDatum - 1 ), mapData.size( ) );
 			FillCPTs( vecfHidden, iterDatum->first, fZero, true );
 			m_SmileNet.UpdateBeliefs( );
 			vecpExpected[ i ]->FillWith( 0 );
 		for( i = 0; i < pData->GetGenes( ); ++i ) {
 			if( !( i % 50 ) )
-				g_CatSleipnir.notice( "CBayesNetSmile::LearnUngrouped( %d, %d ) iteration %d, gene %d/%d",
+				g_CatSleipnir( ).notice( "CBayesNetSmile::LearnUngrouped( %d, %d ) iteration %d, gene %d/%d",
 					iIterations, fZero, iIter, i, pData->GetGenes( ) );
 			for( j = ( i + 1 ); j < pData->GetGenes( ); ++j ) {
 				if( !FillCPTs( pData, i, j, fZero, true ) )
 		dPrior = (float)(*pValue->GetMatrix( ))[ 0 ]; }
 	for( i = 0; i < pData->GetGenes( ); ++i ) {
 		if( !( i % 250 ) )
-			g_CatSleipnir.notice( "CBayesNetSmile::Evaluate( %d ) %d/%d", fZero, i,
+			g_CatSleipnir( ).notice( "CBayesNetSmile::Evaluate( %d ) %d/%d", fZero, i,
 				pData->GetGenes( ) );
 		if( pDatOut && !pvecvecdOut && ( ( iOne = veciGenes[ i ] ) == -1 ) )
 			continue;
 	for( i = 0; i < iAnswers; ++i )
 		(*pMat)[ (int)i ] = ( j = vecveciCounts[ 0 ][ (int)i ] ) ? j : ( fFallback ? 0 : 1 );
 	if( fFallback ) {
-		g_CatSleipnir.warn( "CBayesNetSmile::LearnNaive( %d ) insufficient data for node %s",
+		g_CatSleipnir( ).warn( "CBayesNetSmile::LearnNaive( %d ) insufficient data for node %s",
 			fZero, m_SmileNet.GetNode( 0 )->Info( ).Header( ).GetId( ) );
 		dLambda = 1 - ( (float)iCount / c_iMinimum );
 		pMat->Normalize( );
 					veciCoords[ 1 ] = (int)k;
 					dCount += (*pMat)[ veciCoords ]; }
 				if( dCount < c_iMinimum ) {
-					g_CatSleipnir.warn( "CBayesNetSmile::LearnNaive( %d ) insufficient data for node %s, column %d",
+					g_CatSleipnir( ).warn( "CBayesNetSmile::LearnNaive( %d ) insufficient data for node %s, column %d",
 						fZero, m_SmileNet.GetNode( (int)i )->Info( ).Header( ).GetId( ), j );
 					dLambda = 1 - ( (float)dCount / c_iMinimum );
 					for( k = 0; k < (size_t)pDef->GetNumberOfOutcomes( ); ++k ) {
 	((CBayesNetSmile*)this)->m_SmileNet.SetDefaultBNAlgorithm( iAlgorithm );
 	for( i = 0; i < PCLResults.GetGenes( ); ++i ) {
 		if( !( i % 1 ) )
-			g_CatSleipnir.notice( "CBayesNetSmile::Evaluate( %d ) %d/%d", fZero, i,
+			g_CatSleipnir( ).notice( "CBayesNetSmile::Evaluate( %d ) %d/%d", fZero, i,
 				PCLResults.GetGenes( ) );
 		strCur = EncodeDatum( PCLData, PCLData.GetGene( PCLResults.GetGene( i ) ), veciMap );
 		if( m_fGroup && ( ( iterDatum = mapData.find( strCur ) ) != mapData.end( ) ) ) {

src/bayesnetsmileelr.cpp

 	dDotNew = ELRDot( vecvecfGradient, vecvecfPrev );
 
 	for( i = 0; i < iIterations; ++i ) {
-		g_CatSleipnir.notice( "CBayesNetSmile::LearnELR( %d, %d ) iteration %d/%d",
+		g_CatSleipnir( ).notice( "CBayesNetSmile::LearnELR( %d, %d ) iteration %d/%d",
 			iIterations, fZero, i, iIterations );
 		if( !dDotNew )
 			continue;
 				v = u;
 				fv = fu; } } }
 
-	g_CatSleipnir.warn( "CBayesNetSmileImpl::ELRBrent( ) too many iterations" );
+	g_CatSleipnir( ).warn( "CBayesNetSmileImpl::ELRBrent( ) too many iterations" );
 	dAX = a;
 	dBX = b;
 	return fx; }

src/clusthierarchical.cpp

 	for( i = 0; i < Sim.GetSize( ); ++i )
 		for( j = ( i + 1 ); j < Sim.GetSize( ); ++j )
 			if( ( ( d = Sim.Get( i, j ) ) == -FLT_MAX ) || CMeta::IsNaN( d ) ) {
-				g_CatSleipnir.error( "CClustHierarchicalImpl::Cluster( ) illegal input value at %d, %d", i,
+				g_CatSleipnir( ).error( "CClustHierarchicalImpl::Cluster( ) illegal input value at %d, %d", i,
 					j );
 				return NULL; }
 	iAssigned = iParentless = Sim.GetSize( );
 		float	dHeight;
 
 		if( !( iParentless % 500 ) )
-			g_CatSleipnir.notice( "CClustHierarchical::Cluster( ) %d/%d nodes remaining", iParentless,
+			g_CatSleipnir( ).notice( "CClustHierarchical::Cluster( ) %d/%d nodes remaining", iParentless,
 				Sim.GetSize( ) );
 		dHeight = -FLT_MAX;
 		for( k = 0; k < Sim.GetSize( ); ++k )

src/clustkmeans.cpp

-/*****************************************************************************
-* This file is provided under the Creative Commons Attribution 3.0 license.
-*
-* You are free to share, copy, distribute, transmit, or adapt this work
-* PROVIDED THAT you attribute the work to the authors listed below.
-* For more information, please see the following web page:
-* http://creativecommons.org/licenses/by/3.0/
-*
-* This file is a component of the Sleipnir library for functional genomics,
-* authored by:
-* Curtis Huttenhower (chuttenh@princeton.edu)
-* Mark Schroeder
-* Maria D. Chikina
-* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
-*
-* If you use this library, the included executable tools, or any related
-* code in your work, please cite the following publication:
-* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
-* Olga G. Troyanskaya.
-* "The Sleipnir library for computational functional genomics"
-*****************************************************************************/
-#include "stdafx.h"
-#include "clustkmeans.h"
-#include "measure.h"
-#include "meta.h"
-
-namespace Sleipnir {
-
-/*!
- * \brief
- * Cluster a set of elements into k groups using the given data and pairwise similarity score.
- * 
- * \param MatData
- * Data vectors for each element, generally microarray values from a PCL file.
- * 
- * \param pMeasure
- * Similarity measure to use for clustering.
- * 
- * \param iK
- * Number of clusters to generate.
- * 
- * \param vecsClusters
- * Output cluster IDs for each gene.
- * 
- * \param pMatWeights
- * If non-null, weights to use for each gene/condition value.  These can be used to up/downweight aneuploidies
- * present under only certain conditions, for example.  Default assumes all ones.
- * 
- * \returns
- * True if clustering succeeded.
- * 
- * Performs k-means clustering on the given data using the specified similarity measure and number of
- * clusters.  The indices of each element's final cluster are indicated in the output vector.  If given,
- * individual gene/condition scores can be weighted (e.g. to up/downweight aneuploidies present only under
- * certain conditions).  During k-means clustering, K centers are initially chosen at random.  Each gene is
- * assigned to the center most similar to it, and the centers are moved to the mean of their assigned
- * genes.  This process is iterated until no gene assignments change.  This places each gene in exactly one
- * cluster.
- * 
- * \remarks
- * The size of MatData must be at least iK; on successful return, the size of vecsClusters will be equal to
- * the size of MatData.
- * 
- * \see
- * CClustHierarchical::Cluster
- */
-bool CClustKMeans::Cluster( const CDataMatrix& MatData, const IMeasure* pMeasure, size_t iK,
-	vector<uint16_t>& vecsClusters, const CDataMatrix* pMatWeights ) {
-	size_t			i, j, iIteration;
-	float			d;
-	CDataMatrix		MatMeans;
-	bool			fDone;
-	vector<size_t>	veciCounts;
-	uint16_t		sMax;
-
-	if( !pMeasure || ( MatData.GetRows( ) < iK ) )
-		return false;
-
-	MatMeans.Initialize( iK, MatData.GetColumns( ) );
-	for( i = 0; i < MatMeans.GetRows( ); ++i )
-		Randomize( MatMeans, i, MatData );
-
-	vecsClusters.resize( MatData.GetRows( ) );
-	fill( vecsClusters.begin( ), vecsClusters.end( ), -1 );
-	veciCounts.resize( iK );
-	for( iIteration = 0,fDone = false; !fDone; ++iIteration ) {
-		if( !( iIteration % 10 ) )
-			g_CatSleipnir.notice( "CClustKMeans::Cluster( %d ) iteration %d", iK, iIteration );
-		fill( veciCounts.begin( ), veciCounts.end( ), 0 );
-		fDone = true;
-		for( i = 0; i < vecsClusters.size( ); ++i ) {
-			float	dMax;
-
-			dMax = -FLT_MAX;
-			for( sMax = j = 0; j < MatMeans.GetRows( ); ++j ) {
-				d = (float)pMeasure->Measure( MatData.Get( i ), MatData.GetColumns( ), MatMeans.Get( j ),
-					MatMeans.GetColumns( ), IMeasure::EMapNone, pMatWeights ? pMatWeights->Get( i ) : NULL );
-				if( CMeta::IsNaN( d ) ) {
-					g_CatSleipnir.error( "CClustKMeans::Cluster( %d ) got invalid measure for genes: %d, %d",
-						iK, i, j );
-					return false; }
-				if( d > dMax ) {
-					dMax = d;
-					sMax = j; } }
-			veciCounts[ sMax ]++;
-			if( vecsClusters[ i ] != sMax ) {
-				fDone = false;
-				vecsClusters[ i ] = sMax; } }
-
-		MatMeans.Clear( );
-		for( i = 0; i < vecsClusters.size( ); ++i )
-			for( j = 0; j < MatMeans.GetColumns( ); ++j )
-				MatMeans.Get( vecsClusters[ i ], j ) += MatData.Get( i, j );
-		for( i = 0; i < MatMeans.GetRows( ); ++i )
-			if( veciCounts[ i ] )
-				for( j = 0; j < MatMeans.GetColumns( ); ++j )
-					MatMeans.Get( i, j ) /= veciCounts[ i ];
-			else
-				Randomize( MatMeans, i, MatData ); }
-
-	return true; }
-
+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#include "stdafx.h"
+#include "clustkmeans.h"
+#include "measure.h"
+#include "meta.h"
+
+namespace Sleipnir {
+
 /*!
  * \brief
- * Cluster a set of elements into k groups using the given pairwise similarities.
+ * Cluster a set of elements into k groups using the given data and pairwise similarity score.
+ * 
+ * \param MatData
+ * Data vectors for each element, generally microarray values from a PCL file.
+ * 
+ * \param pMeasure
+ * Similarity measure to use for clustering.
+ * 
+ * \param iK
+ * Number of clusters to generate.
+ * 
+ * \param vecsClusters
+ * Output cluster IDs for each gene.
+ * 
+ * \param pMatWeights
+ * If non-null, weights to use for each gene/condition value.  These can be used to up/downweight aneuploidies
+ * present under only certain conditions, for example.  Default assumes all ones.
+ * 
+ * \returns
+ * True if clustering succeeded.
+ * 
+ * Performs k-means clustering on the given data using the specified similarity measure and number of
+ * clusters.  The indices of each element's final cluster are indicated in the output vector.  If given,
+ * individual gene/condition scores can be weighted (e.g. to up/downweight aneuploidies present only under
+ * certain conditions).  During k-means clustering, K centers are initially chosen at random.  Each gene is
+ * assigned to the center most similar to it, and the centers are moved to the mean of their assigned
+ * genes.  This process is iterated until no gene assignments change.  This places each gene in exactly one
+ * cluster.
+ * 
+ * \remarks
+ * The size of MatData must be at least iK; on successful return, the size of vecsClusters will be equal to
+ * the size of MatData.
+ * 
+ * \see
+ * CClustHierarchical::Cluster
+ */
+bool CClustKMeans::Cluster( const CDataMatrix& MatData, const IMeasure* pMeasure, size_t iK,
+	vector<uint16_t>& vecsClusters, const CDataMatrix* pMatWeights ) {
+	size_t			i, j, iIteration;
+	float			d;
+	CDataMatrix		MatMeans;
+	bool			fDone;
+	vector<size_t>	veciCounts;
+	uint16_t		sMax;
+
+	if( !pMeasure || ( MatData.GetRows( ) < iK ) )
+		return false;
+
+	MatMeans.Initialize( iK, MatData.GetColumns( ) );
+	for( i = 0; i < MatMeans.GetRows( ); ++i )
+		Randomize( MatMeans, i, MatData );
+
+	vecsClusters.resize( MatData.GetRows( ) );
+	fill( vecsClusters.begin( ), vecsClusters.end( ), -1 );
+	veciCounts.resize( iK );
+	for( iIteration = 0,fDone = false; !fDone; ++iIteration ) {
+		if( !( iIteration % 10 ) )
+			g_CatSleipnir( ).notice( "CClustKMeans::Cluster( %d ) iteration %d", iK, iIteration );
+		fill( veciCounts.begin( ), veciCounts.end( ), 0 );
+		fDone = true;
+		for( i = 0; i < vecsClusters.size( ); ++i ) {
+			float	dMax;
+
+			dMax = -FLT_MAX;
+			for( sMax = j = 0; j < MatMeans.GetRows( ); ++j ) {
+				d = (float)pMeasure->Measure( MatData.Get( i ), MatData.GetColumns( ), MatMeans.Get( j ),
+					MatMeans.GetColumns( ), IMeasure::EMapNone, pMatWeights ? pMatWeights->Get( i ) : NULL );
+				if( CMeta::IsNaN( d ) ) {
+					g_CatSleipnir( ).error( "CClustKMeans::Cluster( %d ) got invalid measure for genes: %d, %d",
+						iK, i, j );
+					return false; }
+				if( d > dMax ) {
+					dMax = d;
+					sMax = j; } }
+			veciCounts[ sMax ]++;
+			if( vecsClusters[ i ] != sMax ) {
+				fDone = false;
+				vecsClusters[ i ] = sMax; } }
+
+		MatMeans.Clear( );
+		for( i = 0; i < vecsClusters.size( ); ++i )
+			for( j = 0; j < MatMeans.GetColumns( ); ++j )
+				MatMeans.Get( vecsClusters[ i ], j ) += MatData.Get( i, j );
+		for( i = 0; i < MatMeans.GetRows( ); ++i )
+			if( veciCounts[ i ] )
+				for( j = 0; j < MatMeans.GetColumns( ); ++j )
+					MatMeans.Get( i, j ) /= veciCounts[ i ];
+			else
+				Randomize( MatMeans, i, MatData ); }
+
+	return true; }
+
+/*!
+ * \brief
+ * Cluster a set of elements into k groups using the given pairwise similarities.
  * 
  * \param MatSimilarities
  * Matrix of precalculated pairwise similarities between elements to be clustered.
  * 
  * \param iK
- * Number of clusters to generate.
- * 
- * \param vecsClusters
- * Output cluster IDs for each gene.
+ * Number of clusters to generate.
+ * 
+ * \param vecsClusters
+ * Output cluster IDs for each gene.
  * 
  * \returns
- * True if clustering succeeded.
- * 
- * Performs k-means clustering on the given data using the specified similarites and number of
- * clusters.  The indices of each element's final cluster are indicated in the output vector.  During
- * k-means clustering, K centers are initially chosen at random.  Each gene is assigned to the center
- * most similar to it, and the centers are moved to the mean of their assigned genes.  This process is
- * iterated until no gene assignments change.  This places each gene in exactly one cluster.
- * 
- * \remarks
- * The size of MatSimilarities must be at least iK; on successful return, the size of vecsClusters will be equal
- * to the size of MatSimilarities.
- * 
- * \see
- * CClustHierarchical::Cluster
+ * True if clustering succeeded.
+ * 
+ * Performs k-means clustering on the given data using the specified similarites and number of
+ * clusters.  The indices of each element's final cluster are indicated in the output vector.  During
+ * k-means clustering, K centers are initially chosen at random.  Each gene is assigned to the center
+ * most similar to it, and the centers are moved to the mean of their assigned genes.  This process is
+ * iterated until no gene assignments change.  This places each gene in exactly one cluster.
+ * 
+ * \remarks
+ * The size of MatSimilarities must be at least iK; on successful return, the size of vecsClusters will be equal
+ * to the size of MatSimilarities.
+ * 
+ * \see
+ * CClustHierarchical::Cluster
  */
-bool CClustKMeans::Cluster( const CDistanceMatrix& MatSimilarities, size_t iK,
-	vector<uint16_t>& vecsClusters ) {
-	size_t			i, j, iOne, iIteration, iChanged, iState;
-	float			d, dMax, dMin;
-	CDataMatrix		MatPrev, MatNext;
-	vector<size_t>	veciPrev, veciNext;
-	uint16_t		sMax;
-	set<size_t>		setiStates;
-
-	if( MatSimilarities.GetSize( ) < iK )
-		return false;
-
-	dMax = -( dMin = FLT_MAX );
-	for( i = 0; i < MatSimilarities.GetSize( ); ++i )
-		for( j = ( i + 1 ); j < MatSimilarities.GetSize( ); ++j )
-			if( !CMeta::IsNaN( d = MatSimilarities.Get( i, j ) ) ) {
-				if( d > dMax )
-					dMax = d;
-				if( d < dMin )
-					dMin = d; }
-	if( dMin == FLT_MAX )
-		return false;
-	dMax++;
-	dMin--;
-	MatPrev.Initialize( MatSimilarities.GetSize( ), iK );
-	for( i = 0; i < MatPrev.GetColumns( ); ++i ) {
-		iOne = rand( ) % MatSimilarities.GetSize( );
-		for( j = 0; j < MatPrev.GetRows( ); ++j )
-			MatPrev.Set( j, i, GetClean( iOne, j, dMin, dMax, MatSimilarities ) ); }
-	MatNext.Initialize( MatPrev.GetRows( ), MatPrev.GetColumns( ) );
-	MatNext.Clear( );
-
-	vecsClusters.resize( MatSimilarities.GetSize( ) );
-	fill( vecsClusters.begin( ), vecsClusters.end( ), iK );
-	veciPrev.resize( iK );
-	fill( veciPrev.begin( ), veciPrev.end( ), 1 );
-	veciNext.resize( veciPrev.size( ) );
-	for( iChanged = MatSimilarities.GetSize( ),iIteration = 0; iChanged > 2; ++iIteration ) {
-		g_CatSleipnir.log( ( iIteration % 10 ) ? Priority::DEBUG : Priority::NOTICE,
-			"CClustKMeans::Cluster( %d ) iteration %d", iK, iIteration );
-		for( iChanged = i = 0; i < vecsClusters.size( ); ++i ) {
-			float	dMax;
-
-			dMax = -FLT_MAX;
-			for( sMax = j = 0; j < MatPrev.GetColumns( ); ++j ) {
-				if( CMeta::IsNaN( d = MatPrev.Get( i, j ) ) ) {
-					g_CatSleipnir.error( "CClustKMeans::Cluster( %d ) failed on gene %d, cluster %d", i, j );
-					return false; }
-				d /= veciPrev[ j ];
-				if( d > dMax ) {
-					dMax = d;
-					sMax = j; } }
-			if( vecsClusters[ i ] != sMax ) {
-				iChanged++;
-				if( vecsClusters[ i ] != iK )
-					veciNext[ vecsClusters[ i ] ]--;
-				veciNext[ sMax ]++;
-				for( j = 0; j < MatSimilarities.GetSize( ); ++j ) {
-					d = GetClean( i, j, dMin, dMax, MatSimilarities );
-					if( vecsClusters[ i ] != iK )
-						MatNext.Get( j, vecsClusters[ i ] ) -= d;
-					MatNext.Get( j, sMax ) += d; }
-				vecsClusters[ i ] = sMax; } }
-
-		for( i = 0; i < veciNext.size( ); ++i )
-			if( !veciNext[ i ] ) {
-				do
-					iOne = rand( ) % vecsClusters.size( );
-				while( veciNext[ vecsClusters[ iOne ] ] < 2 );
-				g_CatSleipnir.info( "CClustKMeans::Cluster( %d ) moving gene %d into empty cluster %d", iK,
-					iOne, i );
-				veciNext[ vecsClusters[ iOne ] ]--;
-				for( j = 0; j < MatNext.GetRows( ); ++j ) {
-					d = GetClean( iOne, j, dMin, dMax, MatSimilarities );
-					MatNext.Get( j, vecsClusters[ iOne ] ) -= d;
-					MatNext.Set( j, i, d ); }
-				veciNext[ i ]++; }
-
-// This calculates a simple hash for the current cluster assignments
-		for( iState = i = 0; i < vecsClusters.size( ); ++i ) {
-			j = iState & (size_t)-32768;
-			iState <<= 15;
-			iState ^= j >> ( ( sizeof(size_t) * 8 ) - 15 );
-			iState ^= vecsClusters[ i ] + 1; }
-		if( setiStates.find( iState ) != setiStates.end( ) ) {
-			g_CatSleipnir.info( "CClustKMeans::Cluster( %d ) found redundant state, terminating", iK );
-			break; }
-		setiStates.insert( iState );
-
-		g_CatSleipnir.notice( "CClustKMeans::Cluster( %d ) updated %d genes", iK, iChanged );
-		if( g_CatSleipnir.isDebugEnabled( ) )
-			for( i = 0; i < MatPrev.GetRows( ); ++i ) {
-				ostringstream	ossm;
-
-				for( j = 0; j < MatPrev.GetColumns( ); ++j )
-					ossm << ( j ? "\t" : "" ) << MatPrev.Get( i, j );
-				g_CatSleipnir.debug( "CClustKMeans::Cluster( %d ) object %d:	%s", iK, i,
-					ossm.str( ).c_str( ) ); }
-		copy( veciNext.begin( ), veciNext.end( ), veciPrev.begin( ) );
-		for( i = 0; i < MatPrev.GetRows( ); ++i )
-			memcpy( MatPrev.Get( i ), MatNext.Get( i ), MatPrev.GetColumns( ) * sizeof(*MatPrev.Get( i )) ); }
-
-	return true; }
-
-void CClustKMeansImpl::Randomize( CDataMatrix& MatMeans, size_t iRow, const CDataMatrix& MatData ) {
-
-	MatMeans.Set( iRow, MatData.Get( rand( ) % MatData.GetRows( ) ) ); }
-
-}
+bool CClustKMeans::Cluster( const CDistanceMatrix& MatSimilarities, size_t iK,
+	vector<uint16_t>& vecsClusters ) {
+	size_t			i, j, iOne, iIteration, iChanged, iState;
+	float			d, dMax, dMin;
+	CDataMatrix		MatPrev, MatNext;
+	vector<size_t>	veciPrev, veciNext;
+	uint16_t		sMax;
+	set<size_t>		setiStates;
+
+	if( MatSimilarities.GetSize( ) < iK )
+		return false;
+
+	dMax = -( dMin = FLT_MAX );
+	for( i = 0; i < MatSimilarities.GetSize( ); ++i )
+		for( j = ( i + 1 ); j < MatSimilarities.GetSize( ); ++j )
+			if( !CMeta::IsNaN( d = MatSimilarities.Get( i, j ) ) ) {
+				if( d > dMax )
+					dMax = d;
+				if( d < dMin )
+					dMin = d; }
+	if( dMin == FLT_MAX )
+		return false;
+	dMax++;
+	dMin--;
+	MatPrev.Initialize( MatSimilarities.GetSize( ), iK );
+	for( i = 0; i < MatPrev.GetColumns( ); ++i ) {
+		iOne = rand( ) % MatSimilarities.GetSize( );
+		for( j = 0; j < MatPrev.GetRows( ); ++j )
+			MatPrev.Set( j, i, GetClean( iOne, j, dMin, dMax, MatSimilarities ) ); }
+	MatNext.Initialize( MatPrev.GetRows( ), MatPrev.GetColumns( ) );
+	MatNext.Clear( );
+
+	vecsClusters.resize( MatSimilarities.GetSize( ) );
+	fill( vecsClusters.begin( ), vecsClusters.end( ), iK );
+	veciPrev.resize( iK );
+	fill( veciPrev.begin( ), veciPrev.end( ), 1 );
+	veciNext.resize( veciPrev.size( ) );
+	for( iChanged = MatSimilarities.GetSize( ),iIteration = 0; iChanged > 2; ++iIteration ) {
+		g_CatSleipnir( ).log( ( iIteration % 10 ) ? Priority::DEBUG : Priority::NOTICE,
+			"CClustKMeans::Cluster( %d ) iteration %d", iK, iIteration );
+		for( iChanged = i = 0; i < vecsClusters.size( ); ++i ) {
+			float	dMax;
+
+			dMax = -FLT_MAX;
+			for( sMax = j = 0; j < MatPrev.GetColumns( ); ++j ) {
+				if( CMeta::IsNaN( d = MatPrev.Get( i, j ) ) ) {
+					g_CatSleipnir( ).error( "CClustKMeans::Cluster( %d ) failed on gene %d, cluster %d", i, j );
+					return false; }
+				d /= veciPrev[ j ];
+				if( d > dMax ) {
+					dMax = d;
+					sMax = j; } }
+			if( vecsClusters[ i ] != sMax ) {
+				iChanged++;
+				if( vecsClusters[ i ] != iK )
+					veciNext[ vecsClusters[ i ] ]--;
+				veciNext[ sMax ]++;
+				for( j = 0; j < MatSimilarities.GetSize( ); ++j ) {
+					d = GetClean( i, j, dMin, dMax, MatSimilarities );
+					if( vecsClusters[ i ] != iK )
+						MatNext.Get( j, vecsClusters[ i ] ) -= d;
+					MatNext.Get( j, sMax ) += d; }
+				vecsClusters[ i ] = sMax; } }
+
+		for( i = 0; i < veciNext.size( ); ++i )
+			if( !veciNext[ i ] ) {
+				do
+					iOne = rand( ) % vecsClusters.size( );
+				while( veciNext[ vecsClusters[ iOne ] ] < 2 );
+				g_CatSleipnir( ).info( "CClustKMeans::Cluster( %d ) moving gene %d into empty cluster %d", iK,
+					iOne, i );
+				veciNext[ vecsClusters[ iOne ] ]--;
+				for( j = 0; j < MatNext.GetRows( ); ++j ) {
+					d = GetClean( iOne, j, dMin, dMax, MatSimilarities );
+					MatNext.Get( j, vecsClusters[ iOne ] ) -= d;
+					MatNext.Set( j, i, d ); }
+				veciNext[ i ]++; }
+
+// This calculates a simple hash for the current cluster assignments
+		for( iState = i = 0; i < vecsClusters.size( ); ++i ) {
+			j = iState & (size_t)-32768;
+			iState <<= 15;
+			iState ^= j >> ( ( sizeof(size_t) * 8 ) - 15 );
+			iState ^= vecsClusters[ i ] + 1; }
+		if( setiStates.find( iState ) != setiStates.end( ) ) {
+			g_CatSleipnir( ).info( "CClustKMeans::Cluster( %d ) found redundant state, terminating", iK );
+			break; }
+		setiStates.insert( iState );
+
+		g_CatSleipnir( ).notice( "CClustKMeans::Cluster( %d ) updated %d genes", iK, iChanged );
+		if( g_CatSleipnir( ).isDebugEnabled( ) )
+			for( i = 0; i < MatPrev.GetRows( ); ++i ) {
+				ostringstream	ossm;
+
+				for( j = 0; j < MatPrev.GetColumns( ); ++j )
+					ossm << ( j ? "\t" : "" ) << MatPrev.Get( i, j );
+				g_CatSleipnir( ).debug( "CClustKMeans::Cluster( %d ) object %d:	%s", iK, i,
+					ossm.str( ).c_str( ) ); }
+		copy( veciNext.begin( ), veciNext.end( ), veciPrev.begin( ) );
+		for( i = 0; i < MatPrev.GetRows( ); ++i )
+			memcpy( MatPrev.Get( i ), MatNext.Get( i ), MatPrev.GetColumns( ) * sizeof(*MatPrev.Get( i )) ); }
+
+	return true; }
+
+void CClustKMeansImpl::Randomize( CDataMatrix& MatMeans, size_t iRow, const CDataMatrix& MatData ) {
+
+	MatMeans.Set( iRow, MatData.Get( rand( ) % MatData.GetRows( ) ) ); }
+
+}
 	size_t				i, j;
 
 	for( dDiameter = dMinDiameter; dDiameter <= dMaxDiameter; dDiameter += dDeltaDiameter ) {
-		g_CatSleipnir.notice( "CClustQTC::Cluster( %g, %g, %g, %d ) processing diameter %g", dMinDiameter,
+		g_CatSleipnir( ).notice( "CClustQTC::Cluster( %g, %g, %g, %d ) processing diameter %g", dMinDiameter,
 			dMaxDiameter, dDeltaDiameter, iSize, dDiameter );
 		sClusters = QualityThresholdAll( MatSimilarities.GetSize( ), dDiameter, iSize, MatSimilarities,
 			vecsClusters );
 
 	vecsClusters.resize( iGenes );
 	for( iAssigned = sCluster = 0; ; ++sCluster ) {
-		g_CatSleipnir.notice( "CClustQTCImpl::QualityThresholdAll( ) cluster %d, assigned %d/%d genes",
+		g_CatSleipnir( ).notice( "CClustQTCImpl::QualityThresholdAll( ) cluster %d, assigned %d/%d genes",
 			sCluster + 1, iAssigned, iGenes );
 		QualityThresholdLargest( iGenes, dDiameter, Dist, vecfAssigned, vecsCur );
 		for( i = 0; i < vecsCur.size( ); ++i )
 	vecfClone.resize( vecfAssigned.size( ) );
 	for( iGene = 0; iGene < vecfAssigned.size( ); ++iGene ) {
 		if( !( iGene % 1000 ) )
-			g_CatSleipnir.notice( "CClustQTCImpl::QualityThresholdLargest( %g ) processing gene %d/%d",
+			g_CatSleipnir( ).notice( "CClustQTCImpl::QualityThresholdLargest( %g ) processing gene %d/%d",
 				dDiameter, iGene, vecfAssigned.size( ) );
 		if( vecfAssigned[ iGene ] )
 			continue;
 		adWA = adWB = NULL;
 	for( i = 0; i < Data.GetRows( ); ++i ) {
 		if( !( i % 10 ) )
-			g_CatSleipnir.notice( "CClustQTCImpl::InitializeDistances( %d ) initializing %d/%d genes",
+			g_CatSleipnir( ).notice( "CClustQTCImpl::InitializeDistances( %d ) initializing %d/%d genes",
 				i, Data.GetRows( ) );
 		for( j = ( i + 1 ); j < Data.GetRows( ); ++j )
 			Dist.Set( i, j, (float)GetJackDistance( Data.Get( i ), Data.Get( j ),
 		vector<uint32_t>	veciMotifs;
 
 		if( !Motifs.GetMatches( strKMer, veciMotifs ) ) {
-			g_CatSleipnir.error( "CCoalesceHistograms::Add( %s, %d, %d ) unrecognized kmer: %s",
+			g_CatSleipnir( ).error( "CCoalesceHistograms::Add( %s, %d, %d ) unrecognized kmer: %s",
 				strSequence.c_str( ), iType, fIntron, strKMer.c_str( ) );
 			return false; }
 		for( j = 0; j < veciMotifs.size( ); ++j )
 	vector<size_t>			veciBins, veciCounts;
 	float					dOne, dTwo, dMaxOne, dMaxTwo;
 
-	g_CatSleipnir.notice( "CCoalesceGeneScores::CalculateWeights( ) calculating %d types for %d genes",
+	g_CatSleipnir( ).notice( "CCoalesceGeneScores::CalculateWeights( ) calculating %d types for %d genes",
 		GetTypes( ), m_iGenes );
 	veciBins.resize( m_iGenes );
 	veciCounts.resize( m_iGenes );
 			const float*	adOne	= Get( iType, ESubsequenceTotal, i );
 
 			if( !( i % 1000 ) )
-				g_CatSleipnir.info( "CCoalesceGeneScores::CalculateWeights( ) calculating type %d/%d, gene %d/%d",
+				g_CatSleipnir( ).info( "CCoalesceGeneScores::CalculateWeights( ) calculating type %d/%d, gene %d/%d",
 					iType, m_vecvecdWeights.size( ), i, m_iGenes );
 			if( !adOne )
 				continue;
 		for( i = 0; i < m_iGenes; ++i ) {
 			if( vecdWeights[ i ] == 1 )
 				continue;
-			g_CatSleipnir.info( "CCoalesceGeneScores::CalculateWeights( ) weighting gene %d, type %d to %g",
+			g_CatSleipnir( ).info( "CCoalesceGeneScores::CalculateWeights( ) weighting gene %d, type %d to %g",
 				i, iType, vecdWeights[ i ] );
 			for( j = ESubsequenceBegin; j < ESubsequenceEnd; ++j ) {
 				float*	adOne	= Get( iType, (ESubsequence)j, i );
 		const CCoalesceHistogramSet<>&	HistTwo	= Get( iTypeTwo, sTwo.m_eSubsequence );
 
 		dP = 1 - HistOne.CohensD( sOne.m_iMotif, HistTwo, sTwo.m_iMotif, false, dAveOne, dAverage, dZ );
-		if( g_CatSleipnir.isDebugEnabled( ) )
-			g_CatSleipnir.debug( "CCoalesceGroupHistograms::IsSimilar( %s, %s, %g ) got p-value %g",
+		if( g_CatSleipnir( ).isDebugEnabled( ) )
+			g_CatSleipnir( ).debug( "CCoalesceGroupHistograms::IsSimilar( %s, %s, %g ) got p-value %g",
 				sOne.Save( pMotifs ).c_str( ), sTwo.Save( pMotifs ).c_str( ), dPValue, dP );
 		return ( dP < dPValue );
 	} }
 				vecsThreads[ i ].m_pFASTA = &FASTA;
 				vecsThreads[ i ].m_psModifiers = &sModifiers;
 				if( pthread_create( &vecpthdThreads[ i ], NULL, ThreadCombineMotif, &vecsThreads[ i ] ) ) {
-					g_CatSleipnir.error( "CCoalesceImpl::CombineMotifs( %d ) could not combine motif: %s",
+					g_CatSleipnir( ).error( "CCoalesceImpl::CombineMotifs( %d ) could not combine motif: %s",
 						iThreads, m_pMotifs->GetMotif( iMotif ).c_str( ) );
 					return false; } }
 			for( i = 0; i < vecpthdThreads.size( ); ++i )
 	if( !( InitializeDatasets( PCLCopy ) && InitializeGeneScores( PCLCopy, FASTA, veciPCL2FASTA, sModifiers,
 		GeneScores ) ) )
 		return false;
-	if( g_CatSleipnir.isNoticeEnabled( ) ) {
+	if( g_CatSleipnir( ).isNoticeEnabled( ) ) {
 		size_t			iSequences;
 		ostringstream	ossm;
 
 		for( iSequences = i = 0; i < veciPCL2FASTA.size( ); ++i )
 			if( veciPCL2FASTA[ i ] != -1 )
 				iSequences++;
-		g_CatSleipnir.notice( "CCoalesce::Cluster( ) running with %d genes, %d conditions, and %d sequences",
+		g_CatSleipnir( ).notice( "CCoalesce::Cluster( ) running with %d genes, %d conditions, and %d sequences",
 			PCL.GetGenes( ), PCL.GetExperiments( ), iSequences );
 		for( i = 0; i < PCL.GetExperiments( ); ++i )
 			ossm << ( i ? "\t" : "" ) << PCL.GetExperiment( i );
-		g_CatSleipnir.notice( ossm.str( ).c_str( ) );
-		g_CatSleipnir.notice( "k %d, P gene %g, p condition %g, z condition %g, p motif %g, z motif %g, p correlation %g", GetK( ),
+		g_CatSleipnir( ).notice( ossm.str( ).c_str( ) );
+		g_CatSleipnir( ).notice( "k %d, P gene %g, p condition %g, z condition %g, p motif %g, z motif %g, p correlation %g", GetK( ),
 			GetProbabilityGene( ), GetPValueCondition( ), GetZScoreCondition( ), GetPValueMotif( ),
 			GetZScoreMotif( ), GetPValueCorrelation( ) );
-		g_CatSleipnir.notice( "p merge %g, cutoff merge %g, penalty gap %g, penalty mismatch %g",
+		g_CatSleipnir( ).notice( "p merge %g, cutoff merge %g, penalty gap %g, penalty mismatch %g",
 			GetPValueMerge( ), GetCutoffMerge( ), GetMotifs( ) ? GetMotifs( )->GetPenaltyGap( ) : 0,
 			GetMotifs( ) ? GetMotifs( )->GetPenaltyMismatch( ) : 0 );
-		g_CatSleipnir.notice( "correlation pairs %d, bases %d, min size %d, merge size %d, max size %d",
+		g_CatSleipnir( ).notice( "correlation pairs %d, bases %d, min size %d, merge size %d, max size %d",
 			GetNumberCorrelation( ), GetBasesPerMatch( ), GetSizeMinimum( ), GetSizeMerge( ),
 			GetSizeMaximum( ) ); }
 	for( dFailure = 1; dFailure > c_dEpsilon; dFailure *= GetPValueCorrelation( ) ) {
 		if( !Cluster.Initialize( PCLCopy, Pot, m_vecsDatasets, setpriiSeeds, GetNumberCorrelation( ),
 			GetPValueCorrelation( ), GetThreads( ) ) )
 			continue;
-		g_CatSleipnir.notice( "CCoalesce::Cluster( ) initialized %d genes", Cluster.GetGenes( ).size( ) );
-		if( g_CatSleipnir.isDebugEnabled( ) ) {
+		g_CatSleipnir( ).notice( "CCoalesce::Cluster( ) initialized %d genes", Cluster.GetGenes( ).size( ) );
+		if( g_CatSleipnir( ).isDebugEnabled( ) ) {
 			ostringstream				ossm;
 			set<size_t>::const_iterator	iterGene;
 
 			ossm << "CCoalesce::Cluster( ) initialized:";
 			for( iterGene = Cluster.GetGenes( ).begin( ); iterGene != Cluster.GetGenes( ).end( ); ++iterGene )
 				ossm << ' ' << PCL.GetGene( *iterGene );
-			g_CatSleipnir.debug( ossm.str( ).c_str( ) ); }
+			g_CatSleipnir( ).debug( ossm.str( ).c_str( ) ); }
 		if( Cluster.GetGenes( ).size( ) < GetSizeMinimum( ) )
 			continue;
 		while( !( Cluster.IsConverged( ) || Cluster.IsEmpty( ) ) ) {
 				GetZScoreCondition( ) ) )
 				return false;
 			if( Cluster.IsEmpty( ) ) {
-				g_CatSleipnir.notice( "CCoalesce::Cluster( ) selected no conditions" );
+				g_CatSleipnir( ).notice( "CCoalesce::Cluster( ) selected no conditions" );
 				break; }
 			if( ( Cluster.GetGenes( ).size( ) >= GetSizeMinimum( ) ) &&
 				!( CombineMotifs( FASTA, veciPCL2FASTA, sModifiers, Cluster, GetThreads( ), GeneScores,
 			if( !Cluster.SelectGenes( PCLCopy, GeneScores, HistsCluster, HistsPot, GetSizeMinimum( ),
 				GetThreads( ), Pot, GetProbabilityGene( ), GetMotifs( ) ) )
 				return false;
-			g_CatSleipnir.notice( "CCoalesce::Cluster( ) processed %d genes, %d datasets, %d motifs",
+			g_CatSleipnir( ).notice( "CCoalesce::Cluster( ) processed %d genes, %d datasets, %d motifs",
 				Cluster.GetGenes( ).size( ), Cluster.GetDatasets( ).size( ), Cluster.GetMotifs( ).size( ) ); }
 		if( Cluster.IsConverged( ) && ( Cluster.GetGenes( ).size( ) >= GetSizeMinimum( ) ) ) {
-			g_CatSleipnir.notice( "CCoalesce::Cluster( ) finalizing cluster" );
+			g_CatSleipnir( ).notice( "CCoalesce::Cluster( ) finalizing cluster" );
 			setpriiSeeds.clear( );
 			if( Cluster.GetMotifs( ).size( ) >= GetSizeMaximum( ) ) {
 				if( !Cluster.SelectMotifs( HistsCluster, HistsPot, GetPValueMotif( ), GetZScoreMotif( ), -1,
 					GetThreads( ), GetMotifs( ) ) )
 					return false;
-				g_CatSleipnir.notice( "CCoalesce::Cluster( ) finalized %d motifs",
+				g_CatSleipnir( ).notice( "CCoalesce::Cluster( ) finalized %d motifs",
 					Cluster.GetMotifs( ).size( ) ); }
 			if( IsDirectoryIntermediate( ) )
 				Cluster.Save( GetDirectoryIntermediate( ), vecClusters.size( ), PCLCopy, GetMotifs( ) );
 				m_vecsDatasets[ i ].CalculateCovariance( PCLCopy );
 			dFailure = 1; }
 		else
-			g_CatSleipnir.notice( "CCoalesce::Cluster( ) discarding cluster (failure %g)", dFailure ); }
+			g_CatSleipnir( ).notice( "CCoalesce::Cluster( ) discarding cluster (failure %g)", dFailure ); }
 
 	return true; }
 

src/coalescecluster.cpp

 	vector<SThreadSeedPair>	vecsThreads;
 
 	if( PCL.GetGenes( ) < 2 ) {
-		g_CatSleipnir.error( "CCoalesceClusterImpl::AddSeedPair( %g ) found no genes", dPValue );
+		g_CatSleipnir( ).error( "CCoalesceClusterImpl::AddSeedPair( %g ) found no genes", dPValue );
 		return false; }
 	vecpthdThreads.resize( iThreads );
 	vecsThreads.resize( vecpthdThreads.size( ) );
 		vecsThreads[ i ].m_dFraction = dFraction;
 		vecsThreads[ i ].m_psetpriiSeeds = &setpriiSeeds;
 		if( pthread_create( &vecpthdThreads[ i ], NULL, ThreadSeedPair, &vecsThreads[ i ] ) ) {
-			g_CatSleipnir.error( "CCoalesceClusterImpl::AddSeedPair( %g, %g, %d ) could not seed pair",
+			g_CatSleipnir( ).error( "CCoalesceClusterImpl::AddSeedPair( %g, %g, %d ) could not seed pair",
 				dFraction, dPValue, iThreads );
 			return false; } }
 	dMaxCorr = -( dMinP = DBL_MAX );
 			iOne = vecsThreads[ i ].m_iOne;
 			iTwo = vecsThreads[ i ].m_iTwo; } }
 	if( ( dMinP * PCL.GetGenes( ) * ( PCL.GetGenes( ) - 1 ) * dFraction * dFraction ) < ( dPValue * 2 ) ) {
-		g_CatSleipnir.info( "CCoalesceClusterImpl::AddSeedPair( %g, %g ) seeding: %s, %s, %g (p=%g)",
+		g_CatSleipnir( ).info( "CCoalesceClusterImpl::AddSeedPair( %g, %g ) seeding: %s, %s, %g (p=%g)",
 			dFraction, dPValue, PCL.GetGene( iOne ).c_str( ), PCL.GetGene( iTwo ).c_str( ), dMaxCorr, dMinP );
 		priiSeed.first = iOne;
 		priiSeed.second = iTwo;
 		Add( iTwo, Pot );
 		return true; }
 
-	g_CatSleipnir.notice( "CCoalesceClusterImpl::AddSeedPair( %g, %g ) inadequate seed pair: %s, %s, %g (p=%g)",
+	g_CatSleipnir( ).notice( "CCoalesceClusterImpl::AddSeedPair( %g, %g ) inadequate seed pair: %s, %s, %g (p=%g)",
 		dFraction, dPValue, PCL.GetGene( iOne ).c_str( ), PCL.GetGene( iTwo ).c_str( ), dMaxCorr, dMinP );
 	return false; }
 
 			m_vecdStdevs[ i ] = ( m_vecdStdevs[ i ] < 0 ) ? 0 : sqrt( m_vecdStdevs[ i ] ); }
 		else
 			m_vecdCentroid[ i ] = CMeta::GetNaN( );
-		g_CatSleipnir.debug( "CCoalesceClusterImpl::CalculateCentroid( ) condition %d: count %d, mean %g, stdev %g",
+		g_CatSleipnir( ).debug( "CCoalesceClusterImpl::CalculateCentroid( ) condition %d: count %d, mean %g, stdev %g",
 			i, m_veciCounts[ i ], m_vecdCentroid[ i ], m_vecdStdevs[ i ] ); }
 
 	for( i = 0; i < m_vecsDatasets.size( ); ++i ) {
 		vecsThreads[ i ].m_pvecsDatasets = &m_vecsDatasets;
 		vecsThreads[ i ].m_pPCL = &PCL;
 		if( pthread_create( &vecpthdThreads[ i ], NULL, ThreadSelectCondition, &vecsThreads[ i ] ) ) {
-			g_CatSleipnir.error( "CCoalesceCluster::SelectConditions( %d, %g ) could not select conditions",
+			g_CatSleipnir( ).error( "CCoalesceCluster::SelectConditions( %d, %g ) could not select conditions",
 				iThreads, dZScore );
 			return false; } }
 	for( i = 0; i < vecpthdThreads.size( ); ++i )
 		const SDataset&	sDataset	= m_vecsDatasets[ i ];
 
 		if( ( sDataset.m_dP < dPValue ) && ( fabs( sDataset.m_dZ ) > dZScore ) ) {
-			g_CatSleipnir.debug( "CCoalesceCluster::SelectConditions( %d, %g ) selected dataset %d at %g, z=%g",
+			g_CatSleipnir( ).debug( "CCoalesceCluster::SelectConditions( %d, %g ) selected dataset %d at %g, z=%g",
 				iThreads, dZScore, i, sDataset.m_dP, sDataset.m_dZ );
 			m_setiDatasets.insert( i ); }
 		else
-			g_CatSleipnir.debug( "CCoalesceCluster::SelectConditions( %d, %g ) rejected dataset %d at %g, z=%g",
+			g_CatSleipnir( ).debug( "CCoalesceCluster::SelectConditions( %d, %g ) rejected dataset %d at %g, z=%g",
 				iThreads, dZScore, i, sDataset.m_dP, sDataset.m_dZ ); }
 
 	return true; }
 		vecsThreads[ i ].m_dZScore = dZScore;
 		vecsThreads[ i ].m_pveciMotifs = veciMotifs.empty( ) ? NULL : &veciMotifs;
 		if( pthread_create( &vecpthdThreads[ i ], NULL, ThreadSelectMotif, &vecsThreads[ i ] ) ) {
-			g_CatSleipnir.error( "CCoalesceCluster::SelectMotifs( %g, %d ) could not select motifs",
+			g_CatSleipnir( ).error( "CCoalesceCluster::SelectMotifs( %g, %d ) could not select motifs",
 				dZScore, iThreads );
 			return false; } }
 	for( i = 0; i < vecpthdThreads.size( ); ++i ) {
 				SMotifMatch	sMotif( iMotif, strTypeCluster, eSubsequence, (float)dZ, (float)( dAveOne -
 					dAverage ) );
 
-				if( g_CatSleipnir.isInfoEnabled( ) ) {
+				if( g_CatSleipnir( ).isInfoEnabled( ) ) {
 					ostringstream	ossm;
 
 					ossm << "CCoalesceClusterImpl::AddSignificant( " << iMotif << ", " << dZScore <<
 						" ) adding at " << dP << ":" << endl << sMotif.Save( &Motifs ) << endl <<
 						"Cluster	" << HistSetCluster.Save( iMotif ) << endl <<
 						"Pot	" << HistSetPot.Save( iMotif );
-					g_CatSleipnir.info( ossm.str( ).c_str( ) ); }
+					g_CatSleipnir( ).info( ossm.str( ).c_str( ) ); }
 				vecsMotifs.push_back( sMotif ); }
-			else if( g_CatSleipnir.isDebugEnabled( ) ) {
+			else if( g_CatSleipnir( ).isDebugEnabled( ) ) {
 				ostringstream	ossm;
 
 				ossm << "CCoalesceClusterImpl::AddSignificant( " << iMotif << ", " << dZScore <<
 					(float)dZ, (float)( dAveOne - dAverage ) ).Save( &Motifs ) << endl <<
 					"Cluster	" << HistSetCluster.Save( iMotif ) << endl <<
 					"Pot	" << HistSetPot.Save( iMotif );
-				g_CatSleipnir.debug( ossm.str( ).c_str( ) ); } } }
+				g_CatSleipnir( ).debug( ossm.str( ).c_str( ) ); } } }
 
 	return true; }
 
 		if( iThreads > 1 ) {
 			if( pthread_create( &vecpthdThreads[ 0 ], NULL, ThreadCentroid, &sUs ) ||
 				pthread_create( &vecpthdThreads[ 1 ], NULL, ThreadCentroid, &sThem ) ) {
-				g_CatSleipnir.error( "CCoalesceCluster::SelectGenes( %d, %g ) could not calculate centroids",
+				g_CatSleipnir( ).error( "CCoalesceCluster::SelectGenes( %d, %g ) could not calculate centroids",
 					iThreads, dProbability );
 				return false; }
 			for( i = 0; i < 2; ++i )
 				pthread_join( vecpthdThreads[ i ], NULL ); }
 		else if( ThreadCentroid( &sUs ) || ThreadCentroid( &sThem ) ) {
-			g_CatSleipnir.error( "CCoalesceCluster::SelectGenes( %d, %g ) could not calculate centroids",
+			g_CatSleipnir( ).error( "CCoalesceCluster::SelectGenes( %d, %g ) could not calculate centroids",
 				iThreads, dProbability );
 			return false; }
 	}
 		vecsThreads[ i ].m_iMinimum = iMinimum;
 		vecsThreads[ i ].m_dProbability = dProbability;
 		if( pthread_create( &vecpthdThreads[ i ], NULL, ThreadSignificantGene, &vecsThreads[ i ] ) ) {
-			g_CatSleipnir.error( "CCoalesceCluster::SelectGenes( %d, %g ) could not calculate significance",
+			g_CatSleipnir( ).error( "CCoalesceCluster::SelectGenes( %d, %g ) could not calculate significance",
 				iThreads, dProbability );
 			return false; } }
 	for( i = 0; i < vecpthdThreads.size( ); ++i )
 		( 1 - dBeta ) * ( dLogPMotifsGivenOut - dLogPMotifsGivenIn ) ) /
 		( 0.5f + 2 * pow( 0.5f - dBeta, 2 ) ) ) );
 
-	if( g_CatSleipnir.isDebugEnabled( ) ) {
+	if( g_CatSleipnir( ).isDebugEnabled( ) ) {
 		set<SMotifMatch>::const_iterator	iterMotif;
 		size_t								iType;
 
-		g_CatSleipnir.debug( "CCoalesceClusterImpl::IsSignificant( %s ) is %g beta %g, exp. p=%g vs. %g, seq. p=%g vs %g",
+		g_CatSleipnir( ).debug( "CCoalesceClusterImpl::IsSignificant( %s ) is %g beta %g, exp. p=%g vs. %g, seq. p=%g vs %g",
 			PCL.GetGene( iGene ).c_str( ), dP, dBeta, dLogPExpressionGivenIn, dLogPExpressionGivenOut,
 			dLogPMotifsGivenIn, dLogPMotifsGivenOut );
 		for( iterMotif = m_setsMotifs.begin( ); iterMotif != m_setsMotifs.end( ); ++iterMotif )
 			if( ( iType = GeneScores.GetType( iterMotif->m_strType ) ) != -1 )
-				g_CatSleipnir.debug( "%g	%s", GeneScores.Get( iType, iterMotif->m_eSubsequence, iGene,
+				g_CatSleipnir( ).debug( "%g	%s", GeneScores.Get( iType, iterMotif->m_eSubsequence, iGene,
 					iterMotif->m_iMotif ), iterMotif->Save( pMotifs ).c_str( ) ); }
 
 	return ( dP > dProbability ); }
 					sCluster--;
 					iCluster--; }
 				else
-					g_CatSleipnir.warn( "CCoalesceClusterImpl::CalculateProbabilityMotifs( %d, %d, %g, %g ) no motifs of %d in cluster: %g, %s\n%s",
+					g_CatSleipnir( ).warn( "CCoalesceClusterImpl::CalculateProbabilityMotifs( %d, %d, %g, %g ) no motifs of %d in cluster: %g, %s\n%s",
 						iGene, fClustered, dLogPIn, dLogPOut, iCluster, dGene,
 						iterMotif->Save( NULL ).c_str( ), HistSetCluster.Save( iterMotif->m_iMotif ).c_str( ) ); }
 			else if( sPot ) {
 				sPot--;
 				iPot--; }
 			else
-				g_CatSleipnir.warn( "CCoalesceClusterImpl::CalculateProbabilityMotifs( %d, %d, %g, %g ) no motifs of %d in pot: %g, %s\n%s",
+				g_CatSleipnir( ).warn( "CCoalesceClusterImpl::CalculateProbabilityMotifs( %d, %d, %g, %g ) no motifs of %d in pot: %g, %s\n%s",
 					iGene, fClustered, dLogPIn, dLogPOut, iPot, dGene, iterMotif->Save( NULL ).c_str( ),
 					HistSetPot.Save( iterMotif->m_iMotif ).c_str( ) );
 			dPCluster = (double)( sCluster + 1 ) / ( iCluster + HistSetCluster.GetEdges( ) );
 				break; } }
 	for( i = 0; i < PCLCluster.GetGenes( ); ++i ) {
 		if( ( iGene = PCL.GetGene( PCLCluster.GetGene( i ) ) ) == -1 ) {
-			g_CatSleipnir.error( "CCoalesceCluster::Open( %s, %i ) unrecognized gene: %s", strPCL.c_str( ),
+			g_CatSleipnir( ).error( "CCoalesceCluster::Open( %s, %i ) unrecognized gene: %s", strPCL.c_str( ),
 				iSkip, PCLCluster.GetGene( i ).c_str( ) );
 			return -1; }
 		m_setiGenes.insert( iGene ); }
 		SMotifMatch	sMotif;
 
 		if( !sMotif.Open( ifsm, *pMotifs ) ) {
-			g_CatSleipnir.warn( "CCoalesceCluster::Open( %s, %d ) could not open: %s", strPCL.c_str( ),
+			g_CatSleipnir( ).warn( "CCoalesceCluster::Open( %s, %d ) could not open: %s", strPCL.c_str( ),
 				iSkip, strMotifs.c_str( ) );
 			return -1; }
 		m_setsMotifs.insert( sMotif ); }
 	istm.getline( &strBuffer[ 0 ], strBuffer.size( ) );
 	CMeta::Tokenize( strBuffer.c_str( ), vecstrLine );
 	if( vecstrLine.empty( ) || ( vecstrLine[ 0 ] != c_szGenes ) ) {
-		g_CatSleipnir.error( "CCoalesceCluster::Open( ) invalid line: %s", strBuffer.c_str( ) );
+		g_CatSleipnir( ).error( "CCoalesceCluster::Open( ) invalid line: %s", strBuffer.c_str( ) );
 		return -1; }
 	for( i = 1; i < vecstrLine.size( ); ++i ) {
 		if( ( j = PCL.GetGene( vecstrLine[ i ] ) ) == -1 ) {
-			g_CatSleipnir.warn( "CCoalesceCluster::Open( ) unrecognized gene: %s",
+			g_CatSleipnir( ).warn( "CCoalesceCluster::Open( ) unrecognized gene: %s",
 				vecstrLine[ i ].c_str( ) );
 			continue; }
 		m_setiGenes.insert( j ); }
 	vecstrLine.clear( );
 	CMeta::Tokenize( strBuffer.c_str( ), vecstrLine );
 	if( vecstrLine.empty( ) || ( vecstrLine[ 0 ] != c_szConditions ) ) {
-		g_CatSleipnir.error( "CCoalesceCluster::Open( ) invalid line: %s", strBuffer.c_str( ) );
+		g_CatSleipnir( ).error( "CCoalesceCluster::Open( ) invalid line: %s", strBuffer.c_str( ) );
 		return -1; }
 	for( i = 1; i < vecstrLine.size( ); ++i ) {
 		if( ( j = PCL.GetExperiment( vecstrLine[ i ] ) ) == -1 ) {
-			g_CatSleipnir.error( "CCoalesceCluster::Open( ) unrecognized condition: %s",
+			g_CatSleipnir( ).error( "CCoalesceCluster::Open( ) unrecognized condition: %s",
 				vecstrLine[ i ].c_str( ) );
 			return -1; }
 		m_setiDatasets.insert( j ); }
 	vector<map<string, set<SMotifMatch> > >	vecmapstrsetsMotifs;
 
 	vecmapstrsetsMotifs.resize( CCoalesceSequencerBase::ESubsequenceEnd );
-	g_CatSleipnir.notice( "CCoalesceCluster::Open( %g ) merging clusters:", dFraction );
+	g_CatSleipnir( ).notice( "CCoalesceCluster::Open( %g ) merging clusters:", dFraction );
 	if( !( iClusters = CCoalesceClusterImpl::Open( Hierarchy, vecClusters, vecstrClusters, mapiiGenes,
 		mapiiDatasets, vecmapstrsetsMotifs ) ) ) {
-		g_CatSleipnir.error( "CCoalesceCluster::Open( %g ) no clusters found", dFraction );
+		g_CatSleipnir( ).error( "CCoalesceCluster::Open( %g ) no clusters found", dFraction );
 		return false; }
 
 	Clear( );
 	set<SMotifMatch>	setsMerged;
 
 	iMotifs = setsMotifs.size( );
-	g_CatSleipnir.notice( "CCoalesceClusterImpl::OpenMotifsHeuristic( %g ) resolving %d motifs", dCutoff,
+	g_CatSleipnir( ).notice( "CCoalesceClusterImpl::OpenMotifsHeuristic( %g ) resolving %d motifs", dCutoff,
 		iMotifs );
 
 	vecsMotifs.resize( iMotifs );
 	size_t				i, j;
 	bool				fRet;
 
-	g_CatSleipnir.notice( "CCoalesceClusterImpl::OpenMotifs( %g ) resolving %d motifs", dCutoff,
+	g_CatSleipnir( ).notice( "CCoalesceClusterImpl::OpenMotifs( %g ) resolving %d motifs", dCutoff,
 		setsMotifs.size( ) );
 
 	vecsMotifs.resize( setsMotifs.size( ) );
 
 	const CCoalesceCluster&	Cluster	= vecClusters[ Hier.GetID( ) ];
 
-	g_CatSleipnir.notice( "CCoalesceClusterImpl::Open( ) cluster %s",
+	g_CatSleipnir( ).notice( "CCoalesceClusterImpl::Open( ) cluster %s",
 		vecstrClusters[ Hier.GetID( ) ].c_str( ) );
 	for( iterFrom = Cluster.GetGenes( ).begin( ); iterFrom != Cluster.GetGenes( ).end( ); ++iterFrom )
 		if( ( iterTo = mapiiGenes.find( *iterFrom ) ) == mapiiGenes.end( ) )
 	dRet = (float)iOverlapGenes / min( GetGenes( ).size( ), Cluster.GetGenes( ).size( ) );
 // jaccard index works best with an 0.25-0.33 cutoff
 //	dRet = (float)iOverlapGenes / ( GetGenes( ).size( ) + Cluster.GetGenes( ).size( ) - iOverlapGenes );
-	g_CatSleipnir.debug( "CCoalesceCluster::GetSimilarity( %d, %d ): %d, %d, %d = %g", iGenes,
+	g_CatSleipnir( ).debug( "CCoalesceCluster::GetSimilarity( %d, %d ): %d, %d, %d = %g", iGenes,
 		iDatasets, iOverlapGenes, GetGenes( ).size( ), Cluster.GetGenes( ).size( ), dRet );
 	return dRet; }
 
 		for( i = iTypeUs = 0; iTypeUs < GetTypes( ); ++iTypeUs )
 			for( iSubsequence = ESubsequenceBegin; iSubsequence < GetSubsequences( iTypeUs );
 				++iSubsequence ) {
-//				g_CatSleipnir.info( "CCoalesceGroupHistograms::SetTotal( ) type %s, subsequence %d contains %d genes with sequences",
+//				g_CatSleipnir( ).info( "CCoalesceGroupHistograms::SetTotal( ) type %s, subsequence %d contains %d genes with sequences",
 //					GetType( iTypeUs ).c_str( ), iSubsequence, m_vecsTotals[ i ] );
 				Get( iTypeUs, (ESubsequence)iSubsequence ).SetTotal( m_vecsTotals[ i++ ] ); } }
 

src/coalescemotifs.cpp

-/*****************************************************************************
-* This file is provided under the Creative Commons Attribution 3.0 license.
-*
-* You are free to share, copy, distribute, transmit, or adapt this work
-* PROVIDED THAT you attribute the work to the authors listed below.
-* For more information, please see the following web page:
-* http://creativecommons.org/licenses/by/3.0/
-*
-* This file is a component of the Sleipnir library for functional genomics,
-* authored by:
-* Curtis Huttenhower (chuttenh@princeton.edu)
-* Mark Schroeder
-* Maria D. Chikina
-* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
-*
-* If you use this library, the included executable tools, or any related
-* code in your work, please cite the following publication:
-* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
-* Olga G. Troyanskaya.
-* "The Sleipnir library for computational functional genomics"
-*****************************************************************************/
-#include "stdafx.h"
-#include "coalescemotifs.h"
-#include "coalescestructsi.h"
-#include "pst.h"
-#include "measure.h"
-#include "statistics.h"
-
-namespace Sleipnir {
-
-const char	CCoalesceMotifLibraryImpl::c_szBases[]			= "ACGT";
-const char	CCoalesceMotifLibraryImpl::c_szComplements[]	= "TGCA";
-
-void CCoalesceMotifLibraryImpl::SKnowns::Match( const std::vector<float>& vecdMotif,
-	SMotifMatch::EType eMatchType, std::map<std::string, float>& mapstrdKnown ) const {
-	static const size_t					c_iOverlap	= 5;
-	map<string, TVecPr>::const_iterator	iterKnown;
-	size_t								i, j, k, iMin, iMax, iTests;
-	float								dP, dR, dMin;
-	const float*						adMotif;
-	const float*						adPWM;
-	const float*						adRC;
-
-	for( iterKnown = m_mapstrvecKnown.begin( ); iterKnown != m_mapstrvecKnown.end( ); ++iterKnown ) {
-		dMin = FLT_MAX;
-		for( iTests = i = 0; i < iterKnown->second.size( ); ++i ) {
-			const vector<float>&	vecdPWM	= iterKnown->second[ i ].first;
-			const vector<float>&	vecdRC	= iterKnown->second[ i ].second;
-
-			iMin = strlen( c_szBases ) * c_iOverlap;
-			iMax = vecdMotif.size( ) + vecdPWM.size( ) - iMin;
-			if( iMax < iMin )
-				continue;
-			for( j = iMin; j <= iMax; j += strlen( c_szBases ) ) {
-				if( j < vecdMotif.size( ) ) {
-					adMotif = &vecdMotif[ vecdMotif.size( ) - j - 1 ];
-					adPWM = &vecdPWM.front( );
-					adRC = &vecdRC.front( );
-					k = min( vecdPWM.size( ), j ); }
-				else {
-					adMotif = &vecdMotif.front( );
-					k = j - vecdMotif.size( );
-					adPWM = &vecdPWM[ k ];
-					adRC = &vecdRC[ k ];
-					k = min( vecdMotif.size( ), vecdPWM.size( ) - k ); }
-				switch( eMatchType ) {
-					case SMotifMatch::ETypeRMSE:
-						dP = (float)min( CStatistics::RootMeanSquareError( adMotif, adMotif + k, adPWM,
-							adPWM + k ), CStatistics::RootMeanSquareError( adMotif, adMotif + k, adRC,
-							adRC + k ) );
-						break;
-
-					case SMotifMatch::ETypeJensenShannon:
-						dP = (float)min( CStatistics::JensenShannonDivergence( adMotif, adMotif + k, adPWM,
-							adPWM + k ), CStatistics::JensenShannonDivergence( adMotif, adMotif + k, adRC,
-							adRC + k ) );
-						break;
-
-					case SMotifMatch::ETypePValue:
-						iTests += 2;
-						if( ( dR = (float)max( CMeasurePearson::Pearson( adMotif, k, adPWM, k,
-							IMeasure::EMapNone ), CMeasurePearson::Pearson( adMotif, k, adRC, k,
-							IMeasure::EMapNone ) ) ) > 0 )
-							dP = (float)CStatistics::PValuePearson( dR, k );
-						else
-							dP = FLT_MAX;
-						break; }
-				if( dP < dMin )
-					dMin = dP; } }
-		if( dMin != FLT_MAX )
-			mapstrdKnown[ iterKnown->first ] = dMin * max( 1, iTests ); } }
-
+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#include "stdafx.h"
+#include "coalescemotifs.h"
+#include "coalescestructsi.h"
+#include "pst.h"
+#include "measure.h"
+#include "statistics.h"
+
+namespace Sleipnir {
+
+const char	CCoalesceMotifLibraryImpl::c_szBases[]			= "ACGT";
+const char	CCoalesceMotifLibraryImpl::c_szComplements[]	= "TGCA";
+
+void CCoalesceMotifLibraryImpl::SKnowns::Match( const std::vector<float>& vecdMotif,
+	SMotifMatch::EType eMatchType, std::map<std::string, float>& mapstrdKnown ) const {
+	static const size_t					c_iOverlap	= 5;
+	map<string, TVecPr>::const_iterator	iterKnown;
+	size_t								i, j, k, iMin, iMax, iTests;
+	float								dP, dR, dMin;
+	const float*						adMotif;
+	const float*						adPWM;
+	const float*						adRC;
+
+	for( iterKnown = m_mapstrvecKnown.begin( ); iterKnown != m_mapstrvecKnown.end( ); ++iterKnown ) {
+		dMin = FLT_MAX;
+		for( iTests = i = 0; i < iterKnown->second.size( ); ++i ) {
+			const vector<float>&	vecdPWM	= iterKnown->second[ i ].first;
+			const vector<float>&	vecdRC	= iterKnown->second[ i ].second;
+
+			iMin = strlen( c_szBases ) * c_iOverlap;
+			iMax = vecdMotif.size( ) + vecdPWM.size( ) - iMin;
+			if( iMax < iMin )
+				continue;
+			for( j = iMin; j <= iMax; j += strlen( c_szBases ) ) {
+				if( j < vecdMotif.size( ) ) {
+					adMotif = &vecdMotif[ vecdMotif.size( ) - j - 1 ];
+					adPWM = &vecdPWM.front( );
+					adRC = &vecdRC.front( );
+					k = min( vecdPWM.size( ), j ); }
+				else {
+					adMotif = &vecdMotif.front( );
+					k = j - vecdMotif.size( );
+					adPWM = &vecdPWM[ k ];
+					adRC = &vecdRC[ k ];
+					k = min( vecdMotif.size( ), vecdPWM.size( ) - k ); }
+				switch( eMatchType ) {
+					case SMotifMatch::ETypeRMSE:
+						dP = (float)min( CStatistics::RootMeanSquareError( adMotif, adMotif + k, adPWM,
+							adPWM + k ), CStatistics::RootMeanSquareError( adMotif, adMotif + k, adRC,
+							adRC + k ) );
+						break;
+
+					case SMotifMatch::ETypeJensenShannon:
+						dP = (float)min( CStatistics::JensenShannonDivergence( adMotif, adMotif + k, adPWM,
+							adPWM + k ), CStatistics::JensenShannonDivergence( adMotif, adMotif + k, adRC,
+							adRC + k ) );
+						break;
+
+					case SMotifMatch::ETypePValue:
+						iTests += 2;
+						if( ( dR = (float)max( CMeasurePearson::Pearson( adMotif, k, adPWM, k,
+							IMeasure::EMapNone ), CMeasurePearson::Pearson( adMotif, k, adRC, k,
+							IMeasure::EMapNone ) ) ) > 0 )
+							dP = (float)CStatistics::PValuePearson( dR, k );
+						else
+							dP = FLT_MAX;
+						break; }
+				if( dP < dMin )
+					dMin = dP; } }
+		if( dMin != FLT_MAX )
+			mapstrdKnown[ iterKnown->first ] = dMin * max( 1, iTests ); } }
+
 /*!
  * \brief
  * Retrieves a set of motifs from the given input text stream.
  * \see
  * CCoalesceCluster::Open
  */
-bool CCoalesceMotifLibrary::Open( std::istream& istm, std::vector<SMotifMatch>& vecsMotifs,
-	CCoalesceMotifLibrary* pMotifs ) {
-	string	strBuffer;
-	size_t	i;
-
-	strBuffer.resize( CFile::GetBufferSize( ) );
-	while( CFile::IsNewline( istm.peek( ) ) )
-		istm.getline( &strBuffer[ 0 ], strBuffer.size( ) );
-	while( !istm.eof( ) && ( istm.peek( ) != c_cCluster ) ) {
-		SMotifMatch	sMotif;
-
-		if( pMotifs ) {
-			if( !sMotif.Open( istm, *pMotifs ) )
-				break;
-			vecsMotifs.push_back( sMotif );
-			if( isdigit( istm.peek( ) ) )
-				for( i = 0; i < 4; ++i )
-					istm.getline( &strBuffer[ 0 ], strBuffer.size( ) ); }
-		else
-			istm.getline( &strBuffer[ 0 ], strBuffer.size( ) );
-		while( CFile::IsNewline( istm.peek( ) ) )
-			istm.getline( &strBuffer[ 0 ], strBuffer.size( ) ); }
-
-	return true; }
-
-CCoalesceMotifLibraryImpl::~CCoalesceMotifLibraryImpl( ) {
-	size_t	i;
-
-	for( i = 0; i < m_vecpPSTs.size( ); ++i )
-		delete m_vecpPSTs[ i ]; }
-
-/*!
- * \brief
- * Calculates the length-normalized match strength of the given motif against the appropriate number of
- * characters in the input sequence at the requested offset.
- * 
- * \param strSequence
- * Sequence against which motif is matched.
- * 
- * \param iMotif
- * ID of motif to be matched.
- * 
- * \param iOffset
- * Zero-based offset within strSequence at which the match is performed.
- * 
- * \param sModifiers
- * A modifier cache containing any prior weights to be incorporated into the match.
- * 
- * \returns
- * Length-normalized strength of motif match against the given sequence and offset.
- * 
- * \remarks
- * iMotif must represent a valid motif for the current library, and iOffset must fall within strSequence,
- * although motifs extending from a valid iOffset past the end of the sequence will be handled appropriately.
- * Only PST motifs are currently supported, as there should never be any need to match non-PST motifs at
- * runtime, but support for kmers and RCs could be added in a straightforward manner.
- * 
- * \see
- * GetMatches
- */
-float CCoalesceMotifLibrary::GetMatch( const std::string& strSequence, uint32_t iMotif, size_t iOffset,
-	SCoalesceModifierCache& sModifiers ) const {
-	size_t		i, iDepth;
-	float		d, dRet;
-	const CPST*	pPST;
-	string		strMotif, strRC;
-	EType		eType;
-
-	switch( eType = GetType( iMotif ) ) {
-		case ETypePST:
-			if( !( pPST = GetPST( iMotif ) ) ) {
-				g_CatSleipnir.error( "CCoalesceMotifLibrary::GetMatch( %s, %d, %d ) could not find PST",
-					strSequence.c_str( ), iMotif, iOffset );
-				return CMeta::GetNaN( ); }
-			break;
-
-		case ETypeKMer:
-			strMotif = GetMotif( iMotif );
-			break;
-
-		case ETypeRC:
-			strMotif = GetRCOne( iMotif );
-			strRC = GetReverseComplement( strMotif );
-			break; }
-
-	iDepth = ( eType == ETypePST ) ? pPST->GetDepth( ) : GetK( );
-	sModifiers.InitializeWeight( 0, 0 );
-	dRet = 0;
-	for( i = 1; i < iDepth; ++i ) {
-		sModifiers.AddWeight( 0, iOffset, i - 1 );
-		if( eType == ETypePST )
-			dRet += pPST->GetMatch( strSequence, iDepth - i ) * sModifiers.GetWeight( i ) / i; }
-	sModifiers.AddWeight( 0, iOffset, i - 1 );
-	for( i = 0; i < strSequence.length( ); ++i ) {
-		switch( eType ) {
-			case ETypePST:
-				d = pPST->GetMatch( strSequence.substr( i ) ) * sModifiers.GetWeight( iDepth ) / iDepth;
-				break;
-
-			case ETypeKMer:
-			case ETypeRC:
-				d = ( strSequence.compare( i, iDepth, strMotif ) &&
-					( strRC.empty( ) || strSequence.compare( i, iDepth, strRC ) ) ) ? 0 :
-					sModifiers.GetWeight( iDepth );
-				break; }
-		dRet += d;
-		sModifiers.AddWeight( iDepth, iOffset, i ); }
-	if( CMeta::IsNaN( dRet ) || ( dRet < 0 ) ) {
-		g_CatSleipnir.error( "CCoalesceMotifLibrary::GetMatch( %s, %d, %d ) found negative score: %g",
-			strSequence.c_str( ), iMotif, iOffset, dRet );
-		return CMeta::GetNaN( ); }
-
-	return dRet; }
-
-std::string CCoalesceMotifLibraryImpl::GetMotif( uint32_t iMotif ) const {
-	std::string	strKMer;
-
-// kmer
-	if( iMotif < GetKMers( ) )
-		return ID2KMer( iMotif, m_iK );
-// reverse complement
-	if( iMotif < GetBasePSTs( ) ) {
-		strKMer = GetRCOne( iMotif );
-		return ( strKMer + c_cSeparator + GetReverseComplement( strKMer ) ); }
-// pst
-	return GetPST( iMotif )->GetMotif( ); }
-
-CPST* CCoalesceMotifLibraryImpl::CreatePST( uint32_t& iMotif ) {
-	CPST*	pRet;
-
-	iMotif = GetBasePSTs( ) + GetPSTs( );
-	m_vecpPSTs.push_back( pRet = new CPST( strlen( c_szBases ) ) );
-	return pRet; }
-
+bool CCoalesceMotifLibrary::Open( std::istream& istm, std::vector<SMotifMatch>& vecsMotifs,
+	CCoalesceMotifLibrary* pMotifs ) {
+	string	strBuffer;
+	size_t	i;
+
+	strBuffer.resize( CFile::GetBufferSize( ) );
+	while( CFile::IsNewline( istm.peek( ) ) )
+		istm.getline( &strBuffer[ 0 ], strBuffer.size( ) );
+	while( !istm.eof( ) && ( istm.peek( ) != c_cCluster ) ) {
+		SMotifMatch	sMotif;
+
+		if( pMotifs ) {
+			if( !sMotif.Open( istm, *pMotifs ) )
+				break;
+			vecsMotifs.push_back( sMotif );
+			if( isdigit( istm.peek( ) ) )
+				for( i = 0; i < 4; ++i )
+					istm.getline( &strBuffer[ 0 ], strBuffer.size( ) ); }
+		else
+			istm.getline( &strBuffer[ 0 ], strBuffer.size( ) );
+		while( CFile::IsNewline( istm.peek( ) ) )
+			istm.getline( &strBuffer[ 0 ], strBuffer.size( ) ); }
+
+	return true; }
+
+CCoalesceMotifLibraryImpl::~CCoalesceMotifLibraryImpl( ) {
+	size_t	i;
+
+	for( i = 0; i < m_vecpPSTs.size( ); ++i )
+		delete m_vecpPSTs[ i ]; }
+
+/*!
+ * \brief
+ * Calculates the length-normalized match strength of the given motif against the appropriate number of
+ * characters in the input sequence at the requested offset.
+ * 
+ * \param strSequence
+ * Sequence against which motif is matched.
+ * 
+ * \param iMotif
+ * ID of motif to be matched.
+ * 
+ * \param iOffset
+ * Zero-based offset within strSequence at which the match is performed.
+ * 
+ * \param sModifiers
+ * A modifier cache containing any prior weights to be incorporated into the match.
+ * 
+ * \returns
+ * Length-normalized strength of motif match against the given sequence and offset.
+ * 
+ * \remarks
+ * iMotif must represent a valid motif for the current library, and iOffset must fall within strSequence,
+ * although motifs extending from a valid iOffset past the end of the sequence will be handled appropriately.
+ * Only PST motifs are currently supported, as there should never be any need to match non-PST motifs at
+ * runtime, but support for kmers and RCs could be added in a straightforward manner.
+ * 
+ * \see
+ * GetMatches
+ */
+float CCoalesceMotifLibrary::GetMatch( const std::string& strSequence, uint32_t iMotif, size_t iOffset,
+	SCoalesceModifierCache& sModifiers ) const {
+	size_t		i, iDepth;
+	float		d, dRet;
+	const CPST*	pPST;
+	string		strMotif, strRC;
+	EType		eType;
+
+	switch( eType = GetType( iMotif ) ) {
+		case ETypePST:
+			if( !( pPST = GetPST( iMotif ) ) ) {
+				g_CatSleipnir( ).error( "CCoalesceMotifLibrary::GetMatch( %s, %d, %d ) could not find PST",
+					strSequence.c_str( ), iMotif, iOffset );
+				return CMeta::GetNaN( ); }
+			break;
+
+		case ETypeKMer:
+			strMotif = GetMotif( iMotif );
+			break;
+
+		case ETypeRC:
+			strMotif = GetRCOne( iMotif );
+			strRC = GetReverseComplement( strMotif );
+			break; }
+
+	iDepth = ( eType == ETypePST ) ? pPST->GetDepth( ) : GetK( );
+	sModifiers.InitializeWeight( 0, 0 );
+	dRet = 0;
+	for( i = 1; i < iDepth; ++i ) {
+		sModifiers.AddWeight( 0, iOffset, i - 1 );
+		if( eType == ETypePST )
+			dRet += pPST->GetMatch( strSequence, iDepth - i ) * sModifiers.GetWeight( i ) / i; }
+	sModifiers.AddWeight( 0, iOffset, i - 1 );
+	for( i = 0; i < strSequence.length( ); ++i ) {
+		switch( eType ) {
+			case ETypePST:
+				d = pPST->GetMatch( strSequence.substr( i ) ) * sModifiers.GetWeight( iDepth ) / iDepth;
+				break;
+
+			case ETypeKMer:
+			case ETypeRC:
+				d = ( strSequence.compare( i, iDepth, strMotif ) &&
+					( strRC.empty( ) || strSequence.compare( i, iDepth, strRC ) ) ) ? 0 :
+					sModifiers.GetWeight( iDepth );
+				break; }
+		dRet += d;
+		sModifiers.AddWeight( iDepth, iOffset, i ); }
+	if( CMeta::IsNaN( dRet ) || ( dRet < 0 ) ) {
+		g_CatSleipnir( ).error( "CCoalesceMotifLibrary::GetMatch( %s, %d, %d ) found negative score: %g",
+			strSequence.c_str( ), iMotif, iOffset, dRet );
+		return CMeta::GetNaN( ); }
+
+	return dRet; }
+
+std::string CCoalesceMotifLibraryImpl::GetMotif( uint32_t iMotif ) const {
+	std::string	strKMer;
+
+// kmer
+	if( iMotif < GetKMers( ) )
+		return ID2KMer( iMotif, m_iK );
+// reverse complement
+	if( iMotif < GetBasePSTs( ) ) {
+		strKMer = GetRCOne( iMotif );
+		return ( strKMer + c_cSeparator + GetReverseComplement( strKMer ) ); }
+// pst
+	return GetPST( iMotif )->GetMotif( ); }
+
+CPST* CCoalesceMotifLibraryImpl::CreatePST( uint32_t& iMotif ) {
+	CPST*	pRet;
+
+	iMotif = GetBasePSTs( ) + GetPSTs( );
+	m_vecpPSTs.push_back( pRet = new CPST( strlen( c_szBases ) ) );
+	return pRet; }
+
 /*!
  * \brief
  * Returns a motif ID constructed from the given string representation.
  * \see
  * GetMotif
  */
-uint32_t CCoalesceMotifLibrary::Open( const std::string& strMotif ) {
-	uint32_t	iMotif;
-	CPST*		pPST;
-
-	if( strMotif.length( ) == GetK( ) && !IsIgnorableKMer( strMotif ) )
-		return KMer2ID( strMotif );
-	if( ( strMotif.length( ) == ( ( 2 * GetK( ) ) + 1 ) ) &&
-		!IsIgnorableKMer( strMotif.substr( 0, GetK( ) ) ) &&
-		( strMotif.substr( 0, GetK( ) ) == GetReverseComplement( strMotif.substr( GetK( ) + 1 ) ) ) &&
-		( ( iMotif = KMer2ID( strMotif.substr( 0, GetK( ) ) ) ) != -1 ) )
-		return ( GetBaseRCs( ) + m_veciKMer2RC[ iMotif ] );
-
-	pPST = CreatePST( iMotif );
-	if( !pPST->Open( strMotif ) ) {
-		delete pPST;
-		m_vecpPSTs.pop_back( );
-		return -1; }
-
-	return iMotif; }
-
-float CCoalesceMotifLibraryImpl::AlignKMers( const std::string& strOne, const std::string& strTwo,
-	float dCutoff ) const {
-	int	iOffset;
-
-	return Align( strOne, strTwo, dCutoff, iOffset ); }
-
-uint32_t CCoalesceMotifLibraryImpl::MergeKMers( const std::string& strOne, const std::string& strTwo,
-	float dCutoff, bool fAllowDuplicates ) {
-	int			iOffset;
-	float		dScore;
-	uint32_t	iRet;
-	CPST*		pPST;
-
-	if( ( ( dScore = Align( strOne, strTwo, dCutoff, iOffset ) ) > dCutoff ) ||
-		!( fAllowDuplicates || dScore ) )
-		return -1;
-
-	pPST = CreatePST( iRet );
-	pPST->Add( strOne, strTwo, iOffset );
-	if( g_CatSleipnir.isInfoEnabled( ) )
-		g_CatSleipnir.info( "CCoalesceMotifLibraryImpl::MergeKMers( %s, %s, %g ) merged at %g to %s",
-			strOne.c_str( ), strTwo.c_str( ), dCutoff, dScore, pPST->GetMotif( ).c_str( ) );
-	return iRet; }
-
-float CCoalesceMotifLibraryImpl::AlignKMerRC( const std::string& strKMer, uint32_t iRC, float dCutoff ) const {
-	string	strOne, strTwo;
-	float	dOne, dTwo;
-	int		iOne, iTwo;
-
-	strOne = GetRCOne( iRC );
-	strTwo = GetReverseComplement( strOne );
-	dOne = Align( strKMer, strOne, dCutoff, iOne );
-	dTwo = Align( strKMer, strTwo, dCutoff, iTwo );
-
-	return min( dOne, dTwo ); }
-
-uint32_t CCoalesceMotifLibraryImpl::MergeKMerRC( uint32_t iKMer, uint32_t iRC, float dCutoff,
-	bool fAllowDuplicates ) {
-	string		strKMer, strOne, strTwo;
-	float		dOne, dTwo, dMin;
-	int			iOne, iTwo;
-	uint32_t	iRet;
-	CPST*		pPST;
-
-	if( m_veciKMer2RC[ iKMer ] == iRC )
-		return -1;
-	strKMer = GetMotif( iKMer );
-	strOne = GetRCOne( iRC );
-	strTwo = GetReverseComplement( strOne );
-	dOne = Align( strKMer, strOne, dCutoff, iOne );
-	dTwo = Align( strKMer, strTwo, dCutoff, iTwo );
-	dMin = min( dOne, dTwo );
-	if( ( dMin > dCutoff ) || !( fAllowDuplicates || dMin ) )
-		return -1;
-
-	pPST = CreatePST( iRet );
-	if( dOne < dTwo ) {
-		pPST->Add( strKMer, strOne, iOne );
-		pPST->Add( strTwo ); }
-	else {
-		pPST->Add( strKMer, strTwo, iTwo );
-		pPST->Add( strOne ); }
-	if( g_CatSleipnir.isInfoEnabled( ) )
-		g_CatSleipnir.info( "CCoalesceMotifLibraryImpl::MergeKMerRC( %s, %s, %g ) merged at %g to %s",
-			strKMer.c_str( ), GetMotif( iRC ).c_str( ), dCutoff, min( dOne, dTwo ),
-			pPST->GetMotif( ).c_str( ) );
-	return iRet; }
-
-struct SCrossRCs {
-	string	m_strOne;
-	string	m_strTwo;
-	float	m_dScore;
-	int		m_iOffset;
-};
-
-float CCoalesceMotifLibraryImpl::AlignRCs( uint32_t iOne, uint32_t iTwo, float dCutoff ) const {
-	SCrossRCs	asCrosses[ 4 ];
-	size_t		i;
-	float		dMin;
-
-	asCrosses[ 0 ].m_strOne = asCrosses[ 1 ].m_strOne = GetRCOne( iOne );
-	asCrosses[ 0 ].m_strTwo = asCrosses[ 2 ].m_strTwo = GetRCOne( iTwo );
-	asCrosses[ 1 ].m_strTwo = asCrosses[ 3 ].m_strTwo = GetReverseComplement( asCrosses[ 0 ].m_strTwo );
-	asCrosses[ 2 ].m_strOne = asCrosses[ 3 ].m_strOne = GetReverseComplement( asCrosses[ 0 ].m_strOne );
-	dMin = FLT_MAX;
-	for( i = 0; i < ARRAYSIZE(asCrosses); ++i ) {
-		asCrosses[ i ].m_dScore = Align( asCrosses[ i ].m_strOne, asCrosses[ i ].m_strTwo, dCutoff,
-			asCrosses[ i ].m_iOffset );
-		if( asCrosses[ i ].m_dScore < dMin )
-			dMin = asCrosses[ i ].m_dScore; }
-
-	return dMin; }
-
-uint32_t CCoalesceMotifLibraryImpl::MergeRCs( uint32_t iOne, uint32_t iTwo, float dCutoff,
-	bool fAllowDuplicates ) {
-	SCrossRCs	asCrosses[ 4 ];
-	uint32_t	iRet;
-	CPST*		pPST;
-	size_t		i, iMin;
-	float		dMin;
-
-	asCrosses[ 0 ].m_strOne = asCrosses[ 1 ].m_strOne = GetRCOne( iOne );
-	asCrosses[ 0 ].m_strTwo = asCrosses[ 2 ].m_strTwo = GetRCOne( iTwo );
-	asCrosses[ 1 ].m_strTwo = asCrosses[ 3 ].m_strTwo = GetReverseComplement( asCrosses[ 0 ].m_strTwo );
-	asCrosses[ 2 ].m_strOne = asCrosses[ 3 ].m_strOne = GetReverseComplement( asCrosses[ 0 ].m_strOne );
-	dMin = FLT_MAX;
-	for( iMin = i = 0; i < ARRAYSIZE(asCrosses); ++i ) {
-		asCrosses[ i ].m_dScore = Align( asCrosses[ i ].m_strOne, asCrosses[ i ].m_strTwo, dCutoff,
-			asCrosses[ i ].m_iOffset );
-		if( asCrosses[ i ].m_dScore < dMin ) {
-			dMin = asCrosses[ i ].m_dScore;
-			iMin = i; } }
-	if( ( dMin > dCutoff ) || !( fAllowDuplicates || dMin ) )
-		return -1;
-
-	pPST = CreatePST( iRet );
-	pPST->Add( asCrosses[ iMin ].m_strOne, asCrosses[ iMin ].m_strTwo, asCrosses[ iMin ].m_iOffset );
-	{
-		CPST	PST( strlen( c_szBases ) );
-
-		PST.Add( asCrosses[ ( iMin + 1 ) % ARRAYSIZE(asCrosses) ].m_strTwo,
-			asCrosses[ ( iMin + 2 ) % ARRAYSIZE(asCrosses) ].m_strOne, asCrosses[ iMin ].m_iOffset );
-		pPST->Add( PST );
-	}
-	if( g_CatSleipnir.isInfoEnabled( ) )
-		g_CatSleipnir.info( "CCoalesceMotifLibraryImpl::MergeRCs( %s, %s, %g ) merged at %g to %s",
-			GetMotif( iOne ).c_str( ), GetMotif( iTwo ).c_str( ), dCutoff, dMin, pPST->GetMotif( ).c_str( ) );
-	return iRet; }
-
-float CCoalesceMotifLibraryImpl::AlignKMerPST( const std::string& strKMer, const CPST& PSTIn,
-	float dCutoff ) const {
-	int	iOffset;
-
-	return PSTIn.Align( strKMer, m_dPenaltyGap, m_dPenaltyMismatch, dCutoff, iOffset ); }
-
-uint32_t CCoalesceMotifLibraryImpl::MergeKMerPST( const std::string& strKMer, const CPST& PSTIn,
-	float dCutoff, bool fAllowDuplicates ) {
-	int			iOffset;
-	float		dScore;
-	uint32_t	iRet;
-	CPST*		pPSTOut;
-
-	if( ( ( dScore = PSTIn.Align( strKMer, m_dPenaltyGap, m_dPenaltyMismatch, dCutoff, iOffset ) ) >
-		dCutoff ) || !( fAllowDuplicates || dScore ) )
-		return -1;
-
-	pPSTOut = CreatePST( iRet );
-	pPSTOut->Add( strKMer, PSTIn, iOffset );
-	if( g_CatSleipnir.isInfoEnabled( ) ) {
-		ostringstream	ossm;
-
-		ossm << "CCoalesceMotifLibraryImpl::MergeKMerPST( " << strKMer << ", " << PSTIn.GetMotif( ) <<
-			", " << dCutoff << " ) merged at " << dScore << " to " << pPSTOut->GetMotif( );
-		g_CatSleipnir.info( ossm.str( ).c_str( ) ); }
-	return iRet; }
-
-float CCoalesceMotifLibraryImpl::AlignRCPST( uint32_t iRC, const CPST& PSTIn, float dCutoff ) const {
-	int		iOne, iTwo;
-	string	strOne, strTwo;
-	float	dOne, dTwo;
-
-	strOne = GetRCOne( iRC );
-	strTwo = GetReverseComplement( strOne );
-	dOne = PSTIn.Align( strOne, m_dPenaltyGap, m_dPenaltyMismatch, dCutoff, iOne );
-	dTwo = PSTIn.Align( strTwo, m_dPenaltyGap, m_dPenaltyMismatch, dCutoff, iTwo );
-
-	return min( dOne, dTwo ); }
-
-uint32_t CCoalesceMotifLibraryImpl::MergeRCPST( uint32_t iRC, const CPST& PSTIn, float dCutoff,
-	bool fAllowDuplicates ) {
-	int			iOne, iTwo;
-	uint32_t	iRet;
-	CPST*		pPSTOut;
-	string		strOne, strTwo;
-	float		dOne, dTwo, dMin;
-
-	strOne = GetRCOne( iRC );
-	strTwo = GetReverseComplement( strOne );
-	dOne = PSTIn.Align( strOne, m_dPenaltyGap, m_dPenaltyMismatch, dCutoff, iOne );
-	dTwo = PSTIn.Align( strTwo, m_dPenaltyGap, m_dPenaltyMismatch, dCutoff, iTwo );
-	dMin = min( dOne, dTwo );
-	if( ( dMin > dCutoff ) || !( fAllowDuplicates || dMin ) )
-		return -1;
-
-	pPSTOut = CreatePST( iRet );
-	if( dOne < dTwo ) {
-		pPSTOut->Add( strOne, PSTIn, iOne );
-		pPSTOut->Add( strTwo ); }
-	else {
-		pPSTOut->Add( strTwo, PSTIn, iTwo );
-		pPSTOut->Add( strOne ); }
-	if( g_CatSleipnir.isInfoEnabled( ) ) {
-		ostringstream	ossm;
-
-		ossm << "CCoalesceMotifLibraryImpl::MergeRCPST( " << GetMotif( iRC ) << ", " << PSTIn.GetMotif( ) <<
-			", " << dCutoff << " ) merged at " << min( dOne, dTwo ) << " to " << pPSTOut->GetMotif( );
-		g_CatSleipnir.info( ossm.str( ).c_str( ) ); }
-	return iRet; }
-
-float CCoalesceMotifLibraryImpl::AlignPSTs( const CPST& PSTOne, const CPST& PSTTwo, float dCutoff ) const {
-	int	iOffset;
-
-	return PSTOne.Align( PSTTwo, m_dPenaltyGap, m_dPenaltyMismatch, dCutoff, iOffset ); }
-
-uint32_t CCoalesceMotifLibraryImpl::MergePSTs( const CPST& PSTOne, const CPST& PSTTwo, float dCutoff,
-	bool fAllowDuplicates ) {
-	int			iOffset;
-	uint32_t	iRet;
-	CPST*		pPSTOut;
-	float		dScore;
-
-	if( ( ( dScore = PSTOne.Align( PSTTwo, m_dPenaltyGap, m_dPenaltyMismatch, dCutoff, iOffset ) ) >
-		dCutoff ) || !( fAllowDuplicates || dScore ) )
-		return -1;
-
-	pPSTOut = CreatePST( iRet );
-	if( iOffset < 0 ) {
-		pPSTOut->Add( PSTOne );
-		pPSTOut->Add( PSTTwo, -iOffset ); }
-	else {
-		pPSTOut->Add( PSTTwo );
-		pPSTOut->Add( PSTOne, iOffset ); }
-	if( g_CatSleipnir.isInfoEnabled( ) ) {
-		ostringstream	ossm;
-
-		ossm << "CCoalesceMotifLibraryImpl::MergePSTs( " << PSTOne.GetMotif( ) << ", " <<
-			PSTTwo.GetMotif( ) << ", " << dCutoff << " ) merged at " << dScore << " to " <<
-			pPSTOut->GetMotif( );
-		g_CatSleipnir.info( ossm.str( ).c_str( ) ); }
-
-	return iRet; }
-
-uint32_t CCoalesceMotifLibraryImpl::RemoveRCs( const CPST& PST, float dPenaltyGap, float dPenaltyMismatch ) {
-	CPST*		pPST;
-	uint32_t	iRet;
-
-	if( !( pPST = CreatePST( iRet ) ) )
-		return -1;
-	PST.RemoveRCs( dPenaltyGap, dPenaltyMismatch, *pPST );
-	return iRet; }
-
+uint32_t CCoalesceMotifLibrary::Open( const std::string& strMotif ) {
+	uint32_t	iMotif;
+	CPST*		pPST;
+
+	if( strMotif.length( ) == GetK( ) && !IsIgnorableKMer( strMotif ) )
+		return KMer2ID( strMotif );
+	if( ( strMotif.length( ) == ( ( 2 * GetK( ) ) + 1 ) ) &&
+		!IsIgnorableKMer( strMotif.substr( 0, GetK( ) ) ) &&
+		( strMotif.substr( 0, GetK( ) ) == GetReverseComplement( strMotif.substr( GetK( ) + 1 ) ) ) &&
+		( ( iMotif = KMer2ID( strMotif.substr( 0, GetK( ) ) ) ) != -1 ) )
+		return ( GetBaseRCs( ) + m_veciKMer2RC[ iMotif ] );
+
+	pPST = CreatePST( iMotif );
+	if( !pPST->Open( strMotif ) ) {
+		delete pPST;
+		m_vecpPSTs.pop_back( );
+		return -1; }
+
+	return iMotif; }
+
+float CCoalesceMotifLibraryImpl::AlignKMers( const std::string& strOne, const std::string& strTwo,
+	float dCutoff ) const {
+	int	iOffset;
+
+	return Align( strOne, strTwo, dCutoff, iOffset ); }
+
+uint32_t CCoalesceMotifLibraryImpl::MergeKMers( const std::string& strOne, const std::string& strTwo,
+	float dCutoff, bool fAllowDuplicates ) {
+	int			iOffset;
+	float		dScore;
+	uint32_t	iRet;
+	CPST*		pPST;
+
+	if( ( ( dScore = Align( strOne, strTwo, dCutoff, iOffset ) ) > dCutoff ) ||
+		!( fAllowDuplicates || dScore ) )
+		return -1;
+
+	pPST = CreatePST( iRet );
+	pPST->Add( strOne, strTwo, iOffset );
+	if( g_CatSleipnir( ).isInfoEnabled( ) )
+		g_CatSleipnir( ).info( "CCoalesceMotifLibraryImpl::MergeKMers( %s, %s, %g ) merged at %g to %s",
+			strOne.c_str( ), strTwo.c_str( ), dCutoff, dScore, pPST->GetMotif( ).c_str( ) );
+	return iRet; }
+
+float CCoalesceMotifLibraryImpl::AlignKMerRC( const std::string& strKMer, uint32_t iRC, float dCutoff ) const {
+	string	strOne, strTwo;
+	float	dOne, dTwo;
+	int		iOne, iTwo;
+
+	strOne = GetRCOne( iRC );
+	strTwo = GetReverseComplement( strOne );
+	dOne = Align( strKMer, strOne, dCutoff, iOne );
+	dTwo = Align( strKMer, strTwo, dCutoff, iTwo );
+
+	return min( dOne, dTwo ); }
+
+uint32_t CCoalesceMotifLibraryImpl::MergeKMerRC( uint32_t iKMer, uint32_t iRC, float dCutoff,
+	bool fAllowDuplicates ) {
+	string		strKMer, strOne, strTwo;
+	float		dOne, dTwo, dMin;
+	int			iOne, iTwo;
+	uint32_t	iRet;
+	CPST*		pPST;
+
+	if( m_veciKMer2RC[ iKMer ] == iRC )
+		return -1;
+	strKMer = GetMotif( iKMer );
+	strOne = GetRCOne( iRC );
+	strTwo = GetReverseComplement( strOne );
+	dOne = Align( strKMer, strOne, dCutoff, iOne );
+	dTwo = Align( strKMer, strTwo, dCutoff, iTwo );
+	dMin = min( dOne, dTwo );
+	if( ( dMin > dCutoff ) || !( fAllowDuplicates || dMin ) )
+		return -1;
+
+	pPST = CreatePST( iRet );
+	if( dOne < dTwo ) {
+		pPST->Add( strKMer, strOne, iOne );
+		pPST->Add( strTwo ); }
+	else {
+		pPST->Add( strKMer, strTwo, iTwo );
+		pPST->Add( strOne ); }
+	if( g_CatSleipnir( ).isInfoEnabled( ) )
+		g_CatSleipnir( ).info( "CCoalesceMotifLibraryImpl::MergeKMerRC( %s, %s, %g ) merged at %g to %s",
+			strKMer.c_str( ), GetMotif( iRC ).c_str( ), dCutoff, min( dOne, dTwo ),
+			pPST->GetMotif( ).c_str( ) );
+	return iRet; }
+
+struct SCrossRCs {
+	string	m_strOne;
+	string	m_strTwo;
+	float	m_dScore;
+	int		m_iOffset;
+};
+
+float CCoalesceMotifLibraryImpl::AlignRCs( uint32_t iOne, uint32_t iTwo, float dCutoff ) const {
+	SCrossRCs	asCrosses[ 4 ];
+	size_t		i;
+	float		dMin;
+
+	asCrosses[ 0 ].m_strOne = asCrosses[ 1 ].m_strOne = GetRCOne( iOne );
+	asCrosses[ 0 ].m_strTwo = asCrosses[ 2 ].m_strTwo = GetRCOne( iTwo );
+	asCrosses[ 1 ].m_strTwo = asCrosses[ 3 ].m_strTwo = GetReverseComplement( asCrosses[ 0 ].m_strTwo );
+	asCrosses[ 2 ].m_strOne = asCrosses[ 3 ].m_strOne = GetReverseComplement( asCrosses[ 0 ].m_strOne );
+	dMin = FLT_MAX;
+	for( i = 0; i < ARRAYSIZE(asCrosses); ++i ) {
+		asCrosses[ i ].m_dScore = Align( asCrosses[ i ].m_strOne, asCrosses[ i ].m_strTwo, dCutoff,
+			asCrosses[ i ].m_iOffset );
+		if( asCrosses[ i ].m_dScore < dMin )
+			dMin = asCrosses[ i ].m_dScore; }
+
+	return dMin; }
+
+uint32_t CCoalesceMotifLibraryImpl::MergeRCs( uint32_t iOne, uint32_t iTwo, float dCutoff,
+	bool fAllowDuplicates ) {
+	SCrossRCs	asCrosses[ 4 ];
+	uint32_t	iRet;
+	CPST*		pPST;
+	size_t		i, iMin;
+	float		dMin;
+
+	asCrosses[ 0 ].m_strOne = asCrosses[ 1 ].m_strOne = GetRCOne( iOne );
+	asCrosses[ 0 ].m_strTwo = asCrosses[ 2 ].m_strTwo = GetRCOne( iTwo );
+	asCrosses[ 1 ].m_strTwo = asCrosses[ 3 ].m_strTwo = GetReverseComplement( asCrosses[ 0 ].m_strTwo );
+	asCrosses[ 2 ].m_strOne = asCrosses[ 3 ].m_strOne = GetReverseComplement( asCrosses[ 0 ].m_strOne );
+	dMin = FLT_MAX;
+	for( iMin = i = 0; i < ARRAYSIZE(asCrosses); ++i ) {
+		asCrosses[ i ].m_dScore = Align( asCrosses[ i ].m_strOne, asCrosses[ i ].m_strTwo, dCutoff,
+			asCrosses[ i ].m_iOffset );
+		if( asCrosses[ i ].m_dScore < dMin ) {
+			dMin = asCrosses[ i ].m_dScore;
+			iMin = i; } }
+	if( ( dMin > dCutoff ) || !( fAllowDuplicates || dMin ) )
+		return -1;
+
+	pPST = CreatePST( iRet );
+