Commits

chuttenh  committed 44a7c3a

[svn r400] Add recursive GetChildren and GetParents to IOntology/COntologyImpl
Fix multiple parent/child annotation bug in COntologyImpl
Although why GO needs to annotate genes to terms' descendants, I'm not sure
Improve CFile organization (particularly OpenToken) for better performance
Improve performance of CPCL::Open
Add simplification trimming to CPST::RemoveRCs
Only branches with frequency above 1/arity are preserved
Add CPST::Integrate and CPST::Simplify
Former gives weighted node count, latter removes low-frequency branches
Fix size_t->uint16_t bug in GetPWM to correspond to CPSTImpl::SNode
Add (inverse) cluster size weighting to Clusters2Dab
Add summation to Combiner
Add parentage command to OntoShell
Performs a "bubble-up" like we need for mapping terms to a slim
Add heuristic motif simplification to COALESCE
Does approximate motif merging above a given threshhold
Reduces motif number to <~5000 before doing an exact merge
Add exactly-two-motif merge SMotifMatch::Open
Fix handling of duplicate merges in CCoalesceMotifLibrary::Merge

  • Participants
  • Parent commits 9b181d6

Comments (0)

Files changed (29)

File src/annotation.cpp

 	return false; }
 
 void COntologyImpl::CollectGenes( size_t iNode, TSetPGenes& setpGenes ) {
+	SNode&						sNode	= m_aNodes[ iNode ];
 	size_t						i;
 	TSetPGenes					setpKids;
 	TSetPGenes::const_iterator	iterGenes;
 
-	if( m_aNodes[ iNode ].m_iCacheGenes != -1 ) {
-		for( i = 0; i < m_aNodes[ iNode ].m_iGenes; ++i )
-			setpGenes.insert( m_aNodes[ iNode ].m_apGenes[ i ] );
-		for( i = 0; i < m_aNodes[ iNode ].m_iCacheGenes; ++i )
-			setpGenes.insert( m_aNodes[ iNode ].m_apCacheGenes[ i ] );
+	if( sNode.m_iCacheGenes != -1 ) {
+		for( i = 0; i < sNode.m_iGenes; ++i )
+			setpGenes.insert( sNode.m_apGenes[ i ] );
+		for( i = 0; i < sNode.m_iCacheGenes; ++i )
+			setpGenes.insert( sNode.m_apCacheGenes[ i ] );
 		return; }
 
-	for( i = 0; i < m_aNodes[ iNode ].m_iGenes; ++i )
-		setpGenes.insert( m_aNodes[ iNode ].m_apGenes[ i ] );
-	for( i = 0; i < m_aNodes[ iNode ].m_iChildren; ++i )
-		CollectGenes( m_aNodes[ iNode ].m_aiChildren[ i ], setpKids );
-	if( m_aNodes[ iNode ].m_iCacheGenes = setpKids.size( ) ) {
-		m_aNodes[ iNode ].m_apCacheGenes = new const CGene*[ setpKids.size( ) ];
-		for( i = 0,iterGenes = setpKids.begin( ); iterGenes != setpKids.end( );
-			++i,++iterGenes ) {
-			m_aNodes[ iNode ].m_apCacheGenes[ i ] = *iterGenes;
-			setpGenes.insert( *iterGenes ); } } }
+	for( i = 0; i < sNode.m_iGenes; ++i )
+		setpGenes.insert( sNode.m_apGenes[ i ] );
+	for( i = 0; i < sNode.m_iChildren; ++i )
+		CollectGenes( sNode.m_aiChildren[ i ], setpKids );
+	sNode.m_iCacheGenes = 0;
+	for( iterGenes = setpKids.begin( ); iterGenes != setpKids.end( ); ++iterGenes )
+		if( setpGenes.find( *iterGenes ) == setpGenes.end( ) )
+			sNode.m_iCacheGenes++;
+	if( sNode.m_iCacheGenes ) {
+		sNode.m_apCacheGenes = new const CGene*[ sNode.m_iCacheGenes ];
+		for( i = 0,iterGenes = setpKids.begin( ); iterGenes != setpKids.end( ); ++iterGenes )
+			if( setpGenes.find( *iterGenes ) == setpGenes.end( ) ) {
+				sNode.m_apCacheGenes[ i++ ] = *iterGenes;
+				setpGenes.insert( *iterGenes ); } } }
 
 size_t COntologyImpl::GetNode( const std::string& strID ) const {
 	TMapStrI::const_iterator	iterNode;

File src/annotation.h

 	 * Requested child must be less than IOntology::GetChildren.
 	 */
 	virtual size_t GetChild( size_t iTerm, size_t iChild ) const = 0;
+	virtual bool GetParents( size_t iTerm, std::set<size_t>& setiParents ) const = 0;
+	virtual bool GetChildren( size_t iTerm, std::set<size_t>& setiChildren ) const = 0;
 	/*!
 	 * \brief
 	 * Returns the number of genes annotated to or (optionally) below this term.
 	const CGene& GetGene( size_t iTerm, size_t iGene ) const {
 
 		return COntologyImpl::GetGene( iTerm, iGene ); }
+
+	bool GetParents( size_t iTerm, std::set<size_t>& setiParents ) const {
+
+		return COntologyImpl::GetParents( iTerm, setiParents ); }
+
+	bool GetChildren( size_t iTerm, std::set<size_t>& setiChildren ) const {
+
+		return COntologyImpl::GetChildren( iTerm, setiChildren ); }
 };
 
 /*!
 	const CGene& GetGene( size_t iTerm, size_t iGene ) const {
 
 		return COntologyImpl::GetGene( iTerm, iGene ); }
+
+	bool GetParents( size_t iTerm, std::set<size_t>& setiParents ) const {
+
+		return COntologyImpl::GetParents( iTerm, setiParents ); }
+
+	bool GetChildren( size_t iTerm, std::set<size_t>& setiChildren ) const {
+
+		return COntologyImpl::GetChildren( iTerm, setiChildren ); }
 };
 
 /*!
 	const CGene& GetGene( size_t iTerm, size_t iGene ) const {
 
 		return COntologyImpl::GetGene( iTerm, iGene ); }
+
+	bool GetParents( size_t iTerm, std::set<size_t>& setiParents ) const {
+
+		return COntologyImpl::GetParents( iTerm, setiParents ); }
+
+	bool GetChildren( size_t iTerm, std::set<size_t>& setiChildren ) const {
+
+		return COntologyImpl::GetChildren( iTerm, setiChildren ); }
 };
 
 /*!
 	const std::string& GetSlim( size_t iSlim ) const {
 
 		return m_vecstrSlims[ iSlim ]; }
+
+	size_t GetNodes( size_t iSlim ) const {
+
+		return m_vecveciTerms[ iSlim ].size( ); }
+
+	size_t GetNode( size_t iSlim, size_t iTerm ) const {
+
+		return m_vecveciTerms[ iSlim ][ iTerm ]; }
 };
 
 }

File src/annotationi.h

 		if( m_aNodes[ iNode ].m_iCacheGenes == -1 )
 			((COntologyImpl*)this)->CollectGenes( iNode, setpGenes ); }
 
+	bool GetChildren( size_t iNode, std::set<size_t>& setiChildren ) const {
+		size_t	i, iChild;
+
+		if( setiChildren.find( iNode ) != setiChildren.end( ) )
+			return true;
+
+		for( i = 0; i < GetChildren( iNode ); ++i ) {
+			if( !GetChildren( iChild = GetChild( iNode, i ), setiChildren ) )
+				return false;
+			setiChildren.insert( iChild ); }
+
+		return true; }
+
+	bool GetParents( size_t iNode, std::set<size_t>& setiParents ) const {
+		size_t	i, iParent;
+
+		if( setiParents.find( iNode ) != setiParents.end( ) )
+			return true;
+
+		for( i = 0; i < GetParents( iNode ); ++i ) {
+			if( !GetParents( iParent = GetParent( iNode, i ), setiParents ) )
+				return false;
+			setiParents.insert( iParent ); }
+
+		return true; }
+
 	const IOntology*	m_pOntology;
 	std::string			m_strID;
 	size_t				m_iNodes;

File src/coalesce.cpp

 			if( ( iterMotifOne->m_iMotif == iterMotifTwo->m_iMotif ) ||
 				!HistsCluster.IsSimilar( m_pMotifs, *iterMotifOne, *iterMotifTwo, m_dPValueMerge ) ||
 				( ( iMotif = m_pMotifs->Merge( iterMotifOne->m_iMotif, iterMotifTwo->m_iMotif,
-				m_dCutoffMerge ) ) == -1 ) )
+				m_dCutoffMerge, false ) ) == -1 ) )
 				continue;
 
 			vecpthdThreads.resize( iThreads );

File src/coalescecluster.cpp

 #include "halfmatrix.h"
 #include "statistics.h"
 #include "clusthierarchical.h"
+#include "pst.h"
 
 namespace Sleipnir {
 
 				dP = 0; }
 			else {
 				dZ = CStatistics::ZScore( adCluster, adCluster + iCluster, adPot, adPot + iPot );
-				dP = CStatistics::ZTest( dZ, iCluster ) * psData->m_pvecsDatasets->size( ); }
+				dP = CStatistics::ZTest( dZ, min( iCluster, iPot ) ) * psData->m_pvecsDatasets->size( ); }
 			if( dP < psData->m_dPValue ) {
 				g_CatSleipnir.info( "CCoalesceClusterImpl::ThreadSelectCondition( %g ) selected condition %d at %g, z=%g",
 					psData->m_dPValue, iCondition, dP, dZ );
 	return PCLCluster.GetExperiments( ); }
 
 bool CCoalesceCluster::Open( const CHierarchy& Hierarchy, const vector<CCoalesceCluster>& vecClusters,
-	const vector<string>& vecstrClusters, float dFraction, float dCutoff, CCoalesceMotifLibrary* pMotifs ) {
+	const vector<string>& vecstrClusters, float dFraction, float dCutoff, size_t iCutoff,
+	CCoalesceMotifLibrary* pMotifs ) {
 	map<size_t, size_t>						mapiiGenes, mapiiDatasets;
-	size_t									i, j, k, iClusters;
+	size_t									i, iClusters;
 	map<size_t, size_t>::const_iterator		iterItem;
 	vector<map<string, set<SMotifMatch> > >	vecmapstrsetsMotifs;
 
 
 		for( iterMotifs = mapstrsetsMotifs.begin( ); iterMotifs != mapstrsetsMotifs.end( ); ++iterMotifs ) {
 			const set<SMotifMatch>&	setsMotifs	= iterMotifs->second;
-			vector<SMotifMatch>		vecsMotifs;
-			CDistanceMatrix			MatSimilarity;
-			CHierarchy*				pHierMotifs;
 
-			vecsMotifs.resize( setsMotifs.size( ) );
-			copy( setsMotifs.begin( ), setsMotifs.end( ), vecsMotifs.begin( ) );
-			MatSimilarity.Initialize( vecsMotifs.size( ) );
-			for( j = 0; j < vecsMotifs.size( ); ++j )
-				for( k = ( j + 1 ); k < vecsMotifs.size( ); ++k )
-					MatSimilarity.Set( j, k, -pMotifs->Align( vecsMotifs[ j ].m_iMotif,
-						vecsMotifs[ k ].m_iMotif, dCutoff ) );
-			if( !( ( pHierMotifs = CClustHierarchical::Cluster( MatSimilarity ) ) &&
-				CCoalesceClusterImpl::Open( *pMotifs, *pHierMotifs, vecsMotifs, dCutoff, m_setsMotifs ) ) )
-				return false;
-			pHierMotifs->Destroy( ); } }
+			if( !( ( setsMotifs.size( ) < iCutoff ) ?
+				CCoalesceClusterImpl::OpenMotifs( setsMotifs, *pMotifs, dCutoff ) :
+				CCoalesceClusterImpl::OpenMotifsHeuristic( setsMotifs, *pMotifs, dCutoff, iCutoff ) ) )
+				return false; } }
 
 	return true; }
 
+bool CCoalesceClusterImpl::OpenMotifsHeuristic( const set<SMotifMatch>& setsMotifs,
+	CCoalesceMotifLibrary& Motifs, float dCutoff, size_t iCutoff ) {
+	vector<SMotifMatch>	vecsMotifs;
+	bool				fDone;
+	size_t				i, iMotifs;
+	set<SMotifMatch>	setsMerged;
+
+	iMotifs = setsMotifs.size( );
+	g_CatSleipnir.notice( "CCoalesceClusterImpl::OpenMotifsHeuristic( %g ) resolving %d motifs", dCutoff,
+		iMotifs );
+
+	vecsMotifs.resize( iMotifs );
+	copy( setsMotifs.begin( ), setsMotifs.end( ), vecsMotifs.begin( ) );
+	do {
+		fDone = true;
+		sort( vecsMotifs.begin( ), vecsMotifs.end( ) );
+		for( i = 0; ( i + 1 ) < vecsMotifs.size( ); ++i ) {
+			SMotifMatch&	sOne	= vecsMotifs[ i ];
+			SMotifMatch&	sTwo	= vecsMotifs[ i + 1 ];
+
+			if( ( sOne.m_iMotif == -1 ) || ( sTwo.m_iMotif == -1 ) )
+				break;
+			if( Motifs.Align( sOne.m_iMotif, sTwo.m_iMotif, dCutoff ) > dCutoff )
+				continue;
+			if( sTwo.Open( sOne, sTwo, Motifs ) == -1 )
+				return false;
+			if( Motifs.GetPST( sTwo.m_iMotif )->Integrate( ) > iCutoff )
+				Motifs.Simplify( sTwo.m_iMotif );
+			fDone = false;
+			iMotifs--;
+			sOne.m_iMotif = -1; } }
+	while( !fDone );
+
+	for( i = 0; i < vecsMotifs.size( ); ++i )
+		if( vecsMotifs[ i ].m_iMotif != -1 )
+			setsMerged.insert( vecsMotifs[ i ] );
+
+	return OpenMotifs( setsMerged, Motifs, dCutoff ); }
+
+bool CCoalesceClusterImpl::OpenMotifs( const set<SMotifMatch>& setsMotifs, CCoalesceMotifLibrary& Motifs,
+	float dCutoff ) {
+	vector<SMotifMatch>	vecsMotifs;
+	CDistanceMatrix		MatSimilarity;
+	CHierarchy*			pHierMotifs;
+	size_t				i, j;
+	bool				fRet;
+
+	g_CatSleipnir.notice( "CCoalesceClusterImpl::OpenMotifs( %g ) resolving %d motifs", dCutoff,
+		setsMotifs.size( ) );
+
+	vecsMotifs.resize( setsMotifs.size( ) );
+	copy( setsMotifs.begin( ), setsMotifs.end( ), vecsMotifs.begin( ) );
+	MatSimilarity.Initialize( vecsMotifs.size( ) );
+	for( i = 0; i < vecsMotifs.size( ); ++i )
+		for( j = ( i + 1 ); j < vecsMotifs.size( ); ++j )
+			MatSimilarity.Set( i, j, -Motifs.Align( vecsMotifs[ i ].m_iMotif,
+				vecsMotifs[ j ].m_iMotif, dCutoff ) );
+	if( !( pHierMotifs = CClustHierarchical::Cluster( MatSimilarity ) ) )
+		return false;
+	fRet = CCoalesceClusterImpl::OpenMotifs( Motifs, *pHierMotifs, vecsMotifs, dCutoff, m_setsMotifs );
+	pHierMotifs->Destroy( );
+	return fRet; }
+
 size_t CCoalesceClusterImpl::Open( const CHierarchy& Hier, const vector<CCoalesceCluster>& vecClusters,
 	const vector<string>& vecstrClusters, map<size_t, size_t>& mapiiGenes, map<size_t, size_t>& mapiiDatasets,
 	TVecMapStrSetSMotifs& vecmapstrsetsMotifs ) {
 
 	return 1; }
 
-bool CCoalesceClusterImpl::Open( CCoalesceMotifLibrary& Motifs, const CHierarchy& Hier,
+bool CCoalesceClusterImpl::OpenMotifs( CCoalesceMotifLibrary& Motifs, const CHierarchy& Hier,
 	const vector<SMotifMatch>& vecsMotifs, float dCutoff, set<SMotifMatch>& setsMotifs ) {
 
 	if( Hier.IsGene( ) || ( -Hier.GetSimilarity( ) < dCutoff ) ) {
 		setsMotifs.insert( sMotif );
 		return true; }
 
-	return ( Open( Motifs, Hier.Get( false ), vecsMotifs, dCutoff, setsMotifs ) &&
-		Open( Motifs, Hier.Get( true ), vecsMotifs, dCutoff, setsMotifs ) ); }
+	return ( OpenMotifs( Motifs, Hier.Get( false ), vecsMotifs, dCutoff, setsMotifs ) &&
+		OpenMotifs( Motifs, Hier.Get( true ), vecsMotifs, dCutoff, setsMotifs ) ); }
 
 float CCoalesceCluster::GetSimilarity( const CCoalesceCluster& Cluster, size_t iGenes,
 	size_t iDatasets ) const {

File src/coalescecluster.h

 	size_t Open( const std::string& strPCL, size_t iSkip, const CPCL& PCL,
 		CCoalesceMotifLibrary* pMotifs = NULL );
 	bool Open( const CHierarchy& Hierarchy, const std::vector<CCoalesceCluster>& vecClusters,
-		const std::vector<std::string>& vecstrClusters, float dFraction, float dCutoff,
+		const std::vector<std::string>& vecstrClusters, float dFraction, float dCutoff, size_t iCutoff,
 		CCoalesceMotifLibrary* pMotifs = NULL );
 	bool Save( const std::string& strDirectory, size_t iID, const CPCL& PCL,
 		const CCoalesceMotifLibrary* pMotifs = NULL ) const;

File src/coalesceclusteri.h

 	static size_t Open( const CHierarchy&, const std::vector<CCoalesceCluster>&,
 		const std::vector<std::string>&, std::map<size_t, size_t>&, std::map<size_t, size_t>&,
 		TVecMapStrSetSMotifs& );
-	static bool Open( CCoalesceMotifLibrary&, const CHierarchy&, const std::vector<SMotifMatch>&, float,
+	static bool OpenMotifs( CCoalesceMotifLibrary&, const CHierarchy&, const std::vector<SMotifMatch>&, float,
 		std::set<SMotifMatch>& );
 
 	template<class tType>
 	bool CalculateProbabilityMotifs( const CCoalesceGeneScores&, size_t, const CCoalesceGroupHistograms&,
 		const CCoalesceGroupHistograms&, bool, size_t, float&, float& ) const;
 	bool SaveCopy( const CPCL&, const std::set<size_t>&, size_t, CPCL&, size_t, bool ) const;
+	bool OpenMotifs( const std::set<SMotifMatch>&, CCoalesceMotifLibrary&, float );
+	bool OpenMotifsHeuristic( const std::set<SMotifMatch>&, CCoalesceMotifLibrary&, float, size_t );
 
 	size_t GetConditions( size_t iDataset ) const {
 

File src/coalescemotifs.cpp

 	return iRet; }
 
 uint32_t CCoalesceMotifLibraryImpl::RemoveRCs( const CPST& PST, float dPenaltyGap, float dPenaltyMismatch ) {
-	CPST*								pPST;
-	uint32_t							iRet;
-	map<unsigned char, unsigned char>	mapccComplements;
-	size_t								i;
+	CPST*		pPST;
+	uint32_t	iRet;
 
-	for( i = 0; c_szBases[ i ]; ++i )
-		mapccComplements[ c_szBases[ i ] ] = c_szComplements[ i ];
 	if( !( pPST = CreatePST( iRet ) ) )
 		return -1;
-	PST.RemoveRCs( mapccComplements, dPenaltyGap, dPenaltyMismatch, *pPST );
+	PST.RemoveRCs( dPenaltyGap, dPenaltyMismatch, *pPST );
 	return iRet; }
 
 string CCoalesceMotifLibrary::GetPWM( uint32_t iMotif, float dCutoffPWMs, float dPenaltyGap,
 	float dPenaltyMismatch, bool fNoRCs ) const {
-	CFullMatrix<size_t>	MatPWM;
-	string				strMotif;
-	size_t				i, j;
-	ostringstream		ossm;
-	float				d;
+	CFullMatrix<uint16_t>	MatPWM;
+	string					strMotif;
+	size_t					i, j;
+	ostringstream			ossm;
+	float					d;
 
 	if( fNoRCs )
 		iMotif = ((CCoalesceMotifLibrary*)this)->RemoveRCs( iMotif, dPenaltyGap, dPenaltyMismatch );
 
 	return ossm.str( ); }
 
+bool CCoalesceMotifLibrary::Simplify( uint32_t iMotif ) const {
+
+	return ( ( GetType( iMotif ) == ETypePST ) ? CCoalesceMotifLibraryImpl::GetPST( iMotif )->Simplify( ) :
+		false ); }
+
 }

File src/coalescemotifs.h

 	uint32_t Open( const std::string& strMotif );
 	std::string GetPWM( uint32_t iMotif, float dCutoffPWMs, float dPenaltyGap, float dPenaltyMismatch,
 		bool fNoRCs ) const;
+	bool Simplify( uint32_t iMotif ) const;
 
 	/*!
 	 * \brief
 	 * \param dCutoff
 	 * Maximum edit distance threshhold for successful merging.
 	 * 
+	 * \param fAllowDuplicates
+	 * If true, duplicate merges will be handled correctly and an ID returned; otherwise -1 is returned in
+	 * such cases.
+	 * 
 	 * \returns
 	 * -1 if the two motifs cannot be merged or have already been merged; the ID of the merged motif
 	 * otherwise, which will always be a PST.
 	 * \see
 	 * SetPenaltyGap | SetPenaltyMismatch
 	 */
-	uint32_t Merge( uint32_t iOne, uint32_t iTwo, float dCutoff ) {
+	uint32_t Merge( uint32_t iOne, uint32_t iTwo, float dCutoff, bool fAllowDuplicates ) {
 		std::pair<uint32_t, uint32_t>	priiMerged;
+		TMapPrIII::const_iterator		iterMerged;
+		uint32_t						iRet;
 
 		if( iOne == iTwo )
-			return -1;
+			return ( fAllowDuplicates ? iOne : -1 );
 		priiMerged.first = min( iOne, iTwo );
 		priiMerged.second = max( iOne, iTwo );
-		if( m_setpriiMerged.find( priiMerged ) != m_setpriiMerged.end( ) )
-			return -1;
-		m_setpriiMerged.insert( priiMerged );
+		if( ( iterMerged = m_mappriiiMerged.find( priiMerged ) ) != m_mappriiiMerged.end( ) )
+			return ( fAllowDuplicates ? iterMerged->second : -1 );
 
 		switch( GetType( iOne ) ) {
 			case ETypeRC:
 				switch( GetType( iTwo ) ) {
 					case ETypeKMer:
-						return MergeKMerRC( iTwo, iOne, dCutoff );
+						iRet = MergeKMerRC( iTwo, iOne, dCutoff );
+						break;
 
 					case ETypeRC:
-						return MergeRCs( iOne, iTwo, dCutoff );
+						iRet = MergeRCs( iOne, iTwo, dCutoff );
+						break;
 
 					case ETypePST:
-						return MergeRCPST( iOne, *GetPST( iTwo ), dCutoff ); }
+						iRet = MergeRCPST( iOne, *GetPST( iTwo ), dCutoff );
+						break; }
+				break;
 
 			case ETypePST:
 				switch( GetType( iTwo ) ) {
 					case ETypeKMer:
-						return MergeKMerPST( GetMotif( iTwo ), *GetPST( iOne ), dCutoff );
+						iRet = MergeKMerPST( GetMotif( iTwo ), *GetPST( iOne ), dCutoff );
+						break;
 
 					case ETypeRC:
-						return MergeRCPST( iTwo, *GetPST( iOne ), dCutoff );
+						iRet = MergeRCPST( iTwo, *GetPST( iOne ), dCutoff );
+						break;
 
 					case ETypePST:
-						return MergePSTs( *GetPST( iOne ), *GetPST( iTwo ), dCutoff ); } }
+						iRet = MergePSTs( *GetPST( iOne ), *GetPST( iTwo ), dCutoff );
+						break; }
+				break;
 
-		switch( GetType( iTwo ) ) {
-			case ETypeRC:
-				return MergeKMerRC( iOne, iTwo, dCutoff );
+			case ETypeKMer:
+				switch( GetType( iTwo ) ) {
+					case ETypeRC:
+						iRet = MergeKMerRC( iOne, iTwo, dCutoff );
+						break;
 
-			case ETypePST:
-				return MergeKMerPST( GetMotif( iOne ), *GetPST( iTwo ), dCutoff ); }
+					case ETypePST:
+						iRet = MergeKMerPST( GetMotif( iOne ), *GetPST( iTwo ), dCutoff );
+						break;
 
-		return MergeKMers( GetMotif( iOne ), GetMotif( iTwo ), dCutoff ); }
+					case ETypeKMer:
+						iRet = MergeKMers( GetMotif( iOne ), GetMotif( iTwo ), dCutoff );
+						break; }
+				break; }
+
+		if( iRet != -1 )
+			m_mappriiiMerged[ priiMerged ] = iRet;
+		return iRet; }
 
 	uint32_t RemoveRCs( uint32_t iMotif, float dPenaltyGap, float dPenaltyMismatch ) {
 
 	float GetPenaltyMismatch( ) const {
 
 		return m_dPenaltyMismatch; }
+
+	/*!
+	 * \brief
+	 * Returns the CPST corresponding to the given motif ID.
+	 * 
+	 * \param iMotif
+	 * Motif ID of PST to retrieve.
+	 * 
+	 * \returns
+	 * CPST corresponding to the given most ID, or null if none.
+	 */
+	const CPST* GetPST( uint32_t iMotif ) const {
+
+		return ( ( GetType( iMotif ) == ETypePST ) ? CCoalesceMotifLibraryImpl::GetPST( iMotif ) : NULL ); }
 };
 
 }

File src/coalescemotifsi.h

 	static const size_t	c_iShift		= 2; // ceil( log2( strlen( c_szBases ) ) )
 	static const char	c_cSeparator	= '|';
 
+	typedef std::map<std::pair<uint32_t, uint32_t>, uint32_t>	TMapPrIII;
+
 	enum EType {
 		ETypeKMer,
 		ETypeRC,
 
 		return false; }
 
-	static bool GetPWM( const std::string& strKMer, CFullMatrix<size_t>& MatPWM ) {
+	static bool GetPWM( const std::string& strKMer, CFullMatrix<uint16_t>& MatPWM ) {
 		size_t	i, j;
 
 		if( ( MatPWM.GetColumns( ) != strlen( c_szBases ) ) || ( MatPWM.GetRows( ) != strKMer.length( ) ) ) {
 		std::reverse( strReverse.begin( ), strReverse.end( ) );
 		return GetComplement( strReverse ); }
 
-	static float GetInformation( const CFullMatrix<size_t>& MatPWM ) {
+	static float GetInformation( const CFullMatrix<uint16_t>& MatPWM ) {
 		CDataMatrix			MatProbs;
 		size_t				iPos, iFrom, iTo;
 		std::vector<size_t>	veciTotals;
 
 		return ( GetBaseRCs( ) + GetRCs( ) ); }
 
-	const CPST* GetPST( uint32_t iMotif ) const {
+	CPST* GetPST( uint32_t iMotif ) const {
 
 		return m_vecpPSTs[ iMotif - GetBasePSTs( ) ]; }
 
 
 		return ID2KMer( (uint32_t)m_veciRC2KMer[ iMotif - GetBaseRCs( ) ], m_iK ); }
 
-	float										m_dPenaltyGap;
-	float										m_dPenaltyMismatch;
-	size_t										m_iK;
-	std::vector<uint32_t>						m_veciKMer2RC;
-	std::vector<uint32_t>						m_veciRC2KMer;
-	std::vector<CPST*>							m_vecpPSTs;
-	std::set<std::pair<uint32_t, uint32_t> >	m_setpriiMerged;
+	float					m_dPenaltyGap;
+	float					m_dPenaltyMismatch;
+	size_t					m_iK;
+	std::vector<uint32_t>	m_veciKMer2RC;
+	std::vector<uint32_t>	m_veciRC2KMer;
+	std::vector<CPST*>		m_vecpPSTs;
+	TMapPrIII				m_mappriiiMerged;
 };
 
 }

File src/coalescestructs.cpp

 
 	return ( ( ( ( iLeft = Open( Hier.Get( false ), vecsMotifs, Motifs, iCount ) ) == -1 ) ||
 		( ( iRight = Open( Hier.Get( true ), vecsMotifs, Motifs, iCount ) ) == -1 ) ) ? -1 :
-		( m_iMotif = Motifs.Merge( iLeft, iRight, FLT_MAX ) ) ); }
+		( m_iMotif = Motifs.Merge( iLeft, iRight, FLT_MAX, true ) ) ); }
+
+uint32_t SMotifMatch::Open( const SMotifMatch& sOne, const SMotifMatch& sTwo,
+	CCoalesceMotifLibrary& Motifs ) {
+
+	m_eSubsequence = sOne.m_eSubsequence;
+	m_strType = sOne.m_strType;
+	m_dZ = ( sOne.m_dZ + sTwo.m_dZ ) / 2;
+	return ( m_iMotif = Motifs.Merge( sOne.m_iMotif, sTwo.m_iMotif, FLT_MAX, true ) ); }
 
 string SMotifMatch::Save( const CCoalesceMotifLibrary* pMotifs, bool fPWM, float dCutoffPWMs,
 	float dPenaltyGap, float dPenaltyMismatch, bool fNoRCs ) const {

File src/coalescestructsi.h

 
 	bool Open( std::istream&, CCoalesceMotifLibrary& );
 	uint32_t Open( const CHierarchy&, const std::vector<SMotifMatch>&, CCoalesceMotifLibrary&, size_t& );
+	uint32_t Open( const SMotifMatch&, const SMotifMatch&, CCoalesceMotifLibrary& );
 	std::string Save( const CCoalesceMotifLibrary*, bool = false, float = 0, float = 0, float = 0,
 		bool = false ) const;
 

File src/file.cpp

 
 namespace Sleipnir {
 
-bool CFileImpl::IsNewline( char c ) {
-
-	return ( ( c == '\n' ) || ( c == '\r' ) ); }
-
 /*!
  * \brief
  * Return the next tab-delimited token from the given input stream.
 
 	return strRet; }
 
-/*!
- * \brief
- * Return the next tab-delimited token from the given string.
- * 
- * \param szInput
- * String from which the token is read.
- * 
- * \param pcEnd
- * If non-null, outputs a pointer to the end of the token in the given string.
- * 
- * \returns
- * String containing all characters up to (but excluding) the next tab or newline.
- */
-string CFile::OpenToken( const char* szInput, const char** pcEnd ) {
-	string	strRet;
-	char	c;
-
-	do
-		c = *(szInput++);
-	while( c && ( c >= 0 ) && ( c < 256 ) && ( c != '\t' ) && isspace( c ) );
-	for( ; c && ( c != -1 ) && ( c != '\t' ) && !IsNewline( c ); c = *(szInput++) )
-		strRet += c;
-	if( pcEnd )
-		*pcEnd = szInput - ( ( !c || IsNewline( c ) ) ? 1 : 0 );
-
-	return strRet; }
-
 }
 class CFile : protected CFileImpl {
 public:
 	static std::string OpenToken( std::istream& istm );
-	static std::string OpenToken( const char* szInput, const char** pcEnd = NULL );
 
 	static size_t GetBufferSize( ) {
 
 		return c_iBufferSize; }
+
+	/*!
+	 * \brief
+	 * Return the next tab-delimited token from the given string.
+	 * 
+	 * \param szInput
+	 * String from which the token is read.
+	 * 
+	 * \param pcEnd
+	 * If non-null, outputs a pointer to the end of the token in the given string.
+	 * 
+	 * \returns
+	 * String containing all characters up to (but excluding) the next tab or newline.
+	 */
+	static std::string OpenToken( const char* szInput, const char** ppcEnd = NULL ) {
+		const char*	pcStart;
+		const char*	pcEnd;
+		char		c;
+
+		do
+			c = *(szInput++);
+		while( ( c > 0 ) && ( c != '\t' ) && isspace( c ) );
+		pcStart = szInput - 1;
+		for( ; ( c > 0 ) && ( c != '\t' ) && !IsNewline( c ); c = *(szInput++) );
+		pcEnd = szInput;
+		if( ppcEnd )
+			*ppcEnd = pcEnd - ( ( !c || IsNewline( c ) ) ? 1 : 0 );
+
+		return std::string( pcStart, pcEnd - pcStart - 1 ); }
 };
 
 }
 protected:
 	static const size_t c_iBufferSize	= 2097152;
 
-	static bool IsNewline( char );
-
 	static void SaveString( std::ostream& ostm, const std::string& str ) {
 		uint32_t	iLength;
 
 		istm.read( (char*)&iLength, sizeof(iLength) );
 		str.resize( iLength );
 		istm.read( &str[ 0 ], iLength ); }
+
+	static bool IsNewline( char c ) {
+
+		return ( ( c == '\n' ) || ( c == '\r' ) ); }
 };
 
 }
  */
 bool CPCL::Open( std::istream& istm, size_t iSkip ) {
 	vector<float>	vecdData;
-	size_t			i, j, k;
+	size_t			i;
 	char*			acBuf;
 	bool			fRet;
 
 				break; }
 		if( fRet ) {
 			m_Data.Initialize( GetGenes( ), GetExperiments( ) );
-			for( k = i = 0; ( k < vecdData.size( ) ) && ( i < m_Data.GetRows( ) ); ++i )
-				for( j = 0; j < m_Data.GetColumns( ); ++j )
-					m_Data.Set( i, j, vecdData[ k++ ] ); } }
+			for( i = 0; i < m_Data.GetRows( ); ++i )
+				m_Data.Set( i, &vecdData[ i * m_Data.GetColumns( ) ] ); } }
 	delete[] acBuf;
 
 	return fRet; }
 			m_vecstrFeatures.push_back( strToken );
 		else
 			m_vecstrExperiments.push_back( strToken );
-	if( !iToken )
+	if( m_vecstrExperiments.empty( ) )
 		g_CatSleipnir.error( "CPCLImpl::OpenExperiments( %d ) found no experiments", iFeatures );
 
-	return !!iToken; }
+	return !m_vecstrExperiments.empty( ); }
 
 bool CPCLImpl::OpenGene( std::istream& istmInput, std::vector<float>& vecdData, char* acLine, size_t iLine ) {
 	const char*	pc;
 	char*		pcEnd;
 	string		strToken;
-	size_t		iToken, iData;
+	size_t		iToken, iData, iBase, i;
 	float		d;
 
+	iBase = vecdData.size( );
 	istmInput.getline( acLine, iLine - 1 );
+	if( ( strToken = OpenToken( acLine ) ).empty( ) )
+		return false;
+	if( strToken == "EWEIGHT" )
+		return true;
+	if( !m_vecstrExperiments.empty( ) )
+		vecdData.resize( vecdData.size( ) + m_vecstrExperiments.size( ) );
 	for( iData = iToken = 0,pc = acLine; ( strToken = OpenToken( pc, &pc ) ).length( ) || *pc; ++iToken ) {
-		if( strToken == "EWEIGHT" )
-			return true;
 		if( !iToken ) {
 			m_mapstriGenes[ strToken ] = m_vecstrGenes.size( );
 			m_vecstrGenes.push_back( strToken ); }
 		else if( iToken < m_vecstrFeatures.size( ) )
 			m_vecvecstrFeatures[ iToken - 1 ].push_back( strToken );
+		else if( !m_vecstrExperiments.empty( ) && ( iData >= m_vecstrExperiments.size( ) ) )
+			return false;
 		else {
-			iData++;
 			d = (float)strtod( strToken.c_str( ), &pcEnd );
-			vecdData.push_back( ( !pcEnd || ( pcEnd == strToken.c_str( ) ) ) ? CMeta::GetNaN( ) : d ); } }
+			if( !pcEnd || ( pcEnd == strToken.c_str( ) ) )
+				d = CMeta::GetNaN( );
+			if( m_vecstrExperiments.empty( ) )
+				vecdData.push_back( d );
+			else if( ( i = ( iBase + iData ) ) >= vecdData.size( ) )
+				return false;
+			else
+				vecdData[ i ] = d; } }
 
 	if( m_vecstrExperiments.empty( ) )
 		m_vecstrExperiments.resize( vecdData.size( ) );
 	else
-		while( iData++ < m_vecstrExperiments.size( ) )
-			vecdData.push_back( CMeta::GetNaN( ) );
+		while( iData < m_vecstrExperiments.size( ) )
+			vecdData[ iBase + iData++ ] = CMeta::GetNaN( );
 
 	return !!iToken; }
 
 		return ( strOne.length( ) > strTwo.length( ) ); }
 };
 
-void CPST::RemoveRCs( const map<unsigned char, unsigned char>& mapccComplements, float dPenaltyGap,
-	float dPenaltyMismatch, CPST& PSTOut ) const {
+void CPST::RemoveRCs( float dPenaltyGap, float dPenaltyMismatch, CPST& PSTOut ) const {
 	size_t			i;
 	string			str;
 	vector<string>	vecstrAdd;
 	float			dOne, dTwo;
 	int				iOne, iTwo;
 
-	for( i = 0; i < m_sRoot.m_vecsChildren.size( ); ++i )
-		CPSTImpl::RemoveRCs( mapccComplements, m_sRoot.m_vecsChildren[ i ], str, vecstrAdd );
+	CPSTImpl::RemoveRCs( m_sRoot, 1.0f / m_iArity, str, vecstrAdd );
 	if( vecstrAdd.empty( ) )
 		return;
 
 	 */
 	CPST( size_t iArity ) : CPSTImpl(iArity) { }
 
-	void RemoveRCs( const std::map<unsigned char, unsigned char>& mapccComplements, float dPenaltyGap,
-		float dPenaltyMismatch, CPST& PSTOut ) const;
+	void RemoveRCs( float dPenaltyGap, float dPenaltyMismatch, CPST& PSTOut ) const;
 
 	/*!
 	 * \brief
 
 		return ( ( m_iDepth = CPSTImpl::Open( strPST, m_sRoot ) ) != -1 ); }
 
-	bool GetPWM( CFullMatrix<size_t>& MatPWM, const char* szSymbols ) const {
+	bool GetPWM( CFullMatrix<uint16_t>& MatPWM, const char* szSymbols ) const {
 		std::map<unsigned char, size_t>					mapciChars;
 		std::vector<size_t>								veciOrder;
 		std::map<unsigned char, size_t>::const_iterator	iterChar;
 				( mapciChars.size( ) + j++ ) : iterChar->second;
 		CMeta::Permute( MatPWM.Get( ), veciOrder );
 		return true; }
+
+	size_t Integrate( ) const {
+		size_t	iRet;
+
+		CPSTImpl::Integrate( m_sRoot, iRet = 0 );
+		return iRet; }
+
+	bool Simplify( ) {
+
+		return CPSTImpl::Simplify( 1.0f / m_iArity, m_sRoot ); }
 };
 
 }
 		std::vector<SNode>	m_vecsChildren;
 	};
 
-	static void RemoveRCs( const std::map<unsigned char, unsigned char>& mapccComplements, const SNode& sNode,
-		std::string& strSeq, std::vector<std::string>& vecstrOut ) {
+	static void RemoveRCs( const SNode& sNode, float dCutoff, std::string& strSeq,
+		std::vector<std::string>& vecstrOut ) {
 		size_t	i;
 
-		strSeq.push_back( sNode.m_cCharacter );
-		if( sNode.m_vecsChildren.empty( ) )
+		if( sNode.m_vecsChildren.empty( ) ) {
 			vecstrOut.push_back( strSeq );
-		else
-			for( i = 0; i < sNode.m_vecsChildren.size( ); ++i )
-				RemoveRCs( mapccComplements, sNode.m_vecsChildren[ i ], strSeq, vecstrOut );
-		strSeq.resize( strSeq.size( ) - 1 ); }
+			return; }
+
+		for( i = 0; i < sNode.m_vecsChildren.size( ); ++i ) {
+			const SNode&	sChild	= sNode.m_vecsChildren[ i ];
+
+			if( ( (float)sChild.m_iCount / sNode.m_iTotal ) < dCutoff )
+				continue;
+			strSeq.push_back( sChild.m_cCharacter );
+			RemoveRCs( sChild, dCutoff, strSeq, vecstrOut );
+			strSeq.resize( strSeq.size( ) - 1 ); } }
 
 	static std::string GetMotif( const SNode& sNode ) {
 		std::ostringstream	ossm;
 				Align( sChildThem, iDepth, iOffset, sChildUs, iBest, iCur ); } }
 		iOut = iBest; }
 
+
+	static void Integrate( const SNode& sNode, size_t& iSum ) {
+		size_t	i;
+
+		iSum += sNode.m_iTotal;
+		for( i = 0; i < sNode.m_vecsChildren.size( ); ++i )
+			Integrate( sNode.m_vecsChildren[ i ], iSum ); }
+
+	static bool Simplify( float dMinFrequency, SNode& sNode ) {
+		size_t		i;
+		uint16_t	iTotal;
+
+		iTotal = sNode.m_iTotal;
+		sNode.m_iTotal = 0;
+		for( i = 0; i < sNode.m_vecsChildren.size( ); ++i ) {
+			SNode&	sChild	= sNode.m_vecsChildren[ i ];
+
+			if( ( (float)sChild.m_iCount / iTotal ) < dMinFrequency )
+				sNode.m_vecsChildren.erase( sNode.m_vecsChildren.begin( ) + i-- );
+			else {
+				if( !Simplify( dMinFrequency, sChild ) )
+					return false;
+				sNode.m_iTotal += ( sChild.m_iCount = max( 1, (uint16_t)( sChild.m_iCount *
+					dMinFrequency ) ) ); } }
+
+		return true; }
+
 	CPSTImpl( size_t iArity ) : m_iDepth(0), m_iArity(iArity) { }
 
+	void Clear( ) {
+
+		m_iDepth = m_sRoot.m_iTotal = m_sRoot.m_iCount = 0;
+		m_sRoot.m_vecsChildren.clear( ); }
+
 	float GetMatch( const std::string& strTarget, const SNode& sNode, size_t iOffset,
 		size_t& iMatched ) const {
 		size_t	i, iCur, iMax;
 		return 1; }
 
 	bool GetPWM( const SNode& sNode, size_t iDepth, std::map<unsigned char, size_t>& mapciCharacters,
-		CFullMatrix<size_t>& MatPWM ) const {
+		CFullMatrix<uint16_t>& MatPWM ) const {
 		size_t											i, iChar;
 		std::map<unsigned char, size_t>::const_iterator	iterChar;
 

File tools/COALESCE/COALESCE.cpp

 *****************************************************************************/
 #include "stdafx.h"
 #include "cmdline.h"
-#include "pst.h"
 
 enum EFile {
 	EFilePCL,
 			&Motifs ) ) == -1 ) ) {
 			cerr << "Could not open: " << strFile << endl;
 			continue; }
-		if( !( vecstrClusters.size( ) % 100 ) )
+		if( !( vecstrClusters.size( ) % 50 ) )
 			cerr << "Opened cluster " << vecstrClusters.size( ) << "..." << endl;
 		vecstrClusters.push_back( strBase ); }
 	if( fFailed )
 
 		cerr << "Creating output cluster " << iID << endl;
 		if( !Cluster.Open( Hier, vecClustersFrom, vecstrClustersFrom,
-			(float)sArgs.fraction_postprocess_arg, (float)sArgs.cutoff_merge_arg, &Motifs ) )
+			(float)sArgs.fraction_postprocess_arg, (float)sArgs.cutoff_merge_arg, sArgs.max_motifs_arg,
+			&Motifs ) )
 			return false;
 		if( Cluster.GetGenes( ).size( ) < (size_t)sArgs.size_minimum_arg ) {
 			cerr << "Cluster too small: " << Cluster.GetGenes( ).size( ) << endl;

File tools/COALESCE/COALESCE.ggo

 							double	default="0.45"
 option	"min_zscore"	U	"Minimum motif z-score magnitude"
 							double	default="0.25"
+option	"max_motifs"	x	"Maximum motifs to merge exactly"
+							int	default="2500"
 
 section "Miscellaneous"
 option	"cache"			e	"Cache file for sequence analysis"

File tools/COALESCE/stdafx.h

 #include "halfmatrix.h"
 #include "meta.h"
 #include "pcl.h"
+#include "pst.h"
 using namespace Sleipnir;
 
 #endif // STDAFX_H

File tools/Clusters2Dab/Clusters2Dab.cpp

 				vecpClusters[ i ]->GetGene( j ).GetName( ) ) ) == mapstriGenes.end( ) ) ? -1 :
 				iterIndex->second; }
 
+	if( sArgs.size_flag )
+		for( iCluster = 0; iCluster < vecpClusters.size( ); ++iCluster )
+			vecdWeights[ iCluster ] = 1.0f / vecpClusters[ iCluster ]->GetGenes( );
 	if( sArgs.counts_flag )
 		for( iCluster = 0; iCluster < vecpClusters.size( ); ++iCluster ) {
 			const vector<size_t>&	veciOne	= vecveciGenes[ iCluster ];

File tools/Clusters2Dab/Clusters2Dab.ggo

 section "Optional"
 option	"counts"	c	"Calculate pair weight by cocluster frequency"
 						flag	off
+option	"size"		z	"Calculate pair weight by cluster size"
+						flag	off
 option	"skip"		s	"Columns to skip in input PCL"
 						int	default="2"
 option	"verbosity"	v	"Message verbosity"

File tools/Combiner/Combiner.cpp

 static const char			c_szHMean[]			= "hmean";
 static const char			c_szMax[]			= "max";
 static const char			c_szMin[]			= "min";
+static const char			c_szSum[]			= "sum";
 static const char			c_szVote[]			= "vote";
 
 int main( int iArgs, char** aszArgs ) {
 			for( k = ( j + 1 ); k < veciGenes.size( ); ++k ) {
 				if( ( ( iTwo = veciGenes[ k ] ) == -1 ) || CMeta::IsNaN( d = DatCur.Get( iOne, iTwo ) ) )
 					continue;
-				if( !strcmp( c_szMean, sArgs.method_arg ) ) {
+				if( !strcmp( c_szMean, sArgs.method_arg ) ||
+					!strcmp( c_szSum, sArgs.method_arg ) ) {
 					DatOut.Get( j, k ) += d;
 					HMatCounts.Get( j, k )++; }
 				else if( !strcmp( c_szGMean, sArgs.method_arg ) ) {

File tools/Combiner/Combiner.ggo

 option	"type"		t	"Output data file type"
 						values="pcl","dat","dab","module"	default="pcl"
 option	"method"	m	"Combination method"
-						values="min","max","mean","gmean","hmean"	default="mean"
+						values="min","max","mean","gmean","hmean","sum"	default="mean"
 option	"output"	o	"Output file"
 						string	typestr="filename"
 

File tools/OntoShell/parser.cpp

 const char	CParser::SLocation::c_szRoot[]	= "/";
 const char	CParser::c_szDot[]				= ".";
 const char	CParser::c_szDotDot[]			= "..";
-const char*	CParser::c_aszParsers[]			= { "cat", "cd", "find", "help", "ls", NULL };
+const char*	CParser::c_aszParsers[]			= { "cat", "cd", "find", "help", "ls", "parentage", NULL };
 
 CParser::SLocation::SLocation( ) : m_pOnto(NULL), m_iNode(-1) { }
 

File tools/OntoShell/parserconsole.cpp

 	"cd [path]                  Display or change current term.\n"
 	"find <filename> [p] [bkg]  Runs term finder on the given gene list.\n"
 	"help [command]             Provides help on command syntax.\n"
-	"ls [path]                  List parents, children, and annotations.";
+	"ls [path]                  List parents, children, and annotations.\n"
+	"parentage <onto> <file>    For terms in onto, list parents in the given set.";
 const CParserConsole::TPFnParser	CParserConsole::c_apfnParsers[]	=
 	{ &CParserConsole::ParseCat, &CParserConsole::ParseCd, &CParserConsole::ParseFind,
-	&CParserConsole::ParseHelp, &CParserConsole::ParseLs, NULL };
+	&CParserConsole::ParseHelp, &CParserConsole::ParseLs, &CParserConsole::ParseParentage, NULL };
 const char*					CParserConsole::c_aszHelps[]			= {
 	"cat [-l] [-r] [path]<gene>+\n\n"
 	"Displays the name, synonyms, and annotations for the given gene(s).\n"
 	"-g  Genes; deactives gene listings.\n"
 	"-s  Siblings; deactivates parent and child listings.\n"
 	"-r  Recursive; descend into child nodes.",
+	"parentage [-a] <ontology> <filename>\n\n"
+	"Loads an ontology slim from the given filename.  Then, for each term in\n"
+	"the indicated ontology, outputs the zero or more parents of that term that\n"
+	"fall within the given set.  This \"bubbles up\" the ontology to the level\n"
+	"given in the input slim file.  Optional flags are:\n"
+	"-a  All listings; include terms with no parents in the slim.",
 	NULL };
 
 CParserConsole::SArgs::SArgs( ) : m_fGenes(m_afFlags[ 0 ]), m_fLong(m_afFlags[ 1 ]),
 	i = strCmd.find( c_cShell );
 	system( strCmd.substr( i + 1 ).c_str( ) );
 	return true; }
+
+bool CParserConsole::ParseParentage( const vector<string>& vecstrLine ) {
+	string				strOnto, strFile;
+	CSlim				Slim;
+	const IOntology*	pOnto;
+	size_t				i, j;
+	ifstream			ifsm;
+	SArgs				sArgs;
+	vector<bool>		vecfTerms;
+
+	if( vecstrLine.size( ) < 2 )
+		return false;
+	for( i = 1; i < vecstrLine.size( ); ++i ) {
+		if( sArgs.Parse( vecstrLine[ i ] ) )
+			continue;
+		if( strOnto.empty( ) )
+			strOnto = vecstrLine[ i ];
+		else if( strFile.empty( ) )
+			strFile = vecstrLine[ i ]; }
+
+	pOnto = NULL;
+	for( i = 0; i < m_vecpOntologies.size( ); ++i )
+		if( strOnto == m_vecpOntologies[ i ]->GetID( ) ) {
+			pOnto = m_vecpOntologies[ i ];
+			break; }
+	if( !pOnto ) {
+		cout << "parentage, can't find ontology: " << strOnto << endl;
+		return false; }
+
+	ifsm.open( strFile.c_str( ) );
+	if( !( ifsm.is_open( ) && Slim.Open( ifsm, pOnto ) ) ) {
+		cout << "parentage, can't open file: " << strFile << endl;
+		return false; }
+	ifsm.close( );
+
+	vecfTerms.resize( pOnto->GetNodes( ) );
+	for( i = 0; i < Slim.GetSlims( ); ++i )
+		for( j = 0; j < Slim.GetNodes( i ); ++j )
+			vecfTerms[ Slim.GetNode( i, j ) ] = true;
+	for( i = 0; i < pOnto->GetNodes( ); ++i ) {
+		set<size_t>					setiParents;
+		set<size_t>::const_iterator	iterParent;
+		vector<size_t>				veciIntersection;
+
+		pOnto->GetParents( i, setiParents );
+		for( iterParent = setiParents.begin( ); iterParent != setiParents.end( ); ++iterParent )
+			if( vecfTerms[ *iterParent ] )
+				veciIntersection.push_back( *iterParent );
+		if( veciIntersection.empty( ) && !sArgs.m_fZeroes )
+			continue;
+		cout << pOnto->GetID( i );
+		for( j = 0; j < veciIntersection.size( ); ++j )
+			cout << '\t' << pOnto->GetID( veciIntersection[ j ] );
+		cout << endl; }
+
+	return true; }

File tools/OntoShell/parserconsole.h

 	bool ParseHelp( const std::vector<string>& );
 	bool ParseLs( const std::vector<string>& );
 	bool ParseFind( const std::vector<string>& );
+	bool ParseParentage( const std::vector<string>& );
 	bool ParseShell( const string& ) const;
 	void PrintOntology( const Sleipnir::IOntology*, char ) const;
 	void PrintLocations( const std::vector<SLocation>&, const SArgs& ) const;