Commits

Curtis Huttenhower committed 2ad1462

Fix bug in CDat::Open( CDat, vector<CGenes>*, ... ) for importing known negatives
Thanks to Maria Chikina!
Fix bug in CMeasureImpl::MeasureTrim for reordering unpaired vectors
Fix bug in CPCL::Open error message generation
Add ability to OpenGene to deal correctly with changing line widths
Fix default Answerer behavior with positive/negative gene sets
Add Bonferroni-corrected p-value output to Clinician
Improve formatting of gene set lookup output in Dab2Dad

Comments (0)

Files changed (7)

 	for( i = 0; i < DatKnown.GetGenes( ); ++i ) {
 		iOne = veciGenes[ i ];
 		for( j = ( i + 1 ); j < DatKnown.GetGenes( ); ++j ) {
-			if( CMeta::IsNaN( Get( iOne, iTwo = veciGenes[ j ] ) ) ) {
-				if( CMeta::IsNaN( d = DatKnown.Get( i, j ) ) )
-					Set( iOne, iTwo, 0 );
-				else if( fKnownNegatives == !d )
-					Set( iOne, iTwo, d ); } } }
+			iTwo = veciGenes[ j ];
+			if( CMeta::IsNaN( d = DatKnown.Get( i, j ) ) ) {
+				if( fKnownNegatives && CMeta::IsNaN( Get( iOne, iTwo ) ) )
+					Set( iOne, iTwo, 0 ); }
+			else if( fKnownNegatives == !d )
+				Set( iOne, iTwo, d ); } }
 
 	return true; }
 
 	float*	adB;
 	float*	adWA;
 	float*	adWB;
-	size_t	i, j, iA, iB;
+	size_t	i, iA, iB;
 	double	dRet;
 
 	adA = new float[ iM ];
 	adWA = adWX ? new float[ iM ] : NULL;
 	adWB = adWY ? new float[ iN ] : NULL;
 	if( fAlign ) {
-		for( i = j = 0; i < min( iM, iN ); ++i )
+		for( i = iA = 0; i < min( iM, iN ); ++i )
 			if( !( CMeta::IsNaN( adX[ i ] ) || CMeta::IsNaN( adY[ i ] ) ) ) {
 				if( adWA )
-					adWA[ j ] = adWX[ i ];
+					adWA[ iA ] = adWX[ i ];
 				if( adWB )
-					adWB[ j ] = adWY[ i ];
-				adA[ j ] = adX[ i ];
-				adB[ j++ ] = adY[ i ]; } }
+					adWB[ iA ] = adWY[ i ];
+				adA[ iA ] = adX[ i ];
+				adB[ iA++ ] = adY[ i ]; } }
 	else {
-		for( i = j = 0; i < iM; ++i )
+		for( i = iA = 0; i < iM; ++i )
 			if( !CMeta::IsNaN( adX[ i ] ) ) {
 				if( adWA )
-					adWA[ j ] = adWX[ i ];
-				adA[ j++ ] = adX[ i ]; }
-		for( i = j = 0; i < iN; ++i )
+					adWA[ iA ] = adWX[ i ];
+				adA[ iA++ ] = adX[ i ]; }
+		for( i = iB = 0; i < iN; ++i )
 			if( !CMeta::IsNaN( adY[ i ] ) ) {
 				if( adWB )
-					adWB[ j ] = adWY[ i ];
-				adB[ j++ ] = adY[ i ]; } }
+					adWB[ iB ] = adWY[ i ];
+				adB[ iB++ ] = adY[ i ]; } }
 
 	dRet = pMeasure->Measure( adA, iA, adB, iB, eMap, adWA, adWB );
 	delete[] adA;
 		for( fRet = !!GetGenes( ),i = 0; i < GetGenes( ); ++i )
 			if( GetGene( i ).empty( ) || !isprint( GetGene( i )[ 0 ] ) ) {
 				fRet = false;
-				g_CatSleipnir( ).error( "CPCL::Open( %d ) invalid gene at index %d: %s", i,
-					GetGene( i ).c_str( ) );
+				g_CatSleipnir( ).error( "CPCL::Open( %d ) invalid gene at index %d: %s", iSkip,
+					i, GetGene( i ).c_str( ) );
 				break; }
 		if( fRet ) {
 			m_Data.Initialize( GetGenes( ), GetExperiments( ) );
 	return true; }
 
 bool CPCLImpl::OpenGene( std::istream& istmInput, std::vector<float>& vecdData, char* acLine, size_t iLine ) {
-	const char*	pc;
-	char*		pcEnd;
-	string		strToken;
-	size_t		iToken, iData, iBase, i;
-	float		d;
+	const char*			pc;
+	char*				pcEnd;
+	string				strToken;
+	size_t				iToken, iData, iBase, i;
+	float				d;
+	map<string, size_t>	mapstriValues;
+	map<string, size_t>::iterator	iterValue;
 
 	iBase = vecdData.size( );
 	istmInput.getline( acLine, iLine - 1 );
 		else if( !m_vecstrExperiments.empty( ) && ( iData >= m_vecstrExperiments.size( ) ) )
 			return false;
 		else {
-			d = (float)strtod( strToken.c_str( ), &pcEnd );
-			if( !pcEnd || ( pcEnd == strToken.c_str( ) ) )
-				d = CMeta::GetNaN( );
+			d = CMeta::GetNaN( );
+			strToken = CMeta::Trim( strToken.c_str( ) );
+			if( strToken.length( ) ) {
+				d = (float)strtod( strToken.c_str( ), &pcEnd );
+				if( pcEnd != ( strToken.c_str( ) + strToken.length( ) ) ) {
+					iterValue = mapstriValues.find( strToken );
+					if( iterValue == mapstriValues.end( ) ) {
+						i = mapstriValues.size( );
+						mapstriValues[strToken] = i;
+						d = i; }
+					else
+						d = iterValue->second; } }
 			if( m_vecstrExperiments.empty( ) )
 				vecdData.push_back( d );
 			else if( ( i = ( iBase + iData++ ) ) >= vecdData.size( ) )
  * 
  * \section sec_history Version History
  * 
- * - <a href="sleipnir-2.2.tar.gz">2.2</a>, *** <br>
+ * - <a href="sleipnir-3.0.tar.gz">3.0</a>, *** <br>
  * Fix confusing documentation in \ref Answerer - thanks to Arjun Krishnan! <br>
  * Fix missing \c SIZE_MAX definition on Mac OS X - thanks to Alice Koechlin! <br>
+ * Fix bug in \ref Answerer when using predefined positive pairs - thanks to Chris Park! <br>
  * Add Partial Correlation Coefficient normalization to \t CDat and \ref Normalizer - thanks to Arjun Krishnan!
  * 
  * - <a href="sleipnir-2.1.tar.gz">2.1</a>, 12-20-09 <br>

tools/Answerer/Answerer.cpp

 			return 1; }
 		for( i = 0; i < DatPositives.GetGenes( ); ++i )
 			Genome.AddGene( DatPositives.GetGene( i ) );
-		if( !Dat.Open( DatPositives, vecpNegatives, Genome, true ) ) {
+		if( !Dat.Open( DatPositives, vecpNegatives, Genome, false ) ) {
 			cerr << "Could not open " << sArgs.input_arg << " with negatives" << endl;
 			return 1; } }
 	else

tools/Clinician/Clinician.cpp

 			k = veciPCL2Genes[veciFinal[j]];
 			d = (float)( sArgs.spearman_flag ? CStatistics::PValueSpearman : CStatistics::PValuePearson )( vecdScores[k], veciScores[k] );
 			cout << PCL.GetGene( i ) << '\t' << PCL.GetGene( veciFinal[j] ) << '\t' << vecdScores[k] << '\t' << veciScores[k] << '\t' <<
-				( d * iGene ) << endl; } }
+				d << '\t' << ( d * iGene ) << endl; } }
 
 	return 0; }

tools/Dab2Dad/Dab2Dad.cpp

 			cerr << "Couldn't open: " << sArgs.lookups_arg << endl;
 			return 1; }
 		ifsm.close( );
-		if( sArgs.lookup1_arg ) {
-			cout << "ID";
+
+		cout << "GID";
+		if( sArgs.lookup1_arg )
 			for( i = 0; i < GenesLk.GetGenes( ); ++i )
 				cout << '\t' << GenesLk.GetGene( i ).GetName( );
-			cout << endl; }
+		else
+			for( i = 0; i < GenesLk.GetGenes( ); ++i ) {
+				const string&	strOne	= GenesLk.GetGene( i ).GetName( );
+				for( j = ( i + 1 ); j < GenesLk.GetGenes( ); ++j )
+					cout << '\t' << strOne << '-' << GenesLk.GetGene( j ).GetName( ); }
+		cout << endl;
+
 		veciGenes.resize( GenesLk.GetGenes( ) );
 		for( i = 0; i < sArgs.inputs_num; ++i ) {
 			CDataPair	Dat;
 							cout << d; } }
 			else
 				for( j = 0; j < veciGenes.size( ); ++j ) {
-					if( ( iOne = veciGenes[ j ] ) == -1 )
-						continue;
-					for( k = ( j + 1 ); k < veciGenes.size( ); ++k )
-						if( ( ( iTwo = veciGenes[ k ] ) != -1 ) &&
+					iOne = veciGenes[ j ];
+					for( k = ( j + 1 ); k < veciGenes.size( ); ++k ) {
+						cout << '\t';
+						if( ( iOne != -1 ) && ( ( iTwo = veciGenes[ k ] ) != -1 ) &&
 							!CMeta::IsNaN( d = Dat.Get( iOne, iTwo ) ) )
-							cout << '\t' << Dat.Get( iOne, iTwo ); }
+							cout << Dat.Get( iOne, iTwo ); } }
 			cout << endl; }
 		return 0; }
 	else if( sArgs.lookup1_arg && sArgs.lookup2_arg ) {