Anonymous avatar Anonymous committed d6b169b

Improve COALESCE termination criteria (slightly more permissive)
Fix bug in missing value handling for aligned measures in CMeasure
Fix bug for ties in rank transform in CPCL::RankTransform
Add PValueSpearman, FisherTransform, better TCDF to CStatistics
Fix bug in newline handling in COALESCE
Add Spearman correlation option to Clinician
Fix bug in default edge cutoff value in Cliquer (none by default)
Add randomization scoring to DChecker
Add gene set limitation option to Dat2Dab
Add hub queries to Dat2Graph
Add sort mode option to Explainer (by prediction, gold standard, or diff)
Add gene (node) weighting option to Hubber
Add missing self-MI bias estimation to MIer (thanks to Maria Chikina!)
Fix missing newlines in SpeciesConnector

Comments (0)

Files changed (85)

 		g_CatSleipnir( ).notice( "correlation pairs %d, bases %d, min size %d, merge size %d, max size %d",
 			GetNumberCorrelation( ), GetBasesPerMatch( ), GetSizeMinimum( ), GetSizeMerge( ),
 			GetSizeMaximum( ) ); }
-	for( dFailure = 1; dFailure > c_dEpsilon; dFailure *= GetPValueCorrelation( ) ) {
+	for( dFailure = 1; dFailure >= c_dEpsilon; dFailure *= max( pow( c_dEpsilon, 0.125 ), (double)GetPValueCorrelation( ) ) ) {
 		CCoalesceCluster			Cluster, Pot;
 		CCoalesceGroupHistograms	HistsCluster( GetBins( ), 1.0f / GetBasesPerMatch( ) );
 		CCoalesceGroupHistograms	HistsPot( GetBins( ), 1.0f / GetBasesPerMatch( ) );

src/coalescecluster.cpp

 			dMaxCorr = vecsThreads[ i ].m_dMaxCorr;
 			iOne = vecsThreads[ i ].m_iOne;
 			iTwo = vecsThreads[ i ].m_iTwo; } }
-	if( ( dMinP * PCL.GetGenes( ) * ( PCL.GetGenes( ) - 1 ) * dFraction * dFraction ) < ( dPValue * 2 ) ) {
+	if( ( dMinP * PCL.GetGenes( ) * ( PCL.GetGenes( ) - 1 ) * dFraction * dFraction ) <= ( dPValue * 2 ) ) {
 		g_CatSleipnir( ).info( "CCoalesceClusterImpl::AddSeedPair( %g, %g ) seeding: %s, %s, %g (p=%g)",
 			dFraction, dPValue, PCL.GetGene( iOne ).c_str( ), PCL.GetGene( iTwo ).c_str( ), dMaxCorr, dMinP );
 		priiSeed.first = iOne;
 		if( !IsGene( iGene ) &&
 			( ( dR = CMeasurePearson::Pearson( &m_vecdCentroid.front( ), PCL.GetExperiments( ),
 			PCL.Get( iGene ), PCL.GetExperiments( ), IMeasure::EMapNone, NULL, NULL, &iN ) ) > 0 ) &&
-			( ( CStatistics::PValuePearson( dR, iN ) * PCL.GetGenes( ) ) < dPValue ) )
+			( ( CStatistics::PValuePearson( dR, iN ) * PCL.GetGenes( ) ) <= dPValue ) )
 			Add( iGene, Pot );
 
 	return true; }
 	return false; }
 
 double CMeasureImpl::MeasureTrim( const IMeasure* pMeasure, const float* adX, size_t iM, const float* adY,
-	size_t iN, const IMeasure::EMap eMap, const float* adWX, const float* adWY ) {
+	size_t iN, const IMeasure::EMap eMap, const float* adWX, const float* adWY, bool fAlign ) {
 	float*	adA;
 	float*	adB;
 	float*	adWA;
 	size_t	i, j, iA, iB;
 	double	dRet;
 
-	for( iA = i = 0; i < iM; ++i )
-		if( CMeta::IsNaN( adX[ i ] ) )
-			iA++;
-	for( iB = i = 0; i < iN; ++i )
-		if( CMeta::IsNaN( adY[ i ] ) )
-			iB++;
-	iA = iM - iA;
-	iB = iN - iB;
-
-	adA = new float[ iA ];
-	adWA = adWX ? new float[ iA ] : NULL;
-	for( i = j = 0; i < iM; ++i )
-		if( !CMeta::IsNaN( adX[ i ] ) ) {
-			if( adWA )
-				adWA[ j ] = adWX[ i ];
-			adA[ j++ ] = adX[ i ]; }
-	adB = new float[ iB ];
-	adWB = adWY ? new float[ iB ] : NULL;
-	for( i = j = 0; i < iN; ++i )
-		if( !CMeta::IsNaN( adY[ i ] ) ) {
-			if( adWB )
-				adWB[ j ] = adWY[ i ];
-			adB[ j++ ] = adY[ i ]; }
+	adA = new float[ iM ];
+	adB = new float[ iN ];
+	adWA = adWX ? new float[ iM ] : NULL;
+	adWB = adWY ? new float[ iN ] : NULL;
+	if( fAlign ) {
+		for( i = j = 0; i < min( iM, iN ); ++i )
+			if( !( CMeta::IsNaN( adX[ i ] ) || CMeta::IsNaN( adY[ i ] ) ) ) {
+				if( adWA )
+					adWA[ j ] = adWX[ i ];
+				if( adWB )
+					adWB[ j ] = adWY[ i ];
+				adA[ j ] = adX[ i ];
+				adB[ j++ ] = adY[ i ]; } }
+	else {
+		for( i = j = 0; i < iM; ++i )
+			if( !CMeta::IsNaN( adX[ i ] ) ) {
+				if( adWA )
+					adWA[ j ] = adWX[ i ];
+				adA[ j++ ] = adX[ i ]; }
+		for( i = j = 0; i < iN; ++i )
+			if( !CMeta::IsNaN( adY[ i ] ) ) {
+				if( adWB )
+					adWB[ j ] = adWY[ i ];
+				adB[ j++ ] = adY[ i ]; } }
 
 	dRet = pMeasure->Measure( adA, iA, adB, iB, eMap, adWA, adWB );
 	delete[] adA;
 	if( adWX || adWY )
 		return CMeta::GetNaN( );
 	if( CMeasureImpl::IsNaN( adX, iM ) || CMeasureImpl::IsNaN( adY, iN ) )
-		return CMeasureImpl::MeasureTrim( this, adX, iM, adY, iN, eMap, adWX, adWY );
+		return CMeasureImpl::MeasureTrim( this, adX, iM, adY, iN, eMap, adWX, adWY, false );
 	if( iM > iN )
 		return Measure( adY, iN, adX, iM, eMap, adWY, adWX );
 
 	if( iM != iN )
 		return CMeta::GetNaN( );
 	if( CMeasureImpl::IsNaN( adX, iM ) || CMeasureImpl::IsNaN( adY, iN ) )
-		return CMeasureImpl::MeasureTrim( this, adX, iM, adY, iN, eMap, adWX, adWY );
+		return CMeasureImpl::MeasureTrim( this, adX, iM, adY, iN, eMap, adWX, adWY, true );
 
 	dRet = ( adWX || adWY ) ? CMeasureKendallsTauImpl::MeasureWeighted( adX, adY, iN, adWX,
 		adWY ) : CMeasureKendallsTauImpl::MeasureUnweighted( adX, adY, iN );
 	if( ( iM != iN ) || adWX || adWY )
 		return CMeta::GetNaN( );
 	if( CMeasureImpl::IsNaN( adX, iM ) || CMeasureImpl::IsNaN( adY, iN ) )
-		return CMeasureImpl::MeasureTrim( this, adX, iM, adY, iN, eMap, adWX, adWY );
+		return CMeasureImpl::MeasureTrim( this, adX, iM, adY, iN, eMap, adWX, adWY, true );
 
 	if( m_fTransformed ) {
 		dSum = 0;
 	dP = CMeasurePearson::Pearson( adX, iM, adY, iN, EMapNone, adWX, adWY );
 	if( fabs( dP ) >= c_dBound )
 		dP *= c_dBound;
-	dP = log( ( 1 + dP ) / ( 1 - dP ) ) / 2;
+	dP = CStatistics::FisherTransform( dP );
 	if( m_dAverage != HUGE_VAL )
 		dP = ( dP - m_dAverage ) / m_dStdDev;
 	return dP; }
 	friend class CMeasureSpearman;
 
 	static double MeasureTrim( const IMeasure*, const float*, size_t, const float*, size_t, const IMeasure::EMap,
-		const float*, const float* );
+		const float*, const float*, bool );
 	static bool IsNaN( const float*, size_t );
 
 	CMeasureImpl( const IMeasure*, bool );
  * IMeasure::IsRank
  */
 void CPCL::RankTransform( ) {
-	size_t	i, j, k;
-	size_t*	aiRanks;
+	size_t			i, j, k;
+	vector<size_t>	veciRanks, veciCounts;
 
-	aiRanks = new size_t[ m_Data.GetColumns( ) ];
-	for( i = 0; i < m_Data.GetRows( ); ++i ) {
-		memset( aiRanks, 0, m_Data.GetColumns( ) * sizeof(*aiRanks) );
-		for( j = 0; j < m_Data.GetColumns( ); ++j )
-			for( k = 0; k < m_Data.GetColumns( ); ++k )
-				if( ( j != k ) && ( m_Data.Get( i, k ) < m_Data.Get( i, j ) ) )
-					aiRanks[ j ]++;
-		for( j = 0; j < m_Data.GetColumns( ); ++j )
-			m_Data.Set( i, j, (float)aiRanks[ j ] ); }
-	delete[] aiRanks; }
+	veciRanks.resize( GetExperiments( ) );
+	veciCounts.resize( GetExperiments( ) );
+	for( i = 0; i < GetGenes( ); ++i ) {
+		fill( veciRanks.begin( ), veciRanks.end( ), 0 );
+		for( j = 0; j < GetExperiments( ); ++j ) {
+			if( CMeta::IsNaN( Get( i, j ) ) )
+				continue;
+			for( k = 0; k < GetExperiments( ); ++k ) {
+				if( CMeta::IsNaN( Get( i, k ) ) )
+					continue;
+				if( ( j != k ) && ( Get( i, k ) < Get( i, j ) ) )
+					veciRanks[ j ]++; } }
+
+		fill( veciCounts.begin( ), veciCounts.end( ), 0 );
+		for( j = 0; j < GetExperiments( ); ++j )
+			if( !CMeta::IsNaN( Get( i, j ) ) )
+				veciCounts[ veciRanks[ j ] ]++;
+
+		for( j = 0; j < GetExperiments( ); ++j )
+			if( !CMeta::IsNaN( Get( i, j ) ) ) {
+				k = veciRanks[ j ];
+// Closed form for sum(rank, rank + n) / n
+				Set( i, j, k + ( ( veciCounts[ k ] + 1 ) / 2.0f ) ); } } }
 
 /*!
  * \brief
 	 * P-value corresponding to the given correlation and array size.
 	 * 
 	 * \see
-	 * CMeasurePearson
+	 * CMeasurePearson | PValueSpearman
 	 */
 	static double PValuePearson( double dR, size_t iN ) {
 		static const double	c_dEpsilon	= 1e-10;
 			return 0;
 		dF = iN - 2;
 		dT = dR * sqrt( dF / ( 1 - ( dR * dR ) ) );
-		return IncompleteBeta( dF / 2, 0.5, dF / ( dF + ( dT * dT ) ) ); }
+		return ( 1 - TCDF( dT, dF ) ); }
+
+	/*!
+	 * \brief
+	 * Return the two-tailed p-value of a Spearman correlation.
+	 * 
+	 * \param dR
+	 * Spearman correlation.
+	 * 
+	 * \param iN
+	 * Length of correlated vectors.
+	 * 
+	 * \returns
+	 * P-value corresponding to the given correlation and array size.
+	 * 
+	 * \see
+	 * CMeasureSpearman | PValuePearson
+	 */
+	static double PValueSpearman( double dR, size_t iN ) {
+		double	dT;
+
+		if( iN < 3 )
+			return 1;
+
+//		dZ = sqrt( ( iN - 3 ) / 1.06 ) * CStatistics::FisherTransform( dR );
+		dT = dR * sqrt( ( iN - 2 ) / ( 1 - ( dR * dR ) ) );
+		return ( 1 - TCDF( dT, iN - 2 ) ); }
+
+	static double FisherTransform( double dR ) {
+
+		return ( log( ( 1 + dR ) / ( 1 - dR ) ) / 2 ); }
 
 	/*!
 	 * \brief
 		dPoolVar = ( ( ( iNOne - 1 ) * dVarianceOne ) + ( ( iNTwo - 1 ) * dVarianceTwo ) ) / iDegFree;
 		dT = ( dMeanOne - dMeanTwo ) / sqrt( dPoolVar * ( ( 1.0 / iNOne ) + ( 1.0 / iNTwo ) ) );
 
-		return IncompleteBeta( 0.5 * iDegFree, 0.5, iDegFree / ( iDegFree + ( dT * dT ) ) ); }
+		return ( 1 - TCDF( dT, iDegFree ) ); }
 
 	/*!
 	 * \brief
 		iDegFree = iN - 1;
 		dT = sqrt( (float)iN ) * dMean / sqrt( dVariance );
 
-		return IncompleteBeta( 0.5 * iDegFree, 0.5, iDegFree / ( iDegFree + ( dT * dT ) ) ); }
+		return ( 1 - TCDF( dT, iDegFree ) ); }
 
 	/*!
 	 * \brief
 			( ( dVarianceTwo * dVarianceTwo ) / iNTwo / iNTwo / ( iNTwo - 1 ) ) );
 		dT = ( dMeanOne - dMeanTwo ) / sqrt( ( dVarianceOne / iNOne ) + ( dVarianceTwo / iNTwo ) );
 
-		return IncompleteBeta( 0.5 * dDegFree, 0.5, dDegFree / ( dDegFree + ( dT * dT ) ) ); }
+		return ( 1 - TCDF( dT, dDegFree ) ); }
 
 	/*!
 	 * \brief
 	 * \param dT
 	 * T value at which to sample the t-distribution.
 	 * 
-	 * \param iDF
+	 * \param dDF
 	 * Degrees of freedom of the desired t-distribution.
 	 * 
 	 * \returns
 	 * p-value of the given T and degrees of freedom.
 	 */
-	static double TCDF( double dT, size_t iDF ) {
+	static double TCDF( double dT, double dDF ) {
 
-		return ( 1 - IncompleteBeta( 0.5 * iDF, 0.5, iDF / ( iDF + ( dT * dT ) ) ) ); }
+		return ( 1 - IncompleteBeta( 0.5 * dDF, 0.5, dDF / ( dDF + ( dT * dT ) ) ) ); }
 
 	/*!
 	 * \brief

tools/Answerer/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iAnswerer.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iAnswerer.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/BNConverter/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNConverter.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNConverter.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/BNCreator/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNCreator.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNCreator.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/BNEvaluator/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNEvaluator.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNEvaluator.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/BNFunc/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNFunc.ggo --default-optional -C -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNFunc.ggo --default-optional -C -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/BNServer/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNServer.ggo --default-optional -C -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNServer.ggo --default-optional -C -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/BNTester/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNTester.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNTester.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/BNTruster/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNTruster.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNTruster.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/BNUnraveler/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNUnraveler.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNUnraveler.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/BNWeaver/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNWeaver.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNWeaver.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/BNs2Txt/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNs2Txt.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNs2Txt.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/COALESCE/COALESCE.cpp

 
 			ifsm.getline( acBuffer, c_iBuffer - 1 );
 			acBuffer[ c_iBuffer - 1 ] = 0;
-			CMeta::Tokenize( acBuffer, vecstrLine );
+			CMeta::Tokenize( CMeta::Trim( acBuffer ).c_str( ), vecstrLine );
 			for( i = 0; i < vecstrLine.size( ); ++i )
 				for( j = 0; j < PCL.GetExperiments( ); ++j )
 					if( vecstrLine[ i ] == PCL.GetExperiment( j ) ) {

tools/COALESCE/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iCOALESCE.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iCOALESCE.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Clinician/Clinician.cpp

 	vector<bool>		vecfClinical;
 	vector<size_t>		veciGenes2PCL, veciPCL2Genes, veciIndices, veciScores;
 	vector<float>		vecdScores;
-	CMeasurePearson		MeasurePearson;
 	CGenome				Genome;
+	float				d;
 
 	if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
 		cmdline_parser_print_help( );
 	if( PCL.GetFeatures( ) < 2 ) {
 		cerr << "PCL requires at least one clinical variable feature" << endl;
 		return 1; }
+	if( sArgs.spearman_flag )
+		PCL.RankTransform( );
 	if( sArgs.global_arg && !Dat.Open( sArgs.global_arg, !!sArgs.memmap_flag ) ) {
 		cerr << "Could not open: " << sArgs.global_arg << endl;
 		return 1; }
 
 		for( j = 0; j < veciFinal.size( ); ++j ) {
 			k = veciPCL2Genes[veciFinal[j]];
+			d = (float)( sArgs.spearman_flag ? CStatistics::PValueSpearman : CStatistics::PValuePearson )( vecdScores[k], veciScores[k] );
 			cout << PCL.GetGene( i ) << '\t' << PCL.GetGene( veciFinal[j] ) << '\t' << vecdScores[k] << '\t' << veciScores[k] << '\t' <<
-				( CStatistics::PValuePearson( vecdScores[k], veciScores[k] ) * iGene ) << endl; } }
+				( d * iGene ) << endl; } }
 
 	return 0; }

tools/Clinician/Clinician.ggo

 								int	default="1000"
 option	"hefalmp"			a	"Perform HEFalMp query instead of bioPIXIE query"
 								flag	on
+option	"spearman"			p	"Use Spearman in place of Pearson correlation"
+								flag	off
 
 section "Optional"
 option	"skip"				s	"Columns to skip in input PCL"

tools/Cliquer/Cliquer.cpp

 	if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
 		cmdline_parser_print_help( );
 		return 1; }
+	if( sArgs.cutoff_arg < -1e-20 )
+		sArgs.cutoff_arg = CMeta::GetNaN( );
 	CMeta Meta( sArgs.verbosity_arg );
 
 	if( sArgs.input_arg ) {
 		return 1; }
 	if( sArgs.normalize_flag )
 		Dat.Normalize( CDat::ENormalizeSigmoid );
-	if( sArgs.cutoff_arg )
+	if( !CMeta::IsNaN( sArgs.cutoff_arg ) )
 		for( i = 0; i < Dat.GetGenes( ); ++i )
 			for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )
 				if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) && ( d < sArgs.cutoff_arg ) )

tools/Cliquer/Cliquer.ggo

 option	"normalize"		n	"Normalize input file"
 							flag	off
 option	"cutoff"		c	"Exclude edges below cutoff"
-							double	default="0"
+							double	default="-1e30"
 
 section "Optional"
 option	"memmap"		m	"Memory map input"

tools/Cliquer/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iCliquer.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iCliquer.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
   "\nPreprocessing:",
   "  -k, --knowns=filename     Known interactions (DAT/DAB) to ignore",
   "  -n, --normalize           Normalize input file  (default=off)",
-  "  -c, --cutoff=DOUBLE       Exclude edges below cutoff  (default=`0')",
+  "  -c, --cutoff=DOUBLE       Exclude edges below cutoff  (default=`-1e30')",
   "\nOptional:",
   "  -m, --memmap              Memory map input  (default=off)",
   "  -v, --verbosity=INT       Message verbosity  (default=`5')",
   args_info->knowns_arg = NULL;
   args_info->knowns_orig = NULL;
   args_info->normalize_flag = 0;
-  args_info->cutoff_arg = 0;
+  args_info->cutoff_arg = -1e30;
   args_info->cutoff_orig = NULL;
   args_info->memmap_flag = 0;
   args_info->verbosity_arg = 5;
         
           if (update_arg( (void *)&(args_info->cutoff_arg), 
                &(args_info->cutoff_orig), &(args_info->cutoff_given),
-              &(local_args_info.cutoff_given), optarg, 0, "0", ARG_DOUBLE,
+              &(local_args_info.cutoff_given), optarg, 0, "-1e30", ARG_DOUBLE,
               check_ambiguity, override, 0, 0,
               "cutoff", 'c',
               additional_error))

tools/Cliquer/cmdline.h

   const char *knowns_help; /**< @brief Known interactions (DAT/DAB) to ignore help description.  */
   int normalize_flag;	/**< @brief Normalize input file (default=off).  */
   const char *normalize_help; /**< @brief Normalize input file help description.  */
-  double cutoff_arg;	/**< @brief Exclude edges below cutoff (default='0').  */
+  double cutoff_arg;	/**< @brief Exclude edges below cutoff (default='-1e30').  */
   char * cutoff_orig;	/**< @brief Exclude edges below cutoff original value given at command line.  */
   const char *cutoff_help; /**< @brief Exclude edges below cutoff help description.  */
   int memmap_flag;	/**< @brief Memory map input (default=off).  */

tools/Clusterer/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iClusterer.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iClusterer.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Clusters2Dab/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iClusters2Dab.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iClusters2Dab.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Combiner/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iCombiner.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iCombiner.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Contexter/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iContexter.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iContexter.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Counter/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iCounter.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iCounter.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/DChecker/DChecker.cpp

 int main( int iArgs, char** aszArgs ) {
 	CDat				Answers, Data;
 	gengetopt_args_info	sArgs;
-	size_t				i, j, k, m, iOne, iTwo, iGenes, iPositives, iNegatives, iBins;
+	size_t				i, j, k, m, iOne, iTwo, iGenes, iPositives, iNegatives, iBins, iRand;
 	vector<size_t>		veciGenes, veciRec, veciRecTerm;
 	CFullMatrix<bool>	MatGenes;
 	CFullMatrix<size_t>	MatResults;
 	veciGenes.resize( Answers.GetGenes( ) );
 	for( i = 0; i < Answers.GetGenes( ); ++i )
 		veciGenes[ i ] = Data.GetGene( Answers.GetGene( i ) );
-	if( sArgs.finite_flag ) {
-		vector<float>	vecdValues;
-		{
-			set<float>		setdValues;
+	for( iRand = 0; iRand <= (size_t)sArgs.randomize_arg; ++iRand ) {
+		if( iRand )
+			Data.Randomize( );
+		if( sArgs.finite_flag ) {
+			vector<float>	vecdValues;
+			{
+				set<float>		setdValues;
 
-			for( i = 0; i < Answers.GetGenes( ); ++i ) {
-				if( ( iOne = veciGenes[ i ] ) == -1 )
-					continue;
+				for( i = 0; i < Answers.GetGenes( ); ++i ) {
+					if( ( iOne = veciGenes[ i ] ) == -1 )
+						continue;
+					for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
+						if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
+							CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
+							CMeta::IsNaN( Answers.Get( i, j ) ) )
+							continue;
+						if( sArgs.invert_flag )
+							dValue = 1 - dValue;
+						setdValues.insert( dValue ); } }
+				vecdValues.resize( setdValues.size( ) );
+				copy( setdValues.begin( ), setdValues.end( ), vecdValues.begin( ) );
+			}
+			sort( vecdValues.begin( ), vecdValues.end( ) );
+			for( i = 0; i < vecdValues.size( ); ++i )
+				mapValues[ vecdValues[ i ] ] = i;
+			iBins = mapValues.size( ); }
+		else
+			iBins = sArgs.bins_arg;
+		MatResults.Initialize( iBins ? ( iBins + 1 ) :
+			(size_t)( ( sArgs.max_arg - sArgs.min_arg ) / sArgs.delta_arg ) + 1, 4 );
+		MatGenes.Initialize( veciGenes.size( ), MatResults.GetRows( ) );
+
+		for( iGenes = 0; !sArgs.inputs_num || ( iGenes < sArgs.inputs_num ); ++iGenes ) {
+			MatResults.Clear( );
+			MatGenes.Clear( );
+
+			if( sArgs.inputs_num ) {
+				CGenes		Genes( Genome );
+				ifstream	ifsm;
+
+				ifsm.open( sArgs.inputs[ iGenes ] );
+				if( !Genes.Open( ifsm ) ) {
+					cerr << "Couldn't open: " << sArgs.inputs[ iGenes ] << endl;
+					return 1; }
+				vecfHere.resize( Answers.GetGenes( ) );
+				for( i = 0; i < vecfHere.size( ); ++i )
+					vecfHere[ i ] = Genes.IsGene( Answers.GetGene( i ) );
+				cerr << "Processing " << sArgs.inputs[ iGenes ] << "..." << endl;
+				ifsm.close( ); }
+
+			if( mapValues.size( ) ) {
+				for( i = 0; i < Answers.GetGenes( ); ++i ) {
+					if( ( iOne = veciGenes[ i ] ) == -1 )
+						continue;
+					for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
+						if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
+							CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
+							CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) )
+							continue;
+						if( !( vecfHere.empty( ) ||
+							( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
+							( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
+							continue;
+						if( sArgs.invert_flag )
+							dValue = 1 - dValue;
+						for( k = 0; k <= mapValues[ dValue ]; ++k ) {
+							MatGenes.Set( i, k, true );
+							MatGenes.Set( j, k, true );
+							MatResults.Get( k, dAnswer ? ETFPN_TP : ETFPN_FP )++; }
+						for( ; k < MatResults.GetRows( ); ++k )
+							MatResults.Get( k, dAnswer ? ETFPN_FN : ETFPN_TN )++; } } }
+			else if( iBins ) {
+				vector<SDatum>	vecsData;
+				size_t			iChunk;
+
+				for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i ) {
+					if( ( iOne = veciGenes[ i ] ) == -1 )
+						continue;
+					for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
+						if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
+							CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
+							CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
+							continue;
+						if( !( vecfHere.empty( ) ||
+							( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
+							( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
+							continue;
+
+						MatGenes.Set( i, 0, true );
+						MatGenes.Set( j, 0, true );
+						if( dAnswer )
+							iPositives++;
+						else
+							iNegatives++;
+						vecsData.push_back( SDatum( dValue, i, j, dAnswer ) ); } }
+				sort( vecsData.begin( ), vecsData.end( ), SSorter( !!sArgs.invert_flag ) );
+				iChunk = (size_t)( 0.5 + ( (float)vecsData.size( ) / ( MatResults.GetRows( ) - 1 ) ) );
+				if( sArgs.sse_flag ) {
+					vecdSSE.resize( MatResults.GetRows( ) );
+					veciPositives.resize( vecdSSE.size( ) );
+					for( i = 1,j = 0; i < vecdSSE.size( ); ++i,j += iChunk ) {
+						veciPositives[ veciPositives.size( ) - i - 1 ] = veciPositives[ veciPositives.size( ) - i ];
+						vecdSSE[ vecdSSE.size( ) - i - 1 ] = vecdSSE[ vecdSSE.size( ) - i ];
+						for( k = 0; k < iChunk; ++k ) {
+							if( ( j + k ) >= vecsData.size( ) )
+								break;
+							const SDatum&	sDatum	= vecsData[ vecsData.size( ) - ( j + k ) - 1 ];
+
+							for( m = 0; m < ( vecdSSE.size( ) - i ); ++m ) {
+								MatGenes.Set( sDatum.m_iOne, m, true );
+								MatGenes.Set( sDatum.m_iTwo, m, true ); }
+							dValue = sDatum.m_dValue - sDatum.m_dAnswer;
+							veciPositives[ veciPositives.size( ) - i - 1 ]++;
+							vecdSSE[ vecdSSE.size( ) - i - 1 ] += dValue * dValue; } } }
+				else {
+					veciPositives.resize( MatResults.GetRows( ) - 1 );
+					veciNegatives.resize( veciPositives.size( ) );
+					for( i = 0; i < veciNegatives.size( ); ++i )
+						veciNegatives[ i ] = veciPositives[ i ] = 0;
+					for( i = j = 0; i < veciPositives.size( ); ++i,j += iChunk )
+						for( k = 0; k < iChunk; ++k ) {
+							if( ( j + k ) >= vecsData.size( ) )
+								break;
+							const SDatum&	sDatum	= vecsData[ j + k ];
+
+							for( m = i; m > 0; --m ) {
+								MatGenes.Set( sDatum.m_iOne, m, true );
+								MatGenes.Set( sDatum.m_iTwo, m, true ); }
+							if( Answers.Get( sDatum.m_iOne, sDatum.m_iTwo ) )
+								veciPositives[ i ]++;
+							else
+								veciNegatives[ i ]++; }
+
+					MatResults.Set( 0, ETFPN_TP, iPositives );
+					MatResults.Set( 0, ETFPN_FP, iNegatives );
+					MatResults.Set( 0, ETFPN_TN, 0 );
+					MatResults.Set( 0, ETFPN_FN, 0 );
+					for( i = 1; i < MatResults.GetRows( ); ++i ) {
+						MatResults.Set( i, ETFPN_TP, MatResults.Get( i - 1, ETFPN_TP ) - veciPositives[ i - 1 ] );
+						MatResults.Set( i, ETFPN_FP, MatResults.Get( i - 1, ETFPN_FP ) - veciNegatives[ i - 1 ] );
+						MatResults.Set( i, ETFPN_TN, MatResults.Get( i - 1, ETFPN_TN ) + veciNegatives[ i - 1 ] );
+						MatResults.Set( i, ETFPN_FN, MatResults.Get( i - 1, ETFPN_FN ) +
+							veciPositives[ i - 1 ] ); } } }
+			else
+				for( i = 0; i < Answers.GetGenes( ); ++i ) {
+					if( !( i % 1000 ) )
+						cerr << "Processing gene " << i << '/' << Answers.GetGenes( ) << endl;
+					if( ( iOne = veciGenes[ i ] ) == -1 )
+						continue;
+					for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
+						if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
+							CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
+							CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
+							continue;
+						if( !( vecfHere.empty( ) ||
+							( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
+							( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
+							continue;
+						if( sArgs.invert_flag )
+							dValue = 1 - dValue;
+
+						iMax = (int)ceil( ( dValue - sArgs.min_arg ) / sArgs.delta_arg );
+						if( iMax > (int)MatResults.GetRows( ) )
+							iMax = (int)MatResults.GetRows( );
+						eTFPN = (ETFPN)!dAnswer;
+						for( k = 0; (int)k < iMax; ++k ) {
+							MatResults.Get( k, eTFPN )++;
+							MatGenes.Set( i, k, true );
+							MatGenes.Set( j, k, true ); }
+						eTFPN = (ETFPN)( 2 + !eTFPN );
+						for( ; k < (int)MatResults.GetRows( ); ++k )
+							MatResults.Get( k, eTFPN )++; } }
+			for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i )
 				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
-						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
-						CMeta::IsNaN( Answers.Get( i, j ) ) )
-						continue;
-					if( sArgs.invert_flag )
-						dValue = 1 - dValue;
-					setdValues.insert( dValue ); } }
-			vecdValues.resize( setdValues.size( ) );
-			copy( setdValues.begin( ), setdValues.end( ), vecdValues.begin( ) );
-		}
-		sort( vecdValues.begin( ), vecdValues.end( ) );
-		for( i = 0; i < vecdValues.size( ); ++i )
-			mapValues[ vecdValues[ i ] ] = i;
-		iBins = mapValues.size( ); }
-	else
-		iBins = sArgs.bins_arg;
-	MatResults.Initialize( iBins ? ( iBins + 1 ) :
-		(size_t)( ( sArgs.max_arg - sArgs.min_arg ) / sArgs.delta_arg ) + 1, 4 );
-	MatGenes.Initialize( veciGenes.size( ), MatResults.GetRows( ) );
-
-	for( iGenes = 0; !sArgs.inputs_num || ( iGenes < sArgs.inputs_num ); ++iGenes ) {
-		MatResults.Clear( );
-		MatGenes.Clear( );
-
-		if( sArgs.inputs_num ) {
-			CGenes		Genes( Genome );
-			ifstream	ifsm;
-
-			ifsm.open( sArgs.inputs[ iGenes ] );
-			if( !Genes.Open( ifsm ) ) {
-				cerr << "Couldn't open: " << sArgs.inputs[ iGenes ] << endl;
-				return 1; }
-			vecfHere.resize( Answers.GetGenes( ) );
-			for( i = 0; i < vecfHere.size( ); ++i )
-				vecfHere[ i ] = Genes.IsGene( Answers.GetGene( i ) );
-			cerr << "Processing " << sArgs.inputs[ iGenes ] << "..." << endl;
-			ifsm.close( ); }
-
-		if( mapValues.size( ) ) {
-			for( i = 0; i < Answers.GetGenes( ); ++i ) {
-				if( ( iOne = veciGenes[ i ] ) == -1 )
-					continue;
-				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
-						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
-						CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) )
-						continue;
-					if( !( vecfHere.empty( ) ||
+					if( CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
+						!( vecfHere.empty( ) ||
 						( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
 						( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
 						continue;
-					if( sArgs.invert_flag )
-						dValue = 1 - dValue;
-					for( k = 0; k <= mapValues[ dValue ]; ++k ) {
-						MatGenes.Set( i, k, true );
-						MatGenes.Set( j, k, true );
-						MatResults.Get( k, dAnswer ? ETFPN_TP : ETFPN_FP )++; }
-					for( ; k < MatResults.GetRows( ); ++k )
-						MatResults.Get( k, dAnswer ? ETFPN_FN : ETFPN_TN )++; } } }
-		else if( iBins ) {
-			vector<SDatum>	vecsData;
-			size_t			iChunk;
-
-			for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i ) {
-				if( ( iOne = veciGenes[ i ] ) == -1 )
-					continue;
-				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
-						CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
-						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
-						continue;
-					if( !( vecfHere.empty( ) ||
-						( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
-						( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
-						continue;
-
-					MatGenes.Set( i, 0, true );
-					MatGenes.Set( j, 0, true );
 					if( dAnswer )
 						iPositives++;
 					else
-						iNegatives++;
-					vecsData.push_back( SDatum( dValue, i, j, dAnswer ) ); } }
-			sort( vecsData.begin( ), vecsData.end( ), SSorter( !!sArgs.invert_flag ) );
-			iChunk = (size_t)( 0.5 + ( (float)vecsData.size( ) / ( MatResults.GetRows( ) - 1 ) ) );
-			if( sArgs.sse_flag ) {
-				vecdSSE.resize( MatResults.GetRows( ) );
-				veciPositives.resize( vecdSSE.size( ) );
-				for( i = 1,j = 0; i < vecdSSE.size( ); ++i,j += iChunk ) {
-					veciPositives[ veciPositives.size( ) - i - 1 ] = veciPositives[ veciPositives.size( ) - i ];
-					vecdSSE[ vecdSSE.size( ) - i - 1 ] = vecdSSE[ vecdSSE.size( ) - i ];
-					for( k = 0; k < iChunk; ++k ) {
-						if( ( j + k ) >= vecsData.size( ) )
-							break;
-						const SDatum&	sDatum	= vecsData[ vecsData.size( ) - ( j + k ) - 1 ];
+						iNegatives++; }
 
-						for( m = 0; m < ( vecdSSE.size( ) - i ); ++m ) {
-							MatGenes.Set( sDatum.m_iOne, m, true );
-							MatGenes.Set( sDatum.m_iTwo, m, true ); }
-						dValue = sDatum.m_dValue - sDatum.m_dAnswer;
-						veciPositives[ veciPositives.size( ) - i - 1 ]++;
-						vecdSSE[ vecdSSE.size( ) - i - 1 ] += dValue * dValue; } } }
-			else {
-				veciPositives.resize( MatResults.GetRows( ) - 1 );
-				veciNegatives.resize( veciPositives.size( ) );
-				for( i = 0; i < veciNegatives.size( ); ++i )
-					veciNegatives[ i ] = veciPositives[ i ] = 0;
-				for( i = j = 0; i < veciPositives.size( ); ++i,j += iChunk )
-					for( k = 0; k < iChunk; ++k ) {
-						if( ( j + k ) >= vecsData.size( ) )
-							break;
-						const SDatum&	sDatum	= vecsData[ j + k ];
+			veciRec.resize( MatResults.GetRows( ) );
+			veciRecTerm.resize( MatResults.GetRows( ) );
+			for( i = 0; i < veciRec.size( ); ++i ) {
+				veciRec[ i ] = veciRecTerm[ i ] = 0;
+				for( j = 0; j < MatGenes.GetRows( ); ++j )
+					if( MatGenes.Get( j, i ) ) {
+						veciRec[ i ]++;
+						if( vecfHere.size( ) && vecfHere[ j ] )
+							veciRecTerm[ i ]++; }
+				for( j = 0; j < veciGenesTerm.size( ); ++j )
+					if( MatGenes.Get( veciGenesTerm[ j ], i ) &&
+						( vecfHere.empty( ) || !vecfHere[ veciGenesTerm[ j ] ] ) )
+						veciRecTerm[ i ]++; }
 
-						for( m = i; m > 0; --m ) {
-							MatGenes.Set( sDatum.m_iOne, m, true );
-							MatGenes.Set( sDatum.m_iTwo, m, true ); }
-						if( Answers.Get( sDatum.m_iOne, sDatum.m_iTwo ) )
-							veciPositives[ i ]++;
-						else
-							veciNegatives[ i ]++; }
+			if( sArgs.inputs_num ) {
+				ofsm.open( ( (string)sArgs.directory_arg + '/' +
+					CMeta::Basename( sArgs.inputs[ iGenes ] ) + ".bins" ).c_str( ) );
+				postm = &ofsm; }
+			else
+				postm = &cout;
 
-				MatResults.Set( 0, ETFPN_TP, iPositives );
-				MatResults.Set( 0, ETFPN_FP, iNegatives );
-				MatResults.Set( 0, ETFPN_TN, 0 );
-				MatResults.Set( 0, ETFPN_FN, 0 );
-				for( i = 1; i < MatResults.GetRows( ); ++i ) {
-					MatResults.Set( i, ETFPN_TP, MatResults.Get( i - 1, ETFPN_TP ) - veciPositives[ i - 1 ] );
-					MatResults.Set( i, ETFPN_FP, MatResults.Get( i - 1, ETFPN_FP ) - veciNegatives[ i - 1 ] );
-					MatResults.Set( i, ETFPN_TN, MatResults.Get( i - 1, ETFPN_TN ) + veciNegatives[ i - 1 ] );
-					MatResults.Set( i, ETFPN_FN, MatResults.Get( i - 1, ETFPN_FN ) +
-						veciPositives[ i - 1 ] ); } } }
-		else
-			for( i = 0; i < Answers.GetGenes( ); ++i ) {
-				if( !( i % 1000 ) )
-					cerr << "Processing gene " << i << '/' << Answers.GetGenes( ) << endl;
-				if( ( iOne = veciGenes[ i ] ) == -1 )
-					continue;
-				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
-						CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
-						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
-						continue;
-					if( !( vecfHere.empty( ) ||
-						( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
-						( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
-						continue;
-					if( sArgs.invert_flag )
-						dValue = 1 - dValue;
+			if( !sArgs.sse_flag ) {
+				*postm << "#	P	" << iPositives << endl;
+				*postm << "#	N	" << iNegatives << endl; }
+			*postm << "Cut	Genes	" << ( sArgs.sse_flag ? "Pairs	SSE" : "TP	FP	TN	FN" ) << endl;
+			for( i = 0; i < MatResults.GetRows( ); ++i ) {
+				*postm << ( iBins ? i : ( sArgs.min_arg + ( i * sArgs.delta_arg ) ) ) << '\t' <<
+					veciRec[ i ];
+				if( sArgs.sse_flag )
+					*postm << '\t' << veciPositives[ i ] << '\t' << vecdSSE[ i ];
+				else
+					for( j = 0; j < MatResults.GetColumns( ); ++j )
+						*postm << '\t' << MatResults.Get( i, j );
+				if( veciGenesTerm.size( ) || vecfHere.size( ) )
+					*postm << '\t' << veciRecTerm[ i ];
+				*postm << endl; }
+			if( !sArgs.sse_flag )
+				*postm << "#	AUC	" << ( sArgs.auc_arg ?
+					AUCMod( Data, Answers, vecfHere, !!sArgs.invert_flag, sArgs.auc_arg ) :
+					CStatistics::WilcoxonRankSum( Data, Answers, vecfHere, !!sArgs.invert_flag ) ) << endl;
 
-					iMax = (int)ceil( ( dValue - sArgs.min_arg ) / sArgs.delta_arg );
-					if( iMax > (int)MatResults.GetRows( ) )
-						iMax = (int)MatResults.GetRows( );
-					eTFPN = (ETFPN)!dAnswer;
-					for( k = 0; (int)k < iMax; ++k ) {
-						MatResults.Get( k, eTFPN )++;
-						MatGenes.Set( i, k, true );
-						MatGenes.Set( j, k, true ); }
-					eTFPN = (ETFPN)( 2 + !eTFPN );
-					for( ; k < (int)MatResults.GetRows( ); ++k )
-						MatResults.Get( k, eTFPN )++; } }
-		for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i )
-			for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-				if( CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
-					!( vecfHere.empty( ) ||
-					( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
-					( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
-					continue;
-				if( dAnswer )
-					iPositives++;
-				else
-					iNegatives++; }
+			if( sArgs.inputs_num )
+				ofsm.close( );
+			else
+				cout.flush( );
 
-		veciRec.resize( MatResults.GetRows( ) );
-		veciRecTerm.resize( MatResults.GetRows( ) );
-		for( i = 0; i < veciRec.size( ); ++i ) {
-			veciRec[ i ] = veciRecTerm[ i ] = 0;
-			for( j = 0; j < MatGenes.GetRows( ); ++j )
-				if( MatGenes.Get( j, i ) ) {
-					veciRec[ i ]++;
-					if( vecfHere.size( ) && vecfHere[ j ] )
-						veciRecTerm[ i ]++; }
-			for( j = 0; j < veciGenesTerm.size( ); ++j )
-				if( MatGenes.Get( veciGenesTerm[ j ], i ) &&
-					( vecfHere.empty( ) || !vecfHere[ veciGenesTerm[ j ] ] ) )
-					veciRecTerm[ i ]++; }
-
-		if( sArgs.inputs_num ) {
-			ofsm.open( ( (string)sArgs.directory_arg + '/' +
-				CMeta::Basename( sArgs.inputs[ iGenes ] ) + ".bins" ).c_str( ) );
-			postm = &ofsm; }
-		else
-			postm = &cout;
-
-		if( !sArgs.sse_flag ) {
-			*postm << "#	P	" << iPositives << endl;
-			*postm << "#	N	" << iNegatives << endl; }
-		*postm << "Cut	Genes	" << ( sArgs.sse_flag ? "Pairs	SSE" : "TP	FP	TN	FN" ) << endl;
-		for( i = 0; i < MatResults.GetRows( ); ++i ) {
-			*postm << ( iBins ? i : ( sArgs.min_arg + ( i * sArgs.delta_arg ) ) ) << '\t' <<
-				veciRec[ i ];
-			if( sArgs.sse_flag )
-				*postm << '\t' << veciPositives[ i ] << '\t' << vecdSSE[ i ];
-			else
-				for( j = 0; j < MatResults.GetColumns( ); ++j )
-					*postm << '\t' << MatResults.Get( i, j );
-			if( veciGenesTerm.size( ) || vecfHere.size( ) )
-				*postm << '\t' << veciRecTerm[ i ];
-			*postm << endl; }
-		if( !sArgs.sse_flag )
-			*postm << "#	AUC	" << ( sArgs.auc_arg ?
-				AUCMod( Data, Answers, vecfHere, !!sArgs.invert_flag, sArgs.auc_arg ) :
-				CStatistics::WilcoxonRankSum( Data, Answers, vecfHere, !!sArgs.invert_flag ) ) << endl;
-
-		if( sArgs.inputs_num )
-			ofsm.close( );
-		else
-			cout.flush( );
-
-		if( !sArgs.inputs_num )
-			break; }
+			if( !sArgs.inputs_num )
+				break; } }
 
 	return 0; }
 

tools/DChecker/DChecker.ggo

 							string	typestr="directory"	default="."
 option	"auc"			a	"Use alternative AUCn calculation"
 							float	default="0"
+option	"randomize"		R	"Calculate specified number of randomized scores"
+							int	default="0"
 
 section "Ranking Method"
 option	"bins"			b	"Bins for quantile sorting"

tools/DChecker/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iDChecker.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDChecker.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
   "\nMiscellaneous:",
   "  -d, --directory=directory  Output directory  (default=`.')",
   "  -a, --auc=FLOAT            Use alternative AUCn calculation  (default=`0')",
+  "  -R, --randomize=INT        Calculate specified number of randomized scores  \n                               (default=`0')",
   "\nRanking Method:",
   "  -b, --bins=INT             Bins for quantile sorting  (default=`1000')",
   "  -f, --finite               Count finitely many bins  (default=off)",
   args_info->answers_given = 0 ;
   args_info->directory_given = 0 ;
   args_info->auc_given = 0 ;
+  args_info->randomize_given = 0 ;
   args_info->bins_given = 0 ;
   args_info->finite_given = 0 ;
   args_info->min_given = 0 ;
   args_info->directory_orig = NULL;
   args_info->auc_arg = 0;
   args_info->auc_orig = NULL;
+  args_info->randomize_arg = 0;
+  args_info->randomize_orig = NULL;
   args_info->bins_arg = 1000;
   args_info->bins_orig = NULL;
   args_info->finite_flag = 0;
   args_info->answers_help = gengetopt_args_info_help[4] ;
   args_info->directory_help = gengetopt_args_info_help[6] ;
   args_info->auc_help = gengetopt_args_info_help[7] ;
-  args_info->bins_help = gengetopt_args_info_help[9] ;
-  args_info->finite_help = gengetopt_args_info_help[10] ;
-  args_info->min_help = gengetopt_args_info_help[11] ;
-  args_info->max_help = gengetopt_args_info_help[12] ;
-  args_info->delta_help = gengetopt_args_info_help[13] ;
-  args_info->genes_help = gengetopt_args_info_help[15] ;
-  args_info->genex_help = gengetopt_args_info_help[16] ;
-  args_info->genet_help = gengetopt_args_info_help[17] ;
-  args_info->genee_help = gengetopt_args_info_help[18] ;
-  args_info->normalize_help = gengetopt_args_info_help[20] ;
-  args_info->invert_help = gengetopt_args_info_help[21] ;
-  args_info->sse_help = gengetopt_args_info_help[23] ;
-  args_info->memmap_help = gengetopt_args_info_help[24] ;
-  args_info->verbosity_help = gengetopt_args_info_help[25] ;
+  args_info->randomize_help = gengetopt_args_info_help[8] ;
+  args_info->bins_help = gengetopt_args_info_help[10] ;
+  args_info->finite_help = gengetopt_args_info_help[11] ;
+  args_info->min_help = gengetopt_args_info_help[12] ;
+  args_info->max_help = gengetopt_args_info_help[13] ;
+  args_info->delta_help = gengetopt_args_info_help[14] ;
+  args_info->genes_help = gengetopt_args_info_help[16] ;
+  args_info->genex_help = gengetopt_args_info_help[17] ;
+  args_info->genet_help = gengetopt_args_info_help[18] ;
+  args_info->genee_help = gengetopt_args_info_help[19] ;
+  args_info->normalize_help = gengetopt_args_info_help[21] ;
+  args_info->invert_help = gengetopt_args_info_help[22] ;
+  args_info->sse_help = gengetopt_args_info_help[24] ;
+  args_info->memmap_help = gengetopt_args_info_help[25] ;
+  args_info->verbosity_help = gengetopt_args_info_help[26] ;
   
 }
 
   free_string_field (&(args_info->directory_arg));
   free_string_field (&(args_info->directory_orig));
   free_string_field (&(args_info->auc_orig));
+  free_string_field (&(args_info->randomize_orig));
   free_string_field (&(args_info->bins_orig));
   free_string_field (&(args_info->min_orig));
   free_string_field (&(args_info->max_orig));
     write_into_file(outfile, "directory", args_info->directory_orig, 0);
   if (args_info->auc_given)
     write_into_file(outfile, "auc", args_info->auc_orig, 0);
+  if (args_info->randomize_given)
+    write_into_file(outfile, "randomize", args_info->randomize_orig, 0);
   if (args_info->bins_given)
     write_into_file(outfile, "bins", args_info->bins_orig, 0);
   if (args_info->finite_given)
         { "answers",	1, NULL, 'w' },
         { "directory",	1, NULL, 'd' },
         { "auc",	1, NULL, 'a' },
+        { "randomize",	1, NULL, 'R' },
         { "bins",	1, NULL, 'b' },
         { "finite",	0, NULL, 'f' },
         { "min",	1, NULL, 'm' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVi:w:d:a:b:fm:M:e:g:G:c:C:ntspv:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVi:w:d:a:R:b:fm:M:e:g:G:c:C:ntspv:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'R':	/* Calculate specified number of randomized scores.  */
+        
+        
+          if (update_arg( (void *)&(args_info->randomize_arg), 
+               &(args_info->randomize_orig), &(args_info->randomize_given),
+              &(local_args_info.randomize_given), optarg, 0, "0", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "randomize", 'R',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'b':	/* Bins for quantile sorting.  */
         
         

tools/DChecker/cmdline.h

   float auc_arg;	/**< @brief Use alternative AUCn calculation (default='0').  */
   char * auc_orig;	/**< @brief Use alternative AUCn calculation original value given at command line.  */
   const char *auc_help; /**< @brief Use alternative AUCn calculation help description.  */
+  int randomize_arg;	/**< @brief Calculate specified number of randomized scores (default='0').  */
+  char * randomize_orig;	/**< @brief Calculate specified number of randomized scores original value given at command line.  */
+  const char *randomize_help; /**< @brief Calculate specified number of randomized scores help description.  */
   int bins_arg;	/**< @brief Bins for quantile sorting (default='1000').  */
   char * bins_orig;	/**< @brief Bins for quantile sorting original value given at command line.  */
   const char *bins_help; /**< @brief Bins for quantile sorting help description.  */
   unsigned int answers_given ;	/**< @brief Whether answers was given.  */
   unsigned int directory_given ;	/**< @brief Whether directory was given.  */
   unsigned int auc_given ;	/**< @brief Whether auc was given.  */
+  unsigned int randomize_given ;	/**< @brief Whether randomize was given.  */
   unsigned int bins_given ;	/**< @brief Whether bins was given.  */
   unsigned int finite_given ;	/**< @brief Whether finite was given.  */
   unsigned int min_given ;	/**< @brief Whether min was given.  */

tools/DSLConverter/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iDSLConverter.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDSLConverter.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Dab2Dad/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iDab2Dad.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDab2Dad.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Dat2Dab/Dat2Dab.cpp

 			CMeta::Tokenize( acBuffer, vecstrTokens );
 			if( vecstrTokens.empty( ) )
 				continue;
-			if( vecstrTokens.size( ) != 2 ) {
+			if( vecstrTokens.size( ) < 2 ) {
 				cerr << "Illegal remap line (" << vecstrTokens.size( ) << "): " << acBuffer << endl;
 				return 1; }
 			if( vecstrTokens[ 0 ] == vecstrTokens[ 1 ] )
 		Dat.FilterGenes( Genes, CDat::EFilterInclude );
 	if( sArgs.genex_arg )
 		Dat.FilterGenes( sArgs.genex_arg, CDat::EFilterExclude );
+	if( sArgs.genee_arg )
+		Dat.FilterGenes( sArgs.genee_arg, CDat::EFilterEdge );
 
 	if( sArgs.paircount_flag ) {
 		size_t			iTotal, iCutoff;

tools/Dat2Dab/Dat2Dab.ggo

 							string	typestr="filename"
 option	"genex"			G	"Exclude all genes from the given set"
 							string	typestr="filename"
+option	"genee"			D	"Process only edges including a gene from the given set"
+							string	typestr="filename"
 option	"edges"			e	"Process only edges from the given DAT/DAB"
 							string	typestr="filename"
 option	"cutoff"		c	"Exclude edges below cutoff"

tools/Dat2Dab/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iDat2Dab.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDat2Dab.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
   "\nFiltering:",
   "  -g, --genes=filename     Process only genes from the given set",
   "  -G, --genex=filename     Exclude all genes from the given set",
+  "  -D, --genee=filename     Process only edges including a gene from the given \n                             set",
   "  -e, --edges=filename     Process only edges from the given DAT/DAB",
   "  -c, --cutoff=DOUBLE      Exclude edges below cutoff",
   "  -Z, --zero               Zero missing values  (default=off)",
   args_info->randomize_given = 0 ;
   args_info->genes_given = 0 ;
   args_info->genex_given = 0 ;
+  args_info->genee_given = 0 ;
   args_info->edges_given = 0 ;
   args_info->cutoff_given = 0 ;
   args_info->zero_given = 0 ;
   args_info->genes_orig = NULL;
   args_info->genex_arg = NULL;
   args_info->genex_orig = NULL;
+  args_info->genee_arg = NULL;
+  args_info->genee_orig = NULL;
   args_info->edges_arg = NULL;
   args_info->edges_orig = NULL;
   args_info->cutoff_orig = NULL;
   args_info->randomize_help = gengetopt_args_info_help[10] ;
   args_info->genes_help = gengetopt_args_info_help[12] ;
   args_info->genex_help = gengetopt_args_info_help[13] ;
-  args_info->edges_help = gengetopt_args_info_help[14] ;
-  args_info->cutoff_help = gengetopt_args_info_help[15] ;
-  args_info->zero_help = gengetopt_args_info_help[16] ;
-  args_info->duplicates_help = gengetopt_args_info_help[17] ;
-  args_info->subsample_help = gengetopt_args_info_help[18] ;
-  args_info->lookup1_help = gengetopt_args_info_help[20] ;
-  args_info->lookup2_help = gengetopt_args_info_help[21] ;
-  args_info->lookups1_help = gengetopt_args_info_help[22] ;
-  args_info->lookups2_help = gengetopt_args_info_help[23] ;
-  args_info->genelist_help = gengetopt_args_info_help[24] ;
-  args_info->paircount_help = gengetopt_args_info_help[25] ;
-  args_info->remap_help = gengetopt_args_info_help[27] ;
-  args_info->table_help = gengetopt_args_info_help[28] ;
-  args_info->skip_help = gengetopt_args_info_help[29] ;
-  args_info->memmap_help = gengetopt_args_info_help[30] ;
-  args_info->verbosity_help = gengetopt_args_info_help[31] ;
+  args_info->genee_help = gengetopt_args_info_help[14] ;
+  args_info->edges_help = gengetopt_args_info_help[15] ;
+  args_info->cutoff_help = gengetopt_args_info_help[16] ;
+  args_info->zero_help = gengetopt_args_info_help[17] ;
+  args_info->duplicates_help = gengetopt_args_info_help[18] ;
+  args_info->subsample_help = gengetopt_args_info_help[19] ;
+  args_info->lookup1_help = gengetopt_args_info_help[21] ;
+  args_info->lookup2_help = gengetopt_args_info_help[22] ;
+  args_info->lookups1_help = gengetopt_args_info_help[23] ;
+  args_info->lookups2_help = gengetopt_args_info_help[24] ;
+  args_info->genelist_help = gengetopt_args_info_help[25] ;
+  args_info->paircount_help = gengetopt_args_info_help[26] ;
+  args_info->remap_help = gengetopt_args_info_help[28] ;
+  args_info->table_help = gengetopt_args_info_help[29] ;
+  args_info->skip_help = gengetopt_args_info_help[30] ;
+  args_info->memmap_help = gengetopt_args_info_help[31] ;
+  args_info->verbosity_help = gengetopt_args_info_help[32] ;
   
 }
 
   free_string_field (&(args_info->genes_orig));
   free_string_field (&(args_info->genex_arg));
   free_string_field (&(args_info->genex_orig));
+  free_string_field (&(args_info->genee_arg));
+  free_string_field (&(args_info->genee_orig));
   free_string_field (&(args_info->edges_arg));
   free_string_field (&(args_info->edges_orig));
   free_string_field (&(args_info->cutoff_orig));
     write_into_file(outfile, "genes", args_info->genes_orig, 0);
   if (args_info->genex_given)
     write_into_file(outfile, "genex", args_info->genex_orig, 0);
+  if (args_info->genee_given)
+    write_into_file(outfile, "genee", args_info->genee_orig, 0);
   if (args_info->edges_given)
     write_into_file(outfile, "edges", args_info->edges_orig, 0);
   if (args_info->cutoff_given)
         { "randomize",	0, NULL, 'a' },
         { "genes",	1, NULL, 'g' },
         { "genex",	1, NULL, 'G' },
+        { "genee",	1, NULL, 'D' },
         { "edges",	1, NULL, 'e' },
         { "cutoff",	1, NULL, 'c' },
         { "zero",	0, NULL, 'Z' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVi:o:fnzrag:G:e:c:Zdu:l:L:t:T:EPp:bs:mv:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVi:o:fnzrag:G:D:e:c:Zdu:l:L:t:T:EPp:bs:mv:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'D':	/* Process only edges including a gene from the given set.  */
+        
+        
+          if (update_arg( (void *)&(args_info->genee_arg), 
+               &(args_info->genee_orig), &(args_info->genee_given),
+              &(local_args_info.genee_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "genee", 'D',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'e':	/* Process only edges from the given DAT/DAB.  */
         
         

tools/Dat2Dab/cmdline.h

   char * genex_arg;	/**< @brief Exclude all genes from the given set.  */
   char * genex_orig;	/**< @brief Exclude all genes from the given set original value given at command line.  */
   const char *genex_help; /**< @brief Exclude all genes from the given set help description.  */
+  char * genee_arg;	/**< @brief Process only edges including a gene from the given set.  */
+  char * genee_orig;	/**< @brief Process only edges including a gene from the given set original value given at command line.  */
+  const char *genee_help; /**< @brief Process only edges including a gene from the given set help description.  */
   char * edges_arg;	/**< @brief Process only edges from the given DAT/DAB.  */
   char * edges_orig;	/**< @brief Process only edges from the given DAT/DAB original value given at command line.  */
   const char *edges_help; /**< @brief Process only edges from the given DAT/DAB help description.  */
   unsigned int randomize_given ;	/**< @brief Whether randomize was given.  */
   unsigned int genes_given ;	/**< @brief Whether genes was given.  */
   unsigned int genex_given ;	/**< @brief Whether genex was given.  */
+  unsigned int genee_given ;	/**< @brief Whether genee was given.  */
   unsigned int edges_given ;	/**< @brief Whether edges was given.  */
   unsigned int cutoff_given ;	/**< @brief Whether cutoff was given.  */
   unsigned int zero_given ;	/**< @brief Whether zero was given.  */

tools/Dat2Graph/Dat2Graph.cpp

 #include "stdafx.h"
 #include "cmdline.h"
 
+struct SSorter {
+	const vector<float>&	m_vecdScores;
+
+	SSorter( const vector<float>& vecdScores ) : m_vecdScores(vecdScores) { }
+
+	bool operator()( size_t iOne, size_t iTwo ) {
+
+		return ( m_vecdScores[iTwo] < m_vecdScores[iOne] ); }
+};
+
 int open_genes( const char* szFile, CGenes& Genes ) {
 	ifstream	ifsm;
 
 				for( j = ( i + 1 ); j < pDat->GetGenes( ); ++j )
 					if( !CMeta::IsNaN( d = pDat->Get( i, j ) ) && ( d < sArgs.cutoff_arg ) )
 						pDat->Set( i, j, CMeta::GetNaN( ) );
-		if( !strcmp( sArgs.format_arg, "correl" ) ) {
+		if( sArgs.hubs_arg >= 0 ) {
+			vector<float>	vecdScores;
+			vector<size_t>	veciIndices;
+			vector<bool>	vecfHits;
+
+			veciIndices.resize( pDat->GetGenes( ) );
+			vecdScores.resize( pDat->GetGenes( ) );
+			vecfHits.resize( pDat->GetGenes( ) );
+			for( i = 0; i < pDat->GetGenes( ); ++i )
+				if( veciQuery[i] == -1 ) {
+					for( j = 0; j < pDat->GetGenes( ); ++j )
+						if( veciQuery[j] == -1 )
+							pDat->Set( i, j, CMeta::GetNaN( ) ); }
+				else {
+					fill( vecdScores.begin( ), vecdScores.end( ), -FLT_MAX );
+					for( j = 0; j < pDat->GetGenes( ); ++j ) {
+						if( CMeta::IsNaN( d = pDat->Get( i, j ) ) )
+							d = -FLT_MAX;
+						vecdScores[j] = d; }
+					for( j = 0; j < veciIndices.size( ); ++j )
+						veciIndices[j] = j;
+					sort( veciIndices.begin( ), veciIndices.end( ), SSorter( vecdScores ) );
+					fill( vecfHits.begin( ), vecfHits.end( ), false );
+					for( j = 0; j < (size_t)sArgs.hubs_arg; ++j )
+						vecfHits[veciIndices[j]] = true;
+					for( j = 0; j < pDat->GetGenes( ); ++j )
+						if( !vecfHits[j] )
+							pDat->Set( i, j, CMeta::GetNaN( ) ); }
+			pDat->Normalize( CDat::ENormalizeZScore ); }
+		else if( !strcmp( sArgs.format_arg, "correl" ) ) {
 			CMeasurePearson	MeasurePearson;
 			float*			adCentroid;
 			float*			adCur;

tools/Dat2Graph/Dat2Graph.ggo

 						flag	on
 option	"edges"		d	"Aggressiveness of edge trimming after query"
 						double	default="1"
+option	"hubs"		H	"Number of neighbors to query hubs"
+						int	default="-1"
 
 section "Filtering"
 option	"cutoff"	e	"Minimum edge weight for output"

tools/Dat2Graph/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iDat2Graph.ggo --default-optional -C -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDat2Graph.ggo --default-optional -C -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
   "  -k, --neighbors=INT      Size of query neighborhood  (default=`-1')",
   "  -a, --hefalmp            Perform HEFalMp query instead of bioPIXIE query  \n                             (default=on)",
   "  -d, --edges=DOUBLE       Aggressiveness of edge trimming after query  \n                             (default=`1')",
+  "  -H, --hubs=INT           Number of neighbors to query hubs  (default=`-1')",
   "\nFiltering:",
   "  -e, --cutoff=DOUBLE      Minimum edge weight for output",
   "  -g, --genes=filename     Gene inclusion file",
   args_info->neighbors_given = 0 ;
   args_info->hefalmp_given = 0 ;
   args_info->edges_given = 0 ;
+  args_info->hubs_given = 0 ;
   args_info->cutoff_given = 0 ;
   args_info->genes_given = 0 ;
   args_info->genex_given = 0 ;
   args_info->hefalmp_flag = 1;
   args_info->edges_arg = 1;
   args_info->edges_orig = NULL;
+  args_info->hubs_arg = -1;
+  args_info->hubs_orig = NULL;
   args_info->cutoff_orig = NULL;
   args_info->genes_arg = NULL;
   args_info->genes_orig = NULL;
   args_info->neighbors_help = gengetopt_args_info_help[8] ;
   args_info->hefalmp_help = gengetopt_args_info_help[9] ;
   args_info->edges_help = gengetopt_args_info_help[10] ;
-  args_info->cutoff_help = gengetopt_args_info_help[12] ;
-  args_info->genes_help = gengetopt_args_info_help[13] ;
-  args_info->genex_help = gengetopt_args_info_help[14] ;
-  args_info->knowns_help = gengetopt_args_info_help[15] ;
-  args_info->features_help = gengetopt_args_info_help[17] ;
-  args_info->colors_help = gengetopt_args_info_help[18] ;
-  args_info->borders_help = gengetopt_args_info_help[19] ;
-  args_info->normalize_help = gengetopt_args_info_help[21] ;
-  args_info->memmap_help = gengetopt_args_info_help[22] ;
-  args_info->config_help = gengetopt_args_info_help[23] ;
-  args_info->verbosity_help = gengetopt_args_info_help[24] ;
+  args_info->hubs_help = gengetopt_args_info_help[11] ;
+  args_info->cutoff_help = gengetopt_args_info_help[13] ;
+  args_info->genes_help = gengetopt_args_info_help[14] ;
+  args_info->genex_help = gengetopt_args_info_help[15] ;
+  args_info->knowns_help = gengetopt_args_info_help[16] ;
+  args_info->features_help = gengetopt_args_info_help[18] ;
+  args_info->colors_help = gengetopt_args_info_help[19] ;
+  args_info->borders_help = gengetopt_args_info_help[20] ;
+  args_info->normalize_help = gengetopt_args_info_help[22] ;
+  args_info->memmap_help = gengetopt_args_info_help[23] ;
+  args_info->config_help = gengetopt_args_info_help[24] ;
+  args_info->verbosity_help = gengetopt_args_info_help[25] ;
   
 }
 
   free_string_field (&(args_info->genew_orig));
   free_string_field (&(args_info->neighbors_orig));
   free_string_field (&(args_info->edges_orig));
+  free_string_field (&(args_info->hubs_orig));
   free_string_field (&(args_info->cutoff_orig));
   free_string_field (&(args_info->genes_arg));
   free_string_field (&(args_info->genes_orig));
     write_into_file(outfile, "hefalmp", 0, 0 );
   if (args_info->edges_given)
     write_into_file(outfile, "edges", args_info->edges_orig, 0);
+  if (args_info->hubs_given)
+    write_into_file(outfile, "hubs", args_info->hubs_orig, 0);
   if (args_info->cutoff_given)
     write_into_file(outfile, "cutoff", args_info->cutoff_orig, 0);
   if (args_info->genes_given)
         { "neighbors",	1, NULL, 'k' },
         { "hefalmp",	0, NULL, 'a' },
         { "edges",	1, NULL, 'd' },
+        { "hubs",	1, NULL, 'H' },
         { "cutoff",	1, NULL, 'e' },
         { "genes",	1, NULL, 'g' },
         { "genex",	1, NULL, 'G' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVi:t:q:Q:k:ad:e:g:G:w:f:l:b:nmc:v:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVi:t:q:Q:k:ad:H:e:g:G:w:f:l:b:nmc:v:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'H':	/* Number of neighbors to query hubs.  */
+        
+        
+          if (update_arg( (void *)&(args_info->hubs_arg), 
+               &(args_info->hubs_orig), &(args_info->hubs_given),
+              &(local_args_info.hubs_given), optarg, 0, "-1", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "hubs", 'H',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'e':	/* Minimum edge weight for output.  */
         
         

tools/Dat2Graph/cmdline.h

   double edges_arg;	/**< @brief Aggressiveness of edge trimming after query (default='1').  */
   char * edges_orig;	/**< @brief Aggressiveness of edge trimming after query original value given at command line.  */
   const char *edges_help; /**< @brief Aggressiveness of edge trimming after query help description.  */
+  int hubs_arg;	/**< @brief Number of neighbors to query hubs (default='-1').  */
+  char * hubs_orig;	/**< @brief Number of neighbors to query hubs original value given at command line.  */
+  const char *hubs_help; /**< @brief Number of neighbors to query hubs help description.  */
   double cutoff_arg;	/**< @brief Minimum edge weight for output.  */
   char * cutoff_orig;	/**< @brief Minimum edge weight for output original value given at command line.  */
   const char *cutoff_help; /**< @brief Minimum edge weight for output help description.  */
   unsigned int neighbors_given ;	/**< @brief Whether neighbors was given.  */
   unsigned int hefalmp_given ;	/**< @brief Whether hefalmp was given.  */
   unsigned int edges_given ;	/**< @brief Whether edges was given.  */
+  unsigned int hubs_given ;	/**< @brief Whether hubs was given.  */
   unsigned int cutoff_given ;	/**< @brief Whether cutoff was given.  */
   unsigned int genes_given ;	/**< @brief Whether genes was given.  */
   unsigned int genex_given ;	/**< @brief Whether genex was given.  */

tools/Data2Bnt/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iData2Bnt.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iData2Bnt.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Data2DB/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iData2DB.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iData2DB.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Data2Features/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iData2Features.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iData2Features.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Data2Sql/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iData2Sql.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iData2Sql.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Data2Svm/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iData2Svm.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iData2Svm.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/DataDumper/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iDataDumper.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDataDumper.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Distancer/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/users/psarder/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDistancer.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDistancer.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Edges2Posteriors/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iEdges2Posteriors.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iEdges2Posteriors.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

tools/Explainer/Explainer.cpp

 
 struct SDatum {
 	float	m_dDiff;
+	float	m_dData;
+	float	m_dAnswer;
 	size_t	m_iOne;
 	size_t	m_iTwo;
 
-	SDatum( float dDiff, size_t iOne, size_t iTwo ) : m_dDiff(dDiff), m_iOne(iOne), m_iTwo(iTwo) { }
+	SDatum( float dAnswer, float dData, size_t iOne, size_t iTwo ) : m_dData(dData), m_dAnswer(dAnswer), m_iOne(iOne), m_iTwo(iTwo) {
+
+		m_dDiff = fabs( dData - dAnswer ); }
 };
 
 struct SSorter {
+	enum EMode {
+		EModeDiff,
+		EModeData,
+		EModeAnswer
+	};
+
+	EMode	m_eMode;
 	bool	m_fReverse;
 
-	SSorter( bool fReverse ) : m_fReverse(fReverse) { }
+	SSorter( EMode eMode, bool fReverse ) : m_eMode(eMode), m_fReverse(fReverse) { }
 
 	bool operator()( const SDatum& sOne, const SDatum& sTwo ) const {
 		bool	fRet;
+		float	dOne, dTwo;
 
-		fRet = sOne.m_dDiff > sTwo.m_dDiff;
-		return ( m_fReverse ? !fRet : fRet ); }
+		switch( m_eMode ) {
+			case EModeData:
+				dOne = sOne.m_dData;
+				dTwo = sTwo.m_dData;
+				break;
+
+			case EModeAnswer:
+				dOne = sOne.m_dAnswer;
+				dTwo = sTwo.m_dAnswer;
+
+			default:
+				dOne = sOne.m_dDiff;
+				dTwo = sTwo.m_dDiff;
+				break; }
+
+		return ( m_fReverse ? ( dOne < dTwo ) : ( dTwo < dOne ) ); }
 };
 
 int main( int iArgs, char** aszArgs ) {
 	bool				fOne, fTwo;
 	string				strOne, strTwo;
 	int					iRet;
+	SSorter::EMode		eMode;
 
 	iRet = cmdline_parser2( iArgs, aszArgs, &sArgs, 0, 1, 0 );
 	if( sArgs.config_arg )
 			return 1; }
 		ifsm.close( ); }
 
+	if( !strcmp( sArgs.mode_arg, "data" ) )
+		eMode = SSorter::EModeData;
+	else if( !strcmp( sArgs.mode_arg, "ans" ) )
+		eMode = SSorter::EModeAnswer;
+	else
+		eMode = SSorter::EModeDiff;
+
 	veciGenes.resize( Data.GetGenes( ) );
 	for( i = 0; i < Data.GetGenes( ); ++i )
 		veciGenes[ i ] = Answers.GetGene( Data.GetGene( i ) );
 				continue;
 			if( sArgs.everything_flag && CMeta::IsNaN( dAnswer ) )
 				dAnswer = dValue ? ( dValue - ( 1 / dValue ) ) : -FLT_MAX;
-			vecsData.push_back( SDatum( fabs( dValue - dAnswer ), i, j ) ); } }
-	sort( vecsData.begin( ), vecsData.end( ), SSorter( !!sArgs.reverse_flag ) );
+			vecsData.push_back( SDatum( dAnswer, dValue, i, j ) ); } }
+	sort( vecsData.begin( ), vecsData.end( ), SSorter( eMode, !!sArgs.reverse_flag ) );
 
 	if( ( ( iNumber = sArgs.count_arg ) < 0 ) || ( iNumber >= vecsData.size( ) ) )
 		iNumber = vecsData.size( );

tools/Explainer/Explainer.ggo

 							string	typestr="filename"	yes
 option	"answers"		w	"Answer DAT/DAB file"
 							string	typestr="filename"	yes
+option	"mode"			d	"Sort mode"
+							values="diff","data","answer"	default="diff"
 
 section "Miscellaneous"
 option	"count"			k	"Number of pairs to display"

tools/Explainer/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iExplainer.ggo --default-optional -C -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iExplainer.ggo --default-optional -C -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
   "\nMain:",
   "  -i, --input=filename     Similarity DAT/DAB file",
   "  -w, --answers=filename   Answer DAT/DAB file",
+  "  -d, --mode=STRING        Sort mode  (possible values=\"diff\", \"data\", \n                             \"answer\" default=`diff')",
   "\nMiscellaneous:",
   "  -k, --count=INT          Number of pairs to display  (default=`-1')",
   "  -p, --positives          Include only positive pairs  (default=off)",
 }
 
 
+char *cmdline_parser_mode_values[] = {"diff", "data", "answer", 0} ;	/* Possible values for mode.  */
 char *cmdline_parser_unknowns_values[] = {"exclude", "include", "only", 0} ;	/* Possible values for unknowns.  */
 
 static char *
   args_info->version_given = 0 ;
   args_info->input_given = 0 ;
   args_info->answers_given = 0 ;
+  args_info->mode_given = 0 ;
   args_info->count_given = 0 ;
   args_info->positives_given = 0 ;
   args_info->negatives_given = 0 ;
   args_info->input_orig = NULL;
   args_info->answers_arg = NULL;
   args_info->answers_orig = NULL;
+  args_info->mode_arg = gengetopt_strdup ("diff");
+  args_info->mode_orig = NULL;
   args_info->count_arg = -1;
   args_info->count_orig = NULL;
   args_info->positives_flag = 0;
   args_info->version_help = gengetopt_args_info_help[1] ;
   args_info->input_help = gengetopt_args_info_help[3] ;
   args_info->answers_help = gengetopt_args_info_help[4] ;
-  args_info->count_help = gengetopt_args_info_help[6] ;
-  args_info->positives_help = gengetopt_args_info_help[7] ;
-  args_info->negatives_help = gengetopt_args_info_help[8] ;
-  args_info->everything_help = gengetopt_args_info_help[9] ;
-  args_info->unknowns_help = gengetopt_args_info_help[10] ;
-  args_info->fraction_help = gengetopt_args_info_help[11] ;
-  args_info->genes_help = gengetopt_args_info_help[13] ;
-  args_info->genex_help = gengetopt_args_info_help[14] ;
-  args_info->genet_help = gengetopt_args_info_help[15] ;
-  args_info->genee_help = gengetopt_args_info_help[16] ;
-  args_info->normalize_help = gengetopt_args_info_help[18] ;
-  args_info->invert_help = gengetopt_args_info_help[19] ;
-  args_info->reverse_help = gengetopt_args_info_help[20] ;
-  args_info->go_onto_help = gengetopt_args_info_help[22] ;
-  args_info->go_anno_help = gengetopt_args_info_help[23] ;
-  args_info->features_help = gengetopt_args_info_help[24] ;
-  args_info->memmap_help = gengetopt_args_info_help[26] ;
-  args_info->config_help = gengetopt_args_info_help[27] ;
-  args_info->verbosity_help = gengetopt_args_info_help[28] ;
+  args_info->mode_help = gengetopt_args_info_help[5] ;
+  args_info->count_help = gengetopt_args_info_help[7] ;
+  args_info->positives_help = gengetopt_args_info_help[8] ;
+  args_info->negatives_help = gengetopt_args_info_help[9] ;
+  args_info->everything_help = gengetopt_args_info_help[10] ;
+  args_info->unknowns_help = gengetopt_args_info_help[11] ;
+  args_info->fraction_help = gengetopt_args_info_help[12] ;
+  args_info->genes_help = gengetopt_args_info_help[14] ;
+  args_info->genex_help = gengetopt_args_info_help[15] ;
+  args_info->genet_help = gengetopt_args_info_help[16] ;
+  args_info->genee_help = gengetopt_args_info_help[17] ;
+  args_info->normalize_help = gengetopt_args_info_help[19] ;
+  args_info->invert_help = gengetopt_args_info_help[20] ;
+  args_info->reverse_help = gengetopt_args_info_help[21] ;
+  args_info->go_onto_help = gengetopt_args_info_help[23] ;
+  args_info->go_anno_help = gengetopt_args_info_help[24] ;
+  args_info->features_help = gengetopt_args_info_help[25] ;
+  args_info->memmap_help = gengetopt_args_info_help[27] ;
+  args_info->config_help = gengetopt_args_info_help[28] ;
+  args_info->verbosity_help = gengetopt_args_info_help[29] ;
   
 }
 
   free_string_field (&(args_info->input_orig));
   free_string_field (&(args_info->answers_arg));
   free_string_field (&(args_info->answers_orig));
+  free_string_field (&(args_info->mode_arg));
+  free_string_field (&(args_info->mode_orig));
   free_string_field (&(args_info->count_orig));
   free_string_field (&(args_info->unknowns_arg));
   free_string_field (&(args_info->unknowns_orig));
     write_into_file(outfile, "input", args_info->input_orig, 0);
   if (args_info->answers_given)
     write_into_file(outfile, "answers", args_info->answers_orig, 0);
+  if (args_info->mode_given)
+    write_into_file(outfile, "mode", args_info->mode_orig, cmdline_parser_mode_values);
   if (args_info->count_given)
     write_into_file(outfile, "count", args_info->count_orig, 0);
   if (args_info->positives_given)
         { "version",	0, NULL, 'V' },
         { "input",	1, NULL, 'i' },
         { "answers",	1, NULL, 'w' },
+        { "mode",	1, NULL, 'd' },
         { "count",	1, NULL, 'k' },
         { "positives",	0, NULL, 'p' },
         { "negatives",	0, NULL, 'P' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVi:w:k:pPeu:x:g:G:R:C:ntro:a:f:mc:v:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVi:w:d:k:pPeu:x:g:G:R:C:ntro:a:f:mc:v:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'd':	/* Sort mode.  */
+        
+        
+          if (update_arg( (void *)&(args_info->mode_arg), 
+               &(args_info->mode_orig), &(args_info->mode_given),