1. libsleipnir
  2. sleipnir

Commits

chut...@hutlab3.sph.harvard.edu  committed d6b169b

Improve COALESCE termination criteria (slightly more permissive)
Fix bug in missing value handling for aligned measures in CMeasure
Fix bug for ties in rank transform in CPCL::RankTransform
Add PValueSpearman, FisherTransform, better TCDF to CStatistics
Fix bug in newline handling in COALESCE
Add Spearman correlation option to Clinician
Fix bug in default edge cutoff value in Cliquer (none by default)
Add randomization scoring to DChecker
Add gene set limitation option to Dat2Dab
Add hub queries to Dat2Graph
Add sort mode option to Explainer (by prediction, gold standard, or diff)
Add gene (node) weighting option to Hubber
Add missing self-MI bias estimation to MIer (thanks to Maria Chikina!)
Fix missing newlines in SpeciesConnector

  • Participants
  • Parent commits 1f551fc
  • Branches sleipnir

Comments (0)

Files changed (85)

File src/coalesce.cpp

View file
  • Ignore whitespace
 		g_CatSleipnir( ).notice( "correlation pairs %d, bases %d, min size %d, merge size %d, max size %d",
 			GetNumberCorrelation( ), GetBasesPerMatch( ), GetSizeMinimum( ), GetSizeMerge( ),
 			GetSizeMaximum( ) ); }
-	for( dFailure = 1; dFailure > c_dEpsilon; dFailure *= GetPValueCorrelation( ) ) {
+	for( dFailure = 1; dFailure >= c_dEpsilon; dFailure *= max( pow( c_dEpsilon, 0.125 ), (double)GetPValueCorrelation( ) ) ) {
 		CCoalesceCluster			Cluster, Pot;
 		CCoalesceGroupHistograms	HistsCluster( GetBins( ), 1.0f / GetBasesPerMatch( ) );
 		CCoalesceGroupHistograms	HistsPot( GetBins( ), 1.0f / GetBasesPerMatch( ) );

File src/coalescecluster.cpp

View file
  • Ignore whitespace
 			dMaxCorr = vecsThreads[ i ].m_dMaxCorr;
 			iOne = vecsThreads[ i ].m_iOne;
 			iTwo = vecsThreads[ i ].m_iTwo; } }
-	if( ( dMinP * PCL.GetGenes( ) * ( PCL.GetGenes( ) - 1 ) * dFraction * dFraction ) < ( dPValue * 2 ) ) {
+	if( ( dMinP * PCL.GetGenes( ) * ( PCL.GetGenes( ) - 1 ) * dFraction * dFraction ) <= ( dPValue * 2 ) ) {
 		g_CatSleipnir( ).info( "CCoalesceClusterImpl::AddSeedPair( %g, %g ) seeding: %s, %s, %g (p=%g)",
 			dFraction, dPValue, PCL.GetGene( iOne ).c_str( ), PCL.GetGene( iTwo ).c_str( ), dMaxCorr, dMinP );
 		priiSeed.first = iOne;
 		if( !IsGene( iGene ) &&
 			( ( dR = CMeasurePearson::Pearson( &m_vecdCentroid.front( ), PCL.GetExperiments( ),
 			PCL.Get( iGene ), PCL.GetExperiments( ), IMeasure::EMapNone, NULL, NULL, &iN ) ) > 0 ) &&
-			( ( CStatistics::PValuePearson( dR, iN ) * PCL.GetGenes( ) ) < dPValue ) )
+			( ( CStatistics::PValuePearson( dR, iN ) * PCL.GetGenes( ) ) <= dPValue ) )
 			Add( iGene, Pot );
 
 	return true; }

File src/measure.cpp

View file
  • Ignore whitespace
 	return false; }
 
 double CMeasureImpl::MeasureTrim( const IMeasure* pMeasure, const float* adX, size_t iM, const float* adY,
-	size_t iN, const IMeasure::EMap eMap, const float* adWX, const float* adWY ) {
+	size_t iN, const IMeasure::EMap eMap, const float* adWX, const float* adWY, bool fAlign ) {
 	float*	adA;
 	float*	adB;
 	float*	adWA;
 	size_t	i, j, iA, iB;
 	double	dRet;
 
-	for( iA = i = 0; i < iM; ++i )
-		if( CMeta::IsNaN( adX[ i ] ) )
-			iA++;
-	for( iB = i = 0; i < iN; ++i )
-		if( CMeta::IsNaN( adY[ i ] ) )
-			iB++;
-	iA = iM - iA;
-	iB = iN - iB;
-
-	adA = new float[ iA ];
-	adWA = adWX ? new float[ iA ] : NULL;
-	for( i = j = 0; i < iM; ++i )
-		if( !CMeta::IsNaN( adX[ i ] ) ) {
-			if( adWA )
-				adWA[ j ] = adWX[ i ];
-			adA[ j++ ] = adX[ i ]; }
-	adB = new float[ iB ];
-	adWB = adWY ? new float[ iB ] : NULL;
-	for( i = j = 0; i < iN; ++i )
-		if( !CMeta::IsNaN( adY[ i ] ) ) {
-			if( adWB )
-				adWB[ j ] = adWY[ i ];
-			adB[ j++ ] = adY[ i ]; }
+	adA = new float[ iM ];
+	adB = new float[ iN ];
+	adWA = adWX ? new float[ iM ] : NULL;
+	adWB = adWY ? new float[ iN ] : NULL;
+	if( fAlign ) {
+		for( i = j = 0; i < min( iM, iN ); ++i )
+			if( !( CMeta::IsNaN( adX[ i ] ) || CMeta::IsNaN( adY[ i ] ) ) ) {
+				if( adWA )
+					adWA[ j ] = adWX[ i ];
+				if( adWB )
+					adWB[ j ] = adWY[ i ];
+				adA[ j ] = adX[ i ];
+				adB[ j++ ] = adY[ i ]; } }
+	else {
+		for( i = j = 0; i < iM; ++i )
+			if( !CMeta::IsNaN( adX[ i ] ) ) {
+				if( adWA )
+					adWA[ j ] = adWX[ i ];
+				adA[ j++ ] = adX[ i ]; }
+		for( i = j = 0; i < iN; ++i )
+			if( !CMeta::IsNaN( adY[ i ] ) ) {
+				if( adWB )
+					adWB[ j ] = adWY[ i ];
+				adB[ j++ ] = adY[ i ]; } }
 
 	dRet = pMeasure->Measure( adA, iA, adB, iB, eMap, adWA, adWB );
 	delete[] adA;
 	if( adWX || adWY )
 		return CMeta::GetNaN( );
 	if( CMeasureImpl::IsNaN( adX, iM ) || CMeasureImpl::IsNaN( adY, iN ) )
-		return CMeasureImpl::MeasureTrim( this, adX, iM, adY, iN, eMap, adWX, adWY );
+		return CMeasureImpl::MeasureTrim( this, adX, iM, adY, iN, eMap, adWX, adWY, false );
 	if( iM > iN )
 		return Measure( adY, iN, adX, iM, eMap, adWY, adWX );
 
 	if( iM != iN )
 		return CMeta::GetNaN( );
 	if( CMeasureImpl::IsNaN( adX, iM ) || CMeasureImpl::IsNaN( adY, iN ) )
-		return CMeasureImpl::MeasureTrim( this, adX, iM, adY, iN, eMap, adWX, adWY );
+		return CMeasureImpl::MeasureTrim( this, adX, iM, adY, iN, eMap, adWX, adWY, true );
 
 	dRet = ( adWX || adWY ) ? CMeasureKendallsTauImpl::MeasureWeighted( adX, adY, iN, adWX,
 		adWY ) : CMeasureKendallsTauImpl::MeasureUnweighted( adX, adY, iN );
 	if( ( iM != iN ) || adWX || adWY )
 		return CMeta::GetNaN( );
 	if( CMeasureImpl::IsNaN( adX, iM ) || CMeasureImpl::IsNaN( adY, iN ) )
-		return CMeasureImpl::MeasureTrim( this, adX, iM, adY, iN, eMap, adWX, adWY );
+		return CMeasureImpl::MeasureTrim( this, adX, iM, adY, iN, eMap, adWX, adWY, true );
 
 	if( m_fTransformed ) {
 		dSum = 0;
 	dP = CMeasurePearson::Pearson( adX, iM, adY, iN, EMapNone, adWX, adWY );
 	if( fabs( dP ) >= c_dBound )
 		dP *= c_dBound;
-	dP = log( ( 1 + dP ) / ( 1 - dP ) ) / 2;
+	dP = CStatistics::FisherTransform( dP );
 	if( m_dAverage != HUGE_VAL )
 		dP = ( dP - m_dAverage ) / m_dStdDev;
 	return dP; }

File src/measurei.h

View file
  • Ignore whitespace
 	friend class CMeasureSpearman;
 
 	static double MeasureTrim( const IMeasure*, const float*, size_t, const float*, size_t, const IMeasure::EMap,
-		const float*, const float* );
+		const float*, const float*, bool );
 	static bool IsNaN( const float*, size_t );
 
 	CMeasureImpl( const IMeasure*, bool );

File src/pcl.cpp

View file
  • Ignore whitespace
  * IMeasure::IsRank
  */
 void CPCL::RankTransform( ) {
-	size_t	i, j, k;
-	size_t*	aiRanks;
+	size_t			i, j, k;
+	vector<size_t>	veciRanks, veciCounts;
 
-	aiRanks = new size_t[ m_Data.GetColumns( ) ];
-	for( i = 0; i < m_Data.GetRows( ); ++i ) {
-		memset( aiRanks, 0, m_Data.GetColumns( ) * sizeof(*aiRanks) );
-		for( j = 0; j < m_Data.GetColumns( ); ++j )
-			for( k = 0; k < m_Data.GetColumns( ); ++k )
-				if( ( j != k ) && ( m_Data.Get( i, k ) < m_Data.Get( i, j ) ) )
-					aiRanks[ j ]++;
-		for( j = 0; j < m_Data.GetColumns( ); ++j )
-			m_Data.Set( i, j, (float)aiRanks[ j ] ); }
-	delete[] aiRanks; }
+	veciRanks.resize( GetExperiments( ) );
+	veciCounts.resize( GetExperiments( ) );
+	for( i = 0; i < GetGenes( ); ++i ) {
+		fill( veciRanks.begin( ), veciRanks.end( ), 0 );
+		for( j = 0; j < GetExperiments( ); ++j ) {
+			if( CMeta::IsNaN( Get( i, j ) ) )
+				continue;
+			for( k = 0; k < GetExperiments( ); ++k ) {
+				if( CMeta::IsNaN( Get( i, k ) ) )
+					continue;
+				if( ( j != k ) && ( Get( i, k ) < Get( i, j ) ) )
+					veciRanks[ j ]++; } }
+
+		fill( veciCounts.begin( ), veciCounts.end( ), 0 );
+		for( j = 0; j < GetExperiments( ); ++j )
+			if( !CMeta::IsNaN( Get( i, j ) ) )
+				veciCounts[ veciRanks[ j ] ]++;
+
+		for( j = 0; j < GetExperiments( ); ++j )
+			if( !CMeta::IsNaN( Get( i, j ) ) ) {
+				k = veciRanks[ j ];
+// Closed form for sum(rank, rank + n) / n
+				Set( i, j, k + ( ( veciCounts[ k ] + 1 ) / 2.0f ) ); } } }
 
 /*!
  * \brief

File src/statistics.h

View file
  • Ignore whitespace
 	 * P-value corresponding to the given correlation and array size.
 	 * 
 	 * \see
-	 * CMeasurePearson
+	 * CMeasurePearson | PValueSpearman
 	 */
 	static double PValuePearson( double dR, size_t iN ) {
 		static const double	c_dEpsilon	= 1e-10;
 			return 0;
 		dF = iN - 2;
 		dT = dR * sqrt( dF / ( 1 - ( dR * dR ) ) );
-		return IncompleteBeta( dF / 2, 0.5, dF / ( dF + ( dT * dT ) ) ); }
+		return ( 1 - TCDF( dT, dF ) ); }
+
+	/*!
+	 * \brief
+	 * Return the two-tailed p-value of a Spearman correlation.
+	 * 
+	 * \param dR
+	 * Spearman correlation.
+	 * 
+	 * \param iN
+	 * Length of correlated vectors.
+	 * 
+	 * \returns
+	 * P-value corresponding to the given correlation and array size.
+	 * 
+	 * \see
+	 * CMeasureSpearman | PValuePearson
+	 */
+	static double PValueSpearman( double dR, size_t iN ) {
+		double	dT;
+
+		if( iN < 3 )
+			return 1;
+
+//		dZ = sqrt( ( iN - 3 ) / 1.06 ) * CStatistics::FisherTransform( dR );
+		dT = dR * sqrt( ( iN - 2 ) / ( 1 - ( dR * dR ) ) );
+		return ( 1 - TCDF( dT, iN - 2 ) ); }
+
+	static double FisherTransform( double dR ) {
+
+		return ( log( ( 1 + dR ) / ( 1 - dR ) ) / 2 ); }
 
 	/*!
 	 * \brief
 		dPoolVar = ( ( ( iNOne - 1 ) * dVarianceOne ) + ( ( iNTwo - 1 ) * dVarianceTwo ) ) / iDegFree;
 		dT = ( dMeanOne - dMeanTwo ) / sqrt( dPoolVar * ( ( 1.0 / iNOne ) + ( 1.0 / iNTwo ) ) );
 
-		return IncompleteBeta( 0.5 * iDegFree, 0.5, iDegFree / ( iDegFree + ( dT * dT ) ) ); }
+		return ( 1 - TCDF( dT, iDegFree ) ); }
 
 	/*!
 	 * \brief
 		iDegFree = iN - 1;
 		dT = sqrt( (float)iN ) * dMean / sqrt( dVariance );
 
-		return IncompleteBeta( 0.5 * iDegFree, 0.5, iDegFree / ( iDegFree + ( dT * dT ) ) ); }
+		return ( 1 - TCDF( dT, iDegFree ) ); }
 
 	/*!
 	 * \brief
 			( ( dVarianceTwo * dVarianceTwo ) / iNTwo / iNTwo / ( iNTwo - 1 ) ) );
 		dT = ( dMeanOne - dMeanTwo ) / sqrt( ( dVarianceOne / iNOne ) + ( dVarianceTwo / iNTwo ) );
 
-		return IncompleteBeta( 0.5 * dDegFree, 0.5, dDegFree / ( dDegFree + ( dT * dT ) ) ); }
+		return ( 1 - TCDF( dT, dDegFree ) ); }
 
 	/*!
 	 * \brief
 	 * \param dT
 	 * T value at which to sample the t-distribution.
 	 * 
-	 * \param iDF
+	 * \param dDF
 	 * Degrees of freedom of the desired t-distribution.
 	 * 
 	 * \returns
 	 * p-value of the given T and degrees of freedom.
 	 */
-	static double TCDF( double dT, size_t iDF ) {
+	static double TCDF( double dT, double dDF ) {
 
-		return ( 1 - IncompleteBeta( 0.5 * iDF, 0.5, iDF / ( iDF + ( dT * dT ) ) ) ); }
+		return ( 1 - IncompleteBeta( 0.5 * dDF, 0.5, dDF / ( dDF + ( dT * dT ) ) ) ); }
 
 	/*!
 	 * \brief

File tools/Answerer/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iAnswerer.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iAnswerer.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/BNConverter/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNConverter.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNConverter.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/BNCreator/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNCreator.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNCreator.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/BNEvaluator/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNEvaluator.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNEvaluator.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/BNFunc/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNFunc.ggo --default-optional -C -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNFunc.ggo --default-optional -C -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/BNServer/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNServer.ggo --default-optional -C -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNServer.ggo --default-optional -C -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/BNTester/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNTester.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNTester.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/BNTruster/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNTruster.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNTruster.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/BNUnraveler/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNUnraveler.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNUnraveler.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/BNWeaver/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNWeaver.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNWeaver.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/BNs2Txt/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iBNs2Txt.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNs2Txt.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/COALESCE/COALESCE.cpp

View file
  • Ignore whitespace
 
 			ifsm.getline( acBuffer, c_iBuffer - 1 );
 			acBuffer[ c_iBuffer - 1 ] = 0;
-			CMeta::Tokenize( acBuffer, vecstrLine );
+			CMeta::Tokenize( CMeta::Trim( acBuffer ).c_str( ), vecstrLine );
 			for( i = 0; i < vecstrLine.size( ); ++i )
 				for( j = 0; j < PCL.GetExperiments( ); ++j )
 					if( vecstrLine[ i ] == PCL.GetExperiment( j ) ) {

File tools/COALESCE/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iCOALESCE.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iCOALESCE.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/Clinician/Clinician.cpp

View file
  • Ignore whitespace
 	vector<bool>		vecfClinical;
 	vector<size_t>		veciGenes2PCL, veciPCL2Genes, veciIndices, veciScores;
 	vector<float>		vecdScores;
-	CMeasurePearson		MeasurePearson;
 	CGenome				Genome;
+	float				d;
 
 	if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
 		cmdline_parser_print_help( );
 	if( PCL.GetFeatures( ) < 2 ) {
 		cerr << "PCL requires at least one clinical variable feature" << endl;
 		return 1; }
+	if( sArgs.spearman_flag )
+		PCL.RankTransform( );
 	if( sArgs.global_arg && !Dat.Open( sArgs.global_arg, !!sArgs.memmap_flag ) ) {
 		cerr << "Could not open: " << sArgs.global_arg << endl;
 		return 1; }
 
 		for( j = 0; j < veciFinal.size( ); ++j ) {
 			k = veciPCL2Genes[veciFinal[j]];
+			d = (float)( sArgs.spearman_flag ? CStatistics::PValueSpearman : CStatistics::PValuePearson )( vecdScores[k], veciScores[k] );
 			cout << PCL.GetGene( i ) << '\t' << PCL.GetGene( veciFinal[j] ) << '\t' << vecdScores[k] << '\t' << veciScores[k] << '\t' <<
-				( CStatistics::PValuePearson( vecdScores[k], veciScores[k] ) * iGene ) << endl; } }
+				( d * iGene ) << endl; } }
 
 	return 0; }

File tools/Clinician/Clinician.ggo

View file
  • Ignore whitespace
 								int	default="1000"
 option	"hefalmp"			a	"Perform HEFalMp query instead of bioPIXIE query"
 								flag	on
+option	"spearman"			p	"Use Spearman in place of Pearson correlation"
+								flag	off
 
 section "Optional"
 option	"skip"				s	"Columns to skip in input PCL"

File tools/Cliquer/Cliquer.cpp

View file
  • Ignore whitespace
 	if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
 		cmdline_parser_print_help( );
 		return 1; }
+	if( sArgs.cutoff_arg < -1e-20 )
+		sArgs.cutoff_arg = CMeta::GetNaN( );
 	CMeta Meta( sArgs.verbosity_arg );
 
 	if( sArgs.input_arg ) {
 		return 1; }
 	if( sArgs.normalize_flag )
 		Dat.Normalize( CDat::ENormalizeSigmoid );
-	if( sArgs.cutoff_arg )
+	if( !CMeta::IsNaN( sArgs.cutoff_arg ) )
 		for( i = 0; i < Dat.GetGenes( ); ++i )
 			for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )
 				if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) && ( d < sArgs.cutoff_arg ) )

File tools/Cliquer/Cliquer.ggo

View file
  • Ignore whitespace
 option	"normalize"		n	"Normalize input file"
 							flag	off
 option	"cutoff"		c	"Exclude edges below cutoff"
-							double	default="0"
+							double	default="-1e30"
 
 section "Optional"
 option	"memmap"		m	"Memory map input"

File tools/Cliquer/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iCliquer.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iCliquer.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
   "\nPreprocessing:",
   "  -k, --knowns=filename     Known interactions (DAT/DAB) to ignore",
   "  -n, --normalize           Normalize input file  (default=off)",
-  "  -c, --cutoff=DOUBLE       Exclude edges below cutoff  (default=`0')",
+  "  -c, --cutoff=DOUBLE       Exclude edges below cutoff  (default=`-1e30')",
   "\nOptional:",
   "  -m, --memmap              Memory map input  (default=off)",
   "  -v, --verbosity=INT       Message verbosity  (default=`5')",
   args_info->knowns_arg = NULL;
   args_info->knowns_orig = NULL;
   args_info->normalize_flag = 0;
-  args_info->cutoff_arg = 0;
+  args_info->cutoff_arg = -1e30;
   args_info->cutoff_orig = NULL;
   args_info->memmap_flag = 0;
   args_info->verbosity_arg = 5;
         
           if (update_arg( (void *)&(args_info->cutoff_arg), 
                &(args_info->cutoff_orig), &(args_info->cutoff_given),
-              &(local_args_info.cutoff_given), optarg, 0, "0", ARG_DOUBLE,
+              &(local_args_info.cutoff_given), optarg, 0, "-1e30", ARG_DOUBLE,
               check_ambiguity, override, 0, 0,
               "cutoff", 'c',
               additional_error))

File tools/Cliquer/cmdline.h

View file
  • Ignore whitespace
   const char *knowns_help; /**< @brief Known interactions (DAT/DAB) to ignore help description.  */
   int normalize_flag;	/**< @brief Normalize input file (default=off).  */
   const char *normalize_help; /**< @brief Normalize input file help description.  */
-  double cutoff_arg;	/**< @brief Exclude edges below cutoff (default='0').  */
+  double cutoff_arg;	/**< @brief Exclude edges below cutoff (default='-1e30').  */
   char * cutoff_orig;	/**< @brief Exclude edges below cutoff original value given at command line.  */
   const char *cutoff_help; /**< @brief Exclude edges below cutoff help description.  */
   int memmap_flag;	/**< @brief Memory map input (default=off).  */

File tools/Clusterer/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iClusterer.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iClusterer.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/Clusters2Dab/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iClusters2Dab.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iClusters2Dab.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/Combiner/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iCombiner.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iCombiner.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/Contexter/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iContexter.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iContexter.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/Counter/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iCounter.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iCounter.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/DChecker/DChecker.cpp

View file
  • Ignore whitespace
 int main( int iArgs, char** aszArgs ) {
 	CDat				Answers, Data;
 	gengetopt_args_info	sArgs;
-	size_t				i, j, k, m, iOne, iTwo, iGenes, iPositives, iNegatives, iBins;
+	size_t				i, j, k, m, iOne, iTwo, iGenes, iPositives, iNegatives, iBins, iRand;
 	vector<size_t>		veciGenes, veciRec, veciRecTerm;
 	CFullMatrix<bool>	MatGenes;
 	CFullMatrix<size_t>	MatResults;
 	veciGenes.resize( Answers.GetGenes( ) );
 	for( i = 0; i < Answers.GetGenes( ); ++i )
 		veciGenes[ i ] = Data.GetGene( Answers.GetGene( i ) );
-	if( sArgs.finite_flag ) {
-		vector<float>	vecdValues;
-		{
-			set<float>		setdValues;
+	for( iRand = 0; iRand <= (size_t)sArgs.randomize_arg; ++iRand ) {
+		if( iRand )
+			Data.Randomize( );
+		if( sArgs.finite_flag ) {
+			vector<float>	vecdValues;
+			{
+				set<float>		setdValues;
 
-			for( i = 0; i < Answers.GetGenes( ); ++i ) {
-				if( ( iOne = veciGenes[ i ] ) == -1 )
-					continue;
+				for( i = 0; i < Answers.GetGenes( ); ++i ) {
+					if( ( iOne = veciGenes[ i ] ) == -1 )
+						continue;
+					for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
+						if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
+							CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
+							CMeta::IsNaN( Answers.Get( i, j ) ) )
+							continue;
+						if( sArgs.invert_flag )
+							dValue = 1 - dValue;
+						setdValues.insert( dValue ); } }
+				vecdValues.resize( setdValues.size( ) );
+				copy( setdValues.begin( ), setdValues.end( ), vecdValues.begin( ) );
+			}
+			sort( vecdValues.begin( ), vecdValues.end( ) );
+			for( i = 0; i < vecdValues.size( ); ++i )
+				mapValues[ vecdValues[ i ] ] = i;
+			iBins = mapValues.size( ); }
+		else
+			iBins = sArgs.bins_arg;
+		MatResults.Initialize( iBins ? ( iBins + 1 ) :
+			(size_t)( ( sArgs.max_arg - sArgs.min_arg ) / sArgs.delta_arg ) + 1, 4 );
+		MatGenes.Initialize( veciGenes.size( ), MatResults.GetRows( ) );
+
+		for( iGenes = 0; !sArgs.inputs_num || ( iGenes < sArgs.inputs_num ); ++iGenes ) {
+			MatResults.Clear( );
+			MatGenes.Clear( );
+
+			if( sArgs.inputs_num ) {
+				CGenes		Genes( Genome );
+				ifstream	ifsm;
+
+				ifsm.open( sArgs.inputs[ iGenes ] );
+				if( !Genes.Open( ifsm ) ) {
+					cerr << "Couldn't open: " << sArgs.inputs[ iGenes ] << endl;
+					return 1; }
+				vecfHere.resize( Answers.GetGenes( ) );
+				for( i = 0; i < vecfHere.size( ); ++i )
+					vecfHere[ i ] = Genes.IsGene( Answers.GetGene( i ) );
+				cerr << "Processing " << sArgs.inputs[ iGenes ] << "..." << endl;
+				ifsm.close( ); }
+
+			if( mapValues.size( ) ) {
+				for( i = 0; i < Answers.GetGenes( ); ++i ) {
+					if( ( iOne = veciGenes[ i ] ) == -1 )
+						continue;
+					for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
+						if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
+							CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
+							CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) )
+							continue;
+						if( !( vecfHere.empty( ) ||
+							( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
+							( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
+							continue;
+						if( sArgs.invert_flag )
+							dValue = 1 - dValue;
+						for( k = 0; k <= mapValues[ dValue ]; ++k ) {
+							MatGenes.Set( i, k, true );
+							MatGenes.Set( j, k, true );
+							MatResults.Get( k, dAnswer ? ETFPN_TP : ETFPN_FP )++; }
+						for( ; k < MatResults.GetRows( ); ++k )
+							MatResults.Get( k, dAnswer ? ETFPN_FN : ETFPN_TN )++; } } }
+			else if( iBins ) {
+				vector<SDatum>	vecsData;
+				size_t			iChunk;
+
+				for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i ) {
+					if( ( iOne = veciGenes[ i ] ) == -1 )
+						continue;
+					for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
+						if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
+							CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
+							CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
+							continue;
+						if( !( vecfHere.empty( ) ||
+							( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
+							( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
+							continue;
+
+						MatGenes.Set( i, 0, true );
+						MatGenes.Set( j, 0, true );
+						if( dAnswer )
+							iPositives++;
+						else
+							iNegatives++;
+						vecsData.push_back( SDatum( dValue, i, j, dAnswer ) ); } }
+				sort( vecsData.begin( ), vecsData.end( ), SSorter( !!sArgs.invert_flag ) );
+				iChunk = (size_t)( 0.5 + ( (float)vecsData.size( ) / ( MatResults.GetRows( ) - 1 ) ) );
+				if( sArgs.sse_flag ) {
+					vecdSSE.resize( MatResults.GetRows( ) );
+					veciPositives.resize( vecdSSE.size( ) );
+					for( i = 1,j = 0; i < vecdSSE.size( ); ++i,j += iChunk ) {
+						veciPositives[ veciPositives.size( ) - i - 1 ] = veciPositives[ veciPositives.size( ) - i ];
+						vecdSSE[ vecdSSE.size( ) - i - 1 ] = vecdSSE[ vecdSSE.size( ) - i ];
+						for( k = 0; k < iChunk; ++k ) {
+							if( ( j + k ) >= vecsData.size( ) )
+								break;
+							const SDatum&	sDatum	= vecsData[ vecsData.size( ) - ( j + k ) - 1 ];
+
+							for( m = 0; m < ( vecdSSE.size( ) - i ); ++m ) {
+								MatGenes.Set( sDatum.m_iOne, m, true );
+								MatGenes.Set( sDatum.m_iTwo, m, true ); }
+							dValue = sDatum.m_dValue - sDatum.m_dAnswer;
+							veciPositives[ veciPositives.size( ) - i - 1 ]++;
+							vecdSSE[ vecdSSE.size( ) - i - 1 ] += dValue * dValue; } } }
+				else {
+					veciPositives.resize( MatResults.GetRows( ) - 1 );
+					veciNegatives.resize( veciPositives.size( ) );
+					for( i = 0; i < veciNegatives.size( ); ++i )
+						veciNegatives[ i ] = veciPositives[ i ] = 0;
+					for( i = j = 0; i < veciPositives.size( ); ++i,j += iChunk )
+						for( k = 0; k < iChunk; ++k ) {
+							if( ( j + k ) >= vecsData.size( ) )
+								break;
+							const SDatum&	sDatum	= vecsData[ j + k ];
+
+							for( m = i; m > 0; --m ) {
+								MatGenes.Set( sDatum.m_iOne, m, true );
+								MatGenes.Set( sDatum.m_iTwo, m, true ); }
+							if( Answers.Get( sDatum.m_iOne, sDatum.m_iTwo ) )
+								veciPositives[ i ]++;
+							else
+								veciNegatives[ i ]++; }
+
+					MatResults.Set( 0, ETFPN_TP, iPositives );
+					MatResults.Set( 0, ETFPN_FP, iNegatives );
+					MatResults.Set( 0, ETFPN_TN, 0 );
+					MatResults.Set( 0, ETFPN_FN, 0 );
+					for( i = 1; i < MatResults.GetRows( ); ++i ) {
+						MatResults.Set( i, ETFPN_TP, MatResults.Get( i - 1, ETFPN_TP ) - veciPositives[ i - 1 ] );
+						MatResults.Set( i, ETFPN_FP, MatResults.Get( i - 1, ETFPN_FP ) - veciNegatives[ i - 1 ] );
+						MatResults.Set( i, ETFPN_TN, MatResults.Get( i - 1, ETFPN_TN ) + veciNegatives[ i - 1 ] );
+						MatResults.Set( i, ETFPN_FN, MatResults.Get( i - 1, ETFPN_FN ) +
+							veciPositives[ i - 1 ] ); } } }
+			else
+				for( i = 0; i < Answers.GetGenes( ); ++i ) {
+					if( !( i % 1000 ) )
+						cerr << "Processing gene " << i << '/' << Answers.GetGenes( ) << endl;
+					if( ( iOne = veciGenes[ i ] ) == -1 )
+						continue;
+					for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
+						if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
+							CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
+							CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
+							continue;
+						if( !( vecfHere.empty( ) ||
+							( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
+							( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
+							continue;
+						if( sArgs.invert_flag )
+							dValue = 1 - dValue;
+
+						iMax = (int)ceil( ( dValue - sArgs.min_arg ) / sArgs.delta_arg );
+						if( iMax > (int)MatResults.GetRows( ) )
+							iMax = (int)MatResults.GetRows( );
+						eTFPN = (ETFPN)!dAnswer;
+						for( k = 0; (int)k < iMax; ++k ) {
+							MatResults.Get( k, eTFPN )++;
+							MatGenes.Set( i, k, true );
+							MatGenes.Set( j, k, true ); }
+						eTFPN = (ETFPN)( 2 + !eTFPN );
+						for( ; k < (int)MatResults.GetRows( ); ++k )
+							MatResults.Get( k, eTFPN )++; } }
+			for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i )
 				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
-						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
-						CMeta::IsNaN( Answers.Get( i, j ) ) )
-						continue;
-					if( sArgs.invert_flag )
-						dValue = 1 - dValue;
-					setdValues.insert( dValue ); } }
-			vecdValues.resize( setdValues.size( ) );
-			copy( setdValues.begin( ), setdValues.end( ), vecdValues.begin( ) );
-		}
-		sort( vecdValues.begin( ), vecdValues.end( ) );
-		for( i = 0; i < vecdValues.size( ); ++i )
-			mapValues[ vecdValues[ i ] ] = i;
-		iBins = mapValues.size( ); }
-	else
-		iBins = sArgs.bins_arg;
-	MatResults.Initialize( iBins ? ( iBins + 1 ) :
-		(size_t)( ( sArgs.max_arg - sArgs.min_arg ) / sArgs.delta_arg ) + 1, 4 );
-	MatGenes.Initialize( veciGenes.size( ), MatResults.GetRows( ) );
-
-	for( iGenes = 0; !sArgs.inputs_num || ( iGenes < sArgs.inputs_num ); ++iGenes ) {
-		MatResults.Clear( );
-		MatGenes.Clear( );
-
-		if( sArgs.inputs_num ) {
-			CGenes		Genes( Genome );
-			ifstream	ifsm;
-
-			ifsm.open( sArgs.inputs[ iGenes ] );
-			if( !Genes.Open( ifsm ) ) {
-				cerr << "Couldn't open: " << sArgs.inputs[ iGenes ] << endl;
-				return 1; }
-			vecfHere.resize( Answers.GetGenes( ) );
-			for( i = 0; i < vecfHere.size( ); ++i )
-				vecfHere[ i ] = Genes.IsGene( Answers.GetGene( i ) );
-			cerr << "Processing " << sArgs.inputs[ iGenes ] << "..." << endl;
-			ifsm.close( ); }
-
-		if( mapValues.size( ) ) {
-			for( i = 0; i < Answers.GetGenes( ); ++i ) {
-				if( ( iOne = veciGenes[ i ] ) == -1 )
-					continue;
-				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
-						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
-						CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) )
-						continue;
-					if( !( vecfHere.empty( ) ||
+					if( CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
+						!( vecfHere.empty( ) ||
 						( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
 						( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
 						continue;
-					if( sArgs.invert_flag )
-						dValue = 1 - dValue;
-					for( k = 0; k <= mapValues[ dValue ]; ++k ) {
-						MatGenes.Set( i, k, true );
-						MatGenes.Set( j, k, true );
-						MatResults.Get( k, dAnswer ? ETFPN_TP : ETFPN_FP )++; }
-					for( ; k < MatResults.GetRows( ); ++k )
-						MatResults.Get( k, dAnswer ? ETFPN_FN : ETFPN_TN )++; } } }
-		else if( iBins ) {
-			vector<SDatum>	vecsData;
-			size_t			iChunk;
-
-			for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i ) {
-				if( ( iOne = veciGenes[ i ] ) == -1 )
-					continue;
-				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
-						CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
-						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
-						continue;
-					if( !( vecfHere.empty( ) ||
-						( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
-						( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
-						continue;
-
-					MatGenes.Set( i, 0, true );
-					MatGenes.Set( j, 0, true );
 					if( dAnswer )
 						iPositives++;
 					else
-						iNegatives++;
-					vecsData.push_back( SDatum( dValue, i, j, dAnswer ) ); } }
-			sort( vecsData.begin( ), vecsData.end( ), SSorter( !!sArgs.invert_flag ) );
-			iChunk = (size_t)( 0.5 + ( (float)vecsData.size( ) / ( MatResults.GetRows( ) - 1 ) ) );
-			if( sArgs.sse_flag ) {
-				vecdSSE.resize( MatResults.GetRows( ) );
-				veciPositives.resize( vecdSSE.size( ) );
-				for( i = 1,j = 0; i < vecdSSE.size( ); ++i,j += iChunk ) {
-					veciPositives[ veciPositives.size( ) - i - 1 ] = veciPositives[ veciPositives.size( ) - i ];
-					vecdSSE[ vecdSSE.size( ) - i - 1 ] = vecdSSE[ vecdSSE.size( ) - i ];
-					for( k = 0; k < iChunk; ++k ) {
-						if( ( j + k ) >= vecsData.size( ) )
-							break;
-						const SDatum&	sDatum	= vecsData[ vecsData.size( ) - ( j + k ) - 1 ];
+						iNegatives++; }
 
-						for( m = 0; m < ( vecdSSE.size( ) - i ); ++m ) {
-							MatGenes.Set( sDatum.m_iOne, m, true );
-							MatGenes.Set( sDatum.m_iTwo, m, true ); }
-						dValue = sDatum.m_dValue - sDatum.m_dAnswer;
-						veciPositives[ veciPositives.size( ) - i - 1 ]++;
-						vecdSSE[ vecdSSE.size( ) - i - 1 ] += dValue * dValue; } } }
-			else {
-				veciPositives.resize( MatResults.GetRows( ) - 1 );
-				veciNegatives.resize( veciPositives.size( ) );
-				for( i = 0; i < veciNegatives.size( ); ++i )
-					veciNegatives[ i ] = veciPositives[ i ] = 0;
-				for( i = j = 0; i < veciPositives.size( ); ++i,j += iChunk )
-					for( k = 0; k < iChunk; ++k ) {
-						if( ( j + k ) >= vecsData.size( ) )
-							break;
-						const SDatum&	sDatum	= vecsData[ j + k ];
+			veciRec.resize( MatResults.GetRows( ) );
+			veciRecTerm.resize( MatResults.GetRows( ) );
+			for( i = 0; i < veciRec.size( ); ++i ) {
+				veciRec[ i ] = veciRecTerm[ i ] = 0;
+				for( j = 0; j < MatGenes.GetRows( ); ++j )
+					if( MatGenes.Get( j, i ) ) {
+						veciRec[ i ]++;
+						if( vecfHere.size( ) && vecfHere[ j ] )
+							veciRecTerm[ i ]++; }
+				for( j = 0; j < veciGenesTerm.size( ); ++j )
+					if( MatGenes.Get( veciGenesTerm[ j ], i ) &&
+						( vecfHere.empty( ) || !vecfHere[ veciGenesTerm[ j ] ] ) )
+						veciRecTerm[ i ]++; }
 
-						for( m = i; m > 0; --m ) {
-							MatGenes.Set( sDatum.m_iOne, m, true );
-							MatGenes.Set( sDatum.m_iTwo, m, true ); }
-						if( Answers.Get( sDatum.m_iOne, sDatum.m_iTwo ) )
-							veciPositives[ i ]++;
-						else
-							veciNegatives[ i ]++; }
+			if( sArgs.inputs_num ) {
+				ofsm.open( ( (string)sArgs.directory_arg + '/' +
+					CMeta::Basename( sArgs.inputs[ iGenes ] ) + ".bins" ).c_str( ) );
+				postm = &ofsm; }
+			else
+				postm = &cout;
 
-				MatResults.Set( 0, ETFPN_TP, iPositives );
-				MatResults.Set( 0, ETFPN_FP, iNegatives );
-				MatResults.Set( 0, ETFPN_TN, 0 );
-				MatResults.Set( 0, ETFPN_FN, 0 );
-				for( i = 1; i < MatResults.GetRows( ); ++i ) {
-					MatResults.Set( i, ETFPN_TP, MatResults.Get( i - 1, ETFPN_TP ) - veciPositives[ i - 1 ] );
-					MatResults.Set( i, ETFPN_FP, MatResults.Get( i - 1, ETFPN_FP ) - veciNegatives[ i - 1 ] );
-					MatResults.Set( i, ETFPN_TN, MatResults.Get( i - 1, ETFPN_TN ) + veciNegatives[ i - 1 ] );
-					MatResults.Set( i, ETFPN_FN, MatResults.Get( i - 1, ETFPN_FN ) +
-						veciPositives[ i - 1 ] ); } } }
-		else
-			for( i = 0; i < Answers.GetGenes( ); ++i ) {
-				if( !( i % 1000 ) )
-					cerr << "Processing gene " << i << '/' << Answers.GetGenes( ) << endl;
-				if( ( iOne = veciGenes[ i ] ) == -1 )
-					continue;
-				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
-						CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
-						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
-						continue;
-					if( !( vecfHere.empty( ) ||
-						( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
-						( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
-						continue;
-					if( sArgs.invert_flag )
-						dValue = 1 - dValue;
+			if( !sArgs.sse_flag ) {
+				*postm << "#	P	" << iPositives << endl;
+				*postm << "#	N	" << iNegatives << endl; }
+			*postm << "Cut	Genes	" << ( sArgs.sse_flag ? "Pairs	SSE" : "TP	FP	TN	FN" ) << endl;
+			for( i = 0; i < MatResults.GetRows( ); ++i ) {
+				*postm << ( iBins ? i : ( sArgs.min_arg + ( i * sArgs.delta_arg ) ) ) << '\t' <<
+					veciRec[ i ];
+				if( sArgs.sse_flag )
+					*postm << '\t' << veciPositives[ i ] << '\t' << vecdSSE[ i ];
+				else
+					for( j = 0; j < MatResults.GetColumns( ); ++j )
+						*postm << '\t' << MatResults.Get( i, j );
+				if( veciGenesTerm.size( ) || vecfHere.size( ) )
+					*postm << '\t' << veciRecTerm[ i ];
+				*postm << endl; }
+			if( !sArgs.sse_flag )
+				*postm << "#	AUC	" << ( sArgs.auc_arg ?
+					AUCMod( Data, Answers, vecfHere, !!sArgs.invert_flag, sArgs.auc_arg ) :
+					CStatistics::WilcoxonRankSum( Data, Answers, vecfHere, !!sArgs.invert_flag ) ) << endl;
 
-					iMax = (int)ceil( ( dValue - sArgs.min_arg ) / sArgs.delta_arg );
-					if( iMax > (int)MatResults.GetRows( ) )
-						iMax = (int)MatResults.GetRows( );
-					eTFPN = (ETFPN)!dAnswer;
-					for( k = 0; (int)k < iMax; ++k ) {
-						MatResults.Get( k, eTFPN )++;
-						MatGenes.Set( i, k, true );
-						MatGenes.Set( j, k, true ); }
-					eTFPN = (ETFPN)( 2 + !eTFPN );
-					for( ; k < (int)MatResults.GetRows( ); ++k )
-						MatResults.Get( k, eTFPN )++; } }
-		for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i )
-			for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-				if( CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
-					!( vecfHere.empty( ) ||
-					( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
-					( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
-					continue;
-				if( dAnswer )
-					iPositives++;
-				else
-					iNegatives++; }
+			if( sArgs.inputs_num )
+				ofsm.close( );
+			else
+				cout.flush( );
 
-		veciRec.resize( MatResults.GetRows( ) );
-		veciRecTerm.resize( MatResults.GetRows( ) );
-		for( i = 0; i < veciRec.size( ); ++i ) {
-			veciRec[ i ] = veciRecTerm[ i ] = 0;
-			for( j = 0; j < MatGenes.GetRows( ); ++j )
-				if( MatGenes.Get( j, i ) ) {
-					veciRec[ i ]++;
-					if( vecfHere.size( ) && vecfHere[ j ] )
-						veciRecTerm[ i ]++; }
-			for( j = 0; j < veciGenesTerm.size( ); ++j )
-				if( MatGenes.Get( veciGenesTerm[ j ], i ) &&
-					( vecfHere.empty( ) || !vecfHere[ veciGenesTerm[ j ] ] ) )
-					veciRecTerm[ i ]++; }
-
-		if( sArgs.inputs_num ) {
-			ofsm.open( ( (string)sArgs.directory_arg + '/' +
-				CMeta::Basename( sArgs.inputs[ iGenes ] ) + ".bins" ).c_str( ) );
-			postm = &ofsm; }
-		else
-			postm = &cout;
-
-		if( !sArgs.sse_flag ) {
-			*postm << "#	P	" << iPositives << endl;
-			*postm << "#	N	" << iNegatives << endl; }
-		*postm << "Cut	Genes	" << ( sArgs.sse_flag ? "Pairs	SSE" : "TP	FP	TN	FN" ) << endl;
-		for( i = 0; i < MatResults.GetRows( ); ++i ) {
-			*postm << ( iBins ? i : ( sArgs.min_arg + ( i * sArgs.delta_arg ) ) ) << '\t' <<
-				veciRec[ i ];
-			if( sArgs.sse_flag )
-				*postm << '\t' << veciPositives[ i ] << '\t' << vecdSSE[ i ];
-			else
-				for( j = 0; j < MatResults.GetColumns( ); ++j )
-					*postm << '\t' << MatResults.Get( i, j );
-			if( veciGenesTerm.size( ) || vecfHere.size( ) )
-				*postm << '\t' << veciRecTerm[ i ];
-			*postm << endl; }
-		if( !sArgs.sse_flag )
-			*postm << "#	AUC	" << ( sArgs.auc_arg ?
-				AUCMod( Data, Answers, vecfHere, !!sArgs.invert_flag, sArgs.auc_arg ) :
-				CStatistics::WilcoxonRankSum( Data, Answers, vecfHere, !!sArgs.invert_flag ) ) << endl;
-
-		if( sArgs.inputs_num )
-			ofsm.close( );
-		else
-			cout.flush( );
-
-		if( !sArgs.inputs_num )
-			break; }
+			if( !sArgs.inputs_num )
+				break; } }
 
 	return 0; }
 

File tools/DChecker/DChecker.ggo

View file
  • Ignore whitespace
 							string	typestr="directory"	default="."
 option	"auc"			a	"Use alternative AUCn calculation"
 							float	default="0"
+option	"randomize"		R	"Calculate specified number of randomized scores"
+							int	default="0"
 
 section "Ranking Method"
 option	"bins"			b	"Bins for quantile sorting"

File tools/DChecker/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iDChecker.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDChecker.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
   "\nMiscellaneous:",
   "  -d, --directory=directory  Output directory  (default=`.')",
   "  -a, --auc=FLOAT            Use alternative AUCn calculation  (default=`0')",
+  "  -R, --randomize=INT        Calculate specified number of randomized scores  \n                               (default=`0')",
   "\nRanking Method:",
   "  -b, --bins=INT             Bins for quantile sorting  (default=`1000')",
   "  -f, --finite               Count finitely many bins  (default=off)",
   args_info->answers_given = 0 ;
   args_info->directory_given = 0 ;
   args_info->auc_given = 0 ;
+  args_info->randomize_given = 0 ;
   args_info->bins_given = 0 ;
   args_info->finite_given = 0 ;
   args_info->min_given = 0 ;
   args_info->directory_orig = NULL;
   args_info->auc_arg = 0;
   args_info->auc_orig = NULL;
+  args_info->randomize_arg = 0;
+  args_info->randomize_orig = NULL;
   args_info->bins_arg = 1000;
   args_info->bins_orig = NULL;
   args_info->finite_flag = 0;
   args_info->answers_help = gengetopt_args_info_help[4] ;
   args_info->directory_help = gengetopt_args_info_help[6] ;
   args_info->auc_help = gengetopt_args_info_help[7] ;
-  args_info->bins_help = gengetopt_args_info_help[9] ;
-  args_info->finite_help = gengetopt_args_info_help[10] ;
-  args_info->min_help = gengetopt_args_info_help[11] ;
-  args_info->max_help = gengetopt_args_info_help[12] ;
-  args_info->delta_help = gengetopt_args_info_help[13] ;
-  args_info->genes_help = gengetopt_args_info_help[15] ;
-  args_info->genex_help = gengetopt_args_info_help[16] ;
-  args_info->genet_help = gengetopt_args_info_help[17] ;
-  args_info->genee_help = gengetopt_args_info_help[18] ;
-  args_info->normalize_help = gengetopt_args_info_help[20] ;
-  args_info->invert_help = gengetopt_args_info_help[21] ;
-  args_info->sse_help = gengetopt_args_info_help[23] ;
-  args_info->memmap_help = gengetopt_args_info_help[24] ;
-  args_info->verbosity_help = gengetopt_args_info_help[25] ;
+  args_info->randomize_help = gengetopt_args_info_help[8] ;
+  args_info->bins_help = gengetopt_args_info_help[10] ;
+  args_info->finite_help = gengetopt_args_info_help[11] ;
+  args_info->min_help = gengetopt_args_info_help[12] ;
+  args_info->max_help = gengetopt_args_info_help[13] ;
+  args_info->delta_help = gengetopt_args_info_help[14] ;
+  args_info->genes_help = gengetopt_args_info_help[16] ;
+  args_info->genex_help = gengetopt_args_info_help[17] ;
+  args_info->genet_help = gengetopt_args_info_help[18] ;
+  args_info->genee_help = gengetopt_args_info_help[19] ;
+  args_info->normalize_help = gengetopt_args_info_help[21] ;
+  args_info->invert_help = gengetopt_args_info_help[22] ;
+  args_info->sse_help = gengetopt_args_info_help[24] ;
+  args_info->memmap_help = gengetopt_args_info_help[25] ;
+  args_info->verbosity_help = gengetopt_args_info_help[26] ;
   
 }
 
   free_string_field (&(args_info->directory_arg));
   free_string_field (&(args_info->directory_orig));
   free_string_field (&(args_info->auc_orig));
+  free_string_field (&(args_info->randomize_orig));
   free_string_field (&(args_info->bins_orig));
   free_string_field (&(args_info->min_orig));
   free_string_field (&(args_info->max_orig));
     write_into_file(outfile, "directory", args_info->directory_orig, 0);
   if (args_info->auc_given)
     write_into_file(outfile, "auc", args_info->auc_orig, 0);
+  if (args_info->randomize_given)
+    write_into_file(outfile, "randomize", args_info->randomize_orig, 0);
   if (args_info->bins_given)
     write_into_file(outfile, "bins", args_info->bins_orig, 0);
   if (args_info->finite_given)
         { "answers",	1, NULL, 'w' },
         { "directory",	1, NULL, 'd' },
         { "auc",	1, NULL, 'a' },
+        { "randomize",	1, NULL, 'R' },
         { "bins",	1, NULL, 'b' },
         { "finite",	0, NULL, 'f' },
         { "min",	1, NULL, 'm' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVi:w:d:a:b:fm:M:e:g:G:c:C:ntspv:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVi:w:d:a:R:b:fm:M:e:g:G:c:C:ntspv:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'R':	/* Calculate specified number of randomized scores.  */
+        
+        
+          if (update_arg( (void *)&(args_info->randomize_arg), 
+               &(args_info->randomize_orig), &(args_info->randomize_given),
+              &(local_args_info.randomize_given), optarg, 0, "0", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "randomize", 'R',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'b':	/* Bins for quantile sorting.  */
         
         

File tools/DChecker/cmdline.h

View file
  • Ignore whitespace
   float auc_arg;	/**< @brief Use alternative AUCn calculation (default='0').  */
   char * auc_orig;	/**< @brief Use alternative AUCn calculation original value given at command line.  */
   const char *auc_help; /**< @brief Use alternative AUCn calculation help description.  */
+  int randomize_arg;	/**< @brief Calculate specified number of randomized scores (default='0').  */
+  char * randomize_orig;	/**< @brief Calculate specified number of randomized scores original value given at command line.  */
+  const char *randomize_help; /**< @brief Calculate specified number of randomized scores help description.  */
   int bins_arg;	/**< @brief Bins for quantile sorting (default='1000').  */
   char * bins_orig;	/**< @brief Bins for quantile sorting original value given at command line.  */
   const char *bins_help; /**< @brief Bins for quantile sorting help description.  */
   unsigned int answers_given ;	/**< @brief Whether answers was given.  */
   unsigned int directory_given ;	/**< @brief Whether directory was given.  */
   unsigned int auc_given ;	/**< @brief Whether auc was given.  */
+  unsigned int randomize_given ;	/**< @brief Whether randomize was given.  */
   unsigned int bins_given ;	/**< @brief Whether bins was given.  */
   unsigned int finite_given ;	/**< @brief Whether finite was given.  */
   unsigned int min_given ;	/**< @brief Whether min was given.  */

File tools/DSLConverter/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iDSLConverter.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDSLConverter.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/Dab2Dad/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iDab2Dad.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDab2Dad.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/Dat2Dab/Dat2Dab.cpp

View file
  • Ignore whitespace
 			CMeta::Tokenize( acBuffer, vecstrTokens );
 			if( vecstrTokens.empty( ) )
 				continue;
-			if( vecstrTokens.size( ) != 2 ) {
+			if( vecstrTokens.size( ) < 2 ) {
 				cerr << "Illegal remap line (" << vecstrTokens.size( ) << "): " << acBuffer << endl;
 				return 1; }
 			if( vecstrTokens[ 0 ] == vecstrTokens[ 1 ] )
 		Dat.FilterGenes( Genes, CDat::EFilterInclude );
 	if( sArgs.genex_arg )
 		Dat.FilterGenes( sArgs.genex_arg, CDat::EFilterExclude );
+	if( sArgs.genee_arg )
+		Dat.FilterGenes( sArgs.genee_arg, CDat::EFilterEdge );
 
 	if( sArgs.paircount_flag ) {
 		size_t			iTotal, iCutoff;

File tools/Dat2Dab/Dat2Dab.ggo

View file
  • Ignore whitespace
 							string	typestr="filename"
 option	"genex"			G	"Exclude all genes from the given set"
 							string	typestr="filename"
+option	"genee"			D	"Process only edges including a gene from the given set"
+							string	typestr="filename"
 option	"edges"			e	"Process only edges from the given DAT/DAB"
 							string	typestr="filename"
 option	"cutoff"		c	"Exclude edges below cutoff"

File tools/Dat2Dab/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iDat2Dab.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDat2Dab.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
   "\nFiltering:",
   "  -g, --genes=filename     Process only genes from the given set",
   "  -G, --genex=filename     Exclude all genes from the given set",
+  "  -D, --genee=filename     Process only edges including a gene from the given \n                             set",
   "  -e, --edges=filename     Process only edges from the given DAT/DAB",
   "  -c, --cutoff=DOUBLE      Exclude edges below cutoff",
   "  -Z, --zero               Zero missing values  (default=off)",
   args_info->randomize_given = 0 ;
   args_info->genes_given = 0 ;
   args_info->genex_given = 0 ;
+  args_info->genee_given = 0 ;
   args_info->edges_given = 0 ;
   args_info->cutoff_given = 0 ;
   args_info->zero_given = 0 ;
   args_info->genes_orig = NULL;
   args_info->genex_arg = NULL;
   args_info->genex_orig = NULL;
+  args_info->genee_arg = NULL;
+  args_info->genee_orig = NULL;
   args_info->edges_arg = NULL;
   args_info->edges_orig = NULL;
   args_info->cutoff_orig = NULL;
   args_info->randomize_help = gengetopt_args_info_help[10] ;
   args_info->genes_help = gengetopt_args_info_help[12] ;
   args_info->genex_help = gengetopt_args_info_help[13] ;
-  args_info->edges_help = gengetopt_args_info_help[14] ;
-  args_info->cutoff_help = gengetopt_args_info_help[15] ;
-  args_info->zero_help = gengetopt_args_info_help[16] ;
-  args_info->duplicates_help = gengetopt_args_info_help[17] ;
-  args_info->subsample_help = gengetopt_args_info_help[18] ;
-  args_info->lookup1_help = gengetopt_args_info_help[20] ;
-  args_info->lookup2_help = gengetopt_args_info_help[21] ;
-  args_info->lookups1_help = gengetopt_args_info_help[22] ;
-  args_info->lookups2_help = gengetopt_args_info_help[23] ;
-  args_info->genelist_help = gengetopt_args_info_help[24] ;
-  args_info->paircount_help = gengetopt_args_info_help[25] ;
-  args_info->remap_help = gengetopt_args_info_help[27] ;
-  args_info->table_help = gengetopt_args_info_help[28] ;
-  args_info->skip_help = gengetopt_args_info_help[29] ;
-  args_info->memmap_help = gengetopt_args_info_help[30] ;
-  args_info->verbosity_help = gengetopt_args_info_help[31] ;
+  args_info->genee_help = gengetopt_args_info_help[14] ;
+  args_info->edges_help = gengetopt_args_info_help[15] ;
+  args_info->cutoff_help = gengetopt_args_info_help[16] ;
+  args_info->zero_help = gengetopt_args_info_help[17] ;
+  args_info->duplicates_help = gengetopt_args_info_help[18] ;
+  args_info->subsample_help = gengetopt_args_info_help[19] ;
+  args_info->lookup1_help = gengetopt_args_info_help[21] ;
+  args_info->lookup2_help = gengetopt_args_info_help[22] ;
+  args_info->lookups1_help = gengetopt_args_info_help[23] ;
+  args_info->lookups2_help = gengetopt_args_info_help[24] ;
+  args_info->genelist_help = gengetopt_args_info_help[25] ;
+  args_info->paircount_help = gengetopt_args_info_help[26] ;
+  args_info->remap_help = gengetopt_args_info_help[28] ;
+  args_info->table_help = gengetopt_args_info_help[29] ;
+  args_info->skip_help = gengetopt_args_info_help[30] ;
+  args_info->memmap_help = gengetopt_args_info_help[31] ;
+  args_info->verbosity_help = gengetopt_args_info_help[32] ;
   
 }
 
   free_string_field (&(args_info->genes_orig));
   free_string_field (&(args_info->genex_arg));
   free_string_field (&(args_info->genex_orig));
+  free_string_field (&(args_info->genee_arg));
+  free_string_field (&(args_info->genee_orig));
   free_string_field (&(args_info->edges_arg));
   free_string_field (&(args_info->edges_orig));
   free_string_field (&(args_info->cutoff_orig));
     write_into_file(outfile, "genes", args_info->genes_orig, 0);
   if (args_info->genex_given)
     write_into_file(outfile, "genex", args_info->genex_orig, 0);
+  if (args_info->genee_given)
+    write_into_file(outfile, "genee", args_info->genee_orig, 0);
   if (args_info->edges_given)
     write_into_file(outfile, "edges", args_info->edges_orig, 0);
   if (args_info->cutoff_given)
         { "randomize",	0, NULL, 'a' },
         { "genes",	1, NULL, 'g' },
         { "genex",	1, NULL, 'G' },
+        { "genee",	1, NULL, 'D' },
         { "edges",	1, NULL, 'e' },
         { "cutoff",	1, NULL, 'c' },
         { "zero",	0, NULL, 'Z' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVi:o:fnzrag:G:e:c:Zdu:l:L:t:T:EPp:bs:mv:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVi:o:fnzrag:G:D:e:c:Zdu:l:L:t:T:EPp:bs:mv:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'D':	/* Process only edges including a gene from the given set.  */
+        
+        
+          if (update_arg( (void *)&(args_info->genee_arg), 
+               &(args_info->genee_orig), &(args_info->genee_given),
+              &(local_args_info.genee_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "genee", 'D',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'e':	/* Process only edges from the given DAT/DAB.  */
         
         

File tools/Dat2Dab/cmdline.h

View file
  • Ignore whitespace
   char * genex_arg;	/**< @brief Exclude all genes from the given set.  */
   char * genex_orig;	/**< @brief Exclude all genes from the given set original value given at command line.  */
   const char *genex_help; /**< @brief Exclude all genes from the given set help description.  */
+  char * genee_arg;	/**< @brief Process only edges including a gene from the given set.  */
+  char * genee_orig;	/**< @brief Process only edges including a gene from the given set original value given at command line.  */
+  const char *genee_help; /**< @brief Process only edges including a gene from the given set help description.  */
   char * edges_arg;	/**< @brief Process only edges from the given DAT/DAB.  */
   char * edges_orig;	/**< @brief Process only edges from the given DAT/DAB original value given at command line.  */
   const char *edges_help; /**< @brief Process only edges from the given DAT/DAB help description.  */
   unsigned int randomize_given ;	/**< @brief Whether randomize was given.  */
   unsigned int genes_given ;	/**< @brief Whether genes was given.  */
   unsigned int genex_given ;	/**< @brief Whether genex was given.  */
+  unsigned int genee_given ;	/**< @brief Whether genee was given.  */
   unsigned int edges_given ;	/**< @brief Whether edges was given.  */
   unsigned int cutoff_given ;	/**< @brief Whether cutoff was given.  */
   unsigned int zero_given ;	/**< @brief Whether zero was given.  */

File tools/Dat2Graph/Dat2Graph.cpp

View file
  • Ignore whitespace
 #include "stdafx.h"
 #include "cmdline.h"
 
+struct SSorter {
+	const vector<float>&	m_vecdScores;
+
+	SSorter( const vector<float>& vecdScores ) : m_vecdScores(vecdScores) { }
+
+	bool operator()( size_t iOne, size_t iTwo ) {
+
+		return ( m_vecdScores[iTwo] < m_vecdScores[iOne] ); }
+};
+
 int open_genes( const char* szFile, CGenes& Genes ) {
 	ifstream	ifsm;
 
 				for( j = ( i + 1 ); j < pDat->GetGenes( ); ++j )
 					if( !CMeta::IsNaN( d = pDat->Get( i, j ) ) && ( d < sArgs.cutoff_arg ) )
 						pDat->Set( i, j, CMeta::GetNaN( ) );
-		if( !strcmp( sArgs.format_arg, "correl" ) ) {
+		if( sArgs.hubs_arg >= 0 ) {
+			vector<float>	vecdScores;
+			vector<size_t>	veciIndices;
+			vector<bool>	vecfHits;
+
+			veciIndices.resize( pDat->GetGenes( ) );
+			vecdScores.resize( pDat->GetGenes( ) );
+			vecfHits.resize( pDat->GetGenes( ) );
+			for( i = 0; i < pDat->GetGenes( ); ++i )
+				if( veciQuery[i] == -1 ) {
+					for( j = 0; j < pDat->GetGenes( ); ++j )
+						if( veciQuery[j] == -1 )
+							pDat->Set( i, j, CMeta::GetNaN( ) ); }
+				else {
+					fill( vecdScores.begin( ), vecdScores.end( ), -FLT_MAX );
+					for( j = 0; j < pDat->GetGenes( ); ++j ) {
+						if( CMeta::IsNaN( d = pDat->Get( i, j ) ) )
+							d = -FLT_MAX;
+						vecdScores[j] = d; }
+					for( j = 0; j < veciIndices.size( ); ++j )
+						veciIndices[j] = j;
+					sort( veciIndices.begin( ), veciIndices.end( ), SSorter( vecdScores ) );
+					fill( vecfHits.begin( ), vecfHits.end( ), false );
+					for( j = 0; j < (size_t)sArgs.hubs_arg; ++j )
+						vecfHits[veciIndices[j]] = true;
+					for( j = 0; j < pDat->GetGenes( ); ++j )
+						if( !vecfHits[j] )
+							pDat->Set( i, j, CMeta::GetNaN( ) ); }
+			pDat->Normalize( CDat::ENormalizeZScore ); }
+		else if( !strcmp( sArgs.format_arg, "correl" ) ) {
 			CMeasurePearson	MeasurePearson;
 			float*			adCentroid;
 			float*			adCur;

File tools/Dat2Graph/Dat2Graph.ggo

View file
  • Ignore whitespace
 						flag	on
 option	"edges"		d	"Aggressiveness of edge trimming after query"
 						double	default="1"
+option	"hubs"		H	"Number of neighbors to query hubs"
+						int	default="-1"
 
 section "Filtering"
 option	"cutoff"	e	"Minimum edge weight for output"

File tools/Dat2Graph/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iDat2Graph.ggo --default-optional -C -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDat2Graph.ggo --default-optional -C -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
   "  -k, --neighbors=INT      Size of query neighborhood  (default=`-1')",
   "  -a, --hefalmp            Perform HEFalMp query instead of bioPIXIE query  \n                             (default=on)",
   "  -d, --edges=DOUBLE       Aggressiveness of edge trimming after query  \n                             (default=`1')",
+  "  -H, --hubs=INT           Number of neighbors to query hubs  (default=`-1')",
   "\nFiltering:",
   "  -e, --cutoff=DOUBLE      Minimum edge weight for output",
   "  -g, --genes=filename     Gene inclusion file",
   args_info->neighbors_given = 0 ;
   args_info->hefalmp_given = 0 ;
   args_info->edges_given = 0 ;
+  args_info->hubs_given = 0 ;
   args_info->cutoff_given = 0 ;
   args_info->genes_given = 0 ;
   args_info->genex_given = 0 ;
   args_info->hefalmp_flag = 1;
   args_info->edges_arg = 1;
   args_info->edges_orig = NULL;
+  args_info->hubs_arg = -1;
+  args_info->hubs_orig = NULL;
   args_info->cutoff_orig = NULL;
   args_info->genes_arg = NULL;
   args_info->genes_orig = NULL;
   args_info->neighbors_help = gengetopt_args_info_help[8] ;
   args_info->hefalmp_help = gengetopt_args_info_help[9] ;
   args_info->edges_help = gengetopt_args_info_help[10] ;
-  args_info->cutoff_help = gengetopt_args_info_help[12] ;
-  args_info->genes_help = gengetopt_args_info_help[13] ;
-  args_info->genex_help = gengetopt_args_info_help[14] ;
-  args_info->knowns_help = gengetopt_args_info_help[15] ;
-  args_info->features_help = gengetopt_args_info_help[17] ;
-  args_info->colors_help = gengetopt_args_info_help[18] ;
-  args_info->borders_help = gengetopt_args_info_help[19] ;
-  args_info->normalize_help = gengetopt_args_info_help[21] ;
-  args_info->memmap_help = gengetopt_args_info_help[22] ;
-  args_info->config_help = gengetopt_args_info_help[23] ;
-  args_info->verbosity_help = gengetopt_args_info_help[24] ;
+  args_info->hubs_help = gengetopt_args_info_help[11] ;
+  args_info->cutoff_help = gengetopt_args_info_help[13] ;
+  args_info->genes_help = gengetopt_args_info_help[14] ;
+  args_info->genex_help = gengetopt_args_info_help[15] ;
+  args_info->knowns_help = gengetopt_args_info_help[16] ;
+  args_info->features_help = gengetopt_args_info_help[18] ;
+  args_info->colors_help = gengetopt_args_info_help[19] ;
+  args_info->borders_help = gengetopt_args_info_help[20] ;
+  args_info->normalize_help = gengetopt_args_info_help[22] ;
+  args_info->memmap_help = gengetopt_args_info_help[23] ;
+  args_info->config_help = gengetopt_args_info_help[24] ;
+  args_info->verbosity_help = gengetopt_args_info_help[25] ;
   
 }
 
   free_string_field (&(args_info->genew_orig));
   free_string_field (&(args_info->neighbors_orig));
   free_string_field (&(args_info->edges_orig));
+  free_string_field (&(args_info->hubs_orig));
   free_string_field (&(args_info->cutoff_orig));
   free_string_field (&(args_info->genes_arg));
   free_string_field (&(args_info->genes_orig));
     write_into_file(outfile, "hefalmp", 0, 0 );
   if (args_info->edges_given)
     write_into_file(outfile, "edges", args_info->edges_orig, 0);
+  if (args_info->hubs_given)
+    write_into_file(outfile, "hubs", args_info->hubs_orig, 0);
   if (args_info->cutoff_given)
     write_into_file(outfile, "cutoff", args_info->cutoff_orig, 0);
   if (args_info->genes_given)
         { "neighbors",	1, NULL, 'k' },
         { "hefalmp",	0, NULL, 'a' },
         { "edges",	1, NULL, 'd' },
+        { "hubs",	1, NULL, 'H' },
         { "cutoff",	1, NULL, 'e' },
         { "genes",	1, NULL, 'g' },
         { "genex",	1, NULL, 'G' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVi:t:q:Q:k:ad:e:g:G:w:f:l:b:nmc:v:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVi:t:q:Q:k:ad:H:e:g:G:w:f:l:b:nmc:v:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'H':	/* Number of neighbors to query hubs.  */
+        
+        
+          if (update_arg( (void *)&(args_info->hubs_arg), 
+               &(args_info->hubs_orig), &(args_info->hubs_given),
+              &(local_args_info.hubs_given), optarg, 0, "-1", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "hubs", 'H',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'e':	/* Minimum edge weight for output.  */
         
         

File tools/Dat2Graph/cmdline.h

View file
  • Ignore whitespace
   double edges_arg;	/**< @brief Aggressiveness of edge trimming after query (default='1').  */
   char * edges_orig;	/**< @brief Aggressiveness of edge trimming after query original value given at command line.  */
   const char *edges_help; /**< @brief Aggressiveness of edge trimming after query help description.  */
+  int hubs_arg;	/**< @brief Number of neighbors to query hubs (default='-1').  */
+  char * hubs_orig;	/**< @brief Number of neighbors to query hubs original value given at command line.  */
+  const char *hubs_help; /**< @brief Number of neighbors to query hubs help description.  */
   double cutoff_arg;	/**< @brief Minimum edge weight for output.  */
   char * cutoff_orig;	/**< @brief Minimum edge weight for output original value given at command line.  */
   const char *cutoff_help; /**< @brief Minimum edge weight for output help description.  */
   unsigned int neighbors_given ;	/**< @brief Whether neighbors was given.  */
   unsigned int hefalmp_given ;	/**< @brief Whether hefalmp was given.  */
   unsigned int edges_given ;	/**< @brief Whether edges was given.  */
+  unsigned int hubs_given ;	/**< @brief Whether hubs was given.  */
   unsigned int cutoff_given ;	/**< @brief Whether cutoff was given.  */
   unsigned int genes_given ;	/**< @brief Whether genes was given.  */
   unsigned int genex_given ;	/**< @brief Whether genex was given.  */

File tools/Data2Bnt/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iData2Bnt.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iData2Bnt.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/Data2DB/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iData2DB.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iData2DB.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/Data2Features/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iData2Features.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iData2Features.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/Data2Sql/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iData2Sql.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iData2Sql.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/Data2Svm/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iData2Svm.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iData2Svm.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/DataDumper/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iDataDumper.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDataDumper.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/Distancer/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/users/psarder/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDistancer.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iDistancer.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:

File tools/Edges2Posteriors/cmdline.c

View file
  • Ignore whitespace
 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /shared/hg/sleipnir/extlib/gengetopt-2.22/bin/gengetopt -iEdges2Posteriors.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iEdges2Posteriors.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all