Commits

Anonymous committed 658a11a

Add AUCn scoring to DChecker (AUC for first n negatives)
Add "diff" subtraction combination and missing value zeroing to Combiner
Add heuristic CPT-based regularization to CBayesNetFN (crummy)
Update Clinician so that interaction network is optional
Add dynamic gene addition to CDat/CHalfMatrix
Fix missing SIZE_MAX on some Mac OS X versions (thanks Alice Koechlin!)

Comments (0)

Files changed (22)

 	bool Open( std::istream& istm );
 	bool OpenCounts( const char* szFileCounts, const std::map<std::string, size_t>& mapstriNodes,
 		const std::vector<unsigned char>& vecbDefaults, const std::vector<float>& vecdAlphas,
-		size_t iPseudocounts = -1, const CBayesNetMinimal* pBNDefault = NULL );
+		float dPseudocounts = HUGE_VAL, const CBayesNetMinimal* pBNDefault = NULL );
 	void Save( std::ostream& ostm ) const;
 	float Evaluate( const std::vector<unsigned char>& vecbDatum, size_t iOffset = 0 ) const;
 	bool Evaluate( const std::vector<unsigned char>& vecbData, float* adResults, size_t iGenes,
 		size_t iStart = 0 ) const;
+	float Regularize( std::vector<float>& vecdAlphas ) const;
 
 	/*!
 	 * \brief

src/bayesnetfn.cpp

 // CBayesNetMinimal //////////////////////////////////////////////////////////
 
 bool CBayesNetMinimalImpl::Counts2Probs( const std::vector<std::string>& vecstrCounts,
-	std::vector<float>& vecdProbs, float dAlpha, size_t iPseudocounts, const CBayesNetMinimal* pBNDefault,
+	std::vector<float>& vecdProbs, float dAlpha, float dPseudocounts, const CBayesNetMinimal* pBNDefault,
 	size_t iNode, size_t iClass ) {
 	size_t			i, iTotal;
 	vector<size_t>	veciCounts;
-	float			dScale;
+	float			dScale, dTotal;
 
 	veciCounts.resize( vecstrCounts.size( ) );
 	for( iTotal = i = 0; i < veciCounts.size( ); ++i ) {
 		for( i = 0; i < vecdProbs.size( ); ++i )
 			vecdProbs[ i ] = MatDefault.Get( i, iClass ); }
 	else {
-		if( iPseudocounts == -1 )
+		dTotal = (float)iTotal;
+		if( CMeta::IsNaN( dPseudocounts ) )
 			dScale = 1;
 		else if( iTotal ) {
-			dScale = (float)iPseudocounts / iTotal;
-			iTotal = iPseudocounts; }
+			dScale = dPseudocounts / iTotal;
+			dTotal = dPseudocounts; }
 		else
 			dScale = 0;
 		for( i = 0; i < vecdProbs.size( ); ++i )
 			vecdProbs[ i ] = ( ( dScale * veciCounts[ i ] ) + dAlpha ) /
-				( iTotal + ( dAlpha * vecdProbs.size( ) ) ); }
+				( dTotal + ( dAlpha * vecdProbs.size( ) ) ); }
 
 	return true; }
 
  * \param vecdAlphas
  * If non-empty, vector of prior beliefs alpha for each node.
  * 
- * \param iPseudocounts
- * If not equal to -1, effective sample size to use for all nodes.
+ * \param dPseudocounts
+ * If not equal to NaN, effective sample size to use for all nodes.
  * 
  * \param pBNDefault
  * If non-null, Bayes net to use for default values when a distribution's counts are too sparse to use
  * nodes (including the root node), which must also agree with the maximum node index in \c mapstriNodes.
  */
 bool CBayesNetMinimal::OpenCounts( const char* szFileCounts, const std::map<std::string, size_t>& mapstriNodes,
-	const std::vector<unsigned char>& vecbDefaults, const std::vector<float>& vecdAlphas, size_t iPseudocounts,
+	const std::vector<unsigned char>& vecbDefaults, const std::vector<float>& vecdAlphas, float dPseudocounts,
 	const CBayesNetMinimal* pBNDefault ) {
 	static const size_t	c_iStateInitial	= 0;
 	static const size_t	c_iStateRoot	= c_iStateInitial + 1;
 
 			case c_iStateCPT:
 				if( !Counts2Probs( vecstrLine, vecdProbs, vecdAlphas.empty( ) ? 1 : vecdAlphas[ iNode ],
-					iPseudocounts, pBNDefault, iNode, iClass ) )
+					dPseudocounts, pBNDefault, iNode, iClass ) )
 					return false;
 				{
 					CDataMatrix&	MatCPT	= m_vecNodes[ iNode ].m_MatCPT;
 
 	return true; }
 
+float CBayesNetMinimal::Regularize( vector<float>& vecdAlphas ) const {
+	CDistanceMatrix	Mat;
+	size_t			iNodeOne, iNodeTwo, iAnswer, iValOne, iValTwo;
+	float			dPOne, dPTwo, dPostOne, dPostOneTwo, dRet;
+
+	Mat.Initialize( m_vecNodes.size( ) );
+	Mat.Clear( );
+	for( iNodeOne = 0; iNodeOne < m_vecNodes.size( ); ++iNodeOne ) {
+		const CDataMatrix&	MatOne	= m_vecNodes[iNodeOne].m_MatCPT;
+
+		for( iNodeTwo = 0; iNodeTwo < m_vecNodes.size( ); ++iNodeTwo ) {
+			const CDataMatrix&	MatTwo	= m_vecNodes[iNodeTwo].m_MatCPT;
+
+			if( iNodeOne == iNodeTwo )
+				continue;
+			for( iValOne = 0; iValOne < MatOne.GetColumns( ); ++iValOne ) {
+				dPOne = 0;
+				for( iAnswer = 0; iAnswer < m_MatRoot.GetRows( ); ++iAnswer )
+					dPOne += m_MatRoot.Get( iAnswer, 0 ) * MatOne.Get( iAnswer, iValOne );
+				dPostOne = MatOne.Get( 1, iValOne ) * m_MatRoot.Get( 1, 0 ) / dPOne;
+
+				for( iValTwo = 0; iValTwo < MatTwo.GetColumns( ); ++iValTwo ) {
+					dPTwo = 0;
+					for( iAnswer = 0; iAnswer < m_MatRoot.GetRows( ); ++iAnswer )
+						dPTwo += m_MatRoot.Get( iAnswer, 0 ) * MatTwo.Get( iAnswer, iValTwo );
+					dPostOneTwo = dPostOne * MatTwo.Get( 1, iValTwo ) / dPTwo;
+
+					Mat.Get( iNodeOne, iNodeTwo ) += dPOne * dPTwo * fabs( log( dPostOneTwo / dPostOne ) ); } } } }
+
+	dRet = 0;
+	vecdAlphas.resize( m_vecNodes.size( ) );
+	for( iNodeOne = 0; iNodeOne < m_vecNodes.size( ); ++iNodeOne ) {
+		vecdAlphas[iNodeOne] = 0;
+		for( iNodeTwo = 0; iNodeTwo < m_vecNodes.size( ); ++iNodeTwo )
+			if( iNodeOne != iNodeTwo )
+				vecdAlphas[iNodeOne] += Mat.Get( iNodeOne, iNodeTwo );
+		dRet += vecdAlphas[iNodeOne]; }
+	dRet /= m_vecNodes.size( );
+	for( iNodeOne = 0; iNodeOne < m_vecNodes.size( ); ++iNodeOne )
+		vecdAlphas[iNodeOne] = dRet / vecdAlphas[iNodeOne];
+	dRet = 1;
+
+	return dRet; }
+
 }

src/bayesnetfni.h

 class CBayesNetMinimalImpl : protected CBayesNetImpl, protected CFile {
 protected:
 	static bool Counts2Probs( const std::vector<std::string>&, std::vector<float>&, float dAlpha = 1,
-		size_t = -1, const CBayesNetMinimal* = NULL, size_t = 0, size_t = 0 );
+		float = HUGE_VAL, const CBayesNetMinimal* = NULL, size_t = 0, size_t = 0 );
 
 	CBayesNetMinimalImpl( ) : CBayesNetImpl( true ), m_adNY(NULL) { }
 
 				dStd += d * d; }
 	if( iCount ) {
 		dAve /= iCount;
-		dStd = sqrt( max( 0.0f, ( dStd / iCount ) - ( dAve * dAve ) ) ); }
+		dStd = sqrt( max( 0.0f, ( dStd / ( max( iCount, 2 ) - 1 ) ) - ( dAve * dAve ) ) ); }
 	for( i = 0; i < GetGenes( ); ++i )
 		for( j = ( i + 1 ); j < GetGenes( ); ++j )
 			if( !CMeta::IsNaN( d = Get( i, j ) ) && ( fAll || ( d >= dCutoff ) ) ) {
-				d = 1.0 / ( 1 + exp( ( dAve - d ) / dStd ) );
+				d = 1.0f / ( 1 + exp( ( dAve - d ) / dStd ) );
 				ostm << vecstrNames[ i ] << " -- " << vecstrNames[ j ] << " [weight = " << d <<
 					", color = \"" << ( fHashes ? "#" : "" ) << CColor::Interpolate( d,
 					CColor::c_Green, CColor::c_Black, CColor::c_Red ).ToRGB( ) << "\"];" << endl; }
 	void FilterGenes( const CGenes& Genes, EFilter eFilter, size_t iLimit = -1,
 		float dEdgeAggressiveness = 0.5, const std::vector<float>* pvecdWeights = NULL );
 
+	bool AddGene( const std::string& strGene ) {
+		std::vector<std::string>	vecstrGenes;
+
+		vecstrGenes.push_back( strGene );
+		return AddGenes( vecstrGenes ); }
+
+	bool AddGenes( const std::vector<std::string>& vecstrGenes ) {
+
+		if( m_pPCL || m_abData || !m_Data.SetSize( m_Data.GetSize( ) + vecstrGenes.size( ), true ) )
+			return false;
+
+		m_vecstrGenes.insert( m_vecstrGenes.end( ), vecstrGenes.begin( ), vecstrGenes.end( ) );
+		return true; }
+
 	/*!
 	 * \brief
 	 * Normalize each finite value in the CDat by a specific function.
 #include <string.h>
 
 #include "halfmatrixi.h"
+#include "meta.h"
 
 namespace Sleipnir {
 
 
 		return m_iSize; }
 
+	bool SetSize( size_t iSize, bool fClear = false ) {
+		tType**	aaData;
+		size_t	i, iCur;
+
+		if( !m_fMemory )
+			return false;
+		if( !iSize ) {
+			Reset( );
+			return true; }
+
+		aaData = new tType*[iSize - 1];
+		for( i = 0; ( i + 1 ) < iSize; ++i ) {
+			aaData[i] = new tType[iCur = ( iSize - i - 1 )];
+			if( i < m_iSize )
+				memcpy( aaData[i], m_aaData[i], ( min( iSize, m_iSize ) - i - 1 ) * sizeof(*aaData[i]) );
+			if( fClear && ( iSize > m_iSize ) )
+				std::fill( aaData[i] + ( ( i < m_iSize ) ? ( m_iSize - i - 1 ) : 0 ), aaData[i] + iCur, CMeta::GetNaN( ) ); }
+		m_iSize = iSize;
+		m_aaData = aaData;
+
+		return true; }
+
 	/*!
 	 * \brief
 	 * Sets all entries of the matrix to 0 without changing its size.
 #ifndef ARRAYSIZE
 #define ARRAYSIZE(a)	(sizeof(a)/sizeof(*a))
 #endif // ARRAYSIZE
+#ifndef SIZE_MAX
+#define SIZE_MAX		((size_t)-1)
+#endif // SIZE_MAX
 
 namespace Sleipnir {
 
  * 
  * - <a href="sleipnir-2.2.tar.gz">2.2</a>, *** <br>
  * Fix confusing documentation in \ref Answerer - thanks to Arjun Krishnan!
+ * Fix missing \c SIZE_MAX definition on Mac OS X - thanks to Alice Koechlin!
  * 
  * - <a href="sleipnir-2.1.tar.gz">2.1</a>, 12-20-09 <br>
  * Update includes for gcc 4.3 compatibility - thanks to Casey Greene! <br>

tools/Clinician/Clinician.cpp

 	CDat				Dat;
 	size_t				i, j, k, iGene;
 	vector<bool>		vecfClinical;
-	vector<size_t>		veciGenes2PCL, veciPCL2Genes, veciIndices, veciScores, veciDAT2PCL;
+	vector<size_t>		veciGenes2PCL, veciPCL2Genes, veciIndices, veciScores;
 	vector<float>		vecdScores;
 	CMeasurePearson		MeasurePearson;
 	CGenome				Genome;
 	if( !PCL.Open( sArgs.input_arg, sArgs.skip_arg ) ) {
 		cerr << "Could not open: " << ( sArgs.input_arg ? sArgs.input_arg : "stdin" ) << endl;
 		return 1; }
-	if( !Dat.Open( sArgs.global_arg, !!sArgs.memmap_flag ) ) {
+	if( PCL.GetFeatures( ) < 2 ) {
+		cerr << "PCL requires at least one clinical variable feature" << endl;
+		return 1; }
+	if( sArgs.global_arg && !Dat.Open( sArgs.global_arg, !!sArgs.memmap_flag ) ) {
 		cerr << "Could not open: " << sArgs.global_arg << endl;
 		return 1; }
 
-	veciDAT2PCL.resize( Dat.GetGenes( ) );
-	for( i = 0; i < veciDAT2PCL.size( ); ++i )
-		veciDAT2PCL[i] = PCL.GetGene( Dat.GetGene( i ) );
 	vecfClinical.resize( PCL.GetGenes( ) );
 	veciPCL2Genes.resize( PCL.GetGenes( ) );
 	fill( veciPCL2Genes.begin( ), veciPCL2Genes.end( ), -1 );
 			veciIndices[j] = j;
 		sort( veciIndices.begin( ), veciIndices.end( ), SSorter( vecdScores ) );
 
-		if( sArgs.initial_arg ) {
+		if( sArgs.global_arg && sArgs.initial_arg ) {
 			CDat	DatCopy;
 			CGenome	Genome;
 			CGenes	GenesInitial( Genome );

tools/Clinician/Clinician.ggo

 option	"input"				i	"Input PCL file"
 								string	typestr="filename"
 option	"global"			I	"Input DAT/DAB file"
-								string	typestr="filename"	yes
+								string	typestr="filename"
 
 section "Miscellaneous"
 option	"initial"			n	"Initial correlated neighbor count"

tools/Combiner/Combiner.cpp

 static const char			c_szMax[]			= "max";
 static const char			c_szMin[]			= "min";
 static const char			c_szSum[]			= "sum";
+static const char			c_szDiff[]			= "diff";
 static const char			c_szVote[]			= "vote";
 
 int main( int iArgs, char** aszArgs ) {
 	EMethodGMean,
 	EMethodHMean,
 	EMethodMax,
-	EMethodMin
+	EMethodMin,
+	EMethodDiff
 };
 
 int MainDATs( const gengetopt_args_info& sArgs ) {
 
 	if( !strcmp( c_szSum, sArgs.method_arg ) )
 		eMethod = EMethodSum;
+	else if( !strcmp( c_szDiff, sArgs.method_arg ) )
+		eMethod = EMethodDiff;
 	else if( !strcmp( c_szGMean, sArgs.method_arg ) )
 		eMethod = EMethodGMean;
 	else if( !strcmp( c_szHMean, sArgs.method_arg ) )
 		else
 			dWeight = 1;
 		cerr << "Opened: " << sArgs.inputs[ i ] << endl;
+		if( sArgs.zero_flag ) {
+			vector<string>	vecstrGenes;
+
+			for( j = 0; j < DatOut.GetGenes( ); ++j )
+				if( DatCur.GetGene( DatOut.GetGene( j ) ) == -1 )
+					vecstrGenes.push_back( DatOut.GetGene( j ) );
+			DatCur.AddGenes( vecstrGenes );
+			for( j = 0; j < DatCur.GetGenes( ); ++j )
+				for( k = ( j + 1 ); k < DatCur.GetGenes( ); ++k )
+					if( CMeta::IsNaN( DatCur.Get( j, k ) ) )
+						DatCur.Set( j, k, 0 ); }
 		if( sArgs.normalize_flag )
 			DatCur.Normalize( CDat::ENormalizeZScore );
 		if( GenesIn.GetGenes( ) )
 								break;
 
 							default:
-								DatOut.Get( iOne, iTwo ) += dWeight * d;
+								DatOut.Get( iOne, iTwo ) += dWeight * d * ( ( i && ( eMethod == EMethodDiff ) ) ? -1 : 1 );
 								MatCounts.Get( iOne, iTwo ) += dWeight; } } } } }
 	for( i = 0; i < DatOut.GetGenes( ); ++i )
 		for( j = ( i + 1 ); j < DatOut.GetGenes( ); ++j )

tools/Combiner/Combiner.ggo

 option	"type"		t	"Output data file type"
 						values="pcl","dat","dab","module","revdat"	default="pcl"
 option	"method"	m	"Combination method"
-						values="min","max","mean","gmean","hmean","sum"	default="mean"
+						values="min","max","mean","gmean","hmean","sum","diff"	default="mean"
 option	"output"	o	"Output file"
 						string	typestr="filename"
 option	"weights"	w	"Weights file"
 option	"terms"		e	"Produce DAT/DABs averaging within the provided terms"
 						string	typestr="filename"
 
-section "Optional"
+section "Miscellaneous"
 option	"reweight"	W	"Treat weights as absolute"
 						flag	off
+option	"subset"	s	"Subset size (none if zero)"
+						int	default="0"
+option	"normalize"	n	"Normalize inputs before combining"
+						flag	off
+option	"zscore"	z	"Z-score output after combining"
+						flag	off
+option	"zero"		Z	"Default missing values to zero"
+						flag	off
+
+section "Optional"
 option	"skip"		k	"Columns to skip in input PCLs"
 						int	default="2"
 option	"memmap"	p	"Memory map input files"
 						flag	off
-option	"normalize"	n	"Normalize inputs before combining"
-						flag	off
-option	"zscore"	z	"Z-score output after combining"
-						flag	off
-option	"subset"	s	"Subset size (none if zero)"
-						int	default="0"
 option	"verbosity"	v	"Message verbosity"
 						int	default="5"

tools/Combiner/cmdline.c

   "  -V, --version              Print version and exit",
   "\nMain:",
   "  -t, --type=STRING          Output data file type  (possible values=\"pcl\", \n                               \"dat\", \"dab\", \"module\", \"revdat\" \n                               default=`pcl')",
-  "  -m, --method=STRING        Combination method  (possible values=\"min\", \n                               \"max\", \"mean\", \"gmean\", \"hmean\", \"sum\" \n                               default=`mean')",
+  "  -m, --method=STRING        Combination method  (possible values=\"min\", \n                               \"max\", \"mean\", \"gmean\", \"hmean\", \n                               \"sum\", \"diff\" default=`mean')",
   "  -o, --output=filename      Output file",
   "  -w, --weights=filename     Weights file",
   "\nModules:",
   "\nFiltering:",
   "  -g, --genes=filename       Process only genes from the given set",
   "  -e, --terms=filename       Produce DAT/DABs averaging within the provided \n                               terms",
+  "\nMiscellaneous:",
+  "  -W, --reweight             Treat weights as absolute  (default=off)",
+  "  -s, --subset=INT           Subset size (none if zero)  (default=`0')",
+  "  -n, --normalize            Normalize inputs before combining  (default=off)",
+  "  -z, --zscore               Z-score output after combining  (default=off)",
+  "  -Z, --zero                 Default missing values to zero  (default=off)",
   "\nOptional:",
-  "  -W, --reweight             Treat weights as absolute  (default=off)",
   "  -k, --skip=INT             Columns to skip in input PCLs  (default=`2')",
   "  -p, --memmap               Memory map input files  (default=off)",
-  "  -n, --normalize            Normalize inputs before combining  (default=off)",
-  "  -z, --zscore               Z-score output after combining  (default=off)",
-  "  -s, --subset=INT           Subset size (none if zero)  (default=`0')",
   "  -v, --verbosity=INT        Message verbosity  (default=`5')",
     0
 };
 
 
 char *cmdline_parser_type_values[] = {"pcl", "dat", "dab", "module", "revdat", 0} ;	/* Possible values for type.  */
-char *cmdline_parser_method_values[] = {"min", "max", "mean", "gmean", "hmean", "sum", 0} ;	/* Possible values for method.  */
+char *cmdline_parser_method_values[] = {"min", "max", "mean", "gmean", "hmean", "sum", "diff", 0} ;	/* Possible values for method.  */
 
 static char *
 gengetopt_strdup (const char *s);
   args_info->genes_given = 0 ;
   args_info->terms_given = 0 ;
   args_info->reweight_given = 0 ;
+  args_info->subset_given = 0 ;
+  args_info->normalize_given = 0 ;
+  args_info->zscore_given = 0 ;
+  args_info->zero_given = 0 ;
   args_info->skip_given = 0 ;
   args_info->memmap_given = 0 ;
-  args_info->normalize_given = 0 ;
-  args_info->zscore_given = 0 ;
-  args_info->subset_given = 0 ;
   args_info->verbosity_given = 0 ;
 }
 
   args_info->terms_arg = NULL;
   args_info->terms_orig = NULL;
   args_info->reweight_flag = 0;
+  args_info->subset_arg = 0;
+  args_info->subset_orig = NULL;
+  args_info->normalize_flag = 0;
+  args_info->zscore_flag = 0;
+  args_info->zero_flag = 0;
   args_info->skip_arg = 2;
   args_info->skip_orig = NULL;
   args_info->memmap_flag = 0;
-  args_info->normalize_flag = 0;
-  args_info->zscore_flag = 0;
-  args_info->subset_arg = 0;
-  args_info->subset_orig = NULL;
   args_info->verbosity_arg = 5;
   args_info->verbosity_orig = NULL;
   
   args_info->genes_help = gengetopt_args_info_help[11] ;
   args_info->terms_help = gengetopt_args_info_help[12] ;
   args_info->reweight_help = gengetopt_args_info_help[14] ;
-  args_info->skip_help = gengetopt_args_info_help[15] ;
-  args_info->memmap_help = gengetopt_args_info_help[16] ;
-  args_info->normalize_help = gengetopt_args_info_help[17] ;
-  args_info->zscore_help = gengetopt_args_info_help[18] ;
-  args_info->subset_help = gengetopt_args_info_help[19] ;
-  args_info->verbosity_help = gengetopt_args_info_help[20] ;
+  args_info->subset_help = gengetopt_args_info_help[15] ;
+  args_info->normalize_help = gengetopt_args_info_help[16] ;
+  args_info->zscore_help = gengetopt_args_info_help[17] ;
+  args_info->zero_help = gengetopt_args_info_help[18] ;
+  args_info->skip_help = gengetopt_args_info_help[20] ;
+  args_info->memmap_help = gengetopt_args_info_help[21] ;
+  args_info->verbosity_help = gengetopt_args_info_help[22] ;
   
 }
 
   free_string_field (&(args_info->genes_orig));
   free_string_field (&(args_info->terms_arg));
   free_string_field (&(args_info->terms_orig));
+  free_string_field (&(args_info->subset_orig));
   free_string_field (&(args_info->skip_orig));
-  free_string_field (&(args_info->subset_orig));
   free_string_field (&(args_info->verbosity_orig));
   
   
     write_into_file(outfile, "terms", args_info->terms_orig, 0);
   if (args_info->reweight_given)
     write_into_file(outfile, "reweight", 0, 0 );
+  if (args_info->subset_given)
+    write_into_file(outfile, "subset", args_info->subset_orig, 0);
+  if (args_info->normalize_given)
+    write_into_file(outfile, "normalize", 0, 0 );
+  if (args_info->zscore_given)
+    write_into_file(outfile, "zscore", 0, 0 );
+  if (args_info->zero_given)
+    write_into_file(outfile, "zero", 0, 0 );
   if (args_info->skip_given)
     write_into_file(outfile, "skip", args_info->skip_orig, 0);
   if (args_info->memmap_given)
     write_into_file(outfile, "memmap", 0, 0 );
-  if (args_info->normalize_given)
-    write_into_file(outfile, "normalize", 0, 0 );
-  if (args_info->zscore_given)
-    write_into_file(outfile, "zscore", 0, 0 );
-  if (args_info->subset_given)
-    write_into_file(outfile, "subset", args_info->subset_orig, 0);
   if (args_info->verbosity_given)
     write_into_file(outfile, "verbosity", args_info->verbosity_orig, 0);
   
         { "genes",	1, NULL, 'g' },
         { "terms",	1, NULL, 'e' },
         { "reweight",	0, NULL, 'W' },
+        { "subset",	1, NULL, 's' },
+        { "normalize",	0, NULL, 'n' },
+        { "zscore",	0, NULL, 'z' },
+        { "zero",	0, NULL, 'Z' },
         { "skip",	1, NULL, 'k' },
         { "memmap",	0, NULL, 'p' },
-        { "normalize",	0, NULL, 'n' },
-        { "zscore",	0, NULL, 'z' },
-        { "subset",	1, NULL, 's' },
         { "verbosity",	1, NULL, 'v' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVt:m:o:w:j:r:g:e:Wk:pnzs:v:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVt:m:o:w:j:r:g:e:Ws:nzZk:pv:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 's':	/* Subset size (none if zero).  */
+        
+        
+          if (update_arg( (void *)&(args_info->subset_arg), 
+               &(args_info->subset_orig), &(args_info->subset_given),
+              &(local_args_info.subset_given), optarg, 0, "0", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "subset", 's',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'n':	/* Normalize inputs before combining.  */
+        
+        
+          if (update_arg((void *)&(args_info->normalize_flag), 0, &(args_info->normalize_given),
+              &(local_args_info.normalize_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "normalize", 'n',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'z':	/* Z-score output after combining.  */
+        
+        
+          if (update_arg((void *)&(args_info->zscore_flag), 0, &(args_info->zscore_given),
+              &(local_args_info.zscore_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "zscore", 'z',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'Z':	/* Default missing values to zero.  */
+        
+        
+          if (update_arg((void *)&(args_info->zero_flag), 0, &(args_info->zero_given),
+              &(local_args_info.zero_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "zero", 'Z',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'k':	/* Columns to skip in input PCLs.  */
         
         
             goto failure;
         
           break;
-        case 'n':	/* Normalize inputs before combining.  */
-        
-        
-          if (update_arg((void *)&(args_info->normalize_flag), 0, &(args_info->normalize_given),
-              &(local_args_info.normalize_given), optarg, 0, 0, ARG_FLAG,
-              check_ambiguity, override, 1, 0, "normalize", 'n',
-              additional_error))
-            goto failure;
-        
-          break;
-        case 'z':	/* Z-score output after combining.  */
-        
-        
-          if (update_arg((void *)&(args_info->zscore_flag), 0, &(args_info->zscore_given),
-              &(local_args_info.zscore_given), optarg, 0, 0, ARG_FLAG,
-              check_ambiguity, override, 1, 0, "zscore", 'z',
-              additional_error))
-            goto failure;
-        
-          break;
-        case 's':	/* Subset size (none if zero).  */
-        
-        
-          if (update_arg( (void *)&(args_info->subset_arg), 
-               &(args_info->subset_orig), &(args_info->subset_given),
-              &(local_args_info.subset_given), optarg, 0, "0", ARG_INT,
-              check_ambiguity, override, 0, 0,
-              "subset", 's',
-              additional_error))
-            goto failure;
-        
-          break;
         case 'v':	/* Message verbosity.  */
         
         

tools/Combiner/cmdline.h

   const char *terms_help; /**< @brief Produce DAT/DABs averaging within the provided terms help description.  */
   int reweight_flag;	/**< @brief Treat weights as absolute (default=off).  */
   const char *reweight_help; /**< @brief Treat weights as absolute help description.  */
+  int subset_arg;	/**< @brief Subset size (none if zero) (default='0').  */
+  char * subset_orig;	/**< @brief Subset size (none if zero) original value given at command line.  */
+  const char *subset_help; /**< @brief Subset size (none if zero) help description.  */
+  int normalize_flag;	/**< @brief Normalize inputs before combining (default=off).  */
+  const char *normalize_help; /**< @brief Normalize inputs before combining help description.  */
+  int zscore_flag;	/**< @brief Z-score output after combining (default=off).  */
+  const char *zscore_help; /**< @brief Z-score output after combining help description.  */
+  int zero_flag;	/**< @brief Default missing values to zero (default=off).  */
+  const char *zero_help; /**< @brief Default missing values to zero help description.  */
   int skip_arg;	/**< @brief Columns to skip in input PCLs (default='2').  */
   char * skip_orig;	/**< @brief Columns to skip in input PCLs original value given at command line.  */
   const char *skip_help; /**< @brief Columns to skip in input PCLs help description.  */
   int memmap_flag;	/**< @brief Memory map input files (default=off).  */
   const char *memmap_help; /**< @brief Memory map input files help description.  */
-  int normalize_flag;	/**< @brief Normalize inputs before combining (default=off).  */
-  const char *normalize_help; /**< @brief Normalize inputs before combining help description.  */
-  int zscore_flag;	/**< @brief Z-score output after combining (default=off).  */
-  const char *zscore_help; /**< @brief Z-score output after combining help description.  */
-  int subset_arg;	/**< @brief Subset size (none if zero) (default='0').  */
-  char * subset_orig;	/**< @brief Subset size (none if zero) original value given at command line.  */
-  const char *subset_help; /**< @brief Subset size (none if zero) help description.  */
   int verbosity_arg;	/**< @brief Message verbosity (default='5').  */
   char * verbosity_orig;	/**< @brief Message verbosity original value given at command line.  */
   const char *verbosity_help; /**< @brief Message verbosity help description.  */
   unsigned int genes_given ;	/**< @brief Whether genes was given.  */
   unsigned int terms_given ;	/**< @brief Whether terms was given.  */
   unsigned int reweight_given ;	/**< @brief Whether reweight was given.  */
+  unsigned int subset_given ;	/**< @brief Whether subset was given.  */
+  unsigned int normalize_given ;	/**< @brief Whether normalize was given.  */
+  unsigned int zscore_given ;	/**< @brief Whether zscore was given.  */
+  unsigned int zero_given ;	/**< @brief Whether zero was given.  */
   unsigned int skip_given ;	/**< @brief Whether skip was given.  */
   unsigned int memmap_given ;	/**< @brief Whether memmap was given.  */
-  unsigned int normalize_given ;	/**< @brief Whether normalize was given.  */
-  unsigned int zscore_given ;	/**< @brief Whether zscore was given.  */
-  unsigned int subset_given ;	/**< @brief Whether subset was given.  */
   unsigned int verbosity_given ;	/**< @brief Whether verbosity was given.  */
 
   char **inputs ; /**< @brief unamed options (options without names) */

tools/Counter/Counter.cpp

 		cmdline_parser_print_help( );
 		return 1; }
 	CMeta Meta( sArgs.verbosity_arg );
+	if( sArgs.pseudocounts_arg < 0 )
+		sArgs.pseudocounts_arg = CMeta::GetNaN( );
 
 	if( sArgs.zeros_arg ) {
 		ifstream		ifsm;
 	vector<float>				vecdAlphas;
 	vector<unsigned char>		vecbZeros;
 	map<string, size_t>::const_iterator	iterZero, iterDataset;
+	float						d;
 
 	if( mapstriDatasets.empty( ) ) {
 		cerr << "No datasets given" << endl;
 		cerr << "Could not open default counts: " << ( sArgs.default_arg ? sArgs.default_arg : "not given" ) <<
 			endl;
 		return 1; }
+	if( sArgs.regularize_flag ) {
+		d = BNDefault.Regularize( vecdAlphas );
+		BNDefault.OpenCounts( sArgs.default_arg, mapstriDatasets, vecbZeros, vecdAlphas, d ); }
 
 	for( i = 0; i < vecstrContexts.size( ); ++i ) {
 		strFile = (string)sArgs.counts_arg + '/' + CMeta::Filename( vecstrContexts[ i ] ) + c_acTxt;
 		if( !pBN->OpenCounts( strFile.c_str( ), mapstriDatasets, vecbZeros, vecdAlphas, sArgs.pseudocounts_arg,
 			&BNDefault ) )
 			return 1;
+		if( sArgs.regularize_flag ) {
+			d = pBN->Regularize( vecdAlphas );
+			pBN->OpenCounts( strFile.c_str( ), mapstriDatasets, vecbZeros, vecdAlphas, d, &BNDefault ); }
 		vecpBNs.push_back( pBN ); }
 
 	cerr << "Created " << ( vecpBNs.size( ) + 1 ) << " Bayesian classifiers" << endl;

tools/Counter/Counter.ggo

 
 section "Bayesian Regularization"
 option	"pseudocounts"	p	"Effective number of pseudocounts to use"
-							int	default="-1"
+							float default="-1"
 option	"alphas"		a	"File containing equivalent sample sizes (alphas) for each node"
 							string	typestr="filename"
+option	"regularize"	r	"Automatically regularized based on similarity"
+							flag	off
 
 section "Optional"
 option	"temporary"		y	"Directory for temporary files"

tools/Counter/cmdline.c

   "  -b, --default=filename        Count file containing defaults for cases with \n                                  missing data",
   "  -Z, --zeros=filename          Read zeroed node IDs/outputs from the given \n                                  file",
   "\nBayesian Regularization:",
-  "  -p, --pseudocounts=INT        Effective number of pseudocounts to use  \n                                  (default=`-1')",
+  "  -p, --pseudocounts=FLOAT      Effective number of pseudocounts to use  \n                                  (default=`-1')",
   "  -a, --alphas=filename         File containing equivalent sample sizes \n                                  (alphas) for each node",
+  "  -r, --regularize              Automatically regularized based on similarity  \n                                  (default=off)",
   "\nOptional:",
   "  -y, --temporary=directory     Directory for temporary files  (default=`.')",
   "  -l, --smile                   Output SMILE (X)DSL files rather than minimal \n                                  networks  (default=off)",
   , ARG_FLAG
   , ARG_STRING
   , ARG_INT
+  , ARG_FLOAT
 } cmdline_parser_arg_type;
 
 static
   args_info->zeros_given = 0 ;
   args_info->pseudocounts_given = 0 ;
   args_info->alphas_given = 0 ;
+  args_info->regularize_given = 0 ;
   args_info->temporary_given = 0 ;
   args_info->smile_given = 0 ;
   args_info->xdsl_given = 0 ;
   args_info->pseudocounts_orig = NULL;
   args_info->alphas_arg = NULL;
   args_info->alphas_orig = NULL;
+  args_info->regularize_flag = 0;
   args_info->temporary_arg = gengetopt_strdup (".");
   args_info->temporary_orig = NULL;
   args_info->smile_flag = 0;
   args_info->zeros_help = gengetopt_args_info_help[19] ;
   args_info->pseudocounts_help = gengetopt_args_info_help[21] ;
   args_info->alphas_help = gengetopt_args_info_help[22] ;
-  args_info->temporary_help = gengetopt_args_info_help[24] ;
-  args_info->smile_help = gengetopt_args_info_help[25] ;
-  args_info->xdsl_help = gengetopt_args_info_help[26] ;
-  args_info->memmap_help = gengetopt_args_info_help[27] ;
-  args_info->threads_help = gengetopt_args_info_help[28] ;
-  args_info->verbosity_help = gengetopt_args_info_help[29] ;
+  args_info->regularize_help = gengetopt_args_info_help[23] ;
+  args_info->temporary_help = gengetopt_args_info_help[25] ;
+  args_info->smile_help = gengetopt_args_info_help[26] ;
+  args_info->xdsl_help = gengetopt_args_info_help[27] ;
+  args_info->memmap_help = gengetopt_args_info_help[28] ;
+  args_info->threads_help = gengetopt_args_info_help[29] ;
+  args_info->verbosity_help = gengetopt_args_info_help[30] ;
   
 }
 
     write_into_file(outfile, "pseudocounts", args_info->pseudocounts_orig, 0);
   if (args_info->alphas_given)
     write_into_file(outfile, "alphas", args_info->alphas_orig, 0);
+  if (args_info->regularize_given)
+    write_into_file(outfile, "regularize", 0, 0 );
   if (args_info->temporary_given)
     write_into_file(outfile, "temporary", args_info->temporary_orig, 0);
   if (args_info->smile_given)
   case ARG_INT:
     if (val) *((int *)field) = strtol (val, &stop_char, 0);
     break;
+  case ARG_FLOAT:
+    if (val) *((float *)field) = (float)strtod (val, &stop_char);
+    break;
   case ARG_STRING:
     if (val) {
       string_field = (char **)field;
   /* check numeric conversion */
   switch(arg_type) {
   case ARG_INT:
+  case ARG_FLOAT:
     if (val && !(stop_char && *stop_char == '\0')) {
       fprintf(stderr, "%s: invalid numeric value: %s\n", package_name, val);
       return 1; /* failure */
         { "zeros",	1, NULL, 'Z' },
         { "pseudocounts",	1, NULL, 'p' },
         { "alphas",	1, NULL, 'a' },
+        { "regularize",	0, NULL, 'r' },
         { "temporary",	1, NULL, 'y' },
         { "smile",	0, NULL, 'l' },
         { "xdsl",	0, NULL, 'x' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVw:k:n:o:d:s:e:X:g:G:c:C:b:Z:p:a:y:lxmt:v:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVw:k:n:o:d:s:e:X:g:G:c:C:b:Z:p:a:ry:lxmt:v:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
         
           if (update_arg( (void *)&(args_info->pseudocounts_arg), 
                &(args_info->pseudocounts_orig), &(args_info->pseudocounts_given),
-              &(local_args_info.pseudocounts_given), optarg, 0, "-1", ARG_INT,
+              &(local_args_info.pseudocounts_given), optarg, 0, "-1", ARG_FLOAT,
               check_ambiguity, override, 0, 0,
               "pseudocounts", 'p',
               additional_error))
             goto failure;
         
           break;
+        case 'r':	/* Automatically regularized based on similarity.  */
+        
+        
+          if (update_arg((void *)&(args_info->regularize_flag), 0, &(args_info->regularize_given),
+              &(local_args_info.regularize_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "regularize", 'r',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'y':	/* Directory for temporary files.  */
         
         

tools/Counter/cmdline.h

   char * zeros_arg;	/**< @brief Read zeroed node IDs/outputs from the given file.  */
   char * zeros_orig;	/**< @brief Read zeroed node IDs/outputs from the given file original value given at command line.  */
   const char *zeros_help; /**< @brief Read zeroed node IDs/outputs from the given file help description.  */
-  int pseudocounts_arg;	/**< @brief Effective number of pseudocounts to use (default='-1').  */
+  float pseudocounts_arg;	/**< @brief Effective number of pseudocounts to use (default='-1').  */
   char * pseudocounts_orig;	/**< @brief Effective number of pseudocounts to use original value given at command line.  */
   const char *pseudocounts_help; /**< @brief Effective number of pseudocounts to use help description.  */
   char * alphas_arg;	/**< @brief File containing equivalent sample sizes (alphas) for each node.  */
   char * alphas_orig;	/**< @brief File containing equivalent sample sizes (alphas) for each node original value given at command line.  */
   const char *alphas_help; /**< @brief File containing equivalent sample sizes (alphas) for each node help description.  */
+  int regularize_flag;	/**< @brief Automatically regularized based on similarity (default=off).  */
+  const char *regularize_help; /**< @brief Automatically regularized based on similarity help description.  */
   char * temporary_arg;	/**< @brief Directory for temporary files (default='.').  */
   char * temporary_orig;	/**< @brief Directory for temporary files original value given at command line.  */
   const char *temporary_help; /**< @brief Directory for temporary files help description.  */
   unsigned int zeros_given ;	/**< @brief Whether zeros was given.  */
   unsigned int pseudocounts_given ;	/**< @brief Whether pseudocounts was given.  */
   unsigned int alphas_given ;	/**< @brief Whether alphas was given.  */
+  unsigned int regularize_given ;	/**< @brief Whether regularize was given.  */
   unsigned int temporary_given ;	/**< @brief Whether temporary was given.  */
   unsigned int smile_given ;	/**< @brief Whether smile was given.  */
   unsigned int xdsl_given ;	/**< @brief Whether xdsl was given.  */

tools/DChecker/DChecker.cpp

-/*****************************************************************************
-* This file is provided under the Creative Commons Attribution 3.0 license.
-*
-* You are free to share, copy, distribute, transmit, or adapt this work
-* PROVIDED THAT you attribute the work to the authors listed below.
-* For more information, please see the following web page:
-* http://creativecommons.org/licenses/by/3.0/
-*
-* This file is a component of the Sleipnir library for functional genomics,
-* authored by:
-* Curtis Huttenhower (chuttenh@princeton.edu)
-* Mark Schroeder
-* Maria D. Chikina
-* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
-*
-* If you use this library, the included executable tools, or any related
-* code in your work, please cite the following publication:
-* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
-* Olga G. Troyanskaya.
-* "The Sleipnir library for computational functional genomics"
-*****************************************************************************/
-#include "stdafx.h"
-#include "cmdline.h"
-
-enum ETFPN {
-	ETFPN_TP	= 0,
-	ETFPN_FP	= ETFPN_TP + 1,
-	ETFPN_TN	= ETFPN_FP + 1,
-	ETFPN_FN	= ETFPN_TN + 1
-};
-
-struct SDatum {
-	float	m_dValue;
-	size_t	m_iOne;
-	size_t	m_iTwo;
-	float	m_dAnswer;
-
-	SDatum( float dValue, size_t iOne, size_t iTwo, float dAnswer ) : m_dValue(dValue), m_iOne(iOne), m_iTwo(iTwo),
-		m_dAnswer(dAnswer) { }
-};
-
-struct SSorter {
-	bool	m_fInvert;
-
-	SSorter( bool fInvert ) : m_fInvert(fInvert) { }
-
-	bool operator()( const SDatum& sOne, const SDatum& sTwo ) const {
-
-		return ( m_fInvert ? ( sOne.m_dValue > sTwo.m_dValue ) : ( sOne.m_dValue < sTwo.m_dValue ) ); }
-};
-
-int main( int iArgs, char** aszArgs ) {
-	CDat				Answers, Data;
-	gengetopt_args_info	sArgs;
-	size_t				i, j, k, m, iOne, iTwo, iGenes, iPositives, iNegatives, iBins;
-	vector<size_t>		veciGenes, veciRec, veciRecTerm;
-	CFullMatrix<bool>	MatGenes;
-	CFullMatrix<size_t>	MatResults;
-	ETFPN				eTFPN;
-	int					iMax;
-	float				dAnswer, dValue;
-	vector<bool>		vecfHere;
-	vector<float>		vecdScores, vecdSSE;
-	vector<size_t>		veciPositives, veciNegatives, veciGenesTerm;
-	ofstream			ofsm;
-	ostream*			postm;
-	map<float,size_t>	mapValues;
-	bool				fMapAnswers;
-	CGenome				Genome;
-	CGenes				GenesTm( Genome );
-
-	if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
-		cmdline_parser_print_help( );
-		return 1; }
-	CMeta Meta( sArgs.verbosity_arg );
-
-	fMapAnswers = !!sArgs.memmap_flag && !( sArgs.genes_arg || sArgs.genet_arg || sArgs.genex_arg || sArgs.genee_arg );
-	if( !Answers.Open( sArgs.answers_arg, fMapAnswers ) ) {
-		cerr << "Couldn't open: " << sArgs.answers_arg << endl;
-		return 1; }
-	if( sArgs.genes_arg && !Answers.FilterGenes( sArgs.genes_arg, CDat::EFilterInclude ) ) {
-		cerr << "Couldn't open: " << sArgs.genes_arg << endl;
-		return 1; }
-	if( sArgs.genee_arg && !Answers.FilterGenes( sArgs.genee_arg, CDat::EFilterEdge ) ) {
-		cerr << "Couldn't open: " << sArgs.genee_arg << endl;
-		return 1; }
-	if( sArgs.genet_arg ) {
-		if( !( Answers.FilterGenes( sArgs.genet_arg, CDat::EFilterTerm ) &&
-			GenesTm.Open( sArgs.genet_arg ) ) ) {
-			cerr << "Couldn't open: " << sArgs.genet_arg << endl;
-			return 1; }
-		veciGenesTerm.reserve( GenesTm.GetGenes( ) );
-		for( i = 0; i < GenesTm.GetGenes( ); ++i )
-			if( ( j = Answers.GetGene( GenesTm.GetGene( i ).GetName( ) ) ) != -1 )
-				veciGenesTerm.push_back( j ); }
-	if( sArgs.genex_arg && !Answers.FilterGenes( sArgs.genex_arg, CDat::EFilterExclude ) ) {
-		cerr << "Couldn't open: " << sArgs.genex_arg << endl;
-		return 1; }
-	if( !Data.Open( sArgs.input_arg, !!sArgs.memmap_flag ) ) {
-		cerr << "Couldn't open: " << sArgs.input_arg << endl;
-		return 1; }
-	if( sArgs.normalize_flag )
-		Data.Normalize( CDat::ENormalizeMinMax );
-
-	veciGenes.resize( Answers.GetGenes( ) );
-	for( i = 0; i < Answers.GetGenes( ); ++i )
-		veciGenes[ i ] = Data.GetGene( Answers.GetGene( i ) );
-	if( sArgs.finite_flag ) {
-		vector<float>	vecdValues;
-		{
-			set<float>		setdValues;
-
-			for( i = 0; i < Answers.GetGenes( ); ++i ) {
-				if( ( iOne = veciGenes[ i ] ) == -1 )
-					continue;
-				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
-						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
-						CMeta::IsNaN( Answers.Get( i, j ) ) )
-						continue;
-					if( sArgs.invert_flag )
-						dValue = 1 - dValue;
-					setdValues.insert( dValue ); } }
-			vecdValues.resize( setdValues.size( ) );
-			copy( setdValues.begin( ), setdValues.end( ), vecdValues.begin( ) );
-		}
-		sort( vecdValues.begin( ), vecdValues.end( ) );
-		for( i = 0; i < vecdValues.size( ); ++i )
-			mapValues[ vecdValues[ i ] ] = i;
-		iBins = mapValues.size( ); }
-	else
-		iBins = sArgs.bins_arg;
-	MatResults.Initialize( iBins ? ( iBins + 1 ) :
-		(size_t)( ( sArgs.max_arg - sArgs.min_arg ) / sArgs.delta_arg ) + 1, 4 );
-	MatGenes.Initialize( veciGenes.size( ), MatResults.GetRows( ) );
-
-	for( iGenes = 0; !sArgs.inputs_num || ( iGenes < sArgs.inputs_num ); ++iGenes ) {
-		MatResults.Clear( );
-		MatGenes.Clear( );
-
-		if( sArgs.inputs_num ) {
-			CGenes		Genes( Genome );
-			ifstream	ifsm;
-
-			ifsm.open( sArgs.inputs[ iGenes ] );
-			if( !Genes.Open( ifsm ) ) {
-				cerr << "Couldn't open: " << sArgs.inputs[ iGenes ] << endl;
-				return 1; }
-			vecfHere.resize( Answers.GetGenes( ) );
-			for( i = 0; i < vecfHere.size( ); ++i )
-				vecfHere[ i ] = Genes.IsGene( Answers.GetGene( i ) );
-			cerr << "Processing " << sArgs.inputs[ iGenes ] << "..." << endl;
-			ifsm.close( ); }
-
-		if( mapValues.size( ) ) {
-			for( i = 0; i < Answers.GetGenes( ); ++i ) {
-				if( ( iOne = veciGenes[ i ] ) == -1 )
-					continue;
-				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
-						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
-						CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) )
-						continue;
-					if( !( vecfHere.empty( ) ||
-						( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
-						( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
-						continue;
-					if( sArgs.invert_flag )
-						dValue = 1 - dValue;
-					for( k = 0; k <= mapValues[ dValue ]; ++k ) {
-						MatGenes.Set( i, k, true );
-						MatGenes.Set( j, k, true );
-						MatResults.Get( k, dAnswer ? ETFPN_TP : ETFPN_FP )++; }
-					for( ; k < MatResults.GetRows( ); ++k )
-						MatResults.Get( k, dAnswer ? ETFPN_FN : ETFPN_TN )++; } } }
-		else if( iBins ) {
-			vector<SDatum>	vecsData;
-			size_t			iChunk;
-
-			for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i ) {
-				if( ( iOne = veciGenes[ i ] ) == -1 )
-					continue;
-				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
-						CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
-						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
-						continue;
-					if( !( vecfHere.empty( ) ||
-						( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
-						( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
-						continue;
-
-					MatGenes.Set( i, 0, true );
-					MatGenes.Set( j, 0, true );
-					if( dAnswer )
-						iPositives++;
-					else
-						iNegatives++;
-					vecsData.push_back( SDatum( dValue, i, j, dAnswer ) ); } }
-			sort( vecsData.begin( ), vecsData.end( ), SSorter( !!sArgs.invert_flag ) );
-			iChunk = (size_t)( 0.5 + ( (float)vecsData.size( ) / ( MatResults.GetRows( ) - 1 ) ) );
-			if( sArgs.sse_flag ) {
-				vecdSSE.resize( MatResults.GetRows( ) );
-				veciPositives.resize( vecdSSE.size( ) );
-				for( i = 1,j = 0; i < vecdSSE.size( ); ++i,j += iChunk ) {
-					veciPositives[ veciPositives.size( ) - i - 1 ] = veciPositives[ veciPositives.size( ) - i ];
-					vecdSSE[ vecdSSE.size( ) - i - 1 ] = vecdSSE[ vecdSSE.size( ) - i ];
-					for( k = 0; k < iChunk; ++k ) {
-						if( ( j + k ) >= vecsData.size( ) )
-							break;
-						const SDatum&	sDatum	= vecsData[ vecsData.size( ) - ( j + k ) - 1 ];
-
-						for( m = 0; m < ( vecdSSE.size( ) - i ); ++m ) {
-							MatGenes.Set( sDatum.m_iOne, m, true );
-							MatGenes.Set( sDatum.m_iTwo, m, true ); }
-						dValue = sDatum.m_dValue - sDatum.m_dAnswer;
-						veciPositives[ veciPositives.size( ) - i - 1 ]++;
-						vecdSSE[ vecdSSE.size( ) - i - 1 ] += dValue * dValue; } } }
-			else {
-				veciPositives.resize( MatResults.GetRows( ) - 1 );
-				veciNegatives.resize( veciPositives.size( ) );
-				for( i = 0; i < veciNegatives.size( ); ++i )
-					veciNegatives[ i ] = veciPositives[ i ] = 0;
-				for( i = j = 0; i < veciPositives.size( ); ++i,j += iChunk )
-					for( k = 0; k < iChunk; ++k ) {
-						if( ( j + k ) >= vecsData.size( ) )
-							break;
-						const SDatum&	sDatum	= vecsData[ j + k ];
-
-						for( m = i; m > 0; --m ) {
-							MatGenes.Set( sDatum.m_iOne, m, true );
-							MatGenes.Set( sDatum.m_iTwo, m, true ); }
-						if( Answers.Get( sDatum.m_iOne, sDatum.m_iTwo ) )
-							veciPositives[ i ]++;
-						else
-							veciNegatives[ i ]++; }
-
-				MatResults.Set( 0, ETFPN_TP, iPositives );
-				MatResults.Set( 0, ETFPN_FP, iNegatives );
-				MatResults.Set( 0, ETFPN_TN, 0 );
-				MatResults.Set( 0, ETFPN_FN, 0 );
-				for( i = 1; i < MatResults.GetRows( ); ++i ) {
-					MatResults.Set( i, ETFPN_TP, MatResults.Get( i - 1, ETFPN_TP ) - veciPositives[ i - 1 ] );
-					MatResults.Set( i, ETFPN_FP, MatResults.Get( i - 1, ETFPN_FP ) - veciNegatives[ i - 1 ] );
-					MatResults.Set( i, ETFPN_TN, MatResults.Get( i - 1, ETFPN_TN ) + veciNegatives[ i - 1 ] );
-					MatResults.Set( i, ETFPN_FN, MatResults.Get( i - 1, ETFPN_FN ) +
-						veciPositives[ i - 1 ] ); } } }
-		else
-			for( i = 0; i < Answers.GetGenes( ); ++i ) {
-				if( !( i % 1000 ) )
-					cerr << "Processing gene " << i << '/' << Answers.GetGenes( ) << endl;
-				if( ( iOne = veciGenes[ i ] ) == -1 )
-					continue;
-				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
-						CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
-						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
-						continue;
-					if( !( vecfHere.empty( ) ||
-						( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
-						( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
-						continue;
-					if( sArgs.invert_flag )
-						dValue = 1 - dValue;
-
-					iMax = (int)ceil( ( dValue - sArgs.min_arg ) / sArgs.delta_arg );
-					if( iMax > (int)MatResults.GetRows( ) )
-						iMax = (int)MatResults.GetRows( );
-					eTFPN = (ETFPN)!dAnswer;
-					for( k = 0; (int)k < iMax; ++k ) {
-						MatResults.Get( k, eTFPN )++;
-						MatGenes.Set( i, k, true );
-						MatGenes.Set( j, k, true ); }
-					eTFPN = (ETFPN)( 2 + !eTFPN );
-					for( ; k < (int)MatResults.GetRows( ); ++k )
-						MatResults.Get( k, eTFPN )++; } }
-		for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i )
-			for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
-				if( CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
-					!( vecfHere.empty( ) ||
-					( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
-					( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
-					continue;
-				if( dAnswer )
-					iPositives++;
-				else
-					iNegatives++; }
-
-		veciRec.resize( MatResults.GetRows( ) );
-		veciRecTerm.resize( MatResults.GetRows( ) );
-		for( i = 0; i < veciRec.size( ); ++i ) {
-			veciRec[ i ] = veciRecTerm[ i ] = 0;
-			for( j = 0; j < MatGenes.GetRows( ); ++j )
-				if( MatGenes.Get( j, i ) ) {
-					veciRec[ i ]++;
-					if( vecfHere.size( ) && vecfHere[ j ] )
-						veciRecTerm[ i ]++; }
-			for( j = 0; j < veciGenesTerm.size( ); ++j )
-				if( MatGenes.Get( veciGenesTerm[ j ], i ) &&
-					( vecfHere.empty( ) || !vecfHere[ veciGenesTerm[ j ] ] ) )
-					veciRecTerm[ i ]++; }
-
-		if( sArgs.inputs_num ) {
-			ofsm.open( ( (string)sArgs.directory_arg + '/' +
-				CMeta::Basename( sArgs.inputs[ iGenes ] ) + ".bins" ).c_str( ) );
-			postm = &ofsm; }
-		else
-			postm = &cout;
-
-		if( !sArgs.sse_flag ) {
-			*postm << "#	P	" << iPositives << endl;
-			*postm << "#	N	" << iNegatives << endl; }
-		*postm << "Cut	Genes	" << ( sArgs.sse_flag ? "Pairs	SSE" : "TP	FP	TN	FN" ) << endl;
-		for( i = 0; i < MatResults.GetRows( ); ++i ) {
-			*postm << ( iBins ? i : ( sArgs.min_arg + ( i * sArgs.delta_arg ) ) ) << '\t' <<
-				veciRec[ i ];
-			if( sArgs.sse_flag )
-				*postm << '\t' << veciPositives[ i ] << '\t' << vecdSSE[ i ];
-			else
-				for( j = 0; j < MatResults.GetColumns( ); ++j )
-					*postm << '\t' << MatResults.Get( i, j );
-			if( veciGenesTerm.size( ) || vecfHere.size( ) )
-				*postm << '\t' << veciRecTerm[ i ];
-			*postm << endl; }
-		if( !sArgs.sse_flag )
-			*postm << "#	AUC	" << CStatistics::WilcoxonRankSum( Data, Answers, vecfHere,
-				!!sArgs.invert_flag ) << endl;
-
-		if( sArgs.inputs_num )
-			ofsm.close( );
-		else
-			cout.flush( );
-
-		if( !sArgs.inputs_num )
-			break; }
-
-	return 0; }
+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#include "stdafx.h"
+#include "cmdline.h"
+
+enum ETFPN {
+	ETFPN_TP	= 0,
+	ETFPN_FP	= ETFPN_TP + 1,
+	ETFPN_TN	= ETFPN_FP + 1,
+	ETFPN_FN	= ETFPN_TN + 1
+};
+
+struct SDatum {
+	float	m_dValue;
+	size_t	m_iOne;
+	size_t	m_iTwo;
+	float	m_dAnswer;
+
+	SDatum( float dValue, size_t iOne, size_t iTwo, float dAnswer ) : m_dValue(dValue), m_iOne(iOne), m_iTwo(iTwo),
+		m_dAnswer(dAnswer) { }
+};
+
+struct SSorter {
+	bool	m_fInvert;
+
+	SSorter( bool fInvert ) : m_fInvert(fInvert) { }
+
+	bool operator()( const SDatum& sOne, const SDatum& sTwo ) const {
+
+		return ( m_fInvert ? ( sOne.m_dValue > sTwo.m_dValue ) : ( sOne.m_dValue < sTwo.m_dValue ) ); }
+};
+
+double AUCMod( const CDat&, const CDat&, const vector<bool>&, bool, float );
+
+int main( int iArgs, char** aszArgs ) {
+	CDat				Answers, Data;
+	gengetopt_args_info	sArgs;
+	size_t				i, j, k, m, iOne, iTwo, iGenes, iPositives, iNegatives, iBins;
+	vector<size_t>		veciGenes, veciRec, veciRecTerm;
+	CFullMatrix<bool>	MatGenes;
+	CFullMatrix<size_t>	MatResults;
+	ETFPN				eTFPN;
+	int					iMax;
+	float				dAnswer, dValue;
+	vector<bool>		vecfHere;
+	vector<float>		vecdScores, vecdSSE;
+	vector<size_t>		veciPositives, veciNegatives, veciGenesTerm;
+	ofstream			ofsm;
+	ostream*			postm;
+	map<float,size_t>	mapValues;
+	bool				fMapAnswers;
+	CGenome				Genome;
+	CGenes				GenesTm( Genome );
+
+	if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
+		cmdline_parser_print_help( );
+		return 1; }
+	CMeta Meta( sArgs.verbosity_arg );
+
+	fMapAnswers = !!sArgs.memmap_flag && !( sArgs.genes_arg || sArgs.genet_arg || sArgs.genex_arg || sArgs.genee_arg );
+	if( !Answers.Open( sArgs.answers_arg, fMapAnswers ) ) {
+		cerr << "Couldn't open: " << sArgs.answers_arg << endl;
+		return 1; }
+	if( sArgs.genes_arg && !Answers.FilterGenes( sArgs.genes_arg, CDat::EFilterInclude ) ) {
+		cerr << "Couldn't open: " << sArgs.genes_arg << endl;
+		return 1; }
+	if( sArgs.genee_arg && !Answers.FilterGenes( sArgs.genee_arg, CDat::EFilterEdge ) ) {
+		cerr << "Couldn't open: " << sArgs.genee_arg << endl;
+		return 1; }
+	if( sArgs.genet_arg ) {
+		if( !( Answers.FilterGenes( sArgs.genet_arg, CDat::EFilterTerm ) &&
+			GenesTm.Open( sArgs.genet_arg ) ) ) {
+			cerr << "Couldn't open: " << sArgs.genet_arg << endl;
+			return 1; }
+		veciGenesTerm.reserve( GenesTm.GetGenes( ) );
+		for( i = 0; i < GenesTm.GetGenes( ); ++i )
+			if( ( j = Answers.GetGene( GenesTm.GetGene( i ).GetName( ) ) ) != -1 )
+				veciGenesTerm.push_back( j ); }
+	if( sArgs.genex_arg && !Answers.FilterGenes( sArgs.genex_arg, CDat::EFilterExclude ) ) {
+		cerr << "Couldn't open: " << sArgs.genex_arg << endl;
+		return 1; }
+	if( !Data.Open( sArgs.input_arg, !!sArgs.memmap_flag ) ) {
+		cerr << "Couldn't open: " << sArgs.input_arg << endl;
+		return 1; }
+	if( sArgs.normalize_flag )
+		Data.Normalize( CDat::ENormalizeMinMax );
+
+	veciGenes.resize( Answers.GetGenes( ) );
+	for( i = 0; i < Answers.GetGenes( ); ++i )
+		veciGenes[ i ] = Data.GetGene( Answers.GetGene( i ) );
+	if( sArgs.finite_flag ) {
+		vector<float>	vecdValues;
+		{
+			set<float>		setdValues;
+
+			for( i = 0; i < Answers.GetGenes( ); ++i ) {
+				if( ( iOne = veciGenes[ i ] ) == -1 )
+					continue;
+				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
+					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
+						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
+						CMeta::IsNaN( Answers.Get( i, j ) ) )
+						continue;
+					if( sArgs.invert_flag )
+						dValue = 1 - dValue;
+					setdValues.insert( dValue ); } }
+			vecdValues.resize( setdValues.size( ) );
+			copy( setdValues.begin( ), setdValues.end( ), vecdValues.begin( ) );
+		}
+		sort( vecdValues.begin( ), vecdValues.end( ) );
+		for( i = 0; i < vecdValues.size( ); ++i )
+			mapValues[ vecdValues[ i ] ] = i;
+		iBins = mapValues.size( ); }
+	else
+		iBins = sArgs.bins_arg;
+	MatResults.Initialize( iBins ? ( iBins + 1 ) :
+		(size_t)( ( sArgs.max_arg - sArgs.min_arg ) / sArgs.delta_arg ) + 1, 4 );
+	MatGenes.Initialize( veciGenes.size( ), MatResults.GetRows( ) );
+
+	for( iGenes = 0; !sArgs.inputs_num || ( iGenes < sArgs.inputs_num ); ++iGenes ) {
+		MatResults.Clear( );
+		MatGenes.Clear( );
+
+		if( sArgs.inputs_num ) {
+			CGenes		Genes( Genome );
+			ifstream	ifsm;
+
+			ifsm.open( sArgs.inputs[ iGenes ] );
+			if( !Genes.Open( ifsm ) ) {
+				cerr << "Couldn't open: " << sArgs.inputs[ iGenes ] << endl;
+				return 1; }
+			vecfHere.resize( Answers.GetGenes( ) );
+			for( i = 0; i < vecfHere.size( ); ++i )
+				vecfHere[ i ] = Genes.IsGene( Answers.GetGene( i ) );
+			cerr << "Processing " << sArgs.inputs[ iGenes ] << "..." << endl;
+			ifsm.close( ); }
+
+		if( mapValues.size( ) ) {
+			for( i = 0; i < Answers.GetGenes( ); ++i ) {
+				if( ( iOne = veciGenes[ i ] ) == -1 )
+					continue;
+				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
+					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
+						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
+						CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) )
+						continue;
+					if( !( vecfHere.empty( ) ||
+						( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
+						( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
+						continue;
+					if( sArgs.invert_flag )
+						dValue = 1 - dValue;
+					for( k = 0; k <= mapValues[ dValue ]; ++k ) {
+						MatGenes.Set( i, k, true );
+						MatGenes.Set( j, k, true );
+						MatResults.Get( k, dAnswer ? ETFPN_TP : ETFPN_FP )++; }
+					for( ; k < MatResults.GetRows( ); ++k )
+						MatResults.Get( k, dAnswer ? ETFPN_FN : ETFPN_TN )++; } } }
+		else if( iBins ) {
+			vector<SDatum>	vecsData;
+			size_t			iChunk;
+
+			for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i ) {
+				if( ( iOne = veciGenes[ i ] ) == -1 )
+					continue;
+				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
+					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
+						CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
+						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
+						continue;
+					if( !( vecfHere.empty( ) ||
+						( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
+						( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
+						continue;
+
+					MatGenes.Set( i, 0, true );
+					MatGenes.Set( j, 0, true );
+					if( dAnswer )
+						iPositives++;
+					else
+						iNegatives++;
+					vecsData.push_back( SDatum( dValue, i, j, dAnswer ) ); } }
+			sort( vecsData.begin( ), vecsData.end( ), SSorter( !!sArgs.invert_flag ) );
+			iChunk = (size_t)( 0.5 + ( (float)vecsData.size( ) / ( MatResults.GetRows( ) - 1 ) ) );
+			if( sArgs.sse_flag ) {
+				vecdSSE.resize( MatResults.GetRows( ) );
+				veciPositives.resize( vecdSSE.size( ) );
+				for( i = 1,j = 0; i < vecdSSE.size( ); ++i,j += iChunk ) {
+					veciPositives[ veciPositives.size( ) - i - 1 ] = veciPositives[ veciPositives.size( ) - i ];
+					vecdSSE[ vecdSSE.size( ) - i - 1 ] = vecdSSE[ vecdSSE.size( ) - i ];
+					for( k = 0; k < iChunk; ++k ) {
+						if( ( j + k ) >= vecsData.size( ) )
+							break;
+						const SDatum&	sDatum	= vecsData[ vecsData.size( ) - ( j + k ) - 1 ];
+
+						for( m = 0; m < ( vecdSSE.size( ) - i ); ++m ) {
+							MatGenes.Set( sDatum.m_iOne, m, true );
+							MatGenes.Set( sDatum.m_iTwo, m, true ); }
+						dValue = sDatum.m_dValue - sDatum.m_dAnswer;
+						veciPositives[ veciPositives.size( ) - i - 1 ]++;
+						vecdSSE[ vecdSSE.size( ) - i - 1 ] += dValue * dValue; } } }
+			else {
+				veciPositives.resize( MatResults.GetRows( ) - 1 );
+				veciNegatives.resize( veciPositives.size( ) );
+				for( i = 0; i < veciNegatives.size( ); ++i )
+					veciNegatives[ i ] = veciPositives[ i ] = 0;
+				for( i = j = 0; i < veciPositives.size( ); ++i,j += iChunk )
+					for( k = 0; k < iChunk; ++k ) {
+						if( ( j + k ) >= vecsData.size( ) )
+							break;
+						const SDatum&	sDatum	= vecsData[ j + k ];
+
+						for( m = i; m > 0; --m ) {
+							MatGenes.Set( sDatum.m_iOne, m, true );
+							MatGenes.Set( sDatum.m_iTwo, m, true ); }
+						if( Answers.Get( sDatum.m_iOne, sDatum.m_iTwo ) )
+							veciPositives[ i ]++;
+						else
+							veciNegatives[ i ]++; }
+
+				MatResults.Set( 0, ETFPN_TP, iPositives );
+				MatResults.Set( 0, ETFPN_FP, iNegatives );
+				MatResults.Set( 0, ETFPN_TN, 0 );
+				MatResults.Set( 0, ETFPN_FN, 0 );
+				for( i = 1; i < MatResults.GetRows( ); ++i ) {
+					MatResults.Set( i, ETFPN_TP, MatResults.Get( i - 1, ETFPN_TP ) - veciPositives[ i - 1 ] );
+					MatResults.Set( i, ETFPN_FP, MatResults.Get( i - 1, ETFPN_FP ) - veciNegatives[ i - 1 ] );
+					MatResults.Set( i, ETFPN_TN, MatResults.Get( i - 1, ETFPN_TN ) + veciNegatives[ i - 1 ] );
+					MatResults.Set( i, ETFPN_FN, MatResults.Get( i - 1, ETFPN_FN ) +
+						veciPositives[ i - 1 ] ); } } }
+		else
+			for( i = 0; i < Answers.GetGenes( ); ++i ) {
+				if( !( i % 1000 ) )
+					cerr << "Processing gene " << i << '/' << Answers.GetGenes( ) << endl;
+				if( ( iOne = veciGenes[ i ] ) == -1 )
+					continue;
+				for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
+					if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
+						CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
+						CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
+						continue;
+					if( !( vecfHere.empty( ) ||
+						( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
+						( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
+						continue;
+					if( sArgs.invert_flag )
+						dValue = 1 - dValue;
+
+					iMax = (int)ceil( ( dValue - sArgs.min_arg ) / sArgs.delta_arg );
+					if( iMax > (int)MatResults.GetRows( ) )
+						iMax = (int)MatResults.GetRows( );
+					eTFPN = (ETFPN)!dAnswer;
+					for( k = 0; (int)k < iMax; ++k ) {
+						MatResults.Get( k, eTFPN )++;
+						MatGenes.Set( i, k, true );
+						MatGenes.Set( j, k, true ); }
+					eTFPN = (ETFPN)( 2 + !eTFPN );
+					for( ; k < (int)MatResults.GetRows( ); ++k )
+						MatResults.Get( k, eTFPN )++; } }
+		for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i )
+			for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
+				if( CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
+					!( vecfHere.empty( ) ||
+					( dAnswer && vecfHere[ i ] && vecfHere[ j ] ) ||
+					( !dAnswer && ( vecfHere[ i ] || vecfHere[ j ] ) ) ) )
+					continue;
+				if( dAnswer )
+					iPositives++;
+				else
+					iNegatives++; }
+
+		veciRec.resize( MatResults.GetRows( ) );
+		veciRecTerm.resize( MatResults.GetRows( ) );
+		for( i = 0; i < veciRec.size( ); ++i ) {
+			veciRec[ i ] = veciRecTerm[ i ] = 0;
+			for( j = 0; j < MatGenes.GetRows( ); ++j )
+				if( MatGenes.Get( j, i ) ) {
+					veciRec[ i ]++;
+					if( vecfHere.size( ) && vecfHere[ j ] )
+						veciRecTerm[ i ]++; }
+			for( j = 0; j < veciGenesTerm.size( ); ++j )
+				if( MatGenes.Get( veciGenesTerm[ j ], i ) &&
+					( vecfHere.empty( ) || !vecfHere[ veciGenesTerm[ j ] ] ) )
+					veciRecTerm[ i ]++; }
+
+		if( sArgs.inputs_num ) {
+			ofsm.open( ( (string)sArgs.directory_arg + '/' +
+				CMeta::Basename( sArgs.inputs[ iGenes ] ) + ".bins" ).c_str( ) );
+			postm = &ofsm; }
+		else
+			postm = &cout;
+
+		if( !sArgs.sse_flag ) {
+			*postm << "#	P	" << iPositives << endl;
+			*postm << "#	N	" << iNegatives << endl; }
+		*postm << "Cut	Genes	" << ( sArgs.sse_flag ? "Pairs	SSE" : "TP	FP	TN	FN" ) << endl;
+		for( i = 0; i < MatResults.GetRows( ); ++i ) {
+			*postm << ( iBins ? i : ( sArgs.min_arg + ( i * sArgs.delta_arg ) ) ) << '\t' <<
+				veciRec[ i ];
+			if( sArgs.sse_flag )
+				*postm << '\t' << veciPositives[ i ] << '\t' << vecdSSE[ i ];
+			else
+				for( j = 0; j < MatResults.GetColumns( ); ++j )
+					*postm << '\t' << MatResults.Get( i, j );
+			if( veciGenesTerm.size( ) || vecfHere.size( ) )
+				*postm << '\t' << veciRecTerm[ i ];
+			*postm << endl; }
+		if( !sArgs.sse_flag )
+			*postm << "#	AUC	" << ( sArgs.auc_arg ?
+				AUCMod( Data, Answers, vecfHere, !!sArgs.invert_flag, sArgs.auc_arg ) :
+				CStatistics::WilcoxonRankSum( Data, Answers, vecfHere, !!sArgs.invert_flag ) ) << endl;
+
+		if( sArgs.inputs_num )
+			ofsm.close( );
+		else
+			cout.flush( );
+
+		if( !sArgs.inputs_num )
+			break; }
+
+	return 0; }
+
+struct SSorterMod {
+	const vector<float>&	m_vecdValues;
+
+	SSorterMod( const vector<float>& vecdValues ) : m_vecdValues(vecdValues) { }
+
+	bool operator()( size_t iOne, size_t iTwo ) {
+
+		return ( m_vecdValues[iTwo] < m_vecdValues[iOne] ); }
+};
+
+double AUCMod( const CDat& DatData, const CDat& DatAnswers, const vector<bool>& vecfGenesOfInterest, bool fInvert,
+	float dAUC ) {
+	size_t			i, j, iPos, iNeg, iPosCur, iNegCur, iOne, iTwo, iIndex, iAUC;
+	vector<float>	vecdValues, vecdAnswers;
+	vector<size_t>	veciGenes, veciIndices;
+	bool			fAnswer;
+	double			dRet;
+	float			d, dAnswer, dSens, dSpec, dSensPrev, dSpecPrev;
+
+	veciGenes.resize( DatAnswers.GetGenes( ) );
+	for( i = 0; i < veciGenes.size( ); ++i )
+		veciGenes[ i ] = DatData.GetGene( DatAnswers.GetGene( i ) );
+
+	for( iPos = iNeg = i = 0; i < DatAnswers.GetGenes( ); ++i ) {
+		if( ( iOne = veciGenes[ i ] ) == -1 )
+			continue;
+		for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) {
+			if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
+				CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) ||
+				CMeta::IsNaN( d = DatData.Get( iOne, iTwo ) ) )
+				continue;
+			fAnswer = dAnswer > 0;
+			if( !( vecfGenesOfInterest.empty( ) ||
+				( fAnswer && vecfGenesOfInterest[ i ] && vecfGenesOfInterest[ j ] ) ||
+				( !fAnswer && ( vecfGenesOfInterest[ i ] || vecfGenesOfInterest[ j ] ) ) ) )
+				continue;
+			if( fAnswer )
+				iPos++;
+			else
+				iNeg++;
+			if( fInvert )
+				d = 1 - d;
+			vecdAnswers.push_back( dAnswer );
+			vecdValues.push_back( d ); } }
+
+	veciIndices.resize( vecdValues.size( ) );
+	for( i = 0; i < veciIndices.size( ); ++i )
+		veciIndices[i] = i;
+	sort( veciIndices.begin( ), veciIndices.end( ), SSorterMod( vecdValues ) );
+
+	iAUC = (size_t)( ( dAUC < 1 ) ? ( dAUC * iNeg ) : dAUC );
+	dRet = dSensPrev = dSpecPrev = 0;
+	for( iPosCur = iNegCur = i = 0; ( i < veciIndices.size( ) ) && ( iNegCur < iAUC ); ++i ) {
+		iIndex = veciIndices[i];
+		if( vecdAnswers[iIndex] > 0 )
+			iPosCur++;
+		else
+			iNegCur++;
+		dSens = (float)iPosCur / iPos;
+		dSpec = 1 - (float)( iNeg - iNegCur ) / iNeg;
+		if( dSpec > dSpecPrev ) {
+			dRet += ( dSpec - dSpecPrev ) * dSens;
+			dSensPrev = dSens;
+			dSpecPrev = dSpec; } }
+	dRet *= max( 1.0f, (float)iNeg / iAUC );
+
+	return dRet; }

tools/DChecker/DChecker.ggo

 section "Miscellaneous"
 option	"directory"		d	"Output directory"
 							string	typestr="directory"	default="."
+option	"auc"			a	"Use alternative AUCn calculation"
+							float	default="0"
 
 section "Ranking Method"
 option	"bins"			b	"Bins for quantile sorting"

tools/DChecker/cmdline.c

   "  -w, --answers=filename     Answer DAT/DAB file",
   "\nMiscellaneous:",
   "  -d, --directory=directory  Output directory  (default=`.')",
+  "  -a, --auc=FLOAT            Use alternative AUCn calculation  (default=`0')",
   "\nRanking Method:",
   "  -b, --bins=INT             Bins for quantile sorting  (default=`1000')",
   "  -f, --finite               Count finitely many bins  (default=off)",
   args_info->input_given = 0 ;
   args_info->answers_given = 0 ;
   args_info->directory_given = 0 ;
+  args_info->auc_given = 0 ;
   args_info->bins_given = 0 ;
   args_info->finite_given = 0 ;
   args_info->min_given = 0 ;
   args_info->answers_orig = NULL;
   args_info->directory_arg = gengetopt_strdup (".");
   args_info->directory_orig = NULL;
+  args_info->auc_arg = 0;
+  args_info->auc_orig = NULL;
   args_info->bins_arg = 1000;
   args_info->bins_orig = NULL;
   args_info->finite_flag = 0;
   args_info->input_help = gengetopt_args_info_help[3] ;
   args_info->answers_help = gengetopt_args_info_help[4] ;
   args_info->directory_help = gengetopt_args_info_help[6] ;
-  args_info->bins_help = gengetopt_args_info_help[8] ;
-  args_info->finite_help = gengetopt_args_info_help[9] ;
-  args_info->min_help = gengetopt_args_info_help[10] ;
-  args_info->max_help = gengetopt_args_info_help[11] ;
-  args_info->delta_help = gengetopt_args_info_help[12] ;
-  args_info->genes_help = gengetopt_args_info_help[14] ;
-  args_info->genex_help = gengetopt_args_info_help[15] ;
-  args_info->genet_help = gengetopt_args_info_help[16] ;
-  args_info->genee_help = gengetopt_args_info_help[17] ;
-  args_info->normalize_help = gengetopt_args_info_help[19] ;
-  args_info->invert_help = gengetopt_args_info_help[20] ;
-  args_info->sse_help = gengetopt_args_info_help[22] ;
-  args_info->memmap_help = gengetopt_args_info_help[23] ;
-  args_info->verbosity_help = gengetopt_args_info_help[24] ;
+  args_info->auc_help = gengetopt_args_info_help[7] ;
+  args_info->bins_help = gengetopt_args_info_help[9] ;
+  args_info->finite_help = gengetopt_args_info_help[10] ;
+  args_info->min_help = gengetopt_args_info_help[11] ;
+  args_info->max_help = gengetopt_args_info_help[12] ;
+  args_info->delta_help = gengetopt_args_info_help[13] ;
+  args_info->genes_help = gengetopt_args_info_help[15] ;
+  args_info->genex_help = gengetopt_args_info_help[16] ;
+  args_info->genet_help = gengetopt_args_info_help[17] ;
+  args_info->genee_help = gengetopt_args_info_help[18] ;
+  args_info->normalize_help = gengetopt_args_info_help[20] ;
+  args_info->invert_help = gengetopt_args_info_help[21] ;
+  args_info->sse_help = gengetopt_args_info_help[23] ;
+  args_info->memmap_help = gengetopt_args_info_help[24] ;
+  args_info->verbosity_help = gengetopt_args_info_help[25] ;
   
 }
 
   free_string_field (&(args_info->answers_orig));
   free_string_field (&(args_info->directory_arg));
   free_string_field (&(args_info->directory_orig));
+  free_string_field (&(args_info->auc_orig));
   free_string_field (&(args_info->bins_orig));
   free_string_field (&(args_info->min_orig));
   free_string_field (&(args_info->max_orig));
     write_into_file(outfile, "answers", args_info->answers_orig, 0);
   if (args_info->directory_given)
     write_into_file(outfile, "directory", args_info->directory_orig, 0);
+  if (args_info->auc_given)
+    write_into_file(outfile, "auc", args_info->auc_orig, 0);
   if (args_info->bins_given)
     write_into_file(outfile, "bins", args_info->bins_orig, 0);
   if (args_info->finite_given)
         { "input",	1, NULL, 'i' },
         { "answers",	1, NULL, 'w' },
         { "directory",	1, NULL, 'd' },
+        { "auc",	1, NULL, 'a' },
         { "bins",	1, NULL, 'b' },
         { "finite",	0, NULL, 'f' },
         { "min",	1, NULL, 'm' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVi:w:d:b:fm:M:e:g:G:c:C:ntspv:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVi:w:d:a:b:fm:M:e:g:G:c:C:ntspv:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'a':	/* Use alternative AUCn calculation.  */
+        
+        
+          if (update_arg( (void *)&(args_info->auc_arg), 
+               &(args_info->auc_orig), &(args_info->auc_given),
+              &(local_args_info.auc_given), optarg, 0, "0", ARG_FLOAT,
+              check_ambiguity, override, 0, 0,
+              "auc", 'a',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'b':	/* Bins for quantile sorting.  */
         
         

tools/DChecker/cmdline.h

   char * directory_arg;	/**< @brief Output directory (default='.').  */
   char * directory_orig;	/**< @brief Output directory original value given at command line.  */
   const char *directory_help; /**< @brief Output directory help description.  */
+  float auc_arg;	/**< @brief Use alternative AUCn calculation (default='0').  */
+  char * auc_orig;	/**< @brief Use alternative AUCn calculation original value given at command line.  */
+  const char *auc_help; /**< @brief Use alternative AUCn calculation help description.  */
   int bins_arg;	/**< @brief Bins for quantile sorting (default='1000').  */
   char * bins_orig;	/**< @brief Bins for quantile sorting original value given at command line.  */
   const char *bins_help; /**< @brief Bins for quantile sorting help description.  */
   unsigned int input_given ;	/**< @brief Whether input was given.  */
   unsigned int answers_given ;	/**< @brief Whether answers was given.  */
   unsigned int directory_given ;	/**< @brief Whether directory was given.  */
+  unsigned int auc_given ;	/**< @brief Whether auc was given.  */
   unsigned int bins_given ;	/**< @brief Whether bins was given.  */
   unsigned int finite_given ;	/**< @brief Whether finite was given.  */
   unsigned int min_given ;	/**< @brief Whether min was given.  */