Commits

Anonymous committed bf25113 Merge

Merge

Comments (0)

Files changed (17)

 fi
 
 
ac_config_files="$ac_config_files Makefile src/Makefile tools/Makefile tools/Answerer/Makefile tools/Cliquer/Makefile tools/Clusterer/Makefile tools/Clusters2Dab/Makefile tools/COALESCE/Makefile tools/Combiner/Makefile tools/DChecker/Makefile tools/Dat2Dab/Makefile tools/Dat2Graph/Makefile tools/Data2Bnt/Makefile tools/Data2Features/Makefile tools/Data2Sql/Makefile tools/DataDumper/Makefile tools/Distancer/Makefile tools/Explainer/Makefile tools/Funcaeologist/Makefile tools/Funcifier/Makefile tools/Funcographer/Makefile tools/Hubber/Makefile tools/KNNImputer/Makefile tools/Mat2Txt/Makefile tools/MCluster/Makefile tools/Matcher/Makefile tools/MIer/Makefile tools/Normalizer/Makefile tools/Orthologer/Makefile tools/Overlapper/Makefile tools/PCLPlotter/Makefile tools/Randomizer/Makefile tools/Seqs2Ngrams/Makefile tools/SMRF/Makefile tools/SVDer/Makefile tools/Synthesizer/Makefile tools/Txt2Bin/Makefile tools/BNConverter/Makefile tools/BNCreator/Makefile tools/BNEvaluator/Makefile tools/BNFunc/Makefile tools/BNTester/Makefile tools/BNTruster/Makefile tools/BNs2Txt/Makefile tools/BNUnraveler/Makefile tools/BNWeaver/Makefile tools/Contexter/Makefile tools/Counter/Makefile tools/Data2DB/Makefile tools/DSLConverter/Makefile tools/Dab2Dad/Makefile tools/Edges2Posteriors/Makefile tools/MEFIT/Makefile tools/Data2Svm/Makefile tools/SVMer/Makefile tools/OntoShell/Makefile tools/BNServer/Makefile"
ac_config_files="$ac_config_files Makefile src/Makefile tools/Makefile tools/Answerer/Makefile tools/Cliquer/Makefile tools/Clusterer/Makefile tools/Clusters2Dab/Makefile tools/COALESCE/Makefile tools/Combiner/Makefile tools/DChecker/Makefile tools/Dat2Dab/Makefile tools/Dat2Graph/Makefile tools/Data2Bnt/Makefile tools/Data2Features/Makefile tools/Data2Sql/Makefile tools/DataDumper/Makefile tools/Distancer/Makefile tools/Explainer/Makefile tools/Filterer/Makefile tools/Funcaeologist/Makefile tools/Funcifier/Makefile tools/Funcographer/Makefile tools/Hubber/Makefile tools/KNNImputer/Makefile tools/Mat2Txt/Makefile tools/MCluster/Makefile tools/Matcher/Makefile tools/MIer/Makefile tools/Normalizer/Makefile tools/Orthologer/Makefile tools/Overlapper/Makefile tools/PCLPlotter/Makefile tools/Randomizer/Makefile tools/Seqs2Ngrams/Makefile tools/SMRF/Makefile tools/SVDer/Makefile tools/Synthesizer/Makefile tools/Txt2Bin/Makefile tools/BNConverter/Makefile tools/BNCreator/Makefile tools/BNEvaluator/Makefile tools/BNFunc/Makefile tools/BNTester/Makefile tools/BNTruster/Makefile tools/BNs2Txt/Makefile tools/BNUnraveler/Makefile tools/BNWeaver/Makefile tools/Contexter/Makefile tools/Counter/Makefile tools/Data2DB/Makefile tools/DSLConverter/Makefile tools/Dab2Dad/Makefile tools/Edges2Posteriors/Makefile tools/MEFIT/Makefile tools/Data2Svm/Makefile tools/SVMer/Makefile tools/OntoShell/Makefile tools/BNServer/Makefile"
 
 
 
   "tools/DataDumper/Makefile" ) CONFIG_FILES="$CONFIG_FILES tools/DataDumper/Makefile" ;;
   "tools/Distancer/Makefile" ) CONFIG_FILES="$CONFIG_FILES tools/Distancer/Makefile" ;;
   "tools/Explainer/Makefile" ) CONFIG_FILES="$CONFIG_FILES tools/Explainer/Makefile" ;;
+  "tools/Filterer/Makefile" ) CONFIG_FILES="$CONFIG_FILES tools/Filterer/Makefile" ;;
   "tools/Funcaeologist/Makefile" ) CONFIG_FILES="$CONFIG_FILES tools/Funcaeologist/Makefile" ;;
   "tools/Funcifier/Makefile" ) CONFIG_FILES="$CONFIG_FILES tools/Funcifier/Makefile" ;;
   "tools/Funcographer/Makefile" ) CONFIG_FILES="$CONFIG_FILES tools/Funcographer/Makefile" ;;
 		 tools/DataDumper/Makefile \
 		 tools/Distancer/Makefile \
 		 tools/Explainer/Makefile \
+		 tools/Filterer/Makefile \
 		 tools/Funcaeologist/Makefile \
 		 tools/Funcifier/Makefile \
 		 tools/Funcographer/Makefile \
 		vector<vector<float> >	vecvecdCounts;
 		vector<size_t>			veciLengths;
 		SCoalesceModifierCache	sModifiers( sMods );
+		vector<float>			vecdMotifs;
+		uint32_t				iMotif;
+		size_t					iCount, iType, iSubsequence, iGene;
+		float*					ad;
 
 		if( !m_pMotifs ) {
 			m_fMotifs = true;
 					for( j = 0; j < vecsSequences.size( ); ++j )
 						if( !GeneScores.Add( i, *m_pMotifs, vecsSequences[ j ], sModifiers, vecvecdCounts,
 							veciLengths ) )
-							return false; } } }
+							return false; } }
+
+		vecdMotifs.resize( GeneScores.GetMotifs( ) );
+		for( iCount = iType = 0; iType < GeneScores.GetTypes( ); ++iType )
+			for( iSubsequence = 0; iSubsequence < GeneScores.GetSubsequences( iType ); ++iSubsequence ) {
+				for( iGene = 0; iGene < PCL.GetGenes( ); ++iGene ) {
+					if( !( ad = GeneScores.Get( iType, (CCoalesceSequencerBase::ESubsequence)iSubsequence, iGene ) ) )
+						continue;
+					iCount++;
+					for( iMotif = 0; iMotif < GeneScores.GetMotifs( ); ++iMotif )
+						vecdMotifs[iMotif] += ad[iMotif]; }
+				for( iMotif = 0; iMotif < vecdMotifs.size( ); ++iMotif )
+					vecdMotifs[iMotif] /= iCount;
+				for( iGene = 0; iGene < PCL.GetGenes( ); ++iGene ) {
+					if( ad = GeneScores.Get( iType, (CCoalesceSequencerBase::ESubsequence)iSubsequence, iGene ) )
+						for( iMotif = 0; iMotif < GeneScores.GetMotifs( ); ++iMotif )
+							ad[iMotif] += vecdMotifs[iMotif]; } } }
+
 	if( !GeneScores.GetMotifs( ) )
 		Clear( );
 

src/coalescecluster.cpp

 	vector<bool>						vecfSignificant;
 	vector<SThreadSignificantGene>		vecsThreads;
 	vector<size_t>						veciDatasets;
-	float								dFracExpression, dFracMotifs, dSSCluster, dSSPot, dAve;
+	float								dSSCluster, dSSPot, dAve;
 	set<size_t>							setiMotifs;
 	set<SMotifMatch>::const_iterator	iterMotif;
 	vector<float>						vecdStdevs;
 		dAve = ( ( m_vecdCentroid[ i ] * iCluster ) + ( Pot.m_vecdCentroid[ i ] * iPot ) ) / PCL.GetGenes( );
 		vecdStdevs[ i ] = sqrt( ( ( dSSCluster + dSSPot ) / ( iCluster + iPot ) ) - ( dAve * dAve ) ); }
 
-	dFracExpression = (float)m_setiDatasets.size( ) / m_vecsDatasets.size( );
-	if( pMotifs ) {
-		for( iterMotif = m_setsMotifs.begin( ); iterMotif != m_setsMotifs.end( ); ++iterMotif )
-			setiMotifs.insert( iterMotif->m_iMotif );
-		dFracMotifs = (float)setiMotifs.size( ) / pMotifs->GetMotifs( ); }
-	else
-		dFracMotifs = 0;
-
 	veciDatasets.resize( m_setiDatasets.size( ) );
 	copy( m_setiDatasets.begin( ), m_setiDatasets.end( ), veciDatasets.begin( ) );
 	vecfSignificant.resize( PCL.GetGenes( ) );
 		vecsThreads[ i ].m_pPot = &Pot;
 		vecsThreads[ i ].m_pveciDatasets = &veciDatasets;
 		vecsThreads[ i ].m_pvecdStdevs = &vecdStdevs;
-		vecsThreads[ i ].m_dBeta = dFracExpression / ( dFracExpression + dFracMotifs );
+		vecsThreads[ i ].m_dBeta = m_setsMotifs.size( ) ? ( (float)m_setsMotifs.size( ) / ( m_setiDatasets.size( ) + m_setsMotifs.size( ) ) ) : 0.5;
 		vecsThreads[ i ].m_iMinimum = iMinimum;
 		vecsThreads[ i ].m_dProbability = dProbability;
 		if( pthread_create( &vecpthdThreads[ i ], NULL, ThreadSignificantGene, &vecsThreads[ i ] ) ) {
 
 		return IncompleteBeta( 0.5 * iDegFree, 0.5, iDegFree / ( iDegFree + ( dT * dT ) ) ); }
 
+	static double TTest( double dMean, double dVariance, size_t iN ) {
+		size_t	iDegFree;
+		double	dT;
+
+		iDegFree = iN - 1;
+		dT = sqrt( (float)iN ) * dMean / sqrt( dVariance );
+
+		return IncompleteBeta( 0.5 * iDegFree, 0.5, iDegFree / ( iDegFree + ( dT * dT ) ) ); }
+
 	/*!
 	 * \brief
 	 * Return the p-value of a t-test between the two given array statistics without assuming equal variance.

tools/COALESCE/COALESCE.ggo

 option	"zscore_cond"	C	"Z-score threshhold for condition inclusion"
 							double	default="0.5"
 option	"zscore_motif"	M	"Z-score threshhold for motif inclusion"
-							double	default="0.25"
+							double	default="0.5"
 
 section "Sequence Parameters"
 option	"k"				k	"Sequence kmer length"
 option	"sequences"		q	"Sequence types to use (comma separated)"
 							string
 option	"bases"			b	"Resolution of bases per motif match"
-							int	default="5000"
+							int	default="2500"
 option	"size_minimum"	z	"Minimum gene count for clusters of interest"
 							int	default="5"
 option	"size_merge"	E	"Maximum motif count for realtime merging"

tools/COALESCE/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /r01/tergeo/chuttenh/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iCOALESCE.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iCOALESCE.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
   "  -n, --pvalue_correl=DOUBLE    P-value threshhold for significant correlation  \n                                  (default=`0.05')",
   "  -N, --number_correl=INT       Maximum number of pairs to sample for \n                                  significant correlation  (default=`100000')",
   "  -q, --sequences=STRING        Sequence types to use (comma separated)",
-  "  -b, --bases=INT               Resolution of bases per motif match  \n                                  (default=`5000')",
+  "  -b, --bases=INT               Resolution of bases per motif match  \n                                  (default=`2500')",
   "  -z, --size_minimum=INT        Minimum gene count for clusters of interest  \n                                  (default=`5')",
   "  -E, --size_merge=INT          Maximum motif count for realtime merging  \n                                  (default=`100')",
   "  -Z, --size_maximum=INT        Maximum motif count to consider a cluster \n                                  saturated  (default=`1000')",
   args_info->number_correl_orig = NULL;
   args_info->sequences_arg = NULL;
   args_info->sequences_orig = NULL;
-  args_info->bases_arg = 5000;
+  args_info->bases_arg = 2500;
   args_info->bases_orig = NULL;
   args_info->size_minimum_arg = 5;
   args_info->size_minimum_orig = NULL;
         
           if (update_arg( (void *)&(args_info->bases_arg), 
                &(args_info->bases_orig), &(args_info->bases_given),
-              &(local_args_info.bases_given), optarg, 0, "5000", ARG_INT,
+              &(local_args_info.bases_given), optarg, 0, "2500", ARG_INT,
               check_ambiguity, override, 0, 0,
               "bases", 'b',
               additional_error))

tools/COALESCE/cmdline.h

   char * sequences_arg;	/**< @brief Sequence types to use (comma separated).  */
   char * sequences_orig;	/**< @brief Sequence types to use (comma separated) original value given at command line.  */
   const char *sequences_help; /**< @brief Sequence types to use (comma separated) help description.  */
-  int bases_arg;	/**< @brief Resolution of bases per motif match (default='5000').  */
+  int bases_arg;	/**< @brief Resolution of bases per motif match (default='2500').  */
   char * bases_orig;	/**< @brief Resolution of bases per motif match original value given at command line.  */
   const char *bases_help; /**< @brief Resolution of bases per motif match help description.  */
   int size_minimum_arg;	/**< @brief Minimum gene count for clusters of interest (default='5').  */

tools/Combiner/Combiner.cpp

 	return 0; }
 
 int MainDATs( const gengetopt_args_info& sArgs ) {
-	CDataset					Dataset;
-	CDat						DatOut, DatCur;
-	CHalfMatrix<unsigned short>	HMatCounts;
-	size_t						i, j, k, iOne, iTwo;
-	vector<size_t>				veciGenes;
-	float						d;
-	vector<string>				vecstrFiles;
+	CDataset			Dataset;
+	CDat				DatOut, DatCur;
+	CHalfMatrix<float>	MatCounts;
+	size_t				i, j, k, iOne, iTwo;
+	vector<size_t>		veciGenes;
+	float				d, dWeight, dWeights;
+	vector<string>		vecstrFiles;
+	CPCL				PCLWeights( false );
 
 	if( !sArgs.inputs_num )
 		return 1;
 			cerr << ", " << vecstrFiles[ i ];
 		cerr << endl;
 		return 1; }
+	if( sArgs.weights_arg && !PCLWeights.Open( sArgs.weights_arg, 0 ) ) {
+		cerr << "Could not open: " << sArgs.weights_arg << endl;
+		return 1; }
 
 	DatOut.Open( Dataset.GetGeneNames( ), false, sArgs.memmap_flag ? sArgs.output_arg : NULL );
 	if( !strcmp( c_szMax, sArgs.method_arg ) )
 		d = -FLT_MAX;
 	else if( !strcmp( c_szMin, sArgs.method_arg ) )
 		d = FLT_MAX;
+	else if( !strcmp( c_szGMean, sArgs.method_arg ) )
+		d = 1;
 	else
 		d = 0;
 	for( i = 0; i < DatOut.GetGenes( ); ++i )
 		for( j = ( i + 1 ); j < DatOut.GetGenes( ); ++j )
 			DatOut.Set( i, j, d );
-	if( !d ) {
-		HMatCounts.Initialize( DatOut.GetGenes( ) );
-		HMatCounts.Clear( ); }
+	if( fabs( d ) < 2 ) {
+		MatCounts.Initialize( DatOut.GetGenes( ) );
+		MatCounts.Clear( ); }
 	veciGenes.resize( DatOut.GetGenes( ) );
-	for( i = 0; i < sArgs.inputs_num; ++i ) {
+	for( dWeights = 0,i = 0; i < sArgs.inputs_num; ++i ) {
 		if( !DatCur.Open( sArgs.inputs[ i ], !!sArgs.memmap_flag && !sArgs.normalize_flag ) ) {
 			cerr << "Couldn't open: " << sArgs.inputs[ i ] << endl;
 			return 1; }
+		if( PCLWeights.GetGenes( ) ) {
+			if( ( j = PCLWeights.GetGene( CMeta::Deextension( CMeta::Basename( sArgs.inputs[ i ] ) ) ) ) == -1 ) {
+				cerr << "Ignoring unweighted graph: " << sArgs.inputs[ i ] << endl;
+				continue; }
+			dWeight = PCLWeights.Get( j, 0 ); }
+		else
+			dWeight = 1;
 		cerr << "Opened: " << sArgs.inputs[ i ] << endl;
 		if( sArgs.normalize_flag )
 			DatCur.Normalize( CDat::ENormalizeZScore );
 					continue;
 				if( !strcmp( c_szMean, sArgs.method_arg ) ||
 					!strcmp( c_szSum, sArgs.method_arg ) ) {
-					DatOut.Get( j, k ) += d;
-					HMatCounts.Get( j, k )++; }
+					DatOut.Get( j, k ) += dWeight * d;
+					MatCounts.Get( j, k ) += dWeight; }
 				else if( !strcmp( c_szGMean, sArgs.method_arg ) ) {
-					DatOut.Get( j, k ) *= d;
-					HMatCounts.Get( j, k )++; }
+					DatOut.Get( j, k ) *= pow( d, dWeight );
+					MatCounts.Get( j, k ) += dWeight; }
 				else if( !strcmp( c_szHMean, sArgs.method_arg ) ) {
-					DatOut.Get( j, k ) += 1 / d;
-					HMatCounts.Get( j, k )++; }
+					DatOut.Get( j, k ) += dWeight / d;
+					MatCounts.Get( j, k ) += dWeight; }
 				else if( !strcmp( c_szMax, sArgs.method_arg ) ) {
 					if( d > DatOut.Get( j, k ) )
 						DatOut.Set( j, k, d ); }
 	for( i = 0; i < DatOut.GetGenes( ); ++i )
 		for( j = ( i + 1 ); j < DatOut.GetGenes( ); ++j )
 			if( !strcmp( c_szMean, sArgs.method_arg ) )
-				DatOut.Set( i, j, ( k = HMatCounts.Get( i, j ) ) ? ( DatOut.Get( i, j ) / k ) :
+				DatOut.Set( i, j, ( d = MatCounts.Get( i, j ) ) ? ( DatOut.Get( i, j ) / d ) :
 					CMeta::GetNaN( ) );
 			else if( !strcmp( c_szGMean, sArgs.method_arg ) )
-				DatOut.Set( i, j, ( k = HMatCounts.Get( i, j ) ) ?
-					(float)pow( (double)DatOut.Get( i, j ), 1.0 / k ) : CMeta::GetNaN( ) );
+				DatOut.Set( i, j, ( d = MatCounts.Get( i, j ) ) ?
+					(float)pow( (double)DatOut.Get( i, j ), 1.0 / d ) : CMeta::GetNaN( ) );
 			else if( !strcmp( c_szHMean, sArgs.method_arg ) )
-				DatOut.Set( i, j, ( k = HMatCounts.Get( i, j ) ) ? ( k / DatOut.Get( i, j ) ) :
+				DatOut.Set( i, j, ( d = MatCounts.Get( i, j ) ) ? ( d / DatOut.Get( i, j ) ) :
 					CMeta::GetNaN( ) );
 			else if( !strcmp( c_szMax, sArgs.method_arg ) ) {
 				if( DatOut.Get( i, j ) == -FLT_MAX )

tools/Combiner/Combiner.ggo

 						values="min","max","mean","gmean","hmean","sum"	default="mean"
 option	"output"	o	"Output file"
 						string	typestr="filename"
+option	"weights"	w	"Weights file"
+						string	typestr="filename"
 
 section "Modules"
 option	"jaccard"	j	"Minimum Jaccard index for module equivalence"

tools/Combiner/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /r01/tergeo/chuttenh/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iCombiner.ggo --default-optional -u -N -e 
+  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iCombiner.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
   "  -t, --type=STRING          Output data file type  (possible values=\"pcl\", \n                               \"dat\", \"dab\", \"module\" default=`pcl')",
   "  -m, --method=STRING        Combination method  (possible values=\"min\", \n                               \"max\", \"mean\", \"gmean\", \"hmean\", \"sum\" \n                               default=`mean')",
   "  -o, --output=filename      Output file",
+  "  -w, --weights=filename     Weights file",
   "\nModules:",
   "  -j, --jaccard=FLOAT        Minimum Jaccard index for module equivalence  \n                               (default=`0.5')",
   "  -r, --intersection=DOUBLE  Minimum intersection fractino for module \n                               inheritance  (default=`0.666')",
   args_info->type_given = 0 ;
   args_info->method_given = 0 ;
   args_info->output_given = 0 ;
+  args_info->weights_given = 0 ;
   args_info->jaccard_given = 0 ;
   args_info->intersection_given = 0 ;
   args_info->skip_given = 0 ;
   args_info->method_orig = NULL;
   args_info->output_arg = NULL;
   args_info->output_orig = NULL;
+  args_info->weights_arg = NULL;
+  args_info->weights_orig = NULL;
   args_info->jaccard_arg = 0.5;
   args_info->jaccard_orig = NULL;
   args_info->intersection_arg = 0.666;
   args_info->type_help = gengetopt_args_info_help[3] ;
   args_info->method_help = gengetopt_args_info_help[4] ;
   args_info->output_help = gengetopt_args_info_help[5] ;
-  args_info->jaccard_help = gengetopt_args_info_help[7] ;
-  args_info->intersection_help = gengetopt_args_info_help[8] ;
-  args_info->skip_help = gengetopt_args_info_help[10] ;
-  args_info->memmap_help = gengetopt_args_info_help[11] ;
-  args_info->normalize_help = gengetopt_args_info_help[12] ;
-  args_info->subset_help = gengetopt_args_info_help[13] ;
-  args_info->verbosity_help = gengetopt_args_info_help[14] ;
+  args_info->weights_help = gengetopt_args_info_help[6] ;
+  args_info->jaccard_help = gengetopt_args_info_help[8] ;
+  args_info->intersection_help = gengetopt_args_info_help[9] ;
+  args_info->skip_help = gengetopt_args_info_help[11] ;
+  args_info->memmap_help = gengetopt_args_info_help[12] ;
+  args_info->normalize_help = gengetopt_args_info_help[13] ;
+  args_info->subset_help = gengetopt_args_info_help[14] ;
+  args_info->verbosity_help = gengetopt_args_info_help[15] ;
   
 }
 
   free_string_field (&(args_info->method_orig));
   free_string_field (&(args_info->output_arg));
   free_string_field (&(args_info->output_orig));
+  free_string_field (&(args_info->weights_arg));
+  free_string_field (&(args_info->weights_orig));
   free_string_field (&(args_info->jaccard_orig));
   free_string_field (&(args_info->intersection_orig));
   free_string_field (&(args_info->skip_orig));
     write_into_file(outfile, "method", args_info->method_orig, cmdline_parser_method_values);
   if (args_info->output_given)
     write_into_file(outfile, "output", args_info->output_orig, 0);
+  if (args_info->weights_given)
+    write_into_file(outfile, "weights", args_info->weights_orig, 0);
   if (args_info->jaccard_given)
     write_into_file(outfile, "jaccard", args_info->jaccard_orig, 0);
   if (args_info->intersection_given)
         { "type",	1, NULL, 't' },
         { "method",	1, NULL, 'm' },
         { "output",	1, NULL, 'o' },
+        { "weights",	1, NULL, 'w' },
         { "jaccard",	1, NULL, 'j' },
         { "intersection",	1, NULL, 'r' },
         { "skip",	1, NULL, 'k' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVt:m:o:j:r:k:pns:v:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVt:m:o:w:j:r:k:pns:v:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'w':	/* Weights file.  */
+        
+        
+          if (update_arg( (void *)&(args_info->weights_arg), 
+               &(args_info->weights_orig), &(args_info->weights_given),
+              &(local_args_info.weights_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "weights", 'w',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'j':	/* Minimum Jaccard index for module equivalence.  */
         
         

tools/Combiner/cmdline.h

   char * output_arg;	/**< @brief Output file.  */
   char * output_orig;	/**< @brief Output file original value given at command line.  */
   const char *output_help; /**< @brief Output file help description.  */
+  char * weights_arg;	/**< @brief Weights file.  */
+  char * weights_orig;	/**< @brief Weights file original value given at command line.  */
+  const char *weights_help; /**< @brief Weights file help description.  */
   float jaccard_arg;	/**< @brief Minimum Jaccard index for module equivalence (default='0.5').  */
   char * jaccard_orig;	/**< @brief Minimum Jaccard index for module equivalence original value given at command line.  */
   const char *jaccard_help; /**< @brief Minimum Jaccard index for module equivalence help description.  */
   unsigned int type_given ;	/**< @brief Whether type was given.  */
   unsigned int method_given ;	/**< @brief Whether method was given.  */
   unsigned int output_given ;	/**< @brief Whether output was given.  */
+  unsigned int weights_given ;	/**< @brief Whether weights was given.  */
   unsigned int jaccard_given ;	/**< @brief Whether jaccard was given.  */
   unsigned int intersection_given ;	/**< @brief Whether intersection was given.  */
   unsigned int skip_given ;	/**< @brief Whether skip was given.  */

tools/Filterer/Filterer.cpp

+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#include "stdafx.h"
+#include "cmdline.h"
+
+int main( int iArgs, char** aszArgs ) {
+	gengetopt_args_info	sArgs;
+	CDat				Dat;
+	vector<string>		vecstrTokens;
+	size_t				iArg, i, j;
+	float				d, dMin, dMax;
+	CHalfMatrix<char>	MatStatus;
+	bool				fDefaultExclude;
+	char				c;
+
+	if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
+		cmdline_parser_print_help( );
+		return 1; }
+	CMeta Meta( sArgs.verbosity_arg );
+
+	if( !Dat.Open( sArgs.input_arg ) ) {
+		cerr << "Could not open: " << sArgs.input_arg << endl;
+		return 1; }
+
+	MatStatus.Initialize( Dat.GetGenes( ) );
+	MatStatus.Clear( );
+	fDefaultExclude = false;
+	for( iArg = 0; iArg < sArgs.inputs_num; ++iArg ) {
+		vecstrTokens.clear( );
+		CMeta::Tokenize( sArgs.inputs[iArg] + 1, vecstrTokens, "-", true );
+		d = (float)atof( vecstrTokens[0].c_str( ) );
+		if( vecstrTokens.size( ) == 1 ) {
+			if( ( sArgs.inputs[iArg][1] ) == '-' ) {
+				dMin = -FLT_MAX;
+				dMax = d; }
+			else {
+				dMin = d;
+				dMax = FLT_MAX; } }
+		else {
+			dMin = d;
+			dMax = (float)atof( vecstrTokens[1].c_str( ) ); }
+		switch( sArgs.inputs[iArg][0] ) {
+			case 'i':
+				fDefaultExclude = true;
+				for( i = 0; i < Dat.GetGenes( ); ++i )
+					for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )
+						if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) && ( d <= dMax ) && ( d >= dMin ) &&
+							( MatStatus.Get( i, j ) == 0 ) )
+							MatStatus.Set( i, j, 1 );
+				break;
+
+			case 'x':
+				for( i = 0; i < Dat.GetGenes( ); ++i )
+					for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )
+						if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) && ( d <= dMax ) && ( d >= dMin ) )
+							MatStatus.Set( i, j, -1 );
+				break;
+
+			default:
+				cerr << "Unrecognized command: " << sArgs.inputs[iArg] << endl;
+				return 1; } }
+
+	for( i = 0; i < Dat.GetGenes( ); ++i )
+		for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j ) {
+			c = MatStatus.Get( i, j );
+			if( ( c < 0 ) || ( fDefaultExclude && ( c < 1 ) ) )
+				Dat.Set( i, j, CMeta::GetNaN( ) ); }
+	Dat.Save( sArgs.output_arg );
+
+	return 0; }

tools/Filterer/Filterer.ggo

+package	"Filterer"
+version	"1.0"
+purpose	"Flexible DAT/DAB data filterer"
+
+section "Main"
+option	"input"			i	"Input DAT/DAB file"
+							string	typestr="filename"
+option	"output"		o	"Output DAT/DAB file"
+							string	typestr="filename"
+
+section "Optional"
+option	"memmap"		m	"Memory map input/output"
+							flag	off
+option	"verbosity"		v	"Message verbosity"
+							int	default="5"

tools/Filterer/stdafx.cpp

+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#include "stdafx.h"
+
+/*!
+ * \page Dat2Dab Dat2Dab
+ * 
+ * Dat2Dab converts tab-delimited text DAT files into binary DAB files and vice versa.  It can also convert
+ * PCL and DAS files (see Sleipnir::CDat), perform a variety of normalizations or filters during the
+ * conversion process, or lookup individual genes' or gene pairs' values from DAB files.
+ * 
+ * \section sec_usage Usage
+ * 
+ * \subsection ssec_usage_basic Basic Usage
+ * 
+ * \code
+ * Dat2Dab -i <data.dab> -o <data.dat>
+ * \endcode
+ * 
+ * Convert the input binary DAB file \c data.dab into the output tab-delimited text DAT file \c data.dat.
+ * 
+ * \code
+ * Dat2Dab -o <data.dab> -n -f -d < <data.dat>
+ * \endcode
+ * 
+ * Read a text DAT file \c data.dat from standard input, allowing duplicates, normalize all scores to the
+ * range [0,1], then invert them and save the results to the binary DAB file \c data.dab.
+ * 
+ * \code
+ * Dat2Dab -i <data.dab> -m -l <gene1> -L <gene2>
+ * \endcode
+ * 
+ * Open the binary DAB file \c data.dab using memory mapping and output the score for the gene pair \c gene1
+ * and \c gene2.
+ * 
+ * \subsection ssec_usage_detailed Detailed Usage
+ * 
+ * \include Dat2Dab/Dat2Dab.ggo
+ * 
+ * <table><tr>
+ *	<th>Flag</th>
+ *	<th>Default</th>
+ *	<th>Type</th>
+ *	<th>Description</th>
+ * </tr><tr>
+ *	<td>-i</td>
+ *	<td>stdin</td>
+ *	<td>DAT/DAB file</td>
+ *	<td>Input DAT, DAB, DAS, or PCL file.</td>
+ * </tr><tr>
+ *	<td>-o</td>
+ *	<td>stdout</td>
+ *	<td>DAT/DAB file</td>
+ *	<td>Output DAT, DAB, or DAS file.</td>
+ * </tr><tr>
+ *	<td>-f</td>
+ *	<td>off</td>
+ *	<td>Flag</td>
+ *	<td>If on, output one minus the input's values.</td>
+ * </tr><tr>
+ *	<td>-n</td>
+ *	<td>off</td>
+ *	<td>Flag</td>
+ *	<td>If on, normalize input edges to the range [0,1] before processing.</td>
+ * </tr><tr>
+ *	<td>-z</td>
+ *	<td>off</td>
+ *	<td>Flag</td>
+ *	<td>If on, normalize input edges to z-scores (subtract mean, divide by standard deviation) before
+ *		processing.</td>
+ * </tr><tr>
+ *	<td>-r</td>
+ *	<td>off</td>
+ *	<td>Flag</td>
+ *	<td>If on, transform input values to integer ranks before processing.</td>
+ * </tr><tr>
+ *	<td>-g</td>
+ *	<td>None</td>
+ *	<td>Text gene list</td>
+ *	<td>If given, use only gene pairs for which both genes are in the list.  For details, see
+ *		Sleipnir::CDat::FilterGenes.</td>
+ * </tr><tr>
+ *	<td>-c</td>
+ *	<td>None</td>
+ *	<td>Double</td>
+ *	<td>If given, remove all input edges below the given cutoff (after optional normalization).</td>
+ * </tr><tr>
+ *	<td>-e</td>
+ *	<td>off</td>
+ *	<td>Flag</td>
+ *	<td>If on, replace all missing values with zeros.</td>
+ * </tr><tr>
+ *	<td>-d</td>
+ *	<td>off</td>
+ *	<td>Flag</td>
+ *	<td>If on, allow (with a warning) duplicate pairs in text-based input.</td>
+ * </tr><tr>
+ *	<td>-G</td>
+ *	<td>off</td>
+ *	<td>Flag</td>
+ *	<td>If on, only print list of genes that would be included in the normal output file.</td>
+ * </tr><tr>
+ *	<td>-l</td>
+ *	<td>None</td>
+ *	<td>String</td>
+ *	<td>If given, lookup all values for pairs involving the requested gene.</td>
+ * </tr><tr>
+ *	<td>-L</td>
+ *	<td>None</td>
+ *	<td>String</td>
+ *	<td>If given with \c -l, lookup all values for the requested gene pair.</td>
+ * </tr><tr>
+ *	<td>-t</td>
+ *	<td>None</td>
+ *	<td>Gene text file</td>
+ *	<td>If given with \c -l, lookup all pairs between \c -l and the given gene set.  If given alone,
+ *		lookup all pairs between genes in the given set.  If given with \c -T, lookup all pairs spanning the
+ *		two gene sets.</td>
+ * </tr><tr>
+ *	<td>-T</td>
+ *	<td>None</td>
+ *	<td>Gene text file</td>
+ *	<td>Must be given with \c -t; looks up all gene pairs spanning the two gene sets (i.e. one gene in the set
+ *		\c -t, one in the set \c -T).</td>
+ * </tr><tr>
+ *	<td>-E</td>
+ *	<td>off</td>
+ *	<td>Flag</td>
+ *	<td>If set, produce no output other than a list of genes that would be in at least one of the normally
+ *		output pairs.</td>
+ * </tr><tr>
+ *	<td>-p</td>
+ *	<td>None</td>
+ *	<td>Gene pair text file</td>
+ *	<td>Tab-delimited text file containing two columns, both gene IDs.  If given, replace each gene ID
+ *		from the first column with the corresponding ID in the second column.</td>
+ * </tr><tr>
+ *	<td>-b</td>
+ *	<td>off</td>
+ *	<td>Flag</td>
+ *	<td>If given, produce output in a tab-delimited half matrix table.  Not recommended for DAT/DABs with
+ *		more than a few dozen genes!</td>
+ * </tr><tr>
+ *	<td>-s</td>
+ *	<td>2</td>
+ *	<td>Integer</td>
+ *	<td>Number of columns to skip between the initial ID column and the first experimental (data) column
+ *		in the input PCL.</td>
+ * </tr><tr>
+ *	<td>-m</td>
+ *	<td>off</td>
+ *	<td>Flag</td>
+ *	<td>If given, memory map the input files when possible.  DAT and PCL inputs cannot be memmapped.</td>
+ * </tr></table>
+ */

tools/Filterer/stdafx.h

+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#ifndef STDAFX_H
+#define STDAFX_H
+
+#include <time.h>
+
+#include <fstream>
+using namespace std;
+
+#include "dat.h"
+#include "genome.h"
+#include "meta.h"
+using namespace Sleipnir;
+
+#endif // STDAFX_H

tools/Makefile.am

 	  DataDumper \
 	  Distancer \
 	  Explainer \
+	  Filterer \
 	  Funcaeologist \
 	  Funcifier \
 	  Funcographer \