Commits

Qian Zhu committed 4ecf0d2

Update SeekIterative

Comments (0)

Files changed (4)

tools/SeekIterative/SeekIterative.cpp

 bool weight(vector<char> &is_query, vector<float> &d1,
 CSeekIntIntMap *geneMap, float rbp_p, float &w){
 	vector<AResultFloat> ar;
-	ar.resize(geneMap->GetSize());
+	ar.resize(geneMap->GetNumSet());
 	utype i;
 	const vector<utype> &allGenes = geneMap->GetAllReverse();
 	for(i=0; i<geneMap->GetNumSet(); i++){
 		ar[i].i = gi;
 		ar[i].f = d1[gi];
 	}
-	int MAX = 5000;
+	int MAX = 1000;
 	nth_element(ar.begin(), ar.begin()+MAX, ar.end());
 	sort(ar.begin(), ar.begin()+MAX);
 	w = 0;
 			if(i==j) continue;
 			gene_score[query[j]] = 0;
 		}
-		weight_fast(is_query, gene_score, geneMap, w);
+		//weight_fast(is_query, gene_score, geneMap, w);
+		weight(is_query, gene_score, geneMap, rbp_p, w);
 		tot_w += w;
 	}
 	tot_w /= query.size();
 		vector<float> gene_score;
 		get_score(gene_score, mat, geneMap, q_weight);
 		gene_score[query[i]] = 0;
-		//weight(is_query, gene_score, geneMap, rbp_p, w);
-		weight_fast(is_query, gene_score, geneMap, w);
+		weight(is_query, gene_score, geneMap, rbp_p, w);
+		//fprintf(stderr, "Q%d %.3f\n", i, w);
+		//weight_fast(is_query, gene_score, geneMap, w);
 		tot_w += w;
 	}
 	tot_w /= query.size();
 		CSeekTools::ReadListOneColumn(sArgs.tdab_list_arg, dab_list);
 
 		fprintf(stderr, "Finished reading dablist\n");
-		//reading query
+		//preparing query
 		vector<vector<float> > q_weight;
 		q_weight.resize(vecstrAllQuery.size());
 		for(i=0; i<vecstrAllQuery.size(); i++){
 			for(j=0; j<vecstrAllQuery[i].size(); j++)
 				q_weight[i][mapstriGenes[vecstrAllQuery[i][j]]] = 1;
 		}
+
+		//preparing query2
+		vector<vector<unsigned int> > qq;
+		qq.resize(vecstrAllQuery.size());
+		for(i=0; i<vecstrAllQuery.size(); i++){
+			qq[i] = vector<unsigned int>();
+			for(j=0; j<vecstrAllQuery[i].size(); j++)
+				qq[i].push_back(mapstriGenes[vecstrAllQuery[i][j]]);
+		}
+
+		//selected datasets for each query
+		vector<vector<char> > selectedDataset;
+		selectedDataset.resize(vecstrAllQuery.size());
+		for(i=0; i<vecstrAllQuery.size(); i++)
+			CSeekTools::InitVector(selectedDataset[i], dab_list.size(), (char)0);
 		
 		fprintf(stderr, "Reading DAB\n");
 		vector<vector<float> > final_score, count, dweight;
 			CSeekTools::InitVector(dweight[j], dab_list.size(), (float) 0);
 		}
 
+		float threshold_g = sArgs.threshold_g_arg;
+		float threshold_q = sArgs.threshold_q_arg;
+		bool DEBUG = !!sArgs.debug_flag;
+
 		size_t l;
 		for(l=0; l<dab_list.size(); l++){
 			CDat Dat;
 			}
 
 			fprintf(stderr, "Finished copying matrix\n");
+
 			for(j=0; j<vecstrAllQuery.size(); j++){
+				int numPresent = 0;
+				for(k=0; k<qq[j].size(); k++){
+					if(m.GetForward(qq[j][k])==(unsigned int)-1) continue;
+					numPresent++;
+				}
+				if(search_mode=="eq" && numPresent==0){
+					continue;
+				}else if(search_mode=="cv_loi" && numPresent<=1){
+					continue;
+				}
+				int numThreshold = (int) (threshold_q * qq[j].size());
+				if(numPresent>numThreshold)
+					selectedDataset[j][l] = 1;
+			}
+
+			for(j=0; j<vecstrAllQuery.size(); j++){
+				//not enough query genes present
+				if(selectedDataset[j][l]==0) continue;
+
 				float dw = 1.0;
 				if(search_mode=="eq"){
 					dw = 1.0;
 					freq[j][gi]++;
 				}	
 			}
+
 		}
-		int minRequired = (int) ((float) dab_list.size() * 0.50);
+
 		for(j=0; j<vecstrAllQuery.size(); j++){
+
+			int countSelected = 0;
+			for(k=0; k<selectedDataset[j].size(); k++)
+				countSelected+=selectedDataset[j][k];
+
+			//int minRequired = (int) ((float) dab_list.size() * 0.50);
+			int minRequired = (int) ((float) countSelected * threshold_g);
+
+			if(DEBUG){
+				int nG = 0;
+				for(k=0; k<final_score[j].size(); k++){
+					if(freq[j][k]>=minRequired){
+						nG++;
+					}
+				}
+				fprintf(stderr, "Query %d numSelectedDataset %d numGenesIntegrated %d\n", j, countSelected, nG);
+			}
+
 			for(k=0; k<final_score[j].size(); k++){
 				if(freq[j][k]<minRequired){
 					final_score[j][k] = -320;
 
 	//DAB mode (sparse DAB: .2.dab)
 	if(sArgs.dab_flag==1){
+		float threshold_g = sArgs.threshold_g_arg;
+		float threshold_q = sArgs.threshold_q_arg;
+		bool DEBUG = !!sArgs.debug_flag;
+
+		string search_mode = sArgs.tsearch_mode_arg;
+		if(search_mode=="NA"){
+			fprintf(stderr, "Please specify a search mode!\n");
+			return 1;
+		}
+
 		string norm_mode = sArgs.norm_mode_arg;
 		if(sArgs.default_type_arg!=0 && sArgs.default_type_arg!=1){
 			fprintf(stderr, "Error, invalid type!\n");
 				fprintf(stderr, "Invalid exponent!\n");
 				return -1;
 			}
+			if(sArgs.rbp_p_arg==-1){
+				fprintf(stderr, "Error, please supply the rbp_p flag.\n");
+				return -1;
+			}
 		}
 
 		vector<float> score_cutoff;
 		CSeekTools::ReadListOneColumn(sArgs.dab_list_arg, dab_list);
 		vector<CSeekIntIntMap*> dm;
 		dm.resize(dab_list.size());
+
+		//selected datasets for each query
+		vector<vector<char> > selectedDataset;
+		selectedDataset.resize(vecstrAllQuery.size());
+		for(i=0; i<vecstrAllQuery.size(); i++)
+			CSeekTools::InitVector(selectedDataset[i], dab_list.size(), (char)0);
 		
 		//MODE 1 - Normal search:
+		//preparing query
 		vector<vector<float> > q_weight;
 		q_weight.resize(vecstrAllQuery.size());
 		for(i=0; i<vecstrAllQuery.size(); i++){
 			for(j=0; j<vecstrAllQuery[i].size(); j++)
 				q_weight[i][mapstriGenes[vecstrAllQuery[i][j]]] = 1;
 		}
+
+		//preparing query2
+		vector<vector<unsigned int> > qq;
+		qq.resize(vecstrAllQuery.size());
+		for(i=0; i<vecstrAllQuery.size(); i++){
+			qq[i] = vector<unsigned int>();
+			for(j=0; j<vecstrAllQuery[i].size(); j++)
+				qq[i].push_back(mapstriGenes[vecstrAllQuery[i][j]]);
+		}
 		
 		fprintf(stderr, "Reading sparse DAB\n");
 		vector<vector<float> > final_score, count;
 			CSeekTools::InitVector(freq[j], vecstrGenes.size(), (int) 0);
 			CSeekTools::InitVector(dweight[j], dab_list.size(), (float) 0);
 		}
-		if(bDatasetCutoff){
-			fprintf(stderr, "Dataset cutoff is on!\n");
-		}else{
-			fprintf(stderr, "Dataset cutoff is off!\n");
-		}
 
+		if(bDatasetCutoff)	fprintf(stderr, "Dataset cutoff is on!\n");
+		else	fprintf(stderr, "Dataset cutoff is off!\n");
+		
 
 		for(i=0; i<dab_list.size(); i++){
 			fprintf(stderr, "Reading %d: %s\n", i, dab_list[i].c_str());
 			}
 			const vector<utype> &allGenes = d1.GetAllReverse();
 
+			for(j=0; j<vecstrAllQuery.size(); j++){
+				int numPresent = 0;
+				for(k=0; k<qq[j].size(); k++){
+					if(d1.GetForward(qq[j][k])==(unsigned int)-1) continue;
+					numPresent++;
+				}
+				if(search_mode=="eq" && numPresent==0){
+					continue;
+				}else if(search_mode=="cv_loi" && numPresent<=1){
+					continue;
+				}
+				int numThreshold = (int) (threshold_q * qq[j].size());
+				if(numPresent>numThreshold)
+					selectedDataset[j][i] = 1;
+			}
+
 			#pragma omp parallel for \
 			shared(qu, sm, d1, dweight, final_score, count, freq, score_cutoff) \
-			private(j, k) firstprivate(bDatasetCutoff) schedule(dynamic)
+			private(j, k) firstprivate(bDatasetCutoff, i) schedule(dynamic)
 			for(j=0; j<vecstrAllQuery.size(); j++){
+				//not enough query genes present
+				if(selectedDataset[j][i]==0) continue;
+
 				float dw = 1.0;
-				//cv_weight_LOO(qu[j], sm, &d1, rbp_p, dw);
-				cv_weight(qu[j], sm, &d1, rbp_p, dw);
+				if(search_mode=="eq"){
+					dw = 1.0;
+				}else{
+					//cv_weight_LOO(qu[j], sm, &d1, rbp_p, dw);
+					cv_weight(qu[j], sm, &d1, rbp_p, dw);
+				}
+
 				if(bDatasetCutoff){
 					if(score_cutoff[i]>dw)
 						dw = 0;
 					final_score[j][gi] += tmp_score[gi] * dw;
 					count[j][gi]+=dw;
 					freq[j][gi]++;
-				}	
+				}
 			}
 		}
 
 		res.Save(ofsm, true);
 		*/
 		
-		int minRequired = (int) ((float) dab_list.size() * 0.50);
 		for(j=0; j<vecstrAllQuery.size(); j++){
+			int countSelected = 0;
+			for(k=0; k<selectedDataset[j].size(); k++)
+				countSelected+=selectedDataset[j][k];
+
+			int minRequired = (int) ((float) countSelected * threshold_g);
+
+			if(DEBUG){
+				int nG = 0;
+				for(k=0; k<final_score[j].size(); k++){
+					if(freq[j][k]>=minRequired){
+						nG++;
+					}
+				}
+				fprintf(stderr, "Query %d numSelectedDataset %d numGenesIntegrated %d\n", j, countSelected, nG);
+			}
+
 			for(k=0; k<final_score[j].size(); k++){
 				if(freq[j][k]<minRequired){
 					final_score[j][k] = -320;
 				}
 				final_score[j][k] /= count[j][k];
 			}
+
 			sprintf(acBuffer, "%s/%d.query", output_dir.c_str(), j);
 			CSeekTools::WriteArrayText(acBuffer, vecstrAllQuery[j]);
 			sprintf(acBuffer, "%s/%d.gscore", output_dir.c_str(), j);

tools/SeekIterative/SeekIterative.ggo

 option	"genome"			G	"Genome mapping file"
 								string typestr="filename"
 
-section "Traditional DAB mode"
+section "Traditional DAB mode (see also: -Z, -Y, -S)"
 option	"tdab_list"			J	"DAB list"
 								string typestr="filename"
-option	"tsearch_mode"		S	"Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset"
-								values="eq","cv_loi","NA" default="NA"
 
-section "Sparse DAB mode"
+section "Sparse DAB mode (see also: -Z, -Y, -S)"
 option	"dab_list"			V	"DAB list"
 								string typestr="filename"
 option	"num_iter"			I	"Number of iterations"
 								string typestr="directory"	yes
 option	"not_query"			Q	"NOT Query file (optional, for combined-DAB)"
 								string typestr="filename" default="NA"
+option	"threshold_q"		Z	"Fraction of query genes need to be present in a dataset"
+								float default="0"
+option	"threshold_g"		Y	"Fraction of datasets that must contain a gene to put it in ranking (important if individual datasets have very different gene coverage, and for datasets with small gene-size)"
+								float default="0.50"
+option	"tsearch_mode"		S	"Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset). (Required for --tdab and --dab modes)"
+								values="eq","cv_loi","NA" default="NA"
+option	"debug"				x	"Debug mode"
+								flag off
 
 section "Output"
 option	"dir_out"			D	"Output directory"

tools/SeekIterative/cmdline.c

 /*
-  File autogenerated by gengetopt version 2.22.5
+  File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /usr/bin/gengetopt -iSeekIterative.ggo --default-optional -u -N -e 
+  /memex/qzhu/usr/bin/gengetopt -iSeekIterative.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
 #include <stdlib.h>
 #include <string.h>
 
-#ifndef FIX_UNUSED
-#define FIX_UNUSED(X) (void) (X) /* avoid warnings for unused params */
-#endif
-
-#include <getopt.h>
+#include "getopt.h"
 
 #include "cmdline.h"
 
   "\nVisualization mode:",
   "  -c, --cutoff=FLOAT            Cutoff value  (default=`0.0001')",
   "  -G, --genome=filename         Genome mapping file",
-  "\nTraditional DAB mode:",
+  "\nTraditional DAB mode (see also: -Z, -Y, -S):",
   "  -J, --tdab_list=filename      DAB list",
-  "  -S, --tsearch_mode=STRING     Search mode: equal weighted (eq) or CV LOI \n                                  (cv_loi) (Applicable if DAB list contains \n                                  more than 1 dataset  (possible values=\"eq\", \n                                  \"cv_loi\", \"NA\" default=`NA')",
-  "\nSparse DAB mode:",
+  "\nSparse DAB mode (see also: -Z, -Y, -S):",
   "  -V, --dab_list=filename       DAB list",
   "  -I, --num_iter=INT            Number of iterations  (default=`0')",
   "  -T, --default_type=INT        Default gene index type (choose unsigned short \n                                  for genes, or unsigned int (32-bit) for \n                                  transcripts) (required for DAB mode) (0 - \n                                  unsigned int, 1 - unsigned short)  \n                                  (default=`-1')",
   "  -q, --query=filename          Query file",
   "  -F, --dab_dir=directory       DAB directory",
   "  -Q, --not_query=filename      NOT Query file (optional, for combined-DAB)  \n                                  (default=`NA')",
+  "  -Z, --threshold_q=FLOAT       Fraction of query genes need to be present in a \n                                  dataset  (default=`0')",
+  "  -Y, --threshold_g=FLOAT       Fraction of datasets that must contain a gene \n                                  to put it in ranking (important if individual \n                                  datasets have very different gene coverage, \n                                  and for datasets with small gene-size)  \n                                  (default=`0.50')",
+  "  -S, --tsearch_mode=STRING     Search mode: equal weighted (eq) or CV LOI \n                                  (cv_loi) (Applicable if DAB list contains \n                                  more than 1 dataset). (Required for --tdab \n                                  and --dab modes)  (possible values=\"eq\", \n                                  \"cv_loi\", \"NA\" default=`NA')",
+  "  -x, --debug                   Debug mode  (default=off)",
   "\nOutput:",
   "  -D, --dir_out=directory       Output directory",
     0
 void clear_args (struct gengetopt_args_info *args_info);
 
 static int
-cmdline_parser_internal (int argc, char **argv, struct gengetopt_args_info *args_info,
+cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info,
                         struct cmdline_parser_params *params, const char *additional_error);
 
 static int
 cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error);
 
-const char *cmdline_parser_tsearch_mode_values[] = {"eq", "cv_loi", "NA", 0}; /*< Possible values for tsearch_mode. */
-const char *cmdline_parser_norm_mode_values[] = {"rank", "subtract_z", "NA", 0}; /*< Possible values for norm_mode. */
+char *cmdline_parser_norm_mode_values[] = {"rank", "subtract_z", "NA", 0} ;	/* Possible values for norm_mode.  */
+char *cmdline_parser_tsearch_mode_values[] = {"eq", "cv_loi", "NA", 0} ;	/* Possible values for tsearch_mode.  */
 
 static char *
 gengetopt_strdup (const char *s);
   args_info->cutoff_given = 0 ;
   args_info->genome_given = 0 ;
   args_info->tdab_list_given = 0 ;
-  args_info->tsearch_mode_given = 0 ;
   args_info->dab_list_given = 0 ;
   args_info->num_iter_given = 0 ;
   args_info->default_type_given = 0 ;
   args_info->query_given = 0 ;
   args_info->dab_dir_given = 0 ;
   args_info->not_query_given = 0 ;
+  args_info->threshold_q_given = 0 ;
+  args_info->threshold_g_given = 0 ;
+  args_info->tsearch_mode_given = 0 ;
+  args_info->debug_given = 0 ;
   args_info->dir_out_given = 0 ;
 }
 
 static
 void clear_args (struct gengetopt_args_info *args_info)
 {
-  FIX_UNUSED (args_info);
   args_info->tdab_flag = 0;
   args_info->dab_flag = 0;
   args_info->combined_flag = 0;
   args_info->genome_orig = NULL;
   args_info->tdab_list_arg = NULL;
   args_info->tdab_list_orig = NULL;
-  args_info->tsearch_mode_arg = gengetopt_strdup ("NA");
-  args_info->tsearch_mode_orig = NULL;
   args_info->dab_list_arg = NULL;
   args_info->dab_list_orig = NULL;
   args_info->num_iter_arg = 0;
   args_info->dab_dir_orig = NULL;
   args_info->not_query_arg = gengetopt_strdup ("NA");
   args_info->not_query_orig = NULL;
+  args_info->threshold_q_arg = 0;
+  args_info->threshold_q_orig = NULL;
+  args_info->threshold_g_arg = 0.50;
+  args_info->threshold_g_orig = NULL;
+  args_info->tsearch_mode_arg = gengetopt_strdup ("NA");
+  args_info->tsearch_mode_orig = NULL;
+  args_info->debug_flag = 0;
   args_info->dir_out_arg = NULL;
   args_info->dir_out_orig = NULL;
   
   args_info->cutoff_help = gengetopt_args_info_help[16] ;
   args_info->genome_help = gengetopt_args_info_help[17] ;
   args_info->tdab_list_help = gengetopt_args_info_help[19] ;
-  args_info->tsearch_mode_help = gengetopt_args_info_help[20] ;
-  args_info->dab_list_help = gengetopt_args_info_help[22] ;
-  args_info->num_iter_help = gengetopt_args_info_help[23] ;
-  args_info->default_type_help = gengetopt_args_info_help[24] ;
-  args_info->rbp_p_help = gengetopt_args_info_help[25] ;
-  args_info->max_rank_help = gengetopt_args_info_help[26] ;
-  args_info->dset_cutoff_file_help = gengetopt_args_info_help[27] ;
-  args_info->norm_mode_help = gengetopt_args_info_help[28] ;
-  args_info->exp_help = gengetopt_args_info_help[29] ;
-  args_info->input_help = gengetopt_args_info_help[31] ;
-  args_info->query_help = gengetopt_args_info_help[32] ;
-  args_info->dab_dir_help = gengetopt_args_info_help[33] ;
-  args_info->not_query_help = gengetopt_args_info_help[34] ;
-  args_info->dir_out_help = gengetopt_args_info_help[36] ;
+  args_info->dab_list_help = gengetopt_args_info_help[21] ;
+  args_info->num_iter_help = gengetopt_args_info_help[22] ;
+  args_info->default_type_help = gengetopt_args_info_help[23] ;
+  args_info->rbp_p_help = gengetopt_args_info_help[24] ;
+  args_info->max_rank_help = gengetopt_args_info_help[25] ;
+  args_info->dset_cutoff_file_help = gengetopt_args_info_help[26] ;
+  args_info->norm_mode_help = gengetopt_args_info_help[27] ;
+  args_info->exp_help = gengetopt_args_info_help[28] ;
+  args_info->input_help = gengetopt_args_info_help[30] ;
+  args_info->query_help = gengetopt_args_info_help[31] ;
+  args_info->dab_dir_help = gengetopt_args_info_help[32] ;
+  args_info->not_query_help = gengetopt_args_info_help[33] ;
+  args_info->threshold_q_help = gengetopt_args_info_help[34] ;
+  args_info->threshold_g_help = gengetopt_args_info_help[35] ;
+  args_info->tsearch_mode_help = gengetopt_args_info_help[36] ;
+  args_info->debug_help = gengetopt_args_info_help[37] ;
+  args_info->dir_out_help = gengetopt_args_info_help[39] ;
   
 }
 
 void
 cmdline_parser_print_version (void)
 {
-  printf ("%s %s\n",
-     (strlen(CMDLINE_PARSER_PACKAGE_NAME) ? CMDLINE_PARSER_PACKAGE_NAME : CMDLINE_PARSER_PACKAGE),
-     CMDLINE_PARSER_VERSION);
+  printf ("%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION);
 }
 
 static void print_help_common(void) {
   printf("\n");
 
   if (strlen(gengetopt_args_info_description) > 0)
-    printf("%s\n\n", gengetopt_args_info_description);
+    printf("%s\n", gengetopt_args_info_description);
 }
 
 void
   clear_args (args_info);
   init_args_info (args_info);
 
-  args_info->inputs = 0;
+  args_info->inputs = NULL;
   args_info->inputs_num = 0;
 }
 
   free_string_field (&(args_info->genome_orig));
   free_string_field (&(args_info->tdab_list_arg));
   free_string_field (&(args_info->tdab_list_orig));
-  free_string_field (&(args_info->tsearch_mode_arg));
-  free_string_field (&(args_info->tsearch_mode_orig));
   free_string_field (&(args_info->dab_list_arg));
   free_string_field (&(args_info->dab_list_orig));
   free_string_field (&(args_info->num_iter_orig));
   free_string_field (&(args_info->dab_dir_orig));
   free_string_field (&(args_info->not_query_arg));
   free_string_field (&(args_info->not_query_orig));
+  free_string_field (&(args_info->threshold_q_orig));
+  free_string_field (&(args_info->threshold_g_orig));
+  free_string_field (&(args_info->tsearch_mode_arg));
+  free_string_field (&(args_info->tsearch_mode_orig));
   free_string_field (&(args_info->dir_out_arg));
   free_string_field (&(args_info->dir_out_orig));
   
  * -2 if more than one value has matched
  */
 static int
-check_possible_values(const char *val, const char *values[])
+check_possible_values(const char *val, char *values[])
 {
   int i, found, last;
   size_t len;
 
 
 static void
-write_into_file(FILE *outfile, const char *opt, const char *arg, const char *values[])
+write_into_file(FILE *outfile, const char *opt, const char *arg, char *values[])
 {
   int found = -1;
   if (arg) {
     write_into_file(outfile, "genome", args_info->genome_orig, 0);
   if (args_info->tdab_list_given)
     write_into_file(outfile, "tdab_list", args_info->tdab_list_orig, 0);
-  if (args_info->tsearch_mode_given)
-    write_into_file(outfile, "tsearch_mode", args_info->tsearch_mode_orig, cmdline_parser_tsearch_mode_values);
   if (args_info->dab_list_given)
     write_into_file(outfile, "dab_list", args_info->dab_list_orig, 0);
   if (args_info->num_iter_given)
     write_into_file(outfile, "dab_dir", args_info->dab_dir_orig, 0);
   if (args_info->not_query_given)
     write_into_file(outfile, "not_query", args_info->not_query_orig, 0);
+  if (args_info->threshold_q_given)
+    write_into_file(outfile, "threshold_q", args_info->threshold_q_orig, 0);
+  if (args_info->threshold_g_given)
+    write_into_file(outfile, "threshold_g", args_info->threshold_g_orig, 0);
+  if (args_info->tsearch_mode_given)
+    write_into_file(outfile, "tsearch_mode", args_info->tsearch_mode_orig, cmdline_parser_tsearch_mode_values);
+  if (args_info->debug_given)
+    write_into_file(outfile, "debug", 0, 0 );
   if (args_info->dir_out_given)
     write_into_file(outfile, "dir_out", args_info->dir_out_orig, 0);
   
 char *
 gengetopt_strdup (const char *s)
 {
-  char *result = 0;
+  char *result = NULL;
   if (!s)
     return result;
 
 }
 
 int
-cmdline_parser (int argc, char **argv, struct gengetopt_args_info *args_info)
+cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info)
 {
   return cmdline_parser2 (argc, argv, args_info, 0, 1, 1);
 }
 
 int
-cmdline_parser_ext (int argc, char **argv, struct gengetopt_args_info *args_info,
+cmdline_parser_ext (int argc, char * const *argv, struct gengetopt_args_info *args_info,
                    struct cmdline_parser_params *params)
 {
   int result;
-  result = cmdline_parser_internal (argc, argv, args_info, params, 0);
+  result = cmdline_parser_internal (argc, argv, args_info, params, NULL);
 
   return result;
 }
 
 int
-cmdline_parser2 (int argc, char **argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required)
+cmdline_parser2 (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required)
 {
   int result;
   struct cmdline_parser_params params;
   params.check_ambiguity = 0;
   params.print_errors = 1;
 
-  result = cmdline_parser_internal (argc, argv, args_info, &params, 0);
+  result = cmdline_parser_internal (argc, argv, args_info, &params, NULL);
 
   return result;
 }
 {
   int result = EXIT_SUCCESS;
 
-  if (cmdline_parser_required2(args_info, prog_name, 0) > 0)
+  if (cmdline_parser_required2(args_info, prog_name, NULL) > 0)
     result = EXIT_FAILURE;
 
   return result;
 cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error)
 {
   int error = 0;
-  FIX_UNUSED (additional_error);
 
   /* checks for required options */
   if (! args_info->input_given)
 static
 int update_arg(void *field, char **orig_field,
                unsigned int *field_given, unsigned int *prev_given, 
-               char *value, const char *possible_values[],
-               const char *default_value,
+               char *value, char *possible_values[], const char *default_value,
                cmdline_parser_arg_type arg_type,
                int check_ambiguity, int override,
                int no_free, int multiple_option,
   const char *val = value;
   int found;
   char **string_field;
-  FIX_UNUSED (field);
 
   stop_char = 0;
   found = 0;
 
 
 int
-cmdline_parser_internal (
-  int argc, char **argv, struct gengetopt_args_info *args_info,
+cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info,
                         struct cmdline_parser_params *params, const char *additional_error)
 {
   int c;	/* Character of the parsed option.  */
         { "cutoff",	1, NULL, 'c' },
         { "genome",	1, NULL, 'G' },
         { "tdab_list",	1, NULL, 'J' },
-        { "tsearch_mode",	1, NULL, 'S' },
         { "dab_list",	1, NULL, 'V' },
         { "num_iter",	1, NULL, 'I' },
         { "default_type",	1, NULL, 'T' },
         { "query",	1, NULL, 'q' },
         { "dab_dir",	1, NULL, 'F' },
         { "not_query",	1, NULL, 'Q' },
+        { "threshold_q",	1, NULL, 'Z' },
+        { "threshold_g",	1, NULL, 'Y' },
+        { "tsearch_mode",	1, NULL, 'S' },
+        { "debug",	0, NULL, 'x' },
         { "dir_out",	1, NULL, 'D' },
-        { 0,  0, 0, 0 }
+        { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "jdefghvb:t:EPc:G:J:S:V:I:T:R:M:H:n:N:i:q:F:Q:D:", long_options, &option_index);
+      c = getopt_long (argc, argv, "jdefghvb:t:EPc:G:J:V:I:T:R:M:H:n:N:i:q:F:Q:Z:Y:S:xD:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
-        case 'S':	/* Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset.  */
-        
-        
-          if (update_arg( (void *)&(args_info->tsearch_mode_arg), 
-               &(args_info->tsearch_mode_orig), &(args_info->tsearch_mode_given),
-              &(local_args_info.tsearch_mode_given), optarg, cmdline_parser_tsearch_mode_values, "NA", ARG_STRING,
-              check_ambiguity, override, 0, 0,
-              "tsearch_mode", 'S',
-              additional_error))
-            goto failure;
-        
-          break;
         case 'V':	/* DAB list.  */
         
         
             goto failure;
         
           break;
+        case 'Z':	/* Fraction of query genes need to be present in a dataset.  */
+        
+        
+          if (update_arg( (void *)&(args_info->threshold_q_arg), 
+               &(args_info->threshold_q_orig), &(args_info->threshold_q_given),
+              &(local_args_info.threshold_q_given), optarg, 0, "0", ARG_FLOAT,
+              check_ambiguity, override, 0, 0,
+              "threshold_q", 'Z',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'Y':	/* Fraction of datasets that must contain a gene to put it in ranking (important if individual datasets have very different gene coverage, and for datasets with small gene-size).  */
+        
+        
+          if (update_arg( (void *)&(args_info->threshold_g_arg), 
+               &(args_info->threshold_g_orig), &(args_info->threshold_g_given),
+              &(local_args_info.threshold_g_given), optarg, 0, "0.50", ARG_FLOAT,
+              check_ambiguity, override, 0, 0,
+              "threshold_g", 'Y',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'S':	/* Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset). (Required for --tdab and --dab modes).  */
+        
+        
+          if (update_arg( (void *)&(args_info->tsearch_mode_arg), 
+               &(args_info->tsearch_mode_orig), &(args_info->tsearch_mode_given),
+              &(local_args_info.tsearch_mode_given), optarg, cmdline_parser_tsearch_mode_values, "NA", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "tsearch_mode", 'S',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'x':	/* Debug mode.  */
+        
+        
+          if (update_arg((void *)&(args_info->debug_flag), 0, &(args_info->debug_given),
+              &(local_args_info.debug_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "debug", 'x',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'D':	/* Output directory.  */
         
         

tools/SeekIterative/cmdline.h

 /** @file cmdline.h
  *  @brief The header file for the command line option parser
- *  generated by GNU Gengetopt version 2.22.5
+ *  generated by GNU Gengetopt version 2.22
  *  http://www.gnu.org/software/gengetopt.
  *  DO NOT modify this file, since it can be overwritten
  *  @author GNU Gengetopt by Lorenzo Bettini */
 #endif /* __cplusplus */
 
 #ifndef CMDLINE_PARSER_PACKAGE
-/** @brief the program name (used for printing errors) */
+/** @brief the program name */
 #define CMDLINE_PARSER_PACKAGE "SeekIterative"
 #endif
 
-#ifndef CMDLINE_PARSER_PACKAGE_NAME
-/** @brief the complete program name (used for help and version) */
-#define CMDLINE_PARSER_PACKAGE_NAME "SeekIterative"
-#endif
-
 #ifndef CMDLINE_PARSER_VERSION
 /** @brief the program version */
 #define CMDLINE_PARSER_VERSION "1.0"
   char * tdab_list_arg;	/**< @brief DAB list.  */
   char * tdab_list_orig;	/**< @brief DAB list original value given at command line.  */
   const char *tdab_list_help; /**< @brief DAB list help description.  */
-  char * tsearch_mode_arg;	/**< @brief Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset (default='NA').  */
-  char * tsearch_mode_orig;	/**< @brief Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset original value given at command line.  */
-  const char *tsearch_mode_help; /**< @brief Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset help description.  */
   char * dab_list_arg;	/**< @brief DAB list.  */
   char * dab_list_orig;	/**< @brief DAB list original value given at command line.  */
   const char *dab_list_help; /**< @brief DAB list help description.  */
   char * not_query_arg;	/**< @brief NOT Query file (optional, for combined-DAB) (default='NA').  */
   char * not_query_orig;	/**< @brief NOT Query file (optional, for combined-DAB) original value given at command line.  */
   const char *not_query_help; /**< @brief NOT Query file (optional, for combined-DAB) help description.  */
+  float threshold_q_arg;	/**< @brief Fraction of query genes need to be present in a dataset (default='0').  */
+  char * threshold_q_orig;	/**< @brief Fraction of query genes need to be present in a dataset original value given at command line.  */
+  const char *threshold_q_help; /**< @brief Fraction of query genes need to be present in a dataset help description.  */
+  float threshold_g_arg;	/**< @brief Fraction of datasets that must contain a gene to put it in ranking (important if individual datasets have very different gene coverage, and for datasets with small gene-size) (default='0.50').  */
+  char * threshold_g_orig;	/**< @brief Fraction of datasets that must contain a gene to put it in ranking (important if individual datasets have very different gene coverage, and for datasets with small gene-size) original value given at command line.  */
+  const char *threshold_g_help; /**< @brief Fraction of datasets that must contain a gene to put it in ranking (important if individual datasets have very different gene coverage, and for datasets with small gene-size) help description.  */
+  char * tsearch_mode_arg;	/**< @brief Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset). (Required for --tdab and --dab modes) (default='NA').  */
+  char * tsearch_mode_orig;	/**< @brief Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset). (Required for --tdab and --dab modes) original value given at command line.  */
+  const char *tsearch_mode_help; /**< @brief Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset). (Required for --tdab and --dab modes) help description.  */
+  int debug_flag;	/**< @brief Debug mode (default=off).  */
+  const char *debug_help; /**< @brief Debug mode help description.  */
   char * dir_out_arg;	/**< @brief Output directory.  */
   char * dir_out_orig;	/**< @brief Output directory original value given at command line.  */
   const char *dir_out_help; /**< @brief Output directory help description.  */
   unsigned int cutoff_given ;	/**< @brief Whether cutoff was given.  */
   unsigned int genome_given ;	/**< @brief Whether genome was given.  */
   unsigned int tdab_list_given ;	/**< @brief Whether tdab_list was given.  */
-  unsigned int tsearch_mode_given ;	/**< @brief Whether tsearch_mode was given.  */
   unsigned int dab_list_given ;	/**< @brief Whether dab_list was given.  */
   unsigned int num_iter_given ;	/**< @brief Whether num_iter was given.  */
   unsigned int default_type_given ;	/**< @brief Whether default_type was given.  */
   unsigned int query_given ;	/**< @brief Whether query was given.  */
   unsigned int dab_dir_given ;	/**< @brief Whether dab_dir was given.  */
   unsigned int not_query_given ;	/**< @brief Whether not_query was given.  */
+  unsigned int threshold_q_given ;	/**< @brief Whether threshold_q was given.  */
+  unsigned int threshold_g_given ;	/**< @brief Whether threshold_g was given.  */
+  unsigned int tsearch_mode_given ;	/**< @brief Whether tsearch_mode was given.  */
+  unsigned int debug_given ;	/**< @brief Whether debug was given.  */
   unsigned int dir_out_given ;	/**< @brief Whether dir_out was given.  */
 
   char **inputs ; /**< @brief unamed options (options without names) */
  * @param args_info the structure where option information will be stored
  * @return 0 if everything went fine, NON 0 if an error took place
  */
-int cmdline_parser (int argc, char **argv,
+int cmdline_parser (int argc, char * const *argv,
   struct gengetopt_args_info *args_info);
 
 /**
  * @return 0 if everything went fine, NON 0 if an error took place
  * @deprecated use cmdline_parser_ext() instead
  */
-int cmdline_parser2 (int argc, char **argv,
+int cmdline_parser2 (int argc, char * const *argv,
   struct gengetopt_args_info *args_info,
   int override, int initialize, int check_required);
 
  * @param params additional parameters for the parser
  * @return 0 if everything went fine, NON 0 if an error took place
  */
-int cmdline_parser_ext (int argc, char **argv,
+int cmdline_parser_ext (int argc, char * const *argv,
   struct gengetopt_args_info *args_info,
   struct cmdline_parser_params *params);
 
 int cmdline_parser_required (struct gengetopt_args_info *args_info,
   const char *prog_name);
 
-extern const char *cmdline_parser_tsearch_mode_values[];  /**< @brief Possible values for tsearch_mode. */
-extern const char *cmdline_parser_norm_mode_values[];  /**< @brief Possible values for norm_mode. */
+extern char *cmdline_parser_norm_mode_values[] ;	/**< @brief Possible values for norm_mode.  */
+extern char *cmdline_parser_tsearch_mode_values[] ;	/**< @brief Possible values for tsearch_mode.  */
 
 
 #ifdef __cplusplus