Commits

Qian Zhu  committed 0085c48

Make SeekPValue serve dataset p-values

  • Participants
  • Parent commits f798172

Comments (0)

Files changed (4)

File tools/SeekPValue/SeekPValue.cpp

 
 pthread_mutex_t mutexGet;
 
+void sigchld_handler(int s){
+    while(waitpid(-1, NULL, WNOHANG) > 0);
+}
+// get sockaddr, IPv4 or IPv6:
+void *get_in_addr(struct sockaddr *sa){
+    if (sa->sa_family == AF_INET)
+        return &(((struct sockaddr_in*)sa)->sin_addr);
+    return &(((struct sockaddr_in6*)sa)->sin6_addr);
+}
+
+#define NUM_THREADS 8
+char THREAD_OCCUPIED[NUM_THREADS];
+
+//mode is genes============================================
 map<string, int> mapstrintGene;
 vector<string> vecstrGenes;
 vector<string> vecstrGeneID;
 vector<vector<int> > randomRank;
 vector<vector<float> > randomSc;
 vector<int> querySize;
-
 int numGenes;
-
-void sigchld_handler(int s){
-    while(waitpid(-1, NULL, WNOHANG) > 0);
-}
-
-// get sockaddr, IPv4 or IPv6:
-void *get_in_addr(struct sockaddr *sa){
-    if (sa->sa_family == AF_INET) {
-        return &(((struct sockaddr_in*)sa)->sin_addr);
-    }
-    return &(((struct sockaddr_in6*)sa)->sin6_addr);
-}
-
-#define NUM_THREADS 8
-char THREAD_OCCUPIED[NUM_THREADS];
+//=========================================================
+struct parameter{
+	int size;
+	double scale;
+	double shape;
+	double threshold;
+	double portion;
+	vector<double> quantile;
+};
+map<string, int> mapstrintDataset;
+vector<string> vecstrDataset;
+vector<vector<struct parameter> > dsetScore; //co-expression score for dataset i, query-size j
+//=========================================================
 
 struct thread_data{
+	int section; //0 - genes, 1 - datasets (which mode to turn on)
+	//Section "genes"
 	vector<string> query;
 	vector<float> gene_score;
-
 	int mode; //0 - p-value on rank, 1 - p-value on score
 	float nan;
+	//Section "datasets"
+	vector<string> dset;
+	vector<float> dset_score; //scores to test
+	vector<int> dset_qsize; //number of genes for which coexpression score is calculated, for all dset
+	//============================================
     int threadid;
     int new_fd;
 };
 	int threadid = my->threadid;
 	float nan = my->nan;
 	int mode = my->mode;
+	int section = my->section;
+
+	vector<string> dset = my->dset;
+	vector<float> dset_score = my->dset_score;
+	vector<int> dset_qsize = my->dset_qsize;
 
 	vector<string> queryGenes = my->query;
 	vector<float> geneScores = my->gene_score;
 
 	size_t i, j, jj, k;
-	//nan = sArgs.nan_arg;
-	vector<AResultFloat> sortedGenes;
-	sortedGenes.resize(geneScores.size());
-	for(i=0; i<sortedGenes.size(); i++){
-		sortedGenes[i].i = i;
-		sortedGenes[i].f = geneScores[i];
+
+	if(section==1){ //dataset
+		vector<float> pval;
+		for(i=0; i<dset.size(); i++){
+			if(mapstrintDataset.find(dset[i])==mapstrintDataset.end()){
+				fprintf(stderr, "Error: cannot find dataset %s\n", dset[i].c_str());
+				pval.push_back(-1);
+				continue;
+			}
+			if(dset_qsize[i]<2){
+				pval.push_back(0.99);
+				continue;
+			}
+			if(dset_score[i]<=0){
+				pval.push_back(0.99);
+				continue;
+			}
+			int pi = mapstrintDataset[dset[i]];
+			float sc = log(dset_score[i]);
+			int qsize = dset_qsize[i];
+			struct parameter &par = dsetScore[pi][qsize-2];
+			vector<double> &quantile = par.quantile;
+			//fprintf(stderr, "threshold %.2e, scale %.2e, shape %.2e, qsize %d\n", par.threshold, par.scale, par.shape, qsize);
+
+			if((double) sc <= par.threshold){
+				double min_diff = 9999;
+				int min_diff_index = -1;
+				for(k=0; k<quantile.size(); k++){
+					double diff = fabs(quantile[k] - (double) sc);
+					if(diff < min_diff){
+						min_diff = diff;
+						min_diff_index = k;
+					}
+					//fprintf(stderr, "Diff %d %.2f %.2f %d\n", k, quantile[k], diff, qsize);
+				}
+				if(min_diff_index==-1){
+					//fprintf(stderr, "Error, negative one!\n");
+				}
+				float pv = 1.0 - (0.05 + (float)min_diff_index*0.05);
+				pval.push_back(pv);
+				//fprintf(stderr, "original %.2e pval %.2e\n", sc, pv);
+				continue;
+			}
+			double diff = (double) sc - par.threshold;
+			double cdf = 0;
+			if(par.shape==0){
+				cdf = 1.0 - exp(-1.0 * diff / par.scale);
+			}else{
+				cdf = 1.0 - pow(1.0 + par.shape * diff / par.scale, -1.0 / par.shape);
+			}
+			double pv = par.portion * (1.0 - cdf);
+			//fprintf(stderr, "before original %.2e pval %.2e\n", sc, pv);
+			if(isnan(pv)){
+				pv = 2.0e-5;
+			}
+			pval.push_back((float) pv);
+			//fprintf(stderr, "original %.2e pval %.2e\n", sc, pv);
+		}
+
+		if(CSeekNetwork::Send(new_fd, pval)==-1){
+			fprintf(stderr, "Error sending message to client!\n");
+		}
+
+
 	}
+	else if(section==0){ //genes
+		vector<AResultFloat> sortedGenes;
+		sortedGenes.resize(geneScores.size());
+		for(i=0; i<sortedGenes.size(); i++){
+			sortedGenes[i].i = i;
+			sortedGenes[i].f = geneScores[i];
+		}
 
-	vector<int> queryGeneID;
-	for(i=0; i<queryGenes.size(); i++)
-		queryGeneID.push_back(mapstrintGene[queryGenes[i]]);
-	//Query genes themselves have lowest score, to prevent
-	//them from being counted in PR
-	for(i=0; i<queryGeneID.size(); i++)
-		sortedGenes[queryGeneID[i]].f = nan;
+		vector<int> queryGeneID;
+		for(i=0; i<queryGenes.size(); i++)
+			queryGeneID.push_back(mapstrintGene[queryGenes[i]]);
+		//Query genes themselves have lowest score, to prevent
+		//them from being counted in PR
+		for(i=0; i<queryGeneID.size(); i++)
+			sortedGenes[queryGeneID[i]].f = nan;
 
-	sort(sortedGenes.begin(), sortedGenes.end());
+		sort(sortedGenes.begin(), sortedGenes.end());
 
-	//comparison
-	vector<int> geneRank;
-	geneRank.resize(numGenes);
-	for(jj=0; jj<numGenes; jj++){
-		geneRank[sortedGenes[jj].i] = jj;
-	}
+		//comparison
+		vector<int> geneRank;
+		geneRank.resize(numGenes);
+		for(jj=0; jj<numGenes; jj++){
+			geneRank[sortedGenes[jj].i] = jj;
+		}
 
-	vector<float> pval;
-	CSeekTools::InitVector(pval, geneScores.size(), (float) nan);	
-
-	for(jj=0; jj<geneScores.size(); jj++){
-		int gene = sortedGenes[jj].i;
-		int gene_rank = jj;
-		float gene_score = sortedGenes[jj].f;
-		if(gene_score==nan) break;
-		//if(gene_score<0) 
-		//	continue;
-		vector<int> &rR = randomRank[gene];
-		vector<float> &rF = randomSc[gene];
-		int kk = 0;
-		if(mode==1){
-			if(gene_score>=0){
-				for(kk=0; kk<rF.size(); kk++){
-					if(gene_score>=rF[kk] || kk==rF.size()-1)
-						pval[gene] = (float) kk / (float) rF.size();
-						//fprintf(stderr, "%s\t%d\t%d\t%.5e\t%.5e\n", vecstrGenes[gene].c_str(),
-						//gene_rank, kk, gene_score, randomSc[gene][kk]);
-					if(gene_score>=rF[kk])
-						break;
+		vector<float> pval;
+		CSeekTools::InitVector(pval, geneScores.size(), (float) nan);	
+	
+		for(jj=0; jj<geneScores.size(); jj++){
+			int gene = sortedGenes[jj].i;
+			int gene_rank = jj;
+			float gene_score = sortedGenes[jj].f;
+			if(gene_score==nan) break;
+			//if(gene_score<0) 
+			//	continue;
+			vector<int> &rR = randomRank[gene];
+			vector<float> &rF = randomSc[gene];
+			int kk = 0;
+			if(mode==1){
+				if(gene_score>=0){
+					for(kk=0; kk<rF.size(); kk++){
+						if(gene_score>=rF[kk] || kk==rF.size()-1)
+							pval[gene] = (float) kk / (float) rF.size();
+							//fprintf(stderr, "%s\t%d\t%d\t%.5e\t%.5e\n", vecstrGenes[gene].c_str(),
+							//gene_rank, kk, gene_score, randomSc[gene][kk]);
+						if(gene_score>=rF[kk])
+							break;
+					}
+				}else{
+					for(kk=rF.size()-1; kk>=0; kk--){
+						if(gene_score<=rF[kk] || kk==0)
+							pval[gene] = (float) (rF.size()-1-kk) / (float) rF.size();
+							//fprintf(stderr, "%s\t%d\t%d\t%.5e\t%.5e\n", vecstrGenes[gene].c_str(),
+							//gene_rank, rF.size()-1-kk, gene_score, randomSc[gene][kk]);
+						if(gene_score<=rF[kk])
+							break;
+					}
 				}
-			}else{
-				for(kk=rF.size()-1; kk>=0; kk--){
-					if(gene_score<=rF[kk] || kk==0)
-						pval[gene] = (float) (rF.size()-1-kk) / (float) rF.size();
-						//fprintf(stderr, "%s\t%d\t%d\t%.5e\t%.5e\n", vecstrGenes[gene].c_str(),
-						//gene_rank, rF.size()-1-kk, gene_score, randomSc[gene][kk]);
-					if(gene_score<=rF[kk])
-						break;
-				}
-			}
-		}else if(mode==0){
-			if(gene_rank<17600/2){
-				for(kk=0; kk<rR.size(); kk++){
-					if(gene_rank<=rR[kk] || kk==rR.size()-1)
-						pval[gene] = (float) kk / (float) rR.size();
-						//fprintf(stderr, "%s\t%d\t%d\t%.5e\t%.5e\n", vecstrGenes[gene].c_str(),
-						//gene_rank, kk, gene_score, randomSc[gene][kk]);
-					if(gene_rank<=rR[kk])
-						break;
-				}
-			}else{
-				for(kk=rR.size()-1; kk>=0; kk--){
-					if(gene_rank>=rR[kk] || kk==0)
-						pval[gene] = (float) (rR.size()-1-kk) / (float) rF.size();
-						//fprintf(stderr, "%s\t%d\t%d\t%.5e\t%.5e\n", vecstrGenes[gene].c_str(),
-						//gene_rank, rR.size()-1-kk, gene_score, randomSc[gene][kk]);
-					if(gene_rank>=rR[kk])
-						break;
+			}else if(mode==0){
+				if(gene_rank<17600/2){
+					for(kk=0; kk<rR.size(); kk++){
+						if(gene_rank<=rR[kk] || kk==rR.size()-1)
+							pval[gene] = (float) kk / (float) rR.size();
+							//fprintf(stderr, "%s\t%d\t%d\t%.5e\t%.5e\n", vecstrGenes[gene].c_str(),
+							//gene_rank, kk, gene_score, randomSc[gene][kk]);
+						if(gene_rank<=rR[kk])
+							break;
+					}
+				}else{
+					for(kk=rR.size()-1; kk>=0; kk--){
+						if(gene_rank>=rR[kk] || kk==0)
+							pval[gene] = (float) (rR.size()-1-kk) / (float) rF.size();
+							//fprintf(stderr, "%s\t%d\t%d\t%.5e\t%.5e\n", vecstrGenes[gene].c_str(),
+							//gene_rank, rR.size()-1-kk, gene_score, randomSc[gene][kk]);
+						if(gene_rank>=rR[kk])
+							break;
+					}
 				}
 			}
 		}
+	
+		if(CSeekNetwork::Send(new_fd, pval)==-1){
+			fprintf(stderr, "Error sending message to client!\n");
+		}
 	}
 
-	if(CSeekNetwork::Send(new_fd, pval)==-1){
-		fprintf(stderr, "Error sending message to client!\n");
-	}
 
 	pthread_mutex_lock(&mutexGet);
 	close(new_fd);
 	THREAD_OCCUPIED[threadid] = 0;
 	pthread_mutex_unlock(&mutexGet);
-
 	int ret = 0;
 	pthread_exit((void*)ret);
 }
+//mode ends=========================================================
+
+bool ReadParameter(string param_file, vector<struct parameter> &v){
+	ifstream ifsm;
+	ifsm.open(param_file.c_str());
+	if(!ifsm.is_open()){
+		fprintf(stderr, "Error opening file %s\n", param_file.c_str());
+		return false;
+	}
+	const int lineSize = 1024;
+	char acBuffer[lineSize];
+	utype c_iBuffer = lineSize;
+	v.clear();
+
+	while(!ifsm.eof()){
+		ifsm.getline(acBuffer, c_iBuffer -1);
+		if(acBuffer[0]==0) break;
+		acBuffer[c_iBuffer-1] = 0;
+		vector<string> tok;
+		CMeta::Tokenize(acBuffer, tok);
+		struct parameter par;
+		par.size = atoi(tok[0].c_str());
+		par.portion = atof(tok[2].c_str());
+		par.threshold = atof(tok[3].c_str());
+		par.scale = atof(tok[5].c_str());
+		par.shape = atof(tok[6].c_str());
+		par.quantile = vector<double>();
+		for(int k=8; k<tok.size(); k++){
+			par.quantile.push_back(atof(tok[k].c_str()));
+			//fprintf(stderr, "This value is %d %.2f\n", k, atof(tok[k].c_str()));
+		}
+		v.push_back(par);
+	}
+	ifsm.close();
+	return true;
+}
+
 
 int main( int iArgs, char** aszArgs ) {
 	static const size_t	c_iBuffer	= 1024;
 	}
 
 	PORT = sArgs.port_arg;
-	float nan = sArgs.nan_arg;
+	string strMode = sArgs.mode_arg;
+	float nan = sArgs.nan_arg; //only used for strMode=="genes"
 
-	if(!CSeekTools::ReadListTwoColumns(sArgs.input_arg, vecstrGeneID, vecstrGenes))
-		return false;
-	for(i=0; i<vecstrGenes.size(); i++)
-		mapstrintGene[vecstrGenes[i]] = (int) i;
+	//preparation=====================================================
+	if(strMode=="genes"){
+		if(!CSeekTools::ReadListTwoColumns(sArgs.input_arg, vecstrGeneID, vecstrGenes))
+			return false;
+		for(i=0; i<vecstrGenes.size(); i++)
+			mapstrintGene[vecstrGenes[i]] = (int) i;
 
-	numGenes = vecstrGenes.size();
+		numGenes = vecstrGenes.size();
 	
-	string random_directory = sArgs.random_dir_arg;
-	int num_random = sArgs.random_num_arg;
-	int ii, jj;
-	char ac[256];
+		string random_directory = sArgs.random_dir_arg;
+		int num_random = sArgs.random_num_arg;
+		int ii, jj;
+		char ac[256];
 
-	randomRank.resize(numGenes);
-	randomSc.resize(numGenes);
-	for(ii=0; ii<numGenes; ii++){
-		randomRank[ii].resize(num_random);
-		randomSc[ii].resize(num_random);
+		randomRank.resize(numGenes);
+		randomSc.resize(numGenes);
+		for(ii=0; ii<numGenes; ii++){
+			randomRank[ii].resize(num_random);
+			randomSc[ii].resize(num_random);
+		}
+
+		for(ii=0; ii<num_random; ii++){
+			vector<float> randomScores;
+			sprintf(ac, "%s/%d.gscore", random_directory.c_str(), ii);
+			CSeekTools::ReadArray(ac, randomScores);
+			/*vector<string> queryGenes;
+			sprintf(ac, "%s/%d.query", random_directory.c_str(), ii);
+			CSeekTools::ReadMultiGeneOneLine(ac, queryGenes);
+			querySize.push_back(queryGenes.size());
+			*/
+			vector<AResultFloat> sortedRandom;
+			sortedRandom.resize(randomScores.size());
+			for(jj=0; jj<randomScores.size(); jj++){
+				sortedRandom[jj].i = jj;
+				sortedRandom[jj].f = randomScores[jj];
+			}
+			sort(sortedRandom.begin(), sortedRandom.end());
+			for(jj=0; jj<randomScores.size(); jj++){
+				randomRank[sortedRandom[jj].i][ii] = jj;
+				randomSc[sortedRandom[jj].i][ii] = sortedRandom[jj].f;
+			}
+		}
+
+		for(jj=0; jj<numGenes; jj++){
+			sort(randomRank[jj].begin(), randomRank[jj].end());
+			sort(randomSc[jj].begin(), randomSc[jj].end(), std::greater<float>());
+		}
 	}
-
-	for(ii=0; ii<num_random; ii++){
-		vector<float> randomScores;
-		sprintf(ac, "%s/%d.gscore", random_directory.c_str(), ii);
-		CSeekTools::ReadArray(ac, randomScores);
-		/*vector<string> queryGenes;
-		sprintf(ac, "%s/%d.query", random_directory.c_str(), ii);
-		CSeekTools::ReadMultiGeneOneLine(ac, queryGenes);
-		querySize.push_back(queryGenes.size());
-		*/
-		vector<AResultFloat> sortedRandom;
-		sortedRandom.resize(randomScores.size());
-		for(jj=0; jj<randomScores.size(); jj++){
-			sortedRandom[jj].i = jj;
-			sortedRandom[jj].f = randomScores[jj];
+	else if(strMode=="datasets"){
+		vector<string> vD, vDP;
+		if(!CSeekTools::ReadListTwoColumns(sArgs.dset_platform_arg, vD, vDP))
+			return false;
+		for(i=0; i<vD.size(); i++){
+			vecstrDataset.push_back(vD[i]);
 		}
-		sort(sortedRandom.begin(), sortedRandom.end());
-		for(jj=0; jj<randomScores.size(); jj++){
-			randomRank[sortedRandom[jj].i][ii] = jj;
-			randomSc[sortedRandom[jj].i][ii] = sortedRandom[jj].f;
+		for(i=0; i<vecstrDataset.size(); i++){
+			mapstrintDataset[vecstrDataset[i]] = i;
+		}
+		string param_dir = sArgs.param_dir_arg;
+		dsetScore.resize(vecstrDataset.size());
+		for(i=0; i<vecstrDataset.size(); i++){
+			string param_file = param_dir + "/" + vecstrDataset[i] + ".param";
+			dsetScore[i] = vector<struct parameter>();
+			ReadParameter(param_file, dsetScore[i]);
 		}
 	}
 
-	for(jj=0; jj<numGenes; jj++){
-		sort(randomRank[jj].begin(), randomRank[jj].end());
-		sort(randomSc[jj].begin(), randomSc[jj].end(), std::greater<float>());
-	}
-
+	//find a free port and attempt binding to the port
 	int sockfd, new_fd;
 	struct addrinfo hints, *servinfo, *p;
 	struct sockaddr_storage their_addr;
 		THREAD_OCCUPIED[d] = 1;
 		pthread_mutex_unlock(&mutexGet);
 
-		string strQuery;
-		vector<float> vf;
-		vector<string> query;
-		string strMode;
-		int mode;
+		thread_arg[d].threadid = d;
+		thread_arg[d].new_fd = new_fd;
 
-		if(CSeekNetwork::Receive(new_fd, strMode)==-1){
-			fprintf(stderr, "Error receiving from client\n");
+		if(strMode=="genes"){
+			string strQuery;
+			vector<float> vf;
+			vector<string> query;
+			string sMode;
+			int mode;
+
+			if(CSeekNetwork::Receive(new_fd, sMode)==-1){
+				fprintf(stderr, "Error receiving from client\n");
+			}
+
+			if(sMode=="rank") 
+				mode = 0;
+			else if(sMode=="score")
+				mode = 1;
+	
+			if(CSeekNetwork::Receive(new_fd, strQuery)==-1){
+				fprintf(stderr, "Error receiving from client!\n");
+			}
+
+			if(CSeekNetwork::Receive(new_fd, vf)==-1){
+				fprintf(stderr, "Error receiving from client!\n");
+			}
+
+			CMeta::Tokenize(strQuery.c_str(), query, " ");
+			//=========================================================
+			thread_arg[d].section = 0; //genes section
+			thread_arg[d].query = query;
+			thread_arg[d].gene_score = vf;
+			thread_arg[d].nan = nan;
+			thread_arg[d].mode = mode;
+		}
+		else if(strMode=="datasets"){
+			string strDataset;
+			vector<string> dataset;
+			vector<float> qsize;
+			vector<float> vf;
+			if(CSeekNetwork::Receive(new_fd, strDataset)==-1){
+				fprintf(stderr, "Error receiving from client!\n");
+			}
+			if(CSeekNetwork::Receive(new_fd, vf)==-1){
+				fprintf(stderr, "Error receiving from client!\n");
+			}
+			if(CSeekNetwork::Receive(new_fd, qsize)==-1){
+				fprintf(stderr, "Error receiving from client!\n");
+			}
+			vector<int> vi;
+			vi.resize(qsize.size());
+			for(int ki=0; ki<qsize.size(); ki++)
+				vi[ki] = (int) qsize[ki];
+
+			CMeta::Tokenize(strDataset.c_str(), dataset, " ");
+			//========================================================
+			thread_arg[d].dset = dataset;
+			thread_arg[d].dset_score = vf;
+			thread_arg[d].dset_qsize = vi;
+			thread_arg[d].section = 1;
 		}
 
-		if(strMode=="rank") 
-			mode = 0;
-		else if(strMode=="score")
-			mode = 1;
-
-		if(CSeekNetwork::Receive(new_fd, strQuery)==-1){
-			fprintf(stderr, "Error receiving from client!\n");
-		}
-
-		if(CSeekNetwork::Receive(new_fd, vf)==-1){
-			fprintf(stderr, "Error receiving from client!\n");
-		}
-
-		CMeta::Tokenize(strQuery.c_str(), query, " ");
-
-		//=========================================================
-
-		thread_arg[d].threadid = d;
-		thread_arg[d].new_fd = new_fd;
-		thread_arg[d].query = query;
-		thread_arg[d].gene_score = vf;
-		thread_arg[d].nan = nan;
-		thread_arg[d].mode = mode;
 		int ret;
 		pthread_create(&th[d], NULL, do_query, (void*) &thread_arg[d]);
-
 	}
 
 #ifdef WIN32

File tools/SeekPValue/SeekPValue.ggo

 purpose	"Estimates P-Value of the retrieved genes based on a background of random queries"
 
 section "Main"
+option	"mode"				m	"Mode (datasets or genes)"
+								values="datasets","genes" default="genes"
 option	"port"				t	"Port"
 								string default="9005"
+
+section "Dataset mode"
+option	"dset_platform"		d	"Dataset platform file"
+								string typestr="filename"
+option	"param_dir"			p	"Parameter directory"
+								string typestr="directory"
+
+section "Gene mode"
 option	"random_dir"		R	"Random directory"
-								string typestr="directory"	yes
+								string typestr="directory"
 option	"random_num"		N	"Number of random trials"
-								int 	default="100"	yes
+								int 	default="100"
 option	"input"				i	"Gene mapping file"
-								string typestr="filename" yes
+								string typestr="filename"
 option	"nan"				n	"Define NaN score"
 								float	default="-320"

File tools/SeekPValue/cmdline.c

 const char *gengetopt_args_info_description = "";
 
 const char *gengetopt_args_info_help[] = {
-  "  -h, --help                  Print help and exit",
-  "  -V, --version               Print version and exit",
+  "  -h, --help                    Print help and exit",
+  "  -V, --version                 Print version and exit",
   "\nMain:",
-  "  -t, --port=STRING           Port  (default=`9005')",
-  "  -R, --random_dir=directory  Random directory",
-  "  -N, --random_num=INT        Number of random trials  (default=`100')",
-  "  -i, --input=filename        Gene mapping file",
-  "  -n, --nan=FLOAT             Define NaN score  (default=`-320')",
+  "  -m, --mode=STRING             Mode (datasets or genes)  (possible \n                                  values=\"datasets\", \"genes\" \n                                  default=`genes')",
+  "  -t, --port=STRING             Port  (default=`9005')",
+  "\nDataset mode:",
+  "  -d, --dset_platform=filename  Dataset platform file",
+  "  -p, --param_dir=directory     Parameter directory",
+  "\nGene mode:",
+  "  -R, --random_dir=directory    Random directory",
+  "  -N, --random_num=INT          Number of random trials  (default=`100')",
+  "  -i, --input=filename          Gene mapping file",
+  "  -n, --nan=FLOAT               Define NaN score  (default=`-320')",
     0
 };
 
 cmdline_parser_internal (int argc, char **argv, struct gengetopt_args_info *args_info,
                         struct cmdline_parser_params *params, const char *additional_error);
 
-static int
-cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error);
+
+const char *cmdline_parser_mode_values[] = {"datasets", "genes", 0}; /*< Possible values for mode. */
 
 static char *
 gengetopt_strdup (const char *s);
 {
   args_info->help_given = 0 ;
   args_info->version_given = 0 ;
+  args_info->mode_given = 0 ;
   args_info->port_given = 0 ;
+  args_info->dset_platform_given = 0 ;
+  args_info->param_dir_given = 0 ;
   args_info->random_dir_given = 0 ;
   args_info->random_num_given = 0 ;
   args_info->input_given = 0 ;
 void clear_args (struct gengetopt_args_info *args_info)
 {
   FIX_UNUSED (args_info);
+  args_info->mode_arg = gengetopt_strdup ("genes");
+  args_info->mode_orig = NULL;
   args_info->port_arg = gengetopt_strdup ("9005");
   args_info->port_orig = NULL;
+  args_info->dset_platform_arg = NULL;
+  args_info->dset_platform_orig = NULL;
+  args_info->param_dir_arg = NULL;
+  args_info->param_dir_orig = NULL;
   args_info->random_dir_arg = NULL;
   args_info->random_dir_orig = NULL;
   args_info->random_num_arg = 100;
 
   args_info->help_help = gengetopt_args_info_help[0] ;
   args_info->version_help = gengetopt_args_info_help[1] ;
-  args_info->port_help = gengetopt_args_info_help[3] ;
-  args_info->random_dir_help = gengetopt_args_info_help[4] ;
-  args_info->random_num_help = gengetopt_args_info_help[5] ;
-  args_info->input_help = gengetopt_args_info_help[6] ;
-  args_info->nan_help = gengetopt_args_info_help[7] ;
+  args_info->mode_help = gengetopt_args_info_help[3] ;
+  args_info->port_help = gengetopt_args_info_help[4] ;
+  args_info->dset_platform_help = gengetopt_args_info_help[6] ;
+  args_info->param_dir_help = gengetopt_args_info_help[7] ;
+  args_info->random_dir_help = gengetopt_args_info_help[9] ;
+  args_info->random_num_help = gengetopt_args_info_help[10] ;
+  args_info->input_help = gengetopt_args_info_help[11] ;
+  args_info->nan_help = gengetopt_args_info_help[12] ;
   
 }
 
 cmdline_parser_release (struct gengetopt_args_info *args_info)
 {
   unsigned int i;
+  free_string_field (&(args_info->mode_arg));
+  free_string_field (&(args_info->mode_orig));
   free_string_field (&(args_info->port_arg));
   free_string_field (&(args_info->port_orig));
+  free_string_field (&(args_info->dset_platform_arg));
+  free_string_field (&(args_info->dset_platform_orig));
+  free_string_field (&(args_info->param_dir_arg));
+  free_string_field (&(args_info->param_dir_orig));
   free_string_field (&(args_info->random_dir_arg));
   free_string_field (&(args_info->random_dir_orig));
   free_string_field (&(args_info->random_num_orig));
   clear_given (args_info);
 }
 
+/**
+ * @param val the value to check
+ * @param values the possible values
+ * @return the index of the matched value:
+ * -1 if no value matched,
+ * -2 if more than one value has matched
+ */
+static int
+check_possible_values(const char *val, const char *values[])
+{
+  int i, found, last;
+  size_t len;
+
+  if (!val)   /* otherwise strlen() crashes below */
+    return -1; /* -1 means no argument for the option */
+
+  found = last = 0;
+
+  for (i = 0, len = strlen(val); values[i]; ++i)
+    {
+      if (strncmp(val, values[i], len) == 0)
+        {
+          ++found;
+          last = i;
+          if (strlen(values[i]) == len)
+            return i; /* exact macth no need to check more */
+        }
+    }
+
+  if (found == 1) /* one match: OK */
+    return last;
+
+  return (found ? -2 : -1); /* return many values or none matched */
+}
+
 
 static void
 write_into_file(FILE *outfile, const char *opt, const char *arg, const char *values[])
 {
-  FIX_UNUSED (values);
+  int found = -1;
   if (arg) {
-    fprintf(outfile, "%s=\"%s\"\n", opt, arg);
+    if (values) {
+      found = check_possible_values(arg, values);      
+    }
+    if (found >= 0)
+      fprintf(outfile, "%s=\"%s\" # %s\n", opt, arg, values[found]);
+    else
+      fprintf(outfile, "%s=\"%s\"\n", opt, arg);
   } else {
     fprintf(outfile, "%s\n", opt);
   }
     write_into_file(outfile, "help", 0, 0 );
   if (args_info->version_given)
     write_into_file(outfile, "version", 0, 0 );
+  if (args_info->mode_given)
+    write_into_file(outfile, "mode", args_info->mode_orig, cmdline_parser_mode_values);
   if (args_info->port_given)
     write_into_file(outfile, "port", args_info->port_orig, 0);
+  if (args_info->dset_platform_given)
+    write_into_file(outfile, "dset_platform", args_info->dset_platform_orig, 0);
+  if (args_info->param_dir_given)
+    write_into_file(outfile, "param_dir", args_info->param_dir_orig, 0);
   if (args_info->random_dir_given)
     write_into_file(outfile, "random_dir", args_info->random_dir_orig, 0);
   if (args_info->random_num_given)
 int
 cmdline_parser_required (struct gengetopt_args_info *args_info, const char *prog_name)
 {
-  int result = EXIT_SUCCESS;
-
-  if (cmdline_parser_required2(args_info, prog_name, 0) > 0)
-    result = EXIT_FAILURE;
-
-  return result;
-}
-
-int
-cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error)
-{
-  int error = 0;
-  FIX_UNUSED (additional_error);
-
-  /* checks for required options */
-  if (! args_info->random_dir_given)
-    {
-      fprintf (stderr, "%s: '--random_dir' ('-R') option required%s\n", prog_name, (additional_error ? additional_error : ""));
-      error = 1;
-    }
-  
-  if (! args_info->random_num_given)
-    {
-      fprintf (stderr, "%s: '--random_num' ('-N') option required%s\n", prog_name, (additional_error ? additional_error : ""));
-      error = 1;
-    }
-  
-  if (! args_info->input_given)
-    {
-      fprintf (stderr, "%s: '--input' ('-i') option required%s\n", prog_name, (additional_error ? additional_error : ""));
-      error = 1;
-    }
-  
-  
-  /* checks for dependences among options */
-
-  return error;
+  FIX_UNUSED (args_info);
+  FIX_UNUSED (prog_name);
+  return EXIT_SUCCESS;
 }
 
 
       return 1; /* failure */
     }
 
-  FIX_UNUSED (default_value);
+  if (possible_values && (found = check_possible_values((value ? value : default_value), possible_values)) < 0)
+    {
+      if (short_opt != '-')
+        fprintf (stderr, "%s: %s argument, \"%s\", for option `--%s' (`-%c')%s\n", 
+          package_name, (found == -2) ? "ambiguous" : "invalid", value, long_opt, short_opt,
+          (additional_error ? additional_error : ""));
+      else
+        fprintf (stderr, "%s: %s argument, \"%s\", for option `--%s'%s\n", 
+          package_name, (found == -2) ? "ambiguous" : "invalid", value, long_opt,
+          (additional_error ? additional_error : ""));
+      return 1; /* failure */
+    }
     
   if (field_given && *field_given && ! override)
     return 0;
       static struct option long_options[] = {
         { "help",	0, NULL, 'h' },
         { "version",	0, NULL, 'V' },
+        { "mode",	1, NULL, 'm' },
         { "port",	1, NULL, 't' },
+        { "dset_platform",	1, NULL, 'd' },
+        { "param_dir",	1, NULL, 'p' },
         { "random_dir",	1, NULL, 'R' },
         { "random_num",	1, NULL, 'N' },
         { "input",	1, NULL, 'i' },
         { 0,  0, 0, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVt:R:N:i:n:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVm:t:d:p:R:N:i:n:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
           return 0;
         
           break;
+        case 'm':	/* Mode (datasets or genes).  */
+        
+        
+          if (update_arg( (void *)&(args_info->mode_arg), 
+               &(args_info->mode_orig), &(args_info->mode_given),
+              &(local_args_info.mode_given), optarg, cmdline_parser_mode_values, "genes", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "mode", 'm',
+              additional_error))
+            goto failure;
+        
+          break;
         case 't':	/* Port.  */
         
         
             goto failure;
         
           break;
+        case 'd':	/* Dataset platform file.  */
+        
+        
+          if (update_arg( (void *)&(args_info->dset_platform_arg), 
+               &(args_info->dset_platform_orig), &(args_info->dset_platform_given),
+              &(local_args_info.dset_platform_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "dset_platform", 'd',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'p':	/* Parameter directory.  */
+        
+        
+          if (update_arg( (void *)&(args_info->param_dir_arg), 
+               &(args_info->param_dir_orig), &(args_info->param_dir_given),
+              &(local_args_info.param_dir_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "param_dir", 'p',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'R':	/* Random directory.  */
         
         
 
 
 
-  if (check_required)
-    {
-      error += cmdline_parser_required2 (args_info, argv[0], additional_error);
-    }
 
   cmdline_parser_release (&local_args_info);
 

File tools/SeekPValue/cmdline.h

 {
   const char *help_help; /**< @brief Print help and exit help description.  */
   const char *version_help; /**< @brief Print version and exit help description.  */
+  char * mode_arg;	/**< @brief Mode (datasets or genes) (default='genes').  */
+  char * mode_orig;	/**< @brief Mode (datasets or genes) original value given at command line.  */
+  const char *mode_help; /**< @brief Mode (datasets or genes) help description.  */
   char * port_arg;	/**< @brief Port (default='9005').  */
   char * port_orig;	/**< @brief Port original value given at command line.  */
   const char *port_help; /**< @brief Port help description.  */
+  char * dset_platform_arg;	/**< @brief Dataset platform file.  */
+  char * dset_platform_orig;	/**< @brief Dataset platform file original value given at command line.  */
+  const char *dset_platform_help; /**< @brief Dataset platform file help description.  */
+  char * param_dir_arg;	/**< @brief Parameter directory.  */
+  char * param_dir_orig;	/**< @brief Parameter directory original value given at command line.  */
+  const char *param_dir_help; /**< @brief Parameter directory help description.  */
   char * random_dir_arg;	/**< @brief Random directory.  */
   char * random_dir_orig;	/**< @brief Random directory original value given at command line.  */
   const char *random_dir_help; /**< @brief Random directory help description.  */
   
   unsigned int help_given ;	/**< @brief Whether help was given.  */
   unsigned int version_given ;	/**< @brief Whether version was given.  */
+  unsigned int mode_given ;	/**< @brief Whether mode was given.  */
   unsigned int port_given ;	/**< @brief Whether port was given.  */
+  unsigned int dset_platform_given ;	/**< @brief Whether dset_platform was given.  */
+  unsigned int param_dir_given ;	/**< @brief Whether param_dir was given.  */
   unsigned int random_dir_given ;	/**< @brief Whether random_dir was given.  */
   unsigned int random_num_given ;	/**< @brief Whether random_num was given.  */
   unsigned int input_given ;	/**< @brief Whether input was given.  */
 int cmdline_parser_required (struct gengetopt_args_info *args_info,
   const char *prog_name);
 
+extern const char *cmdline_parser_mode_values[];  /**< @brief Possible values for mode. */
+
 
 #ifdef __cplusplus
 }