Commits

Qian Zhu committed 0555932

Fixed SeekServer to allow multiple users simultaneously perform queries
SeekAggregatedDataset: added command-line options

Comments (0)

Files changed (14)

src/seekcentral.cpp

 	m_bSimulateWeight = false;
 	m_bOutputText = false;
 	m_bSquareZ = false;
-	m_bSharedDB = false;
+	//m_bSharedDB = false;
+
+	m_vecDBSetting.clear();
+	m_useNibble = false;
 
 	DEBUG = false;
 	m_output_dir = "";
 	m_vecstrPlatform.clear();
 
 	if(m_vecDB.size()!=0){
-		if(!m_bSharedDB){
-			for(i=0; i<m_vecDB.size(); i++)
-				delete m_vecDB[i];
+		//if(!m_bSharedDB){
+		for(i=0; i<m_vecDB.size(); i++){
+			delete m_vecDB[i];
+			m_vecDB[i] = NULL;
 		}
-		for(i=0; i<m_vecDB.size(); i++)
-			m_vecDB[i] = NULL;
+		//}
+		//for(i=0; i<m_vecDB.size(); i++)
+		//	m_vecDB[i] = NULL;
 		m_vecDB.clear();
 	}
 
 	m_mapLoadTime.clear();
 	m_output_dir = "";
 	DEBUG = false;
-	m_bSharedDB = false;
+	//m_bSharedDB = false;
+
+	for(i=0; i<m_vecDBSetting.size(); i++)
+		if(m_vecDBSetting[i]!= NULL)
+			delete m_vecDBSetting[i];
+
+	m_vecDBSetting.clear();
+	m_useNibble = false;
 }
 
 bool CSeekCentral::CalculateRestart(){
 
 	m_output_dir = output_dir; //LATER, TO BE DELETED
 	m_maxNumDB = src->m_maxNumDB;
-	m_bSharedDB = true;
+	//m_bSharedDB = true;
 	m_numThreads = src->m_numThreads;
 	m_fScoreCutOff = src->m_fScoreCutOff;
 	m_fPercentQueryAfterScoreCutOff = query_min_required;
 				m_mapstrintDataset[m_vecstrSearchDatasets[i][j]]);
 	}
 
-	m_vecDB.resize(src->m_vecDB.size());
-	for(i=0; i<m_vecDB.size(); i++)
-		m_vecDB[i] = src->m_vecDB[i];
-	//m_DB = src->m_DB; //shared DB
-
 	m_vecDBDataset.resize(src->m_vecDB.size());
-	for(i=0; i<m_vecDB.size(); i++){
+	for(i=0; i<src->m_vecDB.size(); i++){
 		m_vecDBDataset[i].resize(src->m_vecDBDataset[i].size());
 		copy(src->m_vecDBDataset[i].begin(), src->m_vecDBDataset[i].end(),
 		m_vecDBDataset[i].begin());
 	}
 
+	m_vecDBSetting.resize(src->m_vecDBSetting.size());
+	for(i=0; i<m_vecDBSetting.size(); i++)
+		m_vecDBSetting[i] = new CSeekDBSetting(src->m_vecDBSetting[i]);
+	m_useNibble = src->m_useNibble;
+
+	m_vecDB.resize(src->m_vecDB.size());
+	//commented out Jan 8, 2014	
+	//for(i=0; i<m_vecDB.size(); i++)
+	//	m_vecDB[i] = src->m_vecDB[i];
+	for(i=0; i<m_vecDB.size(); i++){
+		m_vecDB[i] = NULL;
+		m_vecDB[i] = new CDatabase(m_useNibble);
+		m_vecDB[i]->Open(m_vecDBSetting[i]->GetValue("db"),
+			m_vecstrGenes, m_vecDBDataset[i].size(), m_vecDBSetting[i]->GetNumDB());
+	}
+
 	CSeekTools::LoadDatabase(m_vecDB, m_iGenes, m_iDatasets,
 		m_vc, src->m_vc, m_vp, src->m_vp, m_vecstrDatasets,
 		m_mapstrstrDatasetPlatform, m_mapstriPlatform);
 	for(i=0; i<vecDBSetting.size(); i++)
 		m_vecDB[i] = NULL;
 
+	m_vecDBSetting.resize(vecDBSetting.size());
+	for(i=0; i<m_vecDBSetting.size(); i++)
+		m_vecDBSetting[i] = new CSeekDBSetting(vecDBSetting[i]);
+
+	m_useNibble = useNibble;
+
 	for(i=0; i<vecDBSetting.size(); i++){
 		if(dist_measure==CSeekDataset::CORRELATION &&
 		vecDBSetting[i]->GetValue("sinfo")=="NA"){

src/seekcentral.h

 	/* for network mode */
 	int m_iClient;
 	bool m_bEnableNetwork;
-	bool m_bSharedDB; //if m_DB is shared between multiple CSeekCentral instances
+	//bool m_bSharedDB; //if m_DB is shared between multiple CSeekCentral instances
+
+	vector<CSeekDBSetting*> m_vecDBSetting; //DBSetting
+	bool m_useNibble;
 };
 
 

src/seekdataset.h

 		m_numDB = numDB;
 	}
 
+	CSeekDBSetting(CSeekDBSetting const *g){
+		m_gvarDirectory = g->m_gvarDirectory;
+		m_sinfoDirectory = g->m_sinfoDirectory;
+		m_platformDirectory = g->m_platformDirectory;
+		m_prepDirectory = g->m_prepDirectory;
+		m_dbDirectory = g->m_dbDirectory;
+		m_geneMapFile = g->m_geneMapFile;
+		m_quantFile = g->m_quantFile;
+		m_dsetFile = g->m_dsetFile;
+		m_numDB = g->m_numDB;
+	}
+
 	~CSeekDBSetting(){
 	}
 

src/seekreader.cpp

 	ret = system("date +%s%N 1>&2");
 	if(bNetwork && CSeekNetwork::Send(iClient, "Reading " + 
 		CSeekTools::ConvertInt(allQ.size()) + 
-		" query genes' correlations")==-1){
+		" query genes' correlations (estimated time " + 
+		CSeekTools::ConvertInt(allQ.size() * 2) + 
+		"s; stay on this page)")==-1){
 		fprintf(stderr, "Error sending client message\n");
 		return false;
 	}
 	fprintf(stderr, "Finished reading query genes' correlations\n");
 	ret = system("date +%s%N 1>&2");
 	if(bNetwork && CSeekNetwork::Send(iClient, 
-		"Finished reading query genes' correlations")==-1){
+		"Finished reading. Now doing search (estimated time " +
+		CSeekTools::ConvertInt(allQ.size()) + 
+		"s; stay on this page)")==-1){
 		fprintf(stderr, "Error sending client message\n");
 		return false;
 	}
 }
  
 unsigned int CStrassen::nextPowerOfTwo(int n) {
-	return pow(2, int(ceil(log2(n))));
+	return (unsigned int)pow(2, int(ceil(log2(n))));
 }
  
 void CStrassen::strassen(vector< vector<float> > &A,

tools/SeekAggregatedDataset/SeekAggregatedDataset.cpp

 	}else{
 		pistm = &cin;
 	}
-
-	fprintf(stderr, "Reading gene list\n");	
+	
 	map<string, size_t> mapstriGenes;
 	while( !pistm->eof( ) ) {
 		pistm->getline( acBuffer, c_iBuffer - 1 );
 			
 	//char acBuffer[1024];
 
-	fprintf(stderr, "Finished reading gene map\n");
 	if(sArgs.pcl_flag==1){
 		string pcl_dir = sArgs.pcl_dir_arg;
 		string output_dir = sArgs.dir_out_arg;
-		fprintf(stderr, "Arrived here\n");
 		vector<string> pcl_list;
-		vector< vector<string> > vecstrAllQuery;
 		int numGenes = vecstrGenes.size();
-	
-		fprintf(stderr, "Reading query\n");	
-		if(!CSeekTools::ReadMultipleQueries(sArgs.query_arg, vecstrAllQuery))
-			return -1;
-		
-		fprintf(stderr, "Reading pcl list\n");	
+			
 		CSeekTools::ReadListOneColumn(sArgs.pcl_list_arg, pcl_list);
-		vector< vector< vector<float> > > mat; //only needed for cor-calc
+		vector< vector< vector<float> > > mat;
 		vector<CSeekIntIntMap*> dm;
 		dm.resize(pcl_list.size());
-		mat.resize(pcl_list.size()); //only needed for cor-calc
+		mat.resize(pcl_list.size());
+		
 		for(i=0; i<pcl_list.size(); i++){
 			fprintf(stderr, "Reading %d: %s\n", i, pcl_list[i].c_str());
 			dm[i] = new CSeekIntIntMap(vecstrGenes.size());
 			CPCL pcl;
 			pcl.Open(pclfile.c_str());
 			int totNumExperiments = pcl.GetExperiments();
+
+			//fprintf(stderr, "Done %d\n", totNumExperiments);		
 			
 			vector<utype> presentIndex;
 			vector<string> presentGeneNames;
 				presentGeneNames.push_back(vecstrGenes[j]);
 				dm[i]->Add(j);
 			}
-			
-			//only needed for correlation calc
+
+			//fprintf(stderr, "Done 1 %d\n", presentGeneNames.size());
+	
 			mat[i].resize(presentIndex.size());
 			for(j=0; j<presentIndex.size(); j++)
 				mat[i][j].resize(totNumExperiments);
-			
+		
+			bool containsLoadingError = false;
 			for(j=0; j<presentIndex.size(); j++){
 				float *val = pcl.Get(presentIndex[j]);
 				for(k=0; k<pcl.GetExperiments(); k++){
 					mat[i][j][k] = val[k];	
 					if(isinf(val[k])||isnan(val[k])){
-						fprintf(stderr, "loading error: %d of %d\n", k, pcl.GetExperiments());
+						//fprintf(stderr, "loading error: %d of %d\n", k, pcl.GetExperiments());
+						containsLoadingError = true;
 					}
 				}
 			}
+			if(containsLoadingError)
+				fprintf(stderr, "Loading error!\n");
+			//fprintf(stderr, "Done 2\n");		
 
 		}
 	
 
 		int numActualGenes = geneMap->GetNumSet();
 
+		fprintf(stderr, "Number of actual genes %d\n", numActualGenes);
+
+
+		//generate pairs per machine, Step 1================================
+		if(sArgs.step_num_arg==1){
+			vector<utype> pairs;
+			int numP = numActualGenes*(numActualGenes-1);
+			pairs.resize(numP);
+			int ki = 0;
+			for(i=0; i<numActualGenes; i++){
+				for(j=i+1; j<numActualGenes; j++){
+					pairs[ki] = i; 
+					ki++;
+					pairs[ki] = j;
+					ki++;
+				}
+			}
+
+			if(ki!=numP){
+				fprintf(stderr, "Cannot continue!\n");
+				return -1;
+			}
+
+			int num_batches = sArgs.num_batch_arg;
+			int num_pairs_per_file = (numP / 2) / num_batches;
+	
+			ki = 0;
+			vector<utype> pp;
+			pp.resize(num_pairs_per_file*2);
+			char destfile[256];
+			int kj = 0;
+			int ii = 0;
+
+			fprintf(stderr, "Numpairs per file: %d\n", num_pairs_per_file);
+			for(ii=0; ii<numP/2; ii++){
+				string pairs_dir = sArgs.pairs_dir_arg;
+
+				if(ii%num_pairs_per_file==0 && ii>0){ //happen at the end of a cycle
+					sprintf(destfile, "%s/pairs_to_do.%d", pairs_dir.c_str(), ki);
+					CSeekTools::WriteArray(destfile, pp);
+					pp.clear();
+					pp.resize(num_pairs_per_file*2);
+					ki++;
+					kj = 0;
+				}
+				pp[kj] = pairs[ii*2];
+				pp[kj+1] = pairs[ii*2+1];
+				kj+=2;
+				if(ii==numP/2-1){
+					sprintf(destfile, "%s/pairs_to_do.%d", pairs_dir.c_str(), ki);
+					pp.resize(kj);
+					CSeekTools::WriteArray(destfile, pp);
+				}
+			}
+		
+			fprintf(stderr, "Finished\n");	
+			//getchar();
+			return 0;
+		}
+	
+		//=====================================================================
 		
 		//Do pair per machine, Step 2============================
-/*
-		vector<utype> pairs;
-		CSeekTools::ReadArray("/tmp/pairs_to_do", pairs);
-
-		int numP = pairs.size()/2;
-		int pi=0;
-		const vector<utype> &allGenes = geneMap->GetAllReverse();
-		vector<float> cor;
-		cor.resize(numP);
-
-		#pragma omp parallel for \
-		shared(allGenes, cor, mat, dm, pairs) \
-		private(pi) \
-		firstprivate(numP) schedule(dynamic)
-		for(pi=0; pi<numP; pi++){
-			utype g1 = allGenes[pairs[pi*2]];
-			utype g2 = allGenes[pairs[pi*2+1]];
-			if(g1==g2){
-				cor[pi] = -320;
-				continue;
-			}
-			calculate_correlation(mat, dm, g1, g2, cor[pi]);
-			if(pi%1000==0){
-				fprintf(stderr, "  %d of %d\n", pi, numP);
-			}
-		}
-
-		CSeekTools::WriteArray("/tmp/results_pairs", cor);		
-*/		
-		//=============================================================	
-		
-		
-	
-		vector< vector<float> > correlations;
-		correlations.resize(numActualGenes);
-		for(i=0; i<numActualGenes; i++){
-			correlations[i].resize(numActualGenes);
-		}
-
-		//combining pairs, Step 3=================================
-	/*
-		int max_i = 49;
-		char file[256];
-		const vector<utype> &allGenes = geneMap->GetAllReverse();
-
-		for(i=0; i<=max_i; i++){
+		if(sArgs.step_num_arg==2){
+			
 			vector<utype> pairs;
-			vector<float> cor;
-
-			sprintf(file, "/memex/qzhu/p1/pairs_to_do.%d", i);
-			CSeekTools::ReadArray(file, pairs);
-			sprintf(file, "/memex/qzhu/p1/oct6/pairs_to_do.%d_results", i);
-			CSeekTools::ReadArray(file, cor);
+			char pair_dir[256];
+			sprintf(pair_dir, "%s/pairs_to_do.%d", sArgs.pairs_dir_arg, sArgs.batch_num_arg);
+			CSeekTools::ReadArray(pair_dir, pairs);
 
 			int numP = pairs.size()/2;
 			int pi=0;
+			const vector<utype> &allGenes = geneMap->GetAllReverse();
+			vector<float> cor;
+			cor.resize(numP);
+		
+			omp_set_num_threads(4);
 
+			#pragma omp parallel for \
+			shared(allGenes, cor, mat, dm, pairs) \
+			private(pi) \
+			firstprivate(numP) schedule(dynamic)
 			for(pi=0; pi<numP; pi++){
-				correlations[pairs[pi*2]][pairs[pi*2+1]] = cor[pi];
-				correlations[pairs[pi*2+1]][pairs[pi*2]] = cor[pi];
+				utype g1 = allGenes[pairs[pi*2]];
+				utype g2 = allGenes[pairs[pi*2+1]];
+				if(g1==g2){
+					cor[pi] = -320;
+					continue;
+				}
+				calculate_correlation(mat, dm, g1, g2, cor[pi]);
+				if(pi%1000==0){
+					fprintf(stderr, "  %d of %d\n", pi, numP);
+				}
+				//fprintf(stderr, "%.5f\n", cor[pi]);
 			}
+
+			sprintf(pair_dir, "%s/results_pairs.%d", sArgs.pairs_dir_arg, sArgs.batch_num_arg);
+			CSeekTools::WriteArray(pair_dir, cor);		
+			return 0;
 		}
 
-		vector<float> correlation1D;
-		correlation1D.resize(numActualGenes*numActualGenes);
-		int kk=0;
-		for(i=0; i<numActualGenes; i++){
+		//=============================================================	
+		
+		
+		
+		
+
+		//combining pairs, Step 3=================================
+
+		if(sArgs.step_num_arg==3){
+			vector< vector<float> > correlations;
+			correlations.resize(numActualGenes);
+			for(i=0; i<numActualGenes; i++){
+				correlations[i].resize(numActualGenes);
+			}
+	
+			int max_i = sArgs.num_batch_arg;
+			char file[256];
+			const vector<utype> &allGenes = geneMap->GetAllReverse();
+
+			for(i=0; i<=max_i; i++){
+				vector<utype> pairs;
+				vector<float> cor;
+
+				sprintf(file, "%s/pairs_to_do.%d", sArgs.pairs_dir_arg, i);
+				CSeekTools::ReadArray(file, pairs);
+				sprintf(file, "%s/results_pairs.%d", sArgs.pairs_dir_arg, i);
+				CSeekTools::ReadArray(file, cor);
+
+				int numP = pairs.size()/2;
+				int pi=0;
+
+				for(pi=0; pi<numP; pi++){
+					correlations[pairs[pi*2]][pairs[pi*2+1]] = cor[pi];
+					correlations[pairs[pi*2+1]][pairs[pi*2]] = cor[pi];
+				}
+			}
+
+			/*vector<float> correlation1D;
+			correlation1D.resize(numActualGenes*numActualGenes);
+			int kk=0;
+			for(i=0; i<numActualGenes; i++){
+				for(j=0; j<numActualGenes; j++){
+					correlation1D[kk] = correlations[i][j];
+					kk++;
+				}
+			}
+			CSeekTools::WriteArray(output_dir + "/aggregated_dataset_correlation", correlation1D);
+			fprintf(stderr, "Finished creating file\n");
+			*/
+			//=============================================
+			//Load aggregated dataset, Step 4=====================================
+			/*	
+			vector<float> correlation1D;
+			CSeekTools::ReadArray("/r04/qzhu/Seek/aggregated_dataset_correlation", correlation1D);
+			int pi = 0;
+			for(i=0; i<numActualGenes; i++){
+				for(j=0; j<numActualGenes; j++){
+					correlations[i][j] = correlation1D[pi];
+					pi++;
+				}
+			}
+
+			int ss = sqrt(correlation1D.size());
+
+			fprintf(stderr, "Dimensions of matrix %d\n", ss);
+
+			correlation1D.clear();
+			*/
+
+			//CONVERT TO DAB
+			//START
+			//const vector<utype> &allGenes = geneMap->GetAllReverse();		
+			vector<string> vecstrActualGenes;
 			for(j=0; j<numActualGenes; j++){
-				correlation1D[kk] = correlations[i][j];
-				kk++;
+				utype gi = allGenes[j];
+				string str_gi = vecstrGenes[gi];
+				vecstrActualGenes.push_back(str_gi);
 			}
+
+			CDat CD;
+			CD.Open(vecstrActualGenes);
+			for(i=0; i<numActualGenes; i++){
+				for(j=i+1; j<numActualGenes; j++){
+					CD.Set(i, j, CMeta::GetNaN());
+				}
+			}
+
+			for(j=0; j<numActualGenes; j++){
+				utype gi = allGenes[j];
+				string str_gi = vecstrGenes[gi];
+				for(k=0; k<numActualGenes; k++){
+					utype gj = allGenes[k];
+					string str_gj = vecstrGenes[gj];
+					float cor = correlations[j][k];
+					if(cor==-320) continue;
+					CD.Set(j, k, cor);
+				}
+			}
+			string file_out = sArgs.dir_out_arg;
+			file_out = file_out +  "/" + "aggregated_dataset_correlation.dab";
+			CD.Save(file_out.c_str()); 
+			return 0;
 		}
-		CSeekTools::WriteArray("/memex/qzhu/p1/aggregated_dataset_correlation", correlation1D);
-
-		fprintf(stderr, "Finished creating file\n");
-		getchar();
-*/
-		//============================================================
+		//======================================================================
 
 
 		/*
-		//generate pairs per machine, Step 1================================
-		vector<utype> pairs;
-		int numP = numActualGenes*(numActualGenes-1);
-		pairs.resize(numP);
-		int ki = 0;
-		for(i=0; i<numActualGenes; i++){
-			for(j=i+1; j<numActualGenes; j++){
-				pairs[ki] = i; 
-				ki++;
-				pairs[ki] = j;
-				ki++;
-			}
-		}
-
-		if(ki!=numP){
-			fprintf(stderr, "Cannot continue!\n");
-			return -1;
-		}
-
-		int num_pairs_per_file = (numP / 2) / 49;
-		
-		ki = 0;
-		vector<utype> pp;
-		pp.resize(num_pairs_per_file*2);
-		char destfile[256];
-		int kj = 0;
-		int ii = 0;
-
-		fprintf(stderr, "Numpairs per file: %d\n", num_pairs_per_file);
-		for(ii=0; ii<numP/2; ii++){
-			if(ii%num_pairs_per_file==0 && ii>0){
-				sprintf(destfile, "/memex/qzhu/p1/pairs_to_do.%d", ki);
-				CSeekTools::WriteArray(destfile, pp);
-				pp.clear();
-				pp.resize(num_pairs_per_file*2);
-				ki++;
-				kj = 0;
-			}
-			pp[kj] = pairs[ii*2];
-			pp[kj+1] = pairs[ii*2+1];
-			kj+=2;
-			if(ii==numP/2-1){
-				sprintf(destfile, "/memex/qzhu/p1/pairs_to_do.%d", ki);
-				pp.resize(kj);
-				CSeekTools::WriteArray(destfile, pp);
-			}
-		}
-		
-
-		fprintf(stderr, "Finished\n");	
-		getchar();
-		//=====================================================================
-		*/
-
-		//Load aggregated dataset, Step 4=====================================
-
-		vector<float> correlation1D;
-		CSeekTools::ReadArray("/home/qzhu/Seek/aggregated_dataset_correlation", correlation1D);
-		int pi = 0;
-		for(i=0; i<numActualGenes; i++){
-			for(j=0; j<numActualGenes; j++){
-				correlations[i][j] = correlation1D[pi];
-				pi++;
-			}
-		}
-		correlation1D.clear();
-
-		//======================================================================
-
-
-		/*
-		const vector<utype> &allGenes = geneMap->GetAllReverse();
+		const vector<ushort> &allGenes = geneMap->GetAllReverse();
 		//start correlation calculations
 		for(i=0; i<numActualGenes; i++){
-			utype g1 = allGenes[i];
+			ushort g1 = allGenes[i];
 			fprintf(stderr, "Gene %d of %d: %d\n", i, numActualGenes, numActualGenes - (i+1));
 
 			#pragma omp parallel for \
 			private(j) \
 			firstprivate(numActualGenes, g1, i) schedule(dynamic)
 			for(j=i+1; j<numActualGenes; j++){
-				utype tid = omp_get_thread_num();
-				utype g2 = allGenes[j];
+				ushort tid = omp_get_thread_num();
+				ushort g2 = allGenes[j];
 				if(g1==g2){
 					correlations[i][j] = -320; //Null value
 					correlations[j][i] = -320;
 		CSeekTools::WriteArray("/memex/qzhu/p1/aggregated_dataset_correlation", correlation1D);
 		*/
 
+
+		
+
 		//Evaluation Last step==================================================
-		
+		//START
+		/*	
+		vector< vector<string> > vecstrAllQuery;
+		if(!CSeekTools::ReadMultipleQueries(sArgs.query_arg, vecstrAllQuery))
+			return -1;
 		const vector<utype> &allGenes = geneMap->GetAllReverse();		
 
 		for(i=0; i<vecstrAllQuery.size(); i++){
 			
 			//gs.clear();
 			//b.clear();		
-
         }
-	
-		
+		//END
+		//==============================================================
+		*/
 	}
 	
 	

tools/SeekAggregatedDataset/SeekAggregatedDataset.ggo

 								string typestr="filename"
 option	"pcl_dir"			F	"PCL directory"
 								string typestr="directory"
+option	"step_num"			S	"Step Number (4 steps) (1: separate pairs to batches, 2: calculate Pearson for pairs in each batch (need a batch number), 3: merge Pearson from all batches and output a DAB)"
+								int default="0"
 
 section "Input"
 option	"input"				i	"Gene mapping file"
 								string typestr="filename"	yes
 option	"query"				q	"Query file"
 								string typestr="filename"	yes
+option	"num_batch"			b	"Number of batches to split pairs to (for step 1)"
+								int default="10"
+option	"pairs_dir"			p	"Pairs directory (for steps 1, 2). Pearson for the pairs will also be stored here."
+								string typestr="filename"	yes
+option	"batch_num"			r	"Batch number (for step 2)"
+								int default="0" 
 
 section "Output"
-option	"dir_out"			D	"Output directory"
+option	"dir_out"			D	"DAB output directory (for step 3)"
 								string typestr="directory"
 

tools/SeekAggregatedDataset/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /memex/qzhu/usr/bin/gengetopt -iSeekAggregatedDataset.ggo --default-optional -u -N -e 
+  gengetopt -iSeekAggregatedDataset.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
 const char *gengetopt_args_info_description = "";
 
 const char *gengetopt_args_info_help[] = {
-  "  -h, --help               Print help and exit",
-  "      --version            Print version and exit",
+  "  -h, --help                Print help and exit",
+  "      --version             Print version and exit",
   "\nMode:",
-  "  -e, --pcl                PCL mode, suitable for dataset gene variance \n                             calculation  (default=off)",
+  "  -e, --pcl                 PCL mode, suitable for dataset gene variance \n                              calculation  (default=off)",
   "\nPCL mode:",
-  "  -V, --pcl_list=filename  PCL list",
-  "  -F, --pcl_dir=directory  PCL directory",
+  "  -V, --pcl_list=filename   PCL list",
+  "  -F, --pcl_dir=directory   PCL directory",
+  "  -S, --step_num=INT        Step Number (4 steps) (1: separate pairs to \n                              batches, 2: calculate Pearson for pairs in each \n                              batch (need a batch number), 3: merge Pearson \n                              from all batches and output a DAB)  (default=`0')",
   "\nInput:",
-  "  -i, --input=filename     Gene mapping file",
-  "  -q, --query=filename     Query file",
+  "  -i, --input=filename      Gene mapping file",
+  "  -q, --query=filename      Query file",
+  "  -b, --num_batch=INT       Number of batches to split pairs to (for step 1)  \n                              (default=`10')",
+  "  -p, --pairs_dir=filename  Pairs directory (for steps 1, 2). Pearson for the \n                              pairs will also be stored here.",
+  "  -r, --batch_num=INT       Batch number (for step 2)  (default=`0')",
   "\nOutput:",
-  "  -D, --dir_out=directory  Output directory",
+  "  -D, --dir_out=directory   DAB output directory (for step 3)",
     0
 };
 
 typedef enum {ARG_NO
   , ARG_FLAG
   , ARG_STRING
+  , ARG_INT
 } cmdline_parser_arg_type;
 
 static
   args_info->pcl_given = 0 ;
   args_info->pcl_list_given = 0 ;
   args_info->pcl_dir_given = 0 ;
+  args_info->step_num_given = 0 ;
   args_info->input_given = 0 ;
   args_info->query_given = 0 ;
+  args_info->num_batch_given = 0 ;
+  args_info->pairs_dir_given = 0 ;
+  args_info->batch_num_given = 0 ;
   args_info->dir_out_given = 0 ;
 }
 
   args_info->pcl_list_orig = NULL;
   args_info->pcl_dir_arg = NULL;
   args_info->pcl_dir_orig = NULL;
+  args_info->step_num_arg = 0;
+  args_info->step_num_orig = NULL;
   args_info->input_arg = NULL;
   args_info->input_orig = NULL;
   args_info->query_arg = NULL;
   args_info->query_orig = NULL;
+  args_info->num_batch_arg = 10;
+  args_info->num_batch_orig = NULL;
+  args_info->pairs_dir_arg = NULL;
+  args_info->pairs_dir_orig = NULL;
+  args_info->batch_num_arg = 0;
+  args_info->batch_num_orig = NULL;
   args_info->dir_out_arg = NULL;
   args_info->dir_out_orig = NULL;
   
   args_info->pcl_help = gengetopt_args_info_help[3] ;
   args_info->pcl_list_help = gengetopt_args_info_help[5] ;
   args_info->pcl_dir_help = gengetopt_args_info_help[6] ;
-  args_info->input_help = gengetopt_args_info_help[8] ;
-  args_info->query_help = gengetopt_args_info_help[9] ;
-  args_info->dir_out_help = gengetopt_args_info_help[11] ;
+  args_info->step_num_help = gengetopt_args_info_help[7] ;
+  args_info->input_help = gengetopt_args_info_help[9] ;
+  args_info->query_help = gengetopt_args_info_help[10] ;
+  args_info->num_batch_help = gengetopt_args_info_help[11] ;
+  args_info->pairs_dir_help = gengetopt_args_info_help[12] ;
+  args_info->batch_num_help = gengetopt_args_info_help[13] ;
+  args_info->dir_out_help = gengetopt_args_info_help[15] ;
   
 }
 
   free_string_field (&(args_info->pcl_list_orig));
   free_string_field (&(args_info->pcl_dir_arg));
   free_string_field (&(args_info->pcl_dir_orig));
+  free_string_field (&(args_info->step_num_orig));
   free_string_field (&(args_info->input_arg));
   free_string_field (&(args_info->input_orig));
   free_string_field (&(args_info->query_arg));
   free_string_field (&(args_info->query_orig));
+  free_string_field (&(args_info->num_batch_orig));
+  free_string_field (&(args_info->pairs_dir_arg));
+  free_string_field (&(args_info->pairs_dir_orig));
+  free_string_field (&(args_info->batch_num_orig));
   free_string_field (&(args_info->dir_out_arg));
   free_string_field (&(args_info->dir_out_orig));
   
     write_into_file(outfile, "pcl_list", args_info->pcl_list_orig, 0);
   if (args_info->pcl_dir_given)
     write_into_file(outfile, "pcl_dir", args_info->pcl_dir_orig, 0);
+  if (args_info->step_num_given)
+    write_into_file(outfile, "step_num", args_info->step_num_orig, 0);
   if (args_info->input_given)
     write_into_file(outfile, "input", args_info->input_orig, 0);
   if (args_info->query_given)
     write_into_file(outfile, "query", args_info->query_orig, 0);
+  if (args_info->num_batch_given)
+    write_into_file(outfile, "num_batch", args_info->num_batch_orig, 0);
+  if (args_info->pairs_dir_given)
+    write_into_file(outfile, "pairs_dir", args_info->pairs_dir_orig, 0);
+  if (args_info->batch_num_given)
+    write_into_file(outfile, "batch_num", args_info->batch_num_orig, 0);
   if (args_info->dir_out_given)
     write_into_file(outfile, "dir_out", args_info->dir_out_orig, 0);
   
       error = 1;
     }
   
+  if (! args_info->pairs_dir_given)
+    {
+      fprintf (stderr, "%s: '--pairs_dir' ('-p') option required%s\n", prog_name, (additional_error ? additional_error : ""));
+      error = 1;
+    }
+  
   
   /* checks for dependences among options */
 
   case ARG_FLAG:
     *((int *)field) = !*((int *)field);
     break;
+  case ARG_INT:
+    if (val) *((int *)field) = strtol (val, &stop_char, 0);
+    break;
   case ARG_STRING:
     if (val) {
       string_field = (char **)field;
     break;
   };
 
+  /* check numeric conversion */
+  switch(arg_type) {
+  case ARG_INT:
+    if (val && !(stop_char && *stop_char == '\0')) {
+      fprintf(stderr, "%s: invalid numeric value: %s\n", package_name, val);
+      return 1; /* failure */
+    }
+    break;
+  default:
+    ;
+  };
 
   /* store the original value */
   switch(arg_type) {
         { "pcl",	0, NULL, 'e' },
         { "pcl_list",	1, NULL, 'V' },
         { "pcl_dir",	1, NULL, 'F' },
+        { "step_num",	1, NULL, 'S' },
         { "input",	1, NULL, 'i' },
         { "query",	1, NULL, 'q' },
+        { "num_batch",	1, NULL, 'b' },
+        { "pairs_dir",	1, NULL, 'p' },
+        { "batch_num",	1, NULL, 'r' },
         { "dir_out",	1, NULL, 'D' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "heV:F:i:q:D:", long_options, &option_index);
+      c = getopt_long (argc, argv, "heV:F:S:i:q:b:p:r:D:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'S':	/* Step Number (4 steps) (1: separate pairs to batches, 2: calculate Pearson for pairs in each batch (need a batch number), 3: merge Pearson from all batches and output a DAB).  */
+        
+        
+          if (update_arg( (void *)&(args_info->step_num_arg), 
+               &(args_info->step_num_orig), &(args_info->step_num_given),
+              &(local_args_info.step_num_given), optarg, 0, "0", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "step_num", 'S',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'i':	/* Gene mapping file.  */
         
         
             goto failure;
         
           break;
-        case 'D':	/* Output directory.  */
+        case 'b':	/* Number of batches to split pairs to (for step 1).  */
+        
+        
+          if (update_arg( (void *)&(args_info->num_batch_arg), 
+               &(args_info->num_batch_orig), &(args_info->num_batch_given),
+              &(local_args_info.num_batch_given), optarg, 0, "10", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "num_batch", 'b',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'p':	/* Pairs directory (for steps 1, 2). Pearson for the pairs will also be stored here..  */
+        
+        
+          if (update_arg( (void *)&(args_info->pairs_dir_arg), 
+               &(args_info->pairs_dir_orig), &(args_info->pairs_dir_given),
+              &(local_args_info.pairs_dir_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "pairs_dir", 'p',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'r':	/* Batch number (for step 2).  */
+        
+        
+          if (update_arg( (void *)&(args_info->batch_num_arg), 
+               &(args_info->batch_num_orig), &(args_info->batch_num_given),
+              &(local_args_info.batch_num_given), optarg, 0, "0", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "batch_num", 'r',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'D':	/* DAB output directory (for step 3).  */
         
         
           if (update_arg( (void *)&(args_info->dir_out_arg), 

tools/SeekAggregatedDataset/cmdline.h

   char * pcl_dir_arg;	/**< @brief PCL directory.  */
   char * pcl_dir_orig;	/**< @brief PCL directory original value given at command line.  */
   const char *pcl_dir_help; /**< @brief PCL directory help description.  */
+  int step_num_arg;	/**< @brief Step Number (4 steps) (1: separate pairs to batches, 2: calculate Pearson for pairs in each batch (need a batch number), 3: merge Pearson from all batches and output a DAB) (default='0').  */
+  char * step_num_orig;	/**< @brief Step Number (4 steps) (1: separate pairs to batches, 2: calculate Pearson for pairs in each batch (need a batch number), 3: merge Pearson from all batches and output a DAB) original value given at command line.  */
+  const char *step_num_help; /**< @brief Step Number (4 steps) (1: separate pairs to batches, 2: calculate Pearson for pairs in each batch (need a batch number), 3: merge Pearson from all batches and output a DAB) help description.  */
   char * input_arg;	/**< @brief Gene mapping file.  */
   char * input_orig;	/**< @brief Gene mapping file original value given at command line.  */
   const char *input_help; /**< @brief Gene mapping file help description.  */
   char * query_arg;	/**< @brief Query file.  */
   char * query_orig;	/**< @brief Query file original value given at command line.  */
   const char *query_help; /**< @brief Query file help description.  */
-  char * dir_out_arg;	/**< @brief Output directory.  */
-  char * dir_out_orig;	/**< @brief Output directory original value given at command line.  */
-  const char *dir_out_help; /**< @brief Output directory help description.  */
+  int num_batch_arg;	/**< @brief Number of batches to split pairs to (for step 1) (default='10').  */
+  char * num_batch_orig;	/**< @brief Number of batches to split pairs to (for step 1) original value given at command line.  */
+  const char *num_batch_help; /**< @brief Number of batches to split pairs to (for step 1) help description.  */
+  char * pairs_dir_arg;	/**< @brief Pairs directory (for steps 1, 2). Pearson for the pairs will also be stored here..  */
+  char * pairs_dir_orig;	/**< @brief Pairs directory (for steps 1, 2). Pearson for the pairs will also be stored here. original value given at command line.  */
+  const char *pairs_dir_help; /**< @brief Pairs directory (for steps 1, 2). Pearson for the pairs will also be stored here. help description.  */
+  int batch_num_arg;	/**< @brief Batch number (for step 2) (default='0').  */
+  char * batch_num_orig;	/**< @brief Batch number (for step 2) original value given at command line.  */
+  const char *batch_num_help; /**< @brief Batch number (for step 2) help description.  */
+  char * dir_out_arg;	/**< @brief DAB output directory (for step 3).  */
+  char * dir_out_orig;	/**< @brief DAB output directory (for step 3) original value given at command line.  */
+  const char *dir_out_help; /**< @brief DAB output directory (for step 3) help description.  */
   
   unsigned int help_given ;	/**< @brief Whether help was given.  */
   unsigned int version_given ;	/**< @brief Whether version was given.  */
   unsigned int pcl_given ;	/**< @brief Whether pcl was given.  */
   unsigned int pcl_list_given ;	/**< @brief Whether pcl_list was given.  */
   unsigned int pcl_dir_given ;	/**< @brief Whether pcl_dir was given.  */
+  unsigned int step_num_given ;	/**< @brief Whether step_num was given.  */
   unsigned int input_given ;	/**< @brief Whether input was given.  */
   unsigned int query_given ;	/**< @brief Whether query was given.  */
+  unsigned int num_batch_given ;	/**< @brief Whether num_batch was given.  */
+  unsigned int pairs_dir_given ;	/**< @brief Whether pairs_dir was given.  */
+  unsigned int batch_num_given ;	/**< @brief Whether batch_num was given.  */
   unsigned int dir_out_given ;	/**< @brief Whether dir_out was given.  */
 
   char **inputs ; /**< @brief unamed options (options without names) */

tools/SeekIterative/SeekIterative.cpp

 		q_weight.resize(vecstrAllQuery.size());
 		for(i=0; i<vecstrAllQuery.size(); i++){
 			CSeekTools::InitVector(q_weight[i], numGenes, (float) 0);
-			for(j=0; j<vecstrAllQuery[i].size(); j++)
-				q_weight[i][mapstriGenes[vecstrAllQuery[i][j]]] = 1;
+			for(j=0; j<vecstrAllQuery[i].size(); j++){
+				map<string, size_t>::iterator it = mapstriGenes.find(vecstrAllQuery[i][j]);
+				if(it!=mapstriGenes.end()){
+					q_weight[i][it->second] = 1;
+				}
+			}
 		}
 
 		//preparing query2
 		qq.resize(vecstrAllQuery.size());
 		for(i=0; i<vecstrAllQuery.size(); i++){
 			qq[i] = vector<unsigned int>();
-			for(j=0; j<vecstrAllQuery[i].size(); j++)
-				qq[i].push_back(mapstriGenes[vecstrAllQuery[i][j]]);
+			for(j=0; j<vecstrAllQuery[i].size(); j++){
+				map<string, size_t>::iterator it = mapstriGenes.find(vecstrAllQuery[i][j]);
+				if(it!=mapstriGenes.end()){
+					qq[i].push_back(it->second);
+				}
+			}
 		}
 
 		//selected datasets for each query

tools/SeekReader/SeekReader.cpp

 #include "stdafx.h"
 #include "cmdline.h"
 
+float get_mean(vector<float> &f){
+	int i;
+	float sum = 0;
+	for(i=0; i<f.size(); i++){
+		sum+=f[i];
+	}
+	return sum/(float)(f.size());
+}
+
+float get_stdev(vector<float> &f, float mean){
+	int i;
+	float sum_dev = 0;
+	float diff = 0;
+	for(i=0; i<f.size(); i++){
+		diff = f[i] - mean;
+		sum_dev+= diff * diff;
+	}
+	float dev = sqrt(sum_dev / (float) f.size());
+	return dev;
+}
+
 
 int main( int iArgs, char** aszArgs ) {
 	static const size_t	c_iBuffer	= 1024;
 		return 0;
 	}
 
+	if(sArgs.combine_pcl_flag==1){
+		vector<string> vecstrPCL;
+		if(!CSeekTools::ReadListOneColumn(sArgs.pcl_list_arg, vecstrPCL))
+			return false;
+		vector<CPCL*> vc;
+		vc.resize(vecstrPCL.size());
+		utype i, j, k;
+
+		float **new_m;
+		int totExperiments = 0;
+		int totDatasets = vecstrPCL.size();
+		vector<int> geneFreq;
+		CSeekTools::InitVector(geneFreq, vecstrGenes.size(), (int) 0);
+		
+		for(i=0; i<vecstrPCL.size(); i++){
+			fprintf(stderr, "Reading %s...\n", vecstrPCL[i].c_str());	 
+			vc[i] = new CPCL();
+			CPCL *cp = vc[i];
+			cp->Open(vecstrPCL[i].c_str());
+			totExperiments+=cp->GetExperiments();
+			for(j=0; j<vecstrGenes.size(); j++){
+				int g = cp->GetGene(vecstrGenes[j]);
+				if(g==-1) continue;
+				geneFreq[j]++;
+			}
+		}
+
+		vector<string> pr; //all present genes
+		int totGenes = 0;
+		for(j=0; j<vecstrGenes.size(); j++){
+			if(geneFreq[j]==totDatasets){
+				totGenes++;
+				pr.push_back(vecstrGenes[j]);
+			}
+		}
+
+		//option 1 (Normalize within dataset)
+		vector<vector<vector<float> > > mm;
+		mm.resize(vc.size());	
+		for(i=0; i<vecstrPCL.size(); i++){
+			CPCL *cp = vc[i];
+			mm[i].resize(totGenes);
+
+			float **new_m = new float*[totGenes];
+			for(j=0; j<totGenes; j++)
+				new_m[j] = new float[cp->GetExperiments()];
+
+			vector<float> exp_avg;
+			vector<float> exp_stdev;
+			CSeekTools::InitVector(exp_avg, cp->GetExperiments(), (float)0);
+			CSeekTools::InitVector(exp_stdev, cp->GetExperiments(), (float)0);
+
+			for(j=0; j<pr.size(); j++){
+				int g = cp->GetGene(pr[j]);
+				float *vv = cp->Get(g);
+				for(k=0; k<cp->GetExperiments(); k++){
+					new_m[j][k] = vv[k];
+				}
+			}
+
+			set<int> badExperiments;
+			for(k=0; k<cp->GetExperiments(); k++){
+				vector<float> vf;
+				for(j=0; j<pr.size(); j++){
+					vf.push_back(new_m[j][k]);
+				}
+				exp_avg[k] = get_mean(vf);
+				exp_stdev[k] = get_stdev(vf, exp_avg[k]);
+				if(isinf(exp_avg[k]) || isnan(exp_avg[k])){
+					badExperiments.insert(k);
+					continue;
+				}
+			}
+
+			for(j=0; j<totGenes; j++){
+				vector<float> gv;
+				for(k=0; k<cp->GetExperiments(); k++){
+					if(badExperiments.find(k)==badExperiments.end()){
+						gv.push_back(new_m[j][k]);
+					}
+				}
+				float mean = get_mean(gv);
+				float stdev = get_stdev(gv, mean);
+				mm[i][j] = vector<float>();
+				for(k=0; k<cp->GetExperiments(); k++){
+					if(badExperiments.find(k)==badExperiments.end()){
+						mm[i][j].push_back((new_m[j][k] - mean) / stdev);
+					}
+				}	
+			}
+			for(j=0; j<totGenes; j++)
+				delete new_m[j];
+			delete new_m;
+		}
+
+		for(j=0; j<totGenes; j++){
+			vector<float> vv;
+			for(i=0; i<vecstrPCL.size(); i++){
+				for(k=0; k<mm[i][j].size(); k++){
+					vv.push_back(mm[i][j][k]);
+				}
+			}
+			for(i=0; i<vv.size(); i++){
+				int vi = 0;
+				if(vv[i]>=1.0)
+					vi = 1;
+				fprintf(stdout, "%d", vi);
+				if(i==vv.size()-1){
+					fprintf(stdout, "\n");
+				}else{
+					fprintf(stdout, "\t");
+				}
+			}
+		}
+		
+		//fprintf(stderr, "Number of datasets: %d. Number of genes with full coverage: %d.\n", 
+		//totDatasets, totGenes);
+
+		/*
+		new_m = new float*[totGenes];
+		for(i=0; i<totGenes; i++){
+			new_m[i] = new float[totExperiments];
+		}
+
+		int kk = 0;
+		for(i=0; i<vecstrPCL.size(); i++){
+			CPCL *cp = vc[i];
+			for(j=0; j<pr.size(); j++){
+				int g = cp->GetGene(pr[j]);
+				float *vv = cp->Get(g);
+				for(k=0; k<cp->GetExperiments(); k++){
+					new_m[j][kk+k] = vv[k];
+				}
+			}
+			kk+=cp->GetExperiments();
+		}
+
+		vector<float> allV;
+		allV.resize(totGenes);
+		set<int> badExperiments;
+		vector<float> averages;
+		averages.resize(totExperiments);
+		float mean_of_mean = 0;
+
+		for(i=0; i<totExperiments; i++){
+			for(j=0; j<totGenes; j++){
+				allV[j] = new_m[j][i];
+			}
+			float mean = get_mean(allV);
+			float stdev = get_stdev(allV, mean);
+			if(isinf(mean) || isnan(mean)){
+				badExperiments.insert(i);
+				continue;
+			}
+			averages[i] = mean;
+			mean_of_mean += mean;
+			//fprintf(stderr, "%d\t%.2f\t%.2f\n", i, mean, stdev);
+		}
+
+		mean_of_mean /= (float) (totExperiments - badExperiments.size());
+		//fprintf(stderr, "Mean of mean %.2f\n", mean_of_mean);
+
+		//adjustment by mean
+		vector<int> goodExperiments;
+		for(i=0; i<totExperiments; i++){
+			if(badExperiments.find(i)==badExperiments.end()){
+				float adj = averages[i] - mean_of_mean;
+				for(j=0; j<totGenes; j++){
+					new_m[j][i] -= adj;
+				}
+				goodExperiments.push_back(i);
+			}
+		}
+		fprintf(stderr, "Number of experiments: %d\n", goodExperiments.size());
+		*/
+
+		//Option 2 (normalize across all datasets)
+		/*
+		vector<float> mean_gene;
+		vector<float> stdev_gene;
+		CSeekTools::InitVector(mean_gene, pr.size(), (float) 0); //present genes
+		CSeekTools::InitVector(stdev_gene, pr.size(), (float) 0); //present genes
+		//calculate mean
+		for(i=0; i<goodExperiments.size(); i++){
+			int ii = goodExperiments[i];
+			for(j=0; j<totGenes; j++){
+				mean_gene[j] += new_m[j][ii];
+			}
+		}
+		//calculate mean and standard deviation
+		for(j=0; j<totGenes; j++)
+			mean_gene[j] /= (float) goodExperiments.size();
+		for(i=0; i<goodExperiments.size(); i++){
+			int ii = goodExperiments[i];
+			for(j=0; j<totGenes; j++){
+				float diff = new_m[j][ii] - mean_gene[j];
+				stdev_gene[j] += diff * diff;
+			}
+		}
+		for(j=0; j<totGenes; j++)
+			stdev_gene[j] = sqrt(stdev_gene[j] / (float) goodExperiments.size());
+		for(j=0; j<totGenes; j++){
+			for(i=0; i<goodExperiments.size(); i++){
+				float v = (new_m[j][goodExperiments[i]] - mean_gene[j]) / stdev_gene[j];
+				int vi = 0;
+				if(v>=1.0)
+					vi = 1;
+				fprintf(stdout, "%d", vi);
+				if(i==goodExperiments.size()-1){
+					fprintf(stdout, "\n");
+				}else{
+					fprintf(stdout, "\t");
+				}
+			}
+		}
+		*/
+
+		//original values
+		/*
+		for(j=0; j<totGenes; j++){
+			for(i=0; i<goodExperiments.size(); i++){
+				fprintf(stdout, "%d", (int) new_m[j][goodExperiments[i]]);
+				if(i==goodExperiments.size()-1){
+					fprintf(stdout, "\n");
+				}else{
+					fprintf(stdout, "\t");
+				}
+			}
+		}
+
+		for(i=0; i<totGenes; i++){
+			delete new_m[i];
+		}
+		delete new_m;
+		*/
+		for(i=0; i<vecstrPCL.size(); i++)
+			delete vc[i];
+		vc.clear();
+
+		return false;
+	}
+
 	if(sArgs.convert_aracne_flag==1){
 		int lineLen = 1024;
 		char *acBuffer = (char*)malloc(lineLen);

tools/SeekReader/SeekReader.ggo

 								flag	off
 option	"limit_hub"			Y	"Limit genes in the DAB to those that are hubby"
 								flag	off
+option	"combine_pcl"		B	"Combine PCL bin files"
+								flag	off
+
+section	"Combine PCL"
+option	"pcl_list"			u	"File containing a list of pcl bin files (including path)"
+								string typestr="filename" default="NA"
+option	"binarize"			b	"Binarize the output matrix"
+								flag	off
+option	"output_pcl"		V	"Output file"
+								string typestr="filename" default="NA"
 
 section "Limit Hub"
 option	"dabinput"			y	"DAB input file"

tools/SeekReader/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /memex/qzhu/usr/bin/gengetopt -iSeekReader.ggo --default-optional -u -N -e 
+  gengetopt -iSeekReader.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
 
 const char *gengetopt_args_info_help[] = {
   "      --help                    Print help and exit",
-  "  -V, --version                 Print version and exit",
+  "      --version                 Print version and exit",
   "\nDiagnosis:",
   "  -D, --databaselet             Display values from databaselet(s)  \n                                  (default=off)",
   "  -A, --dataset                 Check which datasets contain query of interest, \n                                  based on .gpres file  (default=off)",
   "  -J, --convert_aracne          Convert Aracne output (.txt) to DAB file  \n                                  (default=off)",
   "  -k, --convert_dab             Convert DAB to matrix  (default=off)",
   "  -Y, --limit_hub               Limit genes in the DAB to those that are hubby  \n                                  (default=off)",
+  "  -B, --combine_pcl             Combine PCL bin files  (default=off)",
+  "\nCombine PCL:",
+  "  -u, --pcl_list=filename       File containing a list of pcl bin files \n                                  (including path)  (default=`NA')",
+  "  -b, --binarize                Binarize the output matrix  (default=off)",
+  "  -V, --output_pcl=filename     Output file  (default=`NA')",
   "\nLimit Hub:",
   "  -y, --dabinput=filename       DAB input file  (default=`NA')",
   "  -Z, --hub_dab_output=filename DAB output file  (default=`NA')",
   args_info->convert_aracne_given = 0 ;
   args_info->convert_dab_given = 0 ;
   args_info->limit_hub_given = 0 ;
+  args_info->combine_pcl_given = 0 ;
+  args_info->pcl_list_given = 0 ;
+  args_info->binarize_given = 0 ;
+  args_info->output_pcl_given = 0 ;
   args_info->dabinput_given = 0 ;
   args_info->hub_dab_output_given = 0 ;
   args_info->aracne_file_given = 0 ;
   args_info->convert_aracne_flag = 0;
   args_info->convert_dab_flag = 0;
   args_info->limit_hub_flag = 0;
+  args_info->combine_pcl_flag = 0;
+  args_info->pcl_list_arg = gengetopt_strdup ("NA");
+  args_info->pcl_list_orig = NULL;
+  args_info->binarize_flag = 0;
+  args_info->output_pcl_arg = gengetopt_strdup ("NA");
+  args_info->output_pcl_orig = NULL;
   args_info->dabinput_arg = gengetopt_strdup ("NA");
   args_info->dabinput_orig = NULL;
   args_info->hub_dab_output_arg = gengetopt_strdup ("NA");
   args_info->convert_aracne_help = gengetopt_args_info_help[8] ;
   args_info->convert_dab_help = gengetopt_args_info_help[9] ;
   args_info->limit_hub_help = gengetopt_args_info_help[10] ;
-  args_info->dabinput_help = gengetopt_args_info_help[12] ;
-  args_info->hub_dab_output_help = gengetopt_args_info_help[13] ;
-  args_info->aracne_file_help = gengetopt_args_info_help[15] ;
-  args_info->output_dab_file_help = gengetopt_args_info_help[16] ;
-  args_info->dab_file_help = gengetopt_args_info_help[18] ;
-  args_info->output_matrix_help = gengetopt_args_info_help[19] ;
-  args_info->dweight_dir_help = gengetopt_args_info_help[21] ;
-  args_info->dweight_num_help = gengetopt_args_info_help[22] ;
-  args_info->dweight_map_help = gengetopt_args_info_help[23] ;
-  args_info->dweight_test_dir_help = gengetopt_args_info_help[24] ;
-  args_info->dweight_test_num_help = gengetopt_args_info_help[25] ;
-  args_info->gscore_dir1_help = gengetopt_args_info_help[27] ;
-  args_info->gscore_dir2_help = gengetopt_args_info_help[28] ;
-  args_info->gscore_num1_help = gengetopt_args_info_help[29] ;
-  args_info->order_stat_single_gene_query_help = gengetopt_args_info_help[31] ;
-  args_info->db_help = gengetopt_args_info_help[32] ;
-  args_info->dset_list_help = gengetopt_args_info_help[33] ;
-  args_info->input_help = gengetopt_args_info_help[34] ;
-  args_info->single_query_help = gengetopt_args_info_help[35] ;
-  args_info->dir_in_help = gengetopt_args_info_help[36] ;
-  args_info->dir_prep_in_help = gengetopt_args_info_help[37] ;
-  args_info->dir_gvar_in_help = gengetopt_args_info_help[38] ;
-  args_info->dir_sinfo_in_help = gengetopt_args_info_help[39] ;
-  args_info->is_nibble_help = gengetopt_args_info_help[40] ;
-  args_info->platform_dir_help = gengetopt_args_info_help[41] ;
-  args_info->gvar_cutoff_help = gengetopt_args_info_help[42] ;
-  args_info->multi_query_help = gengetopt_args_info_help[43] ;
-  args_info->output_file_help = gengetopt_args_info_help[44] ;
+  args_info->combine_pcl_help = gengetopt_args_info_help[11] ;
+  args_info->pcl_list_help = gengetopt_args_info_help[13] ;
+  args_info->binarize_help = gengetopt_args_info_help[14] ;
+  args_info->output_pcl_help = gengetopt_args_info_help[15] ;
+  args_info->dabinput_help = gengetopt_args_info_help[17] ;
+  args_info->hub_dab_output_help = gengetopt_args_info_help[18] ;
+  args_info->aracne_file_help = gengetopt_args_info_help[20] ;
+  args_info->output_dab_file_help = gengetopt_args_info_help[21] ;
+  args_info->dab_file_help = gengetopt_args_info_help[23] ;
+  args_info->output_matrix_help = gengetopt_args_info_help[24] ;
+  args_info->dweight_dir_help = gengetopt_args_info_help[26] ;
+  args_info->dweight_num_help = gengetopt_args_info_help[27] ;
+  args_info->dweight_map_help = gengetopt_args_info_help[28] ;
+  args_info->dweight_test_dir_help = gengetopt_args_info_help[29] ;
+  args_info->dweight_test_num_help = gengetopt_args_info_help[30] ;
+  args_info->gscore_dir1_help = gengetopt_args_info_help[32] ;
+  args_info->gscore_dir2_help = gengetopt_args_info_help[33] ;
+  args_info->gscore_num1_help = gengetopt_args_info_help[34] ;
+  args_info->order_stat_single_gene_query_help = gengetopt_args_info_help[36] ;
+  args_info->db_help = gengetopt_args_info_help[37] ;
+  args_info->dset_list_help = gengetopt_args_info_help[38] ;
+  args_info->input_help = gengetopt_args_info_help[39] ;
+  args_info->single_query_help = gengetopt_args_info_help[40] ;
+  args_info->dir_in_help = gengetopt_args_info_help[41] ;
+  args_info->dir_prep_in_help = gengetopt_args_info_help[42] ;
+  args_info->dir_gvar_in_help = gengetopt_args_info_help[43] ;
+  args_info->dir_sinfo_in_help = gengetopt_args_info_help[44] ;
+  args_info->is_nibble_help = gengetopt_args_info_help[45] ;
+  args_info->platform_dir_help = gengetopt_args_info_help[46] ;
+  args_info->gvar_cutoff_help = gengetopt_args_info_help[47] ;
+  args_info->multi_query_help = gengetopt_args_info_help[48] ;
+  args_info->output_file_help = gengetopt_args_info_help[49] ;
   
 }
 
 cmdline_parser_release (struct gengetopt_args_info *args_info)
 {
   unsigned int i;
+  free_string_field (&(args_info->pcl_list_arg));
+  free_string_field (&(args_info->pcl_list_orig));
+  free_string_field (&(args_info->output_pcl_arg));
+  free_string_field (&(args_info->output_pcl_orig));
   free_string_field (&(args_info->dabinput_arg));
   free_string_field (&(args_info->dabinput_orig));
   free_string_field (&(args_info->hub_dab_output_arg));
     write_into_file(outfile, "convert_dab", 0, 0 );
   if (args_info->limit_hub_given)
     write_into_file(outfile, "limit_hub", 0, 0 );
+  if (args_info->combine_pcl_given)
+    write_into_file(outfile, "combine_pcl", 0, 0 );
+  if (args_info->pcl_list_given)
+    write_into_file(outfile, "pcl_list", args_info->pcl_list_orig, 0);
+  if (args_info->binarize_given)
+    write_into_file(outfile, "binarize", 0, 0 );
+  if (args_info->output_pcl_given)
+    write_into_file(outfile, "output_pcl", args_info->output_pcl_orig, 0);
   if (args_info->dabinput_given)
     write_into_file(outfile, "dabinput", args_info->dabinput_orig, 0);
   if (args_info->hub_dab_output_given)
 
       static struct option long_options[] = {
         { "help",	0, NULL, 0 },
-        { "version",	0, NULL, 'V' },
+        { "version",	0, NULL, 0 },
         { "databaselet",	0, NULL, 'D' },
         { "dataset",	0, NULL, 'A' },
         { "weight",	0, NULL, 'W' },
         { "convert_aracne",	0, NULL, 'J' },
         { "convert_dab",	0, NULL, 'k' },
         { "limit_hub",	0, NULL, 'Y' },
+        { "combine_pcl",	0, NULL, 'B' },
+        { "pcl_list",	1, NULL, 'u' },
+        { "binarize",	0, NULL, 'b' },
+        { "output_pcl",	1, NULL, 'V' },
         { "dabinput",	1, NULL, 'y' },
         { "hub_dab_output",	1, NULL, 'Z' },
         { "aracne_file",	1, NULL, 'K' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "VDAWUCJkYy:Z:K:L:f:m:E:n:M:F:G:H:h:I:Ox:X:i:q:d:p:r:s:NP:v:Q:o:", long_options, &option_index);
+      c = getopt_long (argc, argv, "DAWUCJkYBu:bV:y:Z:K:L:f:m:E:n:M:F:G:H:h:I:Ox:X:i:q:d:p:r:s:NP:v:Q:o:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
       switch (c)
         {
-        case 'V':	/* Print version and exit.  */
-        
-        
-          if (update_arg( 0 , 
-               0 , &(args_info->version_given),
-              &(local_args_info.version_given), optarg, 0, 0, ARG_NO,
-              check_ambiguity, override, 0, 0,
-              "version", 'V',
-              additional_error))
-            goto failure;
-          cmdline_parser_free (&local_args_info);
-          return 0;
-        
-          break;
         case 'D':	/* Display values from databaselet(s).  */
         
         
             goto failure;
         
           break;
+        case 'B':	/* Combine PCL bin files.  */
+        
+        
+          if (update_arg((void *)&(args_info->combine_pcl_flag), 0, &(args_info->combine_pcl_given),
+              &(local_args_info.combine_pcl_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "combine_pcl", 'B',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'u':	/* File containing a list of pcl bin files (including path).  */
+        
+        
+          if (update_arg( (void *)&(args_info->pcl_list_arg), 
+               &(args_info->pcl_list_orig), &(args_info->pcl_list_given),
+              &(local_args_info.pcl_list_given), optarg, 0, "NA", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "pcl_list", 'u',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'b':	/* Binarize the output matrix.  */
+        
+        
+          if (update_arg((void *)&(args_info->binarize_flag), 0, &(args_info->binarize_given),
+              &(local_args_info.binarize_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "binarize", 'b',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'V':	/* Output file.  */
+        
+        
+          if (update_arg( (void *)&(args_info->output_pcl_arg), 
+               &(args_info->output_pcl_orig), &(args_info->output_pcl_given),
+              &(local_args_info.output_pcl_given), optarg, 0, "NA", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "output_pcl", 'V',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'y':	/* DAB input file.  */
         
         
             exit (EXIT_SUCCESS);
           }
 
+          /* Print version and exit.  */
+          if (strcmp (long_options[option_index].name, "version") == 0)
+          {
+          
+          
+            if (update_arg( 0 , 
+                 0 , &(args_info->version_given),
+                &(local_args_info.version_given), optarg, 0, 0, ARG_NO,
+                check_ambiguity, override, 0, 0,
+                "version", 'V',
+                additional_error))
+              goto failure;
+            cmdline_parser_free (&local_args_info);
+            return 0;
+          
+          }
+          
+          break;
         case '?':	/* Invalid option.  */
           /* `getopt_long' already printed an error message.  */
           goto failure;

tools/SeekReader/cmdline.h

   const char *convert_dab_help; /**< @brief Convert DAB to matrix help description.  */
   int limit_hub_flag;	/**< @brief Limit genes in the DAB to those that are hubby (default=off).  */
   const char *limit_hub_help; /**< @brief Limit genes in the DAB to those that are hubby help description.  */
+  int combine_pcl_flag;	/**< @brief Combine PCL bin files (default=off).  */
+  const char *combine_pcl_help; /**< @brief Combine PCL bin files help description.  */
+  char * pcl_list_arg;	/**< @brief File containing a list of pcl bin files (including path) (default='NA').  */
+  char * pcl_list_orig;	/**< @brief File containing a list of pcl bin files (including path) original value given at command line.  */
+  const char *pcl_list_help; /**< @brief File containing a list of pcl bin files (including path) help description.  */
+  int binarize_flag;	/**< @brief Binarize the output matrix (default=off).  */
+  const char *binarize_help; /**< @brief Binarize the output matrix help description.  */
+  char * output_pcl_arg;	/**< @brief Output file (default='NA').  */
+  char * output_pcl_orig;	/**< @brief Output file original value given at command line.  */
+  const char *output_pcl_help; /**< @brief Output file help description.  */
   char * dabinput_arg;	/**< @brief DAB input file (default='NA').  */
   char * dabinput_orig;	/**< @brief DAB input file original value given at command line.  */
   const char *dabinput_help; /**< @brief DAB input file help description.  */
   unsigned int convert_aracne_given ;	/**< @brief Whether convert_aracne was given.  */
   unsigned int convert_dab_given ;	/**< @brief Whether convert_dab was given.  */
   unsigned int limit_hub_given ;	/**< @brief Whether limit_hub was given.  */
+  unsigned int combine_pcl_given ;	/**< @brief Whether combine_pcl was given.  */
+  unsigned int pcl_list_given ;	/**< @brief Whether pcl_list was given.  */
+  unsigned int binarize_given ;	/**< @brief Whether binarize was given.  */
+  unsigned int output_pcl_given ;	/**< @brief Whether output_pcl was given.  */
   unsigned int dabinput_given ;	/**< @brief Whether dabinput was given.  */
   unsigned int hub_dab_output_given ;	/**< @brief Whether hub_dab_output was given.  */
   unsigned int aracne_file_given ;	/**< @brief Whether aracne_file was given.  */