Commits

Qian Zhu committed 13c34a8

Allowed SeekIterative to handle SEEK search integration from a collection of DAB files as
opposed to CDatabase
Added function to convert MI-based distance matrix generated by ARACNE to DAB (SeekReader)
Added function to limit genes in a DAB file to the top hubby genes (SeekReader)
Added topological overlap distance in seekwriter (to be eventually incorporated to Distancer)

Comments (0)

Files changed (18)

src/seekwriter.cpp

 	return true;
 }
 
+bool CSeekWriter::TopologicalOverlap(CDataPair &Dat,
+const vector<string> &vecstrGenes){
+	size_t i, j;
+	vector<unsigned int> veciGenes;
+	veciGenes.resize(vecstrGenes.size());
+	for(i=0; i<vecstrGenes.size(); i++)
+		veciGenes[i] = (unsigned int) Dat.GetGeneIndex(vecstrGenes[i]);
+
+	unsigned int s,t;
+	float d;
+	CSeekIntIntMap m(vecstrGenes.size());
+	for(i=0; i<vecstrGenes.size(); i++){
+		if((s=veciGenes[i])==(unsigned int)-1) continue;
+		m.Add(i);
+	}
+
+	size_t trueSize = m.GetNumSet();
+	float* fs = new float[trueSize*trueSize];
+	const vector<utype> &allRGenes = m.GetAllReverse();
+
+	for(i=0; i<m.GetNumSet(); i++){	
+		s = veciGenes[allRGenes[i]];
+		for(j=i+1; j<m.GetNumSet(); j++){
+			t = veciGenes[allRGenes[j]];
+			if(CMeta::IsNaN(d = Dat.Get(s,t))){
+				fs[i*trueSize + j] = 0;
+				fs[j*trueSize + i] = 0;
+				fprintf(stderr, "Warning i, j is NaN, set to 0!\n", i, j);
+			}else{
+				fs[i*trueSize + j] = d;
+				fs[j*trueSize + i] = d;
+			}
+		}
+		fs[i*trueSize+i] = 0;
+	}
+
+	//first step: transform z-scores back to correlation 
+	//(exp(z*2) = (1+r)/(1-r) -> exp(2z) - exp(2z)*r = 1+r -> exp(2z) - 1 = exp(2z)*r + r = (exp(2z) + 1)*r->
+	//(exp(2z) - 1) / (exp(2z) + 1) = r
+	//second step: transform by abs, then take it to the exponent 9
+	for(i=0; i<m.GetNumSet(); i++){	
+		for(j=i+1; j<m.GetNumSet(); j++){
+			if((d = fs[i*trueSize+j])==0) continue;
+			d = (expf(2.0*d) - 1.0) / (expf(2.0*d) + 1.0);
+			d = pow(abs(d), 9);
+			fs[i*trueSize+j] = d;
+			fs[j*trueSize+i] = d;
+			//fprintf(stderr, "%.3e\n", d);
+		}
+	}
+
+	fprintf(stderr, "Finished step 1: tranform z-score back to pearson\n");
+	vector<float> vecSum;
+	CSeekTools::InitVector(vecSum, trueSize, CMeta::GetNaN());
+	for(i=0; i<m.GetNumSet(); i++){	
+		vecSum[i] = 0;
+	}
+
+	for(i=0; i<m.GetNumSet(); i++){	
+		for(j=i+1; j<m.GetNumSet(); j++){
+			vecSum[i] += fs[i*trueSize+j];
+			vecSum[j] += fs[i*trueSize+j];
+		}
+	}
+
+	//temporary storage matrix
+	CHalfMatrix<float> res;
+	res.Initialize(trueSize);
+	for(i=0; i<m.GetNumSet(); i++){	
+		for(j=i+1; j<m.GetNumSet(); j++){
+			res.Set(i, j, 0);
+		}
+	}
+
+	size_t k;
+	unsigned int u;
+	//size_t ii = 0;
+	#pragma omp parallel for \
+	shared(m, fs, vecSum, res) \
+	firstprivate(trueSize) \
+	private(i, j, d) schedule(dynamic)
+	for(i=0; i<m.GetNumSet(); i++){	
+		for(j=i+1; j<m.GetNumSet(); j++){
+			double tsum = 0;
+			float *pi = &fs[i];
+			float *pj = &fs[j];
+			for(k=0; k<m.GetNumSet(); k++){
+				tsum += (double) (*pi) * (double) (*pj);
+				pi++;
+				pj++; 
+			}
+			tsum -= (double)fs[i*trueSize + i] * (double)fs[j*trueSize + i];
+			tsum -= (double)fs[i*trueSize + j] * (double)fs[j*trueSize + j];
+			double tmin = 0;
+			if(vecSum[i] < vecSum[j])
+				tmin = (double) vecSum[i];
+			else
+				tmin = (double) vecSum[j];
+			double to = (tsum + (double) d) / (tmin + 1.0 - (double) d);
+			res.Set(i, j, (float) to); //temporary matrix to store the results
+			//fprintf(stderr, "%.3e\n", to);	
+		}
+		if(i%100==0){
+			fprintf(stderr, "Doing topological overlap calculation, current %d\n", i);
+		}
+	}
+
+	fprintf(stderr, "Finished step 2: topological overlap calculation\n");
+
+	for(i=0; i<m.GetNumSet(); i++){	
+		s = veciGenes[allRGenes[i]];
+		for(j=i+1; j<m.GetNumSet(); j++){
+			t = veciGenes[allRGenes[j]];
+			Dat.Set(s, t, res.Get(i, j));
+		}
+		Dat.Set(s, s, 1.0);
+	}
+	delete[] fs;
+
+}
+
 
 bool CSeekWriter::NormalizeDAB(CDataPair &Dat,
 const vector<string> &vecstrGenes, 
 						}else
 							r=d/sqrtf(vecSum[i])/sqrtf(vecSum[j]);
 					}else if(subtractNorm){
-						r=d-vecSum[i]/vecNum[i]-vecSum[j]/vecNum[j];
+						r=d-0.5*(vecSum[i]/vecNum[i]+vecSum[j]/vecNum[j]);
 					}
 				}
 				Dat.Set(s, t, r);
 		vector<char> &vecResult);
 	static bool GetDatasetSinfo(CDataPair &Dat, float &mean,
 		float &stdev);
+
+	static bool TopologicalOverlap(CDataPair &Dat,
+		const vector<string> &vecstrGenes);
 };
 
 }

tools/SeekIterative/SeekIterative.cpp

 	return true;
 }
 
+bool get_score(vector<float> &gene_score, CFullMatrix<float> &mat,
+CSeekIntIntMap *geneMap, vector<float> &q_weight){
+	vector<float> gene_count;
+	int numGenes = geneMap->GetSize();
+	CSeekTools::InitVector(gene_score, numGenes, (float)CMeta::GetNaN());
+	CSeekTools::InitVector(gene_count, numGenes, (float)0);
+	int qi=0;
+	const vector<utype> &allGenes = geneMap->GetAllReverse();
+	utype kk, k;
+	for(kk=0; kk<geneMap->GetNumSet(); kk++){
+		utype gi = allGenes[kk];
+		gene_score[gi] = 0;
+	}
+	for(qi=0; qi<geneMap->GetNumSet(); qi++){
+		utype qq = allGenes[qi];
+		if(q_weight[qq]==0) 
+			continue;
+		for(kk=0; kk<geneMap->GetNumSet(); kk++){
+			utype gi = allGenes[kk];
+			float fl = mat.Get(qq, gi);
+			gene_score[gi] += fl * q_weight[qq];
+		}
+		for(kk=0; kk<geneMap->GetNumSet(); kk++){
+			utype gi = allGenes[kk];
+			gene_count[gi] += q_weight[qq];
+		}
+	}
+	for(k=0; k<geneMap->GetNumSet(); k++){
+		utype gi = allGenes[k];
+		gene_score[gi] /= gene_count[gi];
+		if(gene_count[gi] == 0)
+			gene_score[gi] = 0;
+	}
+	return true;
+}
+
 /*bool iterate(vector<float> &src, vector<float> &dest, 
 	vector<vector<float> > &tr, CSeekIntIntMap *geneMap,
 	float alpha, int numIterations){
 	return true;
 }*/
 
+//is_query is like a gold-standard gene list
 bool weight_fast(vector<char> &is_query, vector<float> &d1,
 CSeekIntIntMap *geneMap, float &w){
 	utype i;
 }
 
 bool weight(vector<char> &is_query, vector<float> &d1,
-	CSeekIntIntMap *geneMap, float rbp_p, float &w){
+CSeekIntIntMap *geneMap, float rbp_p, float &w){
 	vector<AResultFloat> ar;
 	ar.resize(geneMap->GetSize());
 	utype i;
-	
 	const vector<utype> &allGenes = geneMap->GetAllReverse();
 	for(i=0; i<geneMap->GetNumSet(); i++){
 		utype gi = allGenes[i];
 		ar[i].i = gi;
 		ar[i].f = d1[gi];
 	}
-	
 	int MAX = 5000;
 	nth_element(ar.begin(), ar.begin()+MAX, ar.end());
 	sort(ar.begin(), ar.begin()+MAX);
-	
 	w = 0;
 	for(i=0; i<MAX; i++)
 		if(is_query[ar[i].i]==1)
 	return true;
 }
 
+bool cv_weight_LOO(vector<utype> &query, CSparseFlatMatrix<float> &mat,
+	CSeekIntIntMap *geneMap, float rbp_p, float &tot_w){
+	//leave one out cross-validation
+	utype i, j;
+	int numGenes = geneMap->GetSize();
+	tot_w = 0;
+	
+	for(i=0; i<query.size(); i++){
+		vector<char> is_query;
+		vector<float> q_weight;
+		float w = 0;
+		CSeekTools::InitVector(is_query, numGenes, (char) 0);
+		CSeekTools::InitVector(q_weight, numGenes, (float) 0);
+		for(j=0; j<query.size(); j++){
+			if(i==j) continue;
+			q_weight[query[j]] = 1.0;
+		}
+		is_query[query[i]] = 1;
+		vector<float> gene_score;
+		get_score(gene_score, mat, geneMap, q_weight);
+		for(j=0; j<query.size(); j++){
+			if(i==j) continue;
+			gene_score[query[j]] = 0;
+		}
+		weight_fast(is_query, gene_score, geneMap, w);
+		tot_w += w;
+	}
+	tot_w /= query.size();
+	return true;
+}
+
 bool cv_weight(vector<utype> &query, CSparseFlatMatrix<float> &mat,
 	CSeekIntIntMap *geneMap, float rbp_p, float &tot_w){
 	//leave one in cross-validation
 	return true;	
 }
 
+//accepts fullmatrix, leave one in cross-validation
+bool cv_weight(vector<utype> &query, CFullMatrix<float> &mat,
+CSeekIntIntMap *geneMap, float rbp_p, float &tot_w){
+	utype i, j;
+	int numGenes = geneMap->GetSize();
+	tot_w = 0;
+	for(i=0; i<query.size(); i++){
+		vector<char> is_query;
+		vector<float> q_weight;
+		CSeekTools::InitVector(is_query, numGenes, (char) 0);
+		CSeekTools::InitVector(q_weight, numGenes, (float) 0);
+		q_weight[query[i]] = 1.0;
+		float w = 0;
+		for(j=0; j<query.size(); j++){
+			if(i==j) continue;
+			is_query[query[j]] = 1;
+		}
+		vector<float> gene_score;
+		get_score(gene_score, mat, geneMap, q_weight);
+		gene_score[query[i]] = 0;
+		weight(is_query, gene_score, geneMap, rbp_p, w);
+		tot_w += w;
+	}
+	tot_w /= query.size();
+	return true;	
+}
+
 int main(int iArgs, char **aszArgs){
 	static const size_t c_iBuffer   = 1024;
 	char acBuffer[c_iBuffer];
 			fprintf(stderr, "%s %s %.5f\n", s1.c_str(), r1[i].c_str(), CD.Get(i1, j1));
 		}
 	}
-	//DAB mode
+
+	//Traditional DAB mode (.dab file)
+	if(sArgs.tdab_flag==1){
+		string search_mode = sArgs.tsearch_mode_arg;
+		if(search_mode=="NA"){
+			fprintf(stderr, "Please specify a search mode!\n");
+			return 1;
+		}
+		vector<string> dab_list;
+		CSeekTools::ReadListOneColumn(sArgs.tdab_list_arg, dab_list);
+
+		fprintf(stderr, "Finished reading dablist\n");
+		//reading query
+		vector<vector<float> > q_weight;
+		q_weight.resize(vecstrAllQuery.size());
+		for(i=0; i<vecstrAllQuery.size(); i++){
+			CSeekTools::InitVector(q_weight[i], numGenes, (float) 0);
+			for(j=0; j<vecstrAllQuery[i].size(); j++)
+				q_weight[i][mapstriGenes[vecstrAllQuery[i][j]]] = 1;
+		}
+		
+		fprintf(stderr, "Reading DAB\n");
+		vector<vector<float> > final_score, count, dweight;
+		vector<vector<int> > freq;
+		final_score.resize(vecstrAllQuery.size());
+		count.resize(vecstrAllQuery.size());
+		freq.resize(vecstrAllQuery.size());
+		dweight.resize(vecstrAllQuery.size());
+		for(j=0; j<vecstrAllQuery.size(); j++){
+			CSeekTools::InitVector(final_score[j], vecstrGenes.size(), (float)CMeta::GetNaN());
+			CSeekTools::InitVector(count[j], vecstrGenes.size(), (float) 0);
+			CSeekTools::InitVector(freq[j], vecstrGenes.size(), (int) 0);
+			CSeekTools::InitVector(dweight[j], dab_list.size(), (float) 0);
+		}
+
+		size_t l;
+		for(l=0; l<dab_list.size(); l++){
+			CDat Dat;
+			fprintf(stderr, "Reading %d: %s\n", l, dab_list[l].c_str());
+			string dabfile = dab_dir + "/" + dab_list[l];
+			Dat.Open(dabfile.c_str(), false, 2, false, false, false);
+
+			fprintf(stderr, "Finished reading DAB\n");
+			vector<unsigned int> veciGenes;
+			veciGenes.resize(vecstrGenes.size());
+			unsigned int ki;
+			for(ki=0; ki<vecstrGenes.size(); ki++)
+				veciGenes[ki] = (unsigned int) Dat.GetGeneIndex(vecstrGenes[ki]);
+			unsigned int s,t;
+			float d;
+			CSeekIntIntMap m(vecstrGenes.size());
+			for(i=0; i<vecstrGenes.size(); i++){
+				if((s=veciGenes[i])==(unsigned int)-1) continue;
+				m.Add(i);
+			}
+
+			//Copy to matrix sm
+			CFullMatrix<float> sm;
+			sm.Initialize(vecstrGenes.size(), vecstrGenes.size());
+			const vector<utype> &allRGenes = m.GetAllReverse();
+			for(i=0; i<m.GetNumSet(); i++){
+				unsigned int si = allRGenes[i];
+				s = veciGenes[si];
+				for(j=i+1; j<m.GetNumSet(); j++){
+					unsigned int tj = allRGenes[j];
+					t = veciGenes[allRGenes[j]];
+					if(CMeta::IsNaN(d = Dat.Get(s,t))){
+						sm.Set(si, tj, 0);
+						sm.Set(tj, si, 0);
+					}else{
+						sm.Set(si, tj, d);
+						sm.Set(tj, si, d);
+					}
+				}
+				sm.Set(si, si, 0);
+			}
+
+			fprintf(stderr, "Finished copying matrix\n");
+			for(j=0; j<vecstrAllQuery.size(); j++){
+				float dw = 1.0;
+				if(search_mode=="eq"){
+					dw = 1.0;
+				}else{ //cv_loi, rbp_p = 0.99
+					cv_weight(qu[j], sm, &m, 0.99, dw);
+				}
+				dweight[j][l] = dw;
+				vector<float> tmp_score;
+				get_score(tmp_score, sm, &m, q_weight[j]);
+				for(k=0; k<m.GetNumSet(); k++){
+					utype gi = allRGenes[k];
+					if(CMeta::IsNaN(final_score[j][gi]))
+						final_score[j][gi] = 0;
+					final_score[j][gi] += tmp_score[gi] * dw;
+					count[j][gi]+=dw;
+					freq[j][gi]++;
+				}	
+			}
+		}
+		int minRequired = (int) ((float) dab_list.size() * 0.50);
+		for(j=0; j<vecstrAllQuery.size(); j++){
+			for(k=0; k<final_score[j].size(); k++){
+				if(freq[j][k]<minRequired){
+					final_score[j][k] = -320;
+					continue;
+				}
+				if(CMeta::IsNaN(final_score[j][k])){
+					final_score[j][k] = -320;
+					continue;
+				}
+				final_score[j][k] /= count[j][k];
+			}
+			sprintf(acBuffer, "%s/%d.query", output_dir.c_str(), j);
+			CSeekTools::WriteArrayText(acBuffer, vecstrAllQuery[j]);
+			sprintf(acBuffer, "%s/%d.gscore", output_dir.c_str(), j);
+			CSeekTools::WriteArray(acBuffer, final_score[j]);
+			sprintf(acBuffer, "%s/%d.dweight", output_dir.c_str(), j);
+			CSeekTools::WriteArray(acBuffer, dweight[j]);
+		}
+		return 0;
+	}
+
+
+	//DAB mode (sparse DAB: .2.dab)
 	if(sArgs.dab_flag==1){
+		string norm_mode = sArgs.norm_mode_arg;
 		if(sArgs.default_type_arg!=0 && sArgs.default_type_arg!=1){
 			fprintf(stderr, "Error, invalid type!\n");
 			return -1;
 		}
 
-		if(sArgs.max_rank_arg==-1){
-			fprintf(stderr, "Error, please supply the max rank flag.\n");
+		if(norm_mode=="NA"){
+			fprintf(stderr, "Error, please specify a norm mode!\n");
 			return -1;
 		}
 
-		if(sArgs.rbp_p_arg==-1){
-			fprintf(stderr, "Error, please supply the rbp_p flag.\n");
-			return -1;
+		float exp = sArgs.exp_arg;
+		if(norm_mode=="rank"){
+			if(sArgs.max_rank_arg==-1){
+				fprintf(stderr, "Error, please supply the max rank flag.\n");
+				return -1;
+			}
+			if(sArgs.rbp_p_arg==-1){
+				fprintf(stderr, "Error, please supply the rbp_p flag.\n");
+				return -1;
+			}
+		}else if(norm_mode=="subtract_z"){
+			if(exp==-1){
+				fprintf(stderr, "Invalid exponent!\n");
+				return -1;
+			}
 		}
 
 		vector<float> score_cutoff;
 			CSeekTools::InitVector(freq[j], vecstrGenes.size(), (int) 0);
 			CSeekTools::InitVector(dweight[j], dab_list.size(), (float) 0);
 		}
+		if(bDatasetCutoff){
+			fprintf(stderr, "Dataset cutoff is on!\n");
+		}else{
+			fprintf(stderr, "Dataset cutoff is off!\n");
+		}
+
+
 		for(i=0; i<dab_list.size(); i++){
 			fprintf(stderr, "Reading %d: %s\n", i, dab_list[i].c_str());
 			CSeekIntIntMap d1(vecstrGenes.size());
 			string dabfile = dab_dir + "/" + dab_list[i];
 			CSparseFlatMatrix<float> sm (0);
 
-			if(sArgs.default_type_arg==0) //utype
-				CSeekWriter::ReadSeekSparseMatrix<utype>(dabfile.c_str(), sm, d1, 
-				max_rank, rbp_p, vecstrGenes);
-			else
-				CSeekWriter::ReadSeekSparseMatrix<unsigned short>(dabfile.c_str(), 
-				sm, d1, max_rank, rbp_p, vecstrGenes);
-			
+			if(norm_mode=="rank"){
+				if(sArgs.default_type_arg==0) //utype
+					CSeekWriter::ReadSeekSparseMatrix<utype>(dabfile.c_str(), sm, d1, 
+					max_rank, rbp_p, vecstrGenes);
+				else
+					CSeekWriter::ReadSeekSparseMatrix<unsigned short>(dabfile.c_str(), 
+					sm, d1, max_rank, rbp_p, vecstrGenes);
+			}else if(norm_mode=="subtract_z"){
+				if(sArgs.default_type_arg==0) //utype
+					CSeekWriter::ReadSeekSparseMatrix<utype>(dabfile.c_str(), sm, d1, 
+					vecstrGenes, 1000, exp);
+				else
+					CSeekWriter::ReadSeekSparseMatrix<unsigned short>(dabfile.c_str(), sm, d1, 
+					vecstrGenes, 1000, exp);
+			}
 			const vector<utype> &allGenes = d1.GetAllReverse();
 
 			#pragma omp parallel for \
 			private(j, k) firstprivate(bDatasetCutoff) schedule(dynamic)
 			for(j=0; j<vecstrAllQuery.size(); j++){
 				float dw = 1.0;
+				//cv_weight_LOO(qu[j], sm, &d1, rbp_p, dw);
 				cv_weight(qu[j], sm, &d1, rbp_p, dw);
 				if(bDatasetCutoff){
 					if(score_cutoff[i]>dw)

tools/SeekIterative/SeekIterative.ggo

 purpose	"Search based on iterative algorithm"
 
 section "Mode"
+option	"tdab"				j	"Traditional DAB mode"
+								flag off
 option	"dab"				d	"Sparse Dab mode"
 								flag off
 option	"combined"			e	"Combined-dab mode"
 option	"genome"			G	"Genome mapping file"
 								string typestr="filename"
 
+section "Traditional DAB mode"
+option	"tdab_list"			J	"DAB list"
+								string typestr="filename"
+option	"tsearch_mode"		S	"Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset"
+								values="eq","cv_loi","NA" default="NA"
+
 section "Sparse DAB mode"
 option	"dab_list"			V	"DAB list"
 								string typestr="filename"
 								int default="-1"
 option	"dset_cutoff_file"	H	"Dataset score cutoff file"
 								string typestr="filename" default="NA"
+option	"norm_mode"			n	"Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix"
+								values="rank","subtract_z","NA" default="NA"
+option	"exp"				N	"Raise the z-score to the power of this value (for --norm_mode=subtract_z)"
+								float	default="-1.0"
 
 section "Input"
 option	"input"				i	"Gene mapping file"

tools/SeekIterative/cmdline.c

   "      --help                    Print help and exit",
   "      --version                 Print version and exit",
   "\nMode:",
+  "  -j, --tdab                    Traditional DAB mode  (default=off)",
   "  -d, --dab                     Sparse Dab mode  (default=off)",
   "  -e, --combined                Combined-dab mode  (default=off)",
   "  -f, --test                    Test mode  (default=off)",
   "\nVisualization mode:",
   "  -c, --cutoff=FLOAT            Cutoff value  (default=`0.0001')",
   "  -G, --genome=filename         Genome mapping file",
+  "\nTraditional DAB mode:",
+  "  -J, --tdab_list=filename      DAB list",
+  "  -S, --tsearch_mode=STRING     Search mode: equal weighted (eq) or CV LOI \n                                  (cv_loi) (Applicable if DAB list contains \n                                  more than 1 dataset  (possible values=\"eq\", \n                                  \"cv_loi\", \"NA\" default=`NA')",
   "\nSparse DAB mode:",
   "  -V, --dab_list=filename       DAB list",
   "  -I, --num_iter=INT            Number of iterations  (default=`0')",
   "  -R, --rbp_p=FLOAT             RBP p parameter (must be specified) (p<1.0) \n                                  (recommended > 0.95)  (default=`-1')",
   "  -M, --max_rank=INT            Maximum rank number in the sparse DAB matrix \n                                  (must be specified)  (default=`-1')",
   "  -H, --dset_cutoff_file=filename\n                                Dataset score cutoff file  (default=`NA')",
+  "  -n, --norm_mode=STRING        Normalization method: rank - rank-normalize \n                                  matrix, subtract_z - subtract-z-normalize \n                                  matrix  (possible values=\"rank\", \n                                  \"subtract_z\", \"NA\" default=`NA')",
+  "  -N, --exp=FLOAT               Raise the z-score to the power of this value \n                                  (for --norm_mode=subtract_z)  \n                                  (default=`-1.0')",
   "\nInput:",
   "  -i, --input=filename          Gene mapping file",
   "  -q, --query=filename          Query file",
 static int
 cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error);
 
+const char *cmdline_parser_tsearch_mode_values[] = {"eq", "cv_loi", "NA", 0}; /*< Possible values for tsearch_mode. */
+const char *cmdline_parser_norm_mode_values[] = {"rank", "subtract_z", "NA", 0}; /*< Possible values for norm_mode. */
+
 static char *
 gengetopt_strdup (const char *s);
 
 {
   args_info->help_given = 0 ;
   args_info->version_given = 0 ;
+  args_info->tdab_given = 0 ;
   args_info->dab_given = 0 ;
   args_info->combined_given = 0 ;
   args_info->test_given = 0 ;
   args_info->print_distr_given = 0 ;
   args_info->cutoff_given = 0 ;
   args_info->genome_given = 0 ;
+  args_info->tdab_list_given = 0 ;
+  args_info->tsearch_mode_given = 0 ;
   args_info->dab_list_given = 0 ;
   args_info->num_iter_given = 0 ;
   args_info->default_type_given = 0 ;
   args_info->rbp_p_given = 0 ;
   args_info->max_rank_given = 0 ;
   args_info->dset_cutoff_file_given = 0 ;
+  args_info->norm_mode_given = 0 ;
+  args_info->exp_given = 0 ;
   args_info->input_given = 0 ;
   args_info->query_given = 0 ;
   args_info->dab_dir_given = 0 ;
 void clear_args (struct gengetopt_args_info *args_info)
 {
   FIX_UNUSED (args_info);
+  args_info->tdab_flag = 0;
   args_info->dab_flag = 0;
   args_info->combined_flag = 0;
   args_info->test_flag = 0;
   args_info->cutoff_orig = NULL;
   args_info->genome_arg = NULL;
   args_info->genome_orig = NULL;
+  args_info->tdab_list_arg = NULL;
+  args_info->tdab_list_orig = NULL;
+  args_info->tsearch_mode_arg = gengetopt_strdup ("NA");
+  args_info->tsearch_mode_orig = NULL;
   args_info->dab_list_arg = NULL;
   args_info->dab_list_orig = NULL;
   args_info->num_iter_arg = 0;
   args_info->max_rank_orig = NULL;
   args_info->dset_cutoff_file_arg = gengetopt_strdup ("NA");
   args_info->dset_cutoff_file_orig = NULL;
+  args_info->norm_mode_arg = gengetopt_strdup ("NA");
+  args_info->norm_mode_orig = NULL;
+  args_info->exp_arg = -1.0;
+  args_info->exp_orig = NULL;
   args_info->input_arg = NULL;
   args_info->input_orig = NULL;
   args_info->query_arg = NULL;
 
   args_info->help_help = gengetopt_args_info_help[0] ;
   args_info->version_help = gengetopt_args_info_help[1] ;
-  args_info->dab_help = gengetopt_args_info_help[3] ;
-  args_info->combined_help = gengetopt_args_info_help[4] ;
-  args_info->test_help = gengetopt_args_info_help[5] ;
-  args_info->testcount_help = gengetopt_args_info_help[6] ;
-  args_info->testcombined_help = gengetopt_args_info_help[7] ;
-  args_info->visualize_help = gengetopt_args_info_help[8] ;
-  args_info->dab_basename_help = gengetopt_args_info_help[10] ;
-  args_info->top_genes_help = gengetopt_args_info_help[11] ;
-  args_info->generate_dot_help = gengetopt_args_info_help[12] ;
-  args_info->print_distr_help = gengetopt_args_info_help[13] ;
-  args_info->cutoff_help = gengetopt_args_info_help[15] ;
-  args_info->genome_help = gengetopt_args_info_help[16] ;
-  args_info->dab_list_help = gengetopt_args_info_help[18] ;
-  args_info->num_iter_help = gengetopt_args_info_help[19] ;
-  args_info->default_type_help = gengetopt_args_info_help[20] ;
-  args_info->rbp_p_help = gengetopt_args_info_help[21] ;
-  args_info->max_rank_help = gengetopt_args_info_help[22] ;
-  args_info->dset_cutoff_file_help = gengetopt_args_info_help[23] ;
-  args_info->input_help = gengetopt_args_info_help[25] ;
-  args_info->query_help = gengetopt_args_info_help[26] ;
-  args_info->dab_dir_help = gengetopt_args_info_help[27] ;
-  args_info->not_query_help = gengetopt_args_info_help[28] ;
-  args_info->dir_out_help = gengetopt_args_info_help[30] ;
+  args_info->tdab_help = gengetopt_args_info_help[3] ;
+  args_info->dab_help = gengetopt_args_info_help[4] ;
+  args_info->combined_help = gengetopt_args_info_help[5] ;
+  args_info->test_help = gengetopt_args_info_help[6] ;
+  args_info->testcount_help = gengetopt_args_info_help[7] ;
+  args_info->testcombined_help = gengetopt_args_info_help[8] ;
+  args_info->visualize_help = gengetopt_args_info_help[9] ;
+  args_info->dab_basename_help = gengetopt_args_info_help[11] ;
+  args_info->top_genes_help = gengetopt_args_info_help[12] ;
+  args_info->generate_dot_help = gengetopt_args_info_help[13] ;
+  args_info->print_distr_help = gengetopt_args_info_help[14] ;
+  args_info->cutoff_help = gengetopt_args_info_help[16] ;
+  args_info->genome_help = gengetopt_args_info_help[17] ;
+  args_info->tdab_list_help = gengetopt_args_info_help[19] ;
+  args_info->tsearch_mode_help = gengetopt_args_info_help[20] ;
+  args_info->dab_list_help = gengetopt_args_info_help[22] ;
+  args_info->num_iter_help = gengetopt_args_info_help[23] ;
+  args_info->default_type_help = gengetopt_args_info_help[24] ;
+  args_info->rbp_p_help = gengetopt_args_info_help[25] ;
+  args_info->max_rank_help = gengetopt_args_info_help[26] ;
+  args_info->dset_cutoff_file_help = gengetopt_args_info_help[27] ;
+  args_info->norm_mode_help = gengetopt_args_info_help[28] ;
+  args_info->exp_help = gengetopt_args_info_help[29] ;
+  args_info->input_help = gengetopt_args_info_help[31] ;
+  args_info->query_help = gengetopt_args_info_help[32] ;
+  args_info->dab_dir_help = gengetopt_args_info_help[33] ;
+  args_info->not_query_help = gengetopt_args_info_help[34] ;
+  args_info->dir_out_help = gengetopt_args_info_help[36] ;
   
 }
 
   free_string_field (&(args_info->cutoff_orig));
   free_string_field (&(args_info->genome_arg));
   free_string_field (&(args_info->genome_orig));
+  free_string_field (&(args_info->tdab_list_arg));
+  free_string_field (&(args_info->tdab_list_orig));
+  free_string_field (&(args_info->tsearch_mode_arg));
+  free_string_field (&(args_info->tsearch_mode_orig));
   free_string_field (&(args_info->dab_list_arg));
   free_string_field (&(args_info->dab_list_orig));
   free_string_field (&(args_info->num_iter_orig));
   free_string_field (&(args_info->max_rank_orig));
   free_string_field (&(args_info->dset_cutoff_file_arg));
   free_string_field (&(args_info->dset_cutoff_file_orig));
+  free_string_field (&(args_info->norm_mode_arg));
+  free_string_field (&(args_info->norm_mode_orig));
+  free_string_field (&(args_info->exp_orig));
   free_string_field (&(args_info->input_arg));
   free_string_field (&(args_info->input_orig));
   free_string_field (&(args_info->query_arg));
   clear_given (args_info);
 }
 
+/**
+ * @param val the value to check
+ * @param values the possible values
+ * @return the index of the matched value:
+ * -1 if no value matched,
+ * -2 if more than one value has matched
+ */
+static int
+check_possible_values(const char *val, const char *values[])
+{
+  int i, found, last;
+  size_t len;
+
+  if (!val)   /* otherwise strlen() crashes below */
+    return -1; /* -1 means no argument for the option */
+
+  found = last = 0;
+
+  for (i = 0, len = strlen(val); values[i]; ++i)
+    {
+      if (strncmp(val, values[i], len) == 0)
+        {
+          ++found;
+          last = i;
+          if (strlen(values[i]) == len)
+            return i; /* exact macth no need to check more */
+        }
+    }
+
+  if (found == 1) /* one match: OK */
+    return last;
+
+  return (found ? -2 : -1); /* return many values or none matched */
+}
+
 
 static void
 write_into_file(FILE *outfile, const char *opt, const char *arg, const char *values[])
 {
-  FIX_UNUSED (values);
+  int found = -1;
   if (arg) {
-    fprintf(outfile, "%s=\"%s\"\n", opt, arg);
+    if (values) {
+      found = check_possible_values(arg, values);      
+    }
+    if (found >= 0)
+      fprintf(outfile, "%s=\"%s\" # %s\n", opt, arg, values[found]);
+    else
+      fprintf(outfile, "%s=\"%s\"\n", opt, arg);
   } else {
     fprintf(outfile, "%s\n", opt);
   }
     write_into_file(outfile, "help", 0, 0 );
   if (args_info->version_given)
     write_into_file(outfile, "version", 0, 0 );
+  if (args_info->tdab_given)
+    write_into_file(outfile, "tdab", 0, 0 );
   if (args_info->dab_given)
     write_into_file(outfile, "dab", 0, 0 );
   if (args_info->combined_given)
     write_into_file(outfile, "cutoff", args_info->cutoff_orig, 0);
   if (args_info->genome_given)
     write_into_file(outfile, "genome", args_info->genome_orig, 0);
+  if (args_info->tdab_list_given)
+    write_into_file(outfile, "tdab_list", args_info->tdab_list_orig, 0);
+  if (args_info->tsearch_mode_given)
+    write_into_file(outfile, "tsearch_mode", args_info->tsearch_mode_orig, cmdline_parser_tsearch_mode_values);
   if (args_info->dab_list_given)
     write_into_file(outfile, "dab_list", args_info->dab_list_orig, 0);
   if (args_info->num_iter_given)
     write_into_file(outfile, "max_rank", args_info->max_rank_orig, 0);
   if (args_info->dset_cutoff_file_given)
     write_into_file(outfile, "dset_cutoff_file", args_info->dset_cutoff_file_orig, 0);
+  if (args_info->norm_mode_given)
+    write_into_file(outfile, "norm_mode", args_info->norm_mode_orig, cmdline_parser_norm_mode_values);
+  if (args_info->exp_given)
+    write_into_file(outfile, "exp", args_info->exp_orig, 0);
   if (args_info->input_given)
     write_into_file(outfile, "input", args_info->input_orig, 0);
   if (args_info->query_given)
       return 1; /* failure */
     }
 
-  FIX_UNUSED (default_value);
+  if (possible_values && (found = check_possible_values((value ? value : default_value), possible_values)) < 0)
+    {
+      if (short_opt != '-')
+        fprintf (stderr, "%s: %s argument, \"%s\", for option `--%s' (`-%c')%s\n", 
+          package_name, (found == -2) ? "ambiguous" : "invalid", value, long_opt, short_opt,
+          (additional_error ? additional_error : ""));
+      else
+        fprintf (stderr, "%s: %s argument, \"%s\", for option `--%s'%s\n", 
+          package_name, (found == -2) ? "ambiguous" : "invalid", value, long_opt,
+          (additional_error ? additional_error : ""));
+      return 1; /* failure */
+    }
     
   if (field_given && *field_given && ! override)
     return 0;
       static struct option long_options[] = {
         { "help",	0, NULL, 0 },
         { "version",	0, NULL, 0 },
+        { "tdab",	0, NULL, 'j' },
         { "dab",	0, NULL, 'd' },
         { "combined",	0, NULL, 'e' },
         { "test",	0, NULL, 'f' },
         { "print_distr",	0, NULL, 'P' },
         { "cutoff",	1, NULL, 'c' },
         { "genome",	1, NULL, 'G' },
+        { "tdab_list",	1, NULL, 'J' },
+        { "tsearch_mode",	1, NULL, 'S' },
         { "dab_list",	1, NULL, 'V' },
         { "num_iter",	1, NULL, 'I' },
         { "default_type",	1, NULL, 'T' },
         { "rbp_p",	1, NULL, 'R' },
         { "max_rank",	1, NULL, 'M' },
         { "dset_cutoff_file",	1, NULL, 'H' },
+        { "norm_mode",	1, NULL, 'n' },
+        { "exp",	1, NULL, 'N' },
         { "input",	1, NULL, 'i' },
         { "query",	1, NULL, 'q' },
         { "dab_dir",	1, NULL, 'F' },
         { 0,  0, 0, 0 }
       };
 
-      c = getopt_long (argc, argv, "defghvb:t:EPc:G:V:I:T:R:M:H:i:q:F:Q:D:", long_options, &option_index);
+      c = getopt_long (argc, argv, "jdefghvb:t:EPc:G:J:S:V:I:T:R:M:H:n:N:i:q:F:Q:D:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
       switch (c)
         {
+        case 'j':	/* Traditional DAB mode.  */
+        
+        
+          if (update_arg((void *)&(args_info->tdab_flag), 0, &(args_info->tdab_given),
+              &(local_args_info.tdab_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "tdab", 'j',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'd':	/* Sparse Dab mode.  */
         
         
             goto failure;
         
           break;
+        case 'J':	/* DAB list.  */
+        
+        
+          if (update_arg( (void *)&(args_info->tdab_list_arg), 
+               &(args_info->tdab_list_orig), &(args_info->tdab_list_given),
+              &(local_args_info.tdab_list_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "tdab_list", 'J',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'S':	/* Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset.  */
+        
+        
+          if (update_arg( (void *)&(args_info->tsearch_mode_arg), 
+               &(args_info->tsearch_mode_orig), &(args_info->tsearch_mode_given),
+              &(local_args_info.tsearch_mode_given), optarg, cmdline_parser_tsearch_mode_values, "NA", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "tsearch_mode", 'S',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'V':	/* DAB list.  */
         
         
             goto failure;
         
           break;
+        case 'n':	/* Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix.  */
+        
+        
+          if (update_arg( (void *)&(args_info->norm_mode_arg), 
+               &(args_info->norm_mode_orig), &(args_info->norm_mode_given),
+              &(local_args_info.norm_mode_given), optarg, cmdline_parser_norm_mode_values, "NA", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "norm_mode", 'n',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'N':	/* Raise the z-score to the power of this value (for --norm_mode=subtract_z).  */
+        
+        
+          if (update_arg( (void *)&(args_info->exp_arg), 
+               &(args_info->exp_orig), &(args_info->exp_given),
+              &(local_args_info.exp_given), optarg, 0, "-1.0", ARG_FLOAT,
+              check_ambiguity, override, 0, 0,
+              "exp", 'N',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'i':	/* Gene mapping file.  */
         
         

tools/SeekIterative/cmdline.h

 {
   const char *help_help; /**< @brief Print help and exit help description.  */
   const char *version_help; /**< @brief Print version and exit help description.  */
+  int tdab_flag;	/**< @brief Traditional DAB mode (default=off).  */
+  const char *tdab_help; /**< @brief Traditional DAB mode help description.  */
   int dab_flag;	/**< @brief Sparse Dab mode (default=off).  */
   const char *dab_help; /**< @brief Sparse Dab mode help description.  */
   int combined_flag;	/**< @brief Combined-dab mode (default=off).  */
   char * genome_arg;	/**< @brief Genome mapping file.  */
   char * genome_orig;	/**< @brief Genome mapping file original value given at command line.  */
   const char *genome_help; /**< @brief Genome mapping file help description.  */
+  char * tdab_list_arg;	/**< @brief DAB list.  */
+  char * tdab_list_orig;	/**< @brief DAB list original value given at command line.  */
+  const char *tdab_list_help; /**< @brief DAB list help description.  */
+  char * tsearch_mode_arg;	/**< @brief Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset (default='NA').  */
+  char * tsearch_mode_orig;	/**< @brief Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset original value given at command line.  */
+  const char *tsearch_mode_help; /**< @brief Search mode: equal weighted (eq) or CV LOI (cv_loi) (Applicable if DAB list contains more than 1 dataset help description.  */
   char * dab_list_arg;	/**< @brief DAB list.  */
   char * dab_list_orig;	/**< @brief DAB list original value given at command line.  */
   const char *dab_list_help; /**< @brief DAB list help description.  */
   char * dset_cutoff_file_arg;	/**< @brief Dataset score cutoff file (default='NA').  */
   char * dset_cutoff_file_orig;	/**< @brief Dataset score cutoff file original value given at command line.  */
   const char *dset_cutoff_file_help; /**< @brief Dataset score cutoff file help description.  */
+  char * norm_mode_arg;	/**< @brief Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (default='NA').  */
+  char * norm_mode_orig;	/**< @brief Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix original value given at command line.  */
+  const char *norm_mode_help; /**< @brief Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix help description.  */
+  float exp_arg;	/**< @brief Raise the z-score to the power of this value (for --norm_mode=subtract_z) (default='-1.0').  */
+  char * exp_orig;	/**< @brief Raise the z-score to the power of this value (for --norm_mode=subtract_z) original value given at command line.  */
+  const char *exp_help; /**< @brief Raise the z-score to the power of this value (for --norm_mode=subtract_z) help description.  */
   char * input_arg;	/**< @brief Gene mapping file.  */
   char * input_orig;	/**< @brief Gene mapping file original value given at command line.  */
   const char *input_help; /**< @brief Gene mapping file help description.  */
   
   unsigned int help_given ;	/**< @brief Whether help was given.  */
   unsigned int version_given ;	/**< @brief Whether version was given.  */
+  unsigned int tdab_given ;	/**< @brief Whether tdab was given.  */
   unsigned int dab_given ;	/**< @brief Whether dab was given.  */
   unsigned int combined_given ;	/**< @brief Whether combined was given.  */
   unsigned int test_given ;	/**< @brief Whether test was given.  */
   unsigned int print_distr_given ;	/**< @brief Whether print_distr was given.  */
   unsigned int cutoff_given ;	/**< @brief Whether cutoff was given.  */
   unsigned int genome_given ;	/**< @brief Whether genome was given.  */
+  unsigned int tdab_list_given ;	/**< @brief Whether tdab_list was given.  */
+  unsigned int tsearch_mode_given ;	/**< @brief Whether tsearch_mode was given.  */
   unsigned int dab_list_given ;	/**< @brief Whether dab_list was given.  */
   unsigned int num_iter_given ;	/**< @brief Whether num_iter was given.  */
   unsigned int default_type_given ;	/**< @brief Whether default_type was given.  */
   unsigned int rbp_p_given ;	/**< @brief Whether rbp_p was given.  */
   unsigned int max_rank_given ;	/**< @brief Whether max_rank was given.  */
   unsigned int dset_cutoff_file_given ;	/**< @brief Whether dset_cutoff_file was given.  */
+  unsigned int norm_mode_given ;	/**< @brief Whether norm_mode was given.  */
+  unsigned int exp_given ;	/**< @brief Whether exp was given.  */
   unsigned int input_given ;	/**< @brief Whether input was given.  */
   unsigned int query_given ;	/**< @brief Whether query was given.  */
   unsigned int dab_dir_given ;	/**< @brief Whether dab_dir was given.  */
 int cmdline_parser_required (struct gengetopt_args_info *args_info,
   const char *prog_name);
 
+extern const char *cmdline_parser_tsearch_mode_values[];  /**< @brief Possible values for tsearch_mode. */
+extern const char *cmdline_parser_norm_mode_values[];  /**< @brief Possible values for norm_mode. */
+
 
 #ifdef __cplusplus
 }

tools/SeekPrep/SeekPrep.cpp

 		}
 
 		if(sArgs.view_flag==1){
-			fprintf(stderr, "Operation not implemented yet!\n");
+			//fprintf(stderr, "Operation not implemented yet!\n");
+			CDataPair Dat;
+			char outFile[125];
+			if(!Dat.Open(sArgs.dabinput_arg, false, false)){
+				cerr << "error opening file" << endl;
+				return 1;
+			}
+			vector<unsigned int> veciGenes;
+			veciGenes.resize(vecstrGenes.size());
+			for(i=0; i<vecstrGenes.size(); i++)
+				veciGenes[i] = (unsigned int) Dat.GetGeneIndex(vecstrGenes[i]);
+
+			unsigned int s,t;
+			vector<float> d;
+			for(i=0; i<vecstrGenes.size(); i++){
+				if((s=veciGenes[i])==(unsigned int)-1) continue;
+				for(j=0; j<vecstrGenes.size(); j++){
+					if((t=veciGenes[j])==(unsigned int) -1) continue;
+					d.push_back(Dat.Get(s, t));
+				}
+			}
+			sort(d.begin(), d.end());
+			float ff[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9};
+			for(i=0; i<9; i++){
+				size_t ind = (size_t) ((float) d.size() * ff[i]);
+				fprintf(stderr, "%.3f %.3e\n", ff[i], d[ind]);
+			}
 			return 1;
 		}
 
 			else{
 				fprintf(stderr, "Error, unsupported type --default_type_arg\n");
 				return 1;
+			}		
+		}
+
+		if(sArgs.norm_flag==1 && norm_mode=="topological_overlap"){
+			omp_set_num_threads(8);
+			CDataPair Dat;
+			char outFile[125];
+			if(!Dat.Open(sArgs.dabinput_arg, false, false)){
+				cerr << "error opening file" << endl;
+				return 1;
 			}
-				
+			fprintf(stderr, "Finished opening file\n");
+			string fileName = CMeta::Basename(sArgs.dabinput_arg);
+			string fileStem = CMeta::Deextension(fileName);
+			sprintf(outFile, "%s/%s.dab", sArgs.dir_out_arg, fileStem.c_str());
+			CSeekWriter::TopologicalOverlap(Dat, vecstrGenes);
+			Dat.Save(outFile);
 		}
 
 		if(sArgs.gavg_flag==1){

tools/SeekPrep/SeekPrep.ggo

 section "Misc"
 option	"default_type"		T	"Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB set mode and if --norm is enabled in DAB mode) (0 - unsigned int, 1 - unsigned short)"
 								int default="-1"
-option	"norm_mode"			n	"Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled)"
-								values="rank","subtract_z","NA" default="NA"
+option	"norm_mode"			n	"Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled, topological_overlap - TO measure from Ravasz et al)"
+								values="rank","subtract_z","topological_overlap","NA" default="NA"
 option	"logit"				l	"For --gavg and --gplat, whether to take logit of the value first (useful if edge value is probability)"
 								flag	off
 option	"max_rank"			M	"Maximum rank value (for --norm_mode=rank)"

tools/SeekPrep/cmdline.c

 /*
-  File autogenerated by gengetopt version 2.22
+  File autogenerated by gengetopt version 2.22.5
   generated with the following command:
-  /memex/qzhu/usr/bin/gengetopt -iSeekPrep.ggo --default-optional -u -N -e 
+  /usr/bin/gengetopt -iSeekPrep.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
 #include <stdlib.h>
 #include <string.h>
 
-#include "getopt.h"
+#ifndef FIX_UNUSED
+#define FIX_UNUSED(X) (void) (X) /* avoid warnings for unused params */
+#endif
+
+#include <getopt.h>
 
 #include "cmdline.h"
 
   "  -Q, --quant=filename         Quant file",
   "\nMisc:",
   "  -T, --default_type=INT       Default gene index type (choose unsigned short \n                                 for genes, or unsigned int (32-bit) for \n                                 transcripts) (required for DAB set mode and if \n                                 --norm is enabled in DAB mode) (0 - unsigned \n                                 int, 1 - unsigned short)  (default=`-1')",
-  "  -n, --norm_mode=STRING       Normalization method: rank - rank-normalize \n                                 matrix, subtract_z - subtract-z-normalize \n                                 matrix (required for DAB set mode and if \n                                 --norm is enabled)  (possible values=\"rank\", \n                                 \"subtract_z\", \"NA\" default=`NA')",
+  "  -n, --norm_mode=STRING       Normalization method: rank - rank-normalize \n                                 matrix, subtract_z - subtract-z-normalize \n                                 matrix (required for DAB set mode and if \n                                 --norm is enabled, topological_overlap - TO \n                                 measure from Ravasz et al)  (possible \n                                 values=\"rank\", \"subtract_z\", \n                                 \"topological_overlap\", \"NA\" default=`NA')",
   "  -l, --logit                  For --gavg and --gplat, whether to take logit of \n                                 the value first (useful if edge value is \n                                 probability)  (default=off)",
   "  -M, --max_rank=INT           Maximum rank value (for --norm_mode=rank)  \n                                 (default=`-1')",
   "  -R, --rbp_p=FLOAT            RBP p parameter (for --norm_mode=rank)  \n                                 (default=`-1')",
 void clear_args (struct gengetopt_args_info *args_info);
 
 static int
-cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info,
+cmdline_parser_internal (int argc, char **argv, struct gengetopt_args_info *args_info,
                         struct cmdline_parser_params *params, const char *additional_error);
 
 static int
 cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error);
 
-char *cmdline_parser_norm_mode_values[] = {"rank", "subtract_z", "NA", 0} ;	/* Possible values for norm_mode.  */
+const char *cmdline_parser_norm_mode_values[] = {"rank", "subtract_z", "topological_overlap", "NA", 0}; /*< Possible values for norm_mode. */
 
 static char *
 gengetopt_strdup (const char *s);
 static
 void clear_args (struct gengetopt_args_info *args_info)
 {
+  FIX_UNUSED (args_info);
   args_info->dab_flag = 0;
   args_info->pclbin_flag = 0;
   args_info->db_flag = 0;
 void
 cmdline_parser_print_version (void)
 {
-  printf ("%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION);
+  printf ("%s %s\n",
+     (strlen(CMDLINE_PARSER_PACKAGE_NAME) ? CMDLINE_PARSER_PACKAGE_NAME : CMDLINE_PARSER_PACKAGE),
+     CMDLINE_PARSER_VERSION);
 }
 
 static void print_help_common(void) {
   printf("\n");
 
   if (strlen(gengetopt_args_info_description) > 0)
-    printf("%s\n", gengetopt_args_info_description);
+    printf("%s\n\n", gengetopt_args_info_description);
 }
 
 void
   clear_args (args_info);
   init_args_info (args_info);
 
-  args_info->inputs = NULL;
+  args_info->inputs = 0;
   args_info->inputs_num = 0;
 }
 
  * -2 if more than one value has matched
  */
 static int
-check_possible_values(const char *val, char *values[])
+check_possible_values(const char *val, const char *values[])
 {
   int i, found, last;
   size_t len;
 
 
 static void
-write_into_file(FILE *outfile, const char *opt, const char *arg, char *values[])
+write_into_file(FILE *outfile, const char *opt, const char *arg, const char *values[])
 {
   int found = -1;
   if (arg) {
 char *
 gengetopt_strdup (const char *s)
 {
-  char *result = NULL;
+  char *result = 0;
   if (!s)
     return result;
 
 }
 
 int
-cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info)
+cmdline_parser (int argc, char **argv, struct gengetopt_args_info *args_info)
 {
   return cmdline_parser2 (argc, argv, args_info, 0, 1, 1);
 }
 
 int
-cmdline_parser_ext (int argc, char * const *argv, struct gengetopt_args_info *args_info,
+cmdline_parser_ext (int argc, char **argv, struct gengetopt_args_info *args_info,
                    struct cmdline_parser_params *params)
 {
   int result;
-  result = cmdline_parser_internal (argc, argv, args_info, params, NULL);
+  result = cmdline_parser_internal (argc, argv, args_info, params, 0);
 
   return result;
 }
 
 int
-cmdline_parser2 (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required)
+cmdline_parser2 (int argc, char **argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required)
 {
   int result;
   struct cmdline_parser_params params;
   params.check_ambiguity = 0;
   params.print_errors = 1;
 
-  result = cmdline_parser_internal (argc, argv, args_info, &params, NULL);
+  result = cmdline_parser_internal (argc, argv, args_info, &params, 0);
 
   return result;
 }
 {
   int result = EXIT_SUCCESS;
 
-  if (cmdline_parser_required2(args_info, prog_name, NULL) > 0)
+  if (cmdline_parser_required2(args_info, prog_name, 0) > 0)
     result = EXIT_FAILURE;
 
   return result;
 cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error)
 {
   int error = 0;
+  FIX_UNUSED (additional_error);
 
   /* checks for required options */
   if (! args_info->input_given)
 static
 int update_arg(void *field, char **orig_field,
                unsigned int *field_given, unsigned int *prev_given, 
-               char *value, char *possible_values[], const char *default_value,
+               char *value, const char *possible_values[],
+               const char *default_value,
                cmdline_parser_arg_type arg_type,
                int check_ambiguity, int override,
                int no_free, int multiple_option,
   const char *val = value;
   int found;
   char **string_field;
+  FIX_UNUSED (field);
 
   stop_char = 0;
   found = 0;
 
 
 int
-cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info,
+cmdline_parser_internal (
+  int argc, char **argv, struct gengetopt_args_info *args_info,
                         struct cmdline_parser_params *params, const char *additional_error)
 {
   int c;	/* Character of the parsed option.  */
         { "exp",	1, NULL, 'E' },
         { "input",	1, NULL, 'i' },
         { "dir_out",	1, NULL, 'D' },
-        { NULL,	0, NULL, 0 }
+        { 0,  0, 0, 0 }
       };
 
       c = getopt_long (argc, argv, "defghH:J:G:L:O:W:apB:C:FXV:vsPb:I:A:NQ:T:n:lM:R:U:E:i:D:", long_options, &option_index);
             goto failure;
         
           break;
-        case 'n':	/* Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled).  */
+        case 'n':	/* Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled, topological_overlap - TO measure from Ravasz et al).  */
         
         
           if (update_arg( (void *)&(args_info->norm_mode_arg), 

tools/SeekPrep/cmdline.h

 /** @file cmdline.h
  *  @brief The header file for the command line option parser
- *  generated by GNU Gengetopt version 2.22
+ *  generated by GNU Gengetopt version 2.22.5
  *  http://www.gnu.org/software/gengetopt.
  *  DO NOT modify this file, since it can be overwritten
  *  @author GNU Gengetopt by Lorenzo Bettini */
 #endif /* __cplusplus */
 
 #ifndef CMDLINE_PARSER_PACKAGE
-/** @brief the program name */
+/** @brief the program name (used for printing errors) */
 #define CMDLINE_PARSER_PACKAGE "SeekPrep"
 #endif
 
+#ifndef CMDLINE_PARSER_PACKAGE_NAME
+/** @brief the complete program name (used for help and version) */
+#define CMDLINE_PARSER_PACKAGE_NAME "SeekPrep"
+#endif
+
 #ifndef CMDLINE_PARSER_VERSION
 /** @brief the program version */
 #define CMDLINE_PARSER_VERSION "1.0"
   int default_type_arg;	/**< @brief Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB set mode and if --norm is enabled in DAB mode) (0 - unsigned int, 1 - unsigned short) (default='-1').  */
   char * default_type_orig;	/**< @brief Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB set mode and if --norm is enabled in DAB mode) (0 - unsigned int, 1 - unsigned short) original value given at command line.  */
   const char *default_type_help; /**< @brief Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB set mode and if --norm is enabled in DAB mode) (0 - unsigned int, 1 - unsigned short) help description.  */
-  char * norm_mode_arg;	/**< @brief Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled) (default='NA').  */
-  char * norm_mode_orig;	/**< @brief Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled) original value given at command line.  */
-  const char *norm_mode_help; /**< @brief Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled) help description.  */
+  char * norm_mode_arg;	/**< @brief Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled, topological_overlap - TO measure from Ravasz et al) (default='NA').  */
+  char * norm_mode_orig;	/**< @brief Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled, topological_overlap - TO measure from Ravasz et al) original value given at command line.  */
+  const char *norm_mode_help; /**< @brief Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled, topological_overlap - TO measure from Ravasz et al) help description.  */
   int logit_flag;	/**< @brief For --gavg and --gplat, whether to take logit of the value first (useful if edge value is probability) (default=off).  */
   const char *logit_help; /**< @brief For --gavg and --gplat, whether to take logit of the value first (useful if edge value is probability) help description.  */
   int max_rank_arg;	/**< @brief Maximum rank value (for --norm_mode=rank) (default='-1').  */
  * @param args_info the structure where option information will be stored
  * @return 0 if everything went fine, NON 0 if an error took place
  */
-int cmdline_parser (int argc, char * const *argv,
+int cmdline_parser (int argc, char **argv,
   struct gengetopt_args_info *args_info);
 
 /**
  * @return 0 if everything went fine, NON 0 if an error took place
  * @deprecated use cmdline_parser_ext() instead
  */
-int cmdline_parser2 (int argc, char * const *argv,
+int cmdline_parser2 (int argc, char **argv,
   struct gengetopt_args_info *args_info,
   int override, int initialize, int check_required);
 
  * @param params additional parameters for the parser
  * @return 0 if everything went fine, NON 0 if an error took place
  */
-int cmdline_parser_ext (int argc, char * const *argv,
+int cmdline_parser_ext (int argc, char **argv,
   struct gengetopt_args_info *args_info,
   struct cmdline_parser_params *params);
 
 int cmdline_parser_required (struct gengetopt_args_info *args_info,
   const char *prog_name);
 
-extern char *cmdline_parser_norm_mode_values[] ;	/**< @brief Possible values for norm_mode.  */
+extern const char *cmdline_parser_norm_mode_values[];  /**< @brief Possible values for norm_mode. */
 
 
 #ifdef __cplusplus

tools/SeekReader/SeekReader.cpp

 		vector<string> vecstrDataset;
 		if(!CSeekTools::ReadListOneColumn(sArgs.dweight_map_arg, vecstrDataset))
 			return false;
-
-		vector<vector<float> > vec_score;
-		vector<vector<float> > orig_score;
+		vector<vector<float> > vec_score, orig_score;
 		utype i, j;
 		int num_query = sArgs.dweight_num_arg; //random query
 		orig_score.resize(num_query);
 			}
 		}
 		fprintf(stderr, "Done!\n");
+		return 0;
+	}
+
+	if(sArgs.convert_aracne_flag==1){
+		int lineLen = 1024;
+		char *acBuffer = (char*)malloc(lineLen);
+		FILE *infile;
+		if((infile=fopen(sArgs.aracne_file_arg, "r"))==NULL){
+			fprintf(stderr, "Error no such file %s\n", sArgs.aracne_file_arg);
+			return 1;
+		}
+		while(fgets(acBuffer, lineLen, infile)!=NULL){
+			while(strlen(acBuffer)==lineLen-1){
+				int len = strlen(acBuffer);
+				fseek(infile, -len, SEEK_CUR);
+				lineLen+=1024;
+				acBuffer = (char*)realloc(acBuffer, lineLen);
+				char *ret = fgets(acBuffer, lineLen, infile);
+			}
+		}
+		fclose(infile);
+		fprintf(stderr, "Max line length: %d\n", lineLen);
+		free(acBuffer);
+
+		acBuffer = (char*)malloc(lineLen);
+		ifstream ia;
+		ia.open(sArgs.aracne_file_arg);
+		if(!ia.is_open()){
+			fprintf(stderr, "Error opening file %s\n", sArgs.aracne_file_arg);
+			return 1;
+		}
+		set<string> allGenes;
+		size_t ci=0;
+		while(!ia.eof()){
+			ia.getline(acBuffer, lineLen-1);
+			if(acBuffer[0]==0) break;
+			acBuffer[lineLen-1] = 0;
+			if(acBuffer[0]=='>') continue;
+			vector<string> tok;
+			CMeta::Tokenize(acBuffer, tok);
+			allGenes.insert(tok[0]);
+			size_t si;
+			for(si=1; si<tok.size(); si+=2){
+				allGenes.insert(tok[si]);
+			}
+			if(ci%100==0){
+				fprintf(stderr, "Gene Number %d\n", ci);
+			}
+			ci++;
+		}
+		ia.close();
+
+		vector<string> vecGenes;
+		copy(allGenes.begin(), allGenes.end(), back_inserter(vecGenes));
+		map<string,size_t> mapiGenes;
+		for(i=0; i<vecGenes.size(); i++)
+			mapiGenes[vecGenes[i]] = i;
+
+		CDat Dat;
+		Dat.Open(vecGenes);
+		ci = 0;
+		ia.open(sArgs.aracne_file_arg);
+		while(!ia.eof()){
+			ia.getline(acBuffer, lineLen-1);
+			if(acBuffer[0]==0) break;
+			acBuffer[lineLen-1] = 0;
+			if(acBuffer[0]=='>') continue;
+			vector<string> tok;
+			CMeta::Tokenize(acBuffer, tok);
+			size_t tG = mapiGenes[tok[0]];
+			size_t si;
+			for(si=1; si<tok.size(); si+=2){
+				size_t oG = mapiGenes[tok[si]];
+				float fG = atof(tok[si+1].c_str());
+				Dat.Set(tG, oG, fG);
+			}
+			if(ci%100==0){
+				fprintf(stderr, "Gene Number %d\n", ci);
+			}
+			ci++;
+		}
+		ia.close();
+		free(acBuffer);
+		Dat.Save(sArgs.output_dab_file_arg);
+		return 0;
 	}
 
 	if(sArgs.weight_flag==1){
 				fprintf(stderr, "Not unique\n");
 		}
 		fprintf(stderr, "Done!\n");
+		return 0;
+	}
+
+	if(sArgs.limit_hub_flag==1){
+		float per = 0.30;
+		CDataPair Dat;
+		char outFile[1024];
+		fprintf(stderr, "Opening file...\n");
+		if(!Dat.Open(sArgs.dabinput_arg, false, false, 2, false, false)){
+			cerr << "error opening file" << endl;
+			return 1;
+		}
+		vector<unsigned int> veciGenes;
+		veciGenes.resize(vecstrGenes.size());
+		for(i=0; i<vecstrGenes.size(); i++)
+			veciGenes[i] = (unsigned int) Dat.GetGeneIndex(vecstrGenes[i]);
+
+		unsigned int s, t, j, ss, tt;
+		float d;
+		CSeekIntIntMap m(vecstrGenes.size());
+		for(i=0; i<vecstrGenes.size(); i++){
+			if((s=veciGenes[i])==(unsigned int)-1) continue;
+			m.Add(i);
+		}
+		vector<float> vecSum;
+		CSeekTools::InitVector(vecSum, vecstrGenes.size(),(float) 0);
+		const vector<utype> &allRGenes = m.GetAllReverse();
+		for(i=0; i<m.GetNumSet(); i++){	
+			s = veciGenes[allRGenes[i]];
+			for(j=i+1; j<m.GetNumSet(); j++){
+				t = veciGenes[allRGenes[j]];
+				if(CMeta::IsNaN(d = Dat.Get(s,t))) continue;
+				vecSum[allRGenes[i]]+=d;
+				vecSum[allRGenes[j]]+=d;
+			}
+		}
+		vector<float> backupSum;
+		for(i=0; i<vecSum.size(); i++)
+			backupSum.push_back(vecSum[i]);
+		sort(vecSum.begin(), vecSum.end(), greater<float>());
+		int index = (int) (per * (float) m.GetNumSet());
+		float percentile = vecSum[index];
+		vector<string> limitedGenes;
+		for(i=0; i<backupSum.size(); i++){
+			if(backupSum[i] >= percentile){
+				limitedGenes.push_back(vecstrGenes[i]);
+			}
+		}
+		fprintf(stderr, "%d / %d genes to be written!\n", limitedGenes.size(), m.GetNumSet());
+		CDat NewDat;
+		NewDat.Open(limitedGenes);
+		for(i=0; i<limitedGenes.size(); i++){	
+			s = (unsigned int) Dat.GetGeneIndex(limitedGenes[i]);
+			ss = NewDat.GetGeneIndex(limitedGenes[i]);
+			for(j=i+1; j<limitedGenes.size(); j++){
+				t = (unsigned int) Dat.GetGeneIndex(limitedGenes[j]);
+				tt = NewDat.GetGeneIndex(limitedGenes[j]);
+				d = Dat.Get(s, t);
+				NewDat.Set(ss, tt, d);
+			}
+		}
+		NewDat.Save(sArgs.hub_dab_output_arg);
+		return 0;
 	}
 
 	if(sArgs.comp_ranking_flag==1){
 			}
 			fprintf(stderr, "Query %d %d\n", i, count);
 		}
+		return 0;
 	}
+
 	if(sArgs.dataset_flag==1){
 		string db = sArgs.db_arg;
 		string dset_list = sArgs.dset_list_arg;
 		if(toOutput){
 			fclose(out);
 		}		
+		return 0;
+	}
 
-	}
-	else if(sArgs.databaselet_flag==1){
-
+	if(sArgs.databaselet_flag==1){
 		string db = sArgs.db_arg;
 		string dset_list = sArgs.dset_list_arg;
 		string dir_in = sArgs.dir_in_arg;

tools/SeekReader/SeekReader.ggo

 								flag	off
 option	"comp_ranking"		C	"Compare two rankings (*.gscore files)"
 								flag	off
+option	"convert_aracne"	J	"Convert Aracne output (.txt) to DAB file"
+								flag	off
+option	"limit_hub"			Y	"Limit genes in the DAB to those that are hubby"
+								flag	off
+
+section "Limit Hub"
+option	"dabinput"			y	"DAB input file"
+								string typestr="filename" default="NA"
+option	"hub_dab_output"	Z	"DAB output file"
+								string typestr="filename" default="NA"
+
+section	"Convert Aracne"
+option	"aracne_file"		K	"Aracne .txt output file"
+								string typestr="filename" default="NA"
+option	"output_dab_file"	L	"DAB file"
+								string typestr="filename" default="NA"
 
 section "Weight"
 option	"dweight_dir"		E	"Dataset weight directory"

tools/SeekReader/cmdline.c

   "  -W, --weight                  Test dataset weights  (default=off)",
   "  -U, --weight2                 Test dataset weights 2  (default=off)",
   "  -C, --comp_ranking            Compare two rankings (*.gscore files)  \n                                  (default=off)",
+  "  -J, --convert_aracne          Convert Aracne output (.txt) to DAB file  \n                                  (default=off)",
+  "  -Y, --limit_hub               Limit genes in the DAB to those that are hubby  \n                                  (default=off)",
+  "\nLimit Hub:",
+  "  -y, --dabinput=filename       DAB input file  (default=`NA')",
+  "  -Z, --hub_dab_output=filename DAB output file  (default=`NA')",
+  "\nConvert Aracne:",
+  "  -K, --aracne_file=filename    Aracne .txt output file  (default=`NA')",
+  "  -L, --output_dab_file=filename\n                                DAB file  (default=`NA')",
   "\nWeight:",
   "  -E, --dweight_dir=directory   Dataset weight directory  (default=`NA')",
   "  -n, --dweight_num=INT         Number of .dweight files  (default=`1000')",
   args_info->weight_given = 0 ;
   args_info->weight2_given = 0 ;
   args_info->comp_ranking_given = 0 ;
+  args_info->convert_aracne_given = 0 ;
+  args_info->limit_hub_given = 0 ;
+  args_info->dabinput_given = 0 ;
+  args_info->hub_dab_output_given = 0 ;
+  args_info->aracne_file_given = 0 ;
+  args_info->output_dab_file_given = 0 ;
   args_info->dweight_dir_given = 0 ;
   args_info->dweight_num_given = 0 ;
   args_info->dweight_map_given = 0 ;
   args_info->weight_flag = 0;
   args_info->weight2_flag = 0;
   args_info->comp_ranking_flag = 0;
+  args_info->convert_aracne_flag = 0;
+  args_info->limit_hub_flag = 0;
+  args_info->dabinput_arg = gengetopt_strdup ("NA");
+  args_info->dabinput_orig = NULL;
+  args_info->hub_dab_output_arg = gengetopt_strdup ("NA");
+  args_info->hub_dab_output_orig = NULL;
+  args_info->aracne_file_arg = gengetopt_strdup ("NA");
+  args_info->aracne_file_orig = NULL;
+  args_info->output_dab_file_arg = gengetopt_strdup ("NA");
+  args_info->output_dab_file_orig = NULL;
   args_info->dweight_dir_arg = gengetopt_strdup ("NA");
   args_info->dweight_dir_orig = NULL;
   args_info->dweight_num_arg = 1000;
   args_info->weight_help = gengetopt_args_info_help[5] ;
   args_info->weight2_help = gengetopt_args_info_help[6] ;
   args_info->comp_ranking_help = gengetopt_args_info_help[7] ;
-  args_info->dweight_dir_help = gengetopt_args_info_help[9] ;
-  args_info->dweight_num_help = gengetopt_args_info_help[10] ;
-  args_info->dweight_map_help = gengetopt_args_info_help[11] ;
-  args_info->dweight_test_dir_help = gengetopt_args_info_help[12] ;
-  args_info->dweight_test_num_help = gengetopt_args_info_help[13] ;
-  args_info->gscore_dir1_help = gengetopt_args_info_help[15] ;
-  args_info->gscore_dir2_help = gengetopt_args_info_help[16] ;
-  args_info->gscore_num1_help = gengetopt_args_info_help[17] ;
-  args_info->order_stat_single_gene_query_help = gengetopt_args_info_help[19] ;
-  args_info->db_help = gengetopt_args_info_help[20] ;
-  args_info->dset_list_help = gengetopt_args_info_help[21] ;
-  args_info->input_help = gengetopt_args_info_help[22] ;
-  args_info->single_query_help = gengetopt_args_info_help[23] ;
-  args_info->dir_in_help = gengetopt_args_info_help[24] ;
-  args_info->dir_prep_in_help = gengetopt_args_info_help[25] ;
-  args_info->dir_gvar_in_help = gengetopt_args_info_help[26] ;
-  args_info->dir_sinfo_in_help = gengetopt_args_info_help[27] ;
-  args_info->is_nibble_help = gengetopt_args_info_help[28] ;
-  args_info->platform_dir_help = gengetopt_args_info_help[29] ;
-  args_info->gvar_cutoff_help = gengetopt_args_info_help[30] ;
-  args_info->multi_query_help = gengetopt_args_info_help[31] ;
-  args_info->output_file_help = gengetopt_args_info_help[32] ;
+  args_info->convert_aracne_help = gengetopt_args_info_help[8] ;
+  args_info->limit_hub_help = gengetopt_args_info_help[9] ;
+  args_info->dabinput_help = gengetopt_args_info_help[11] ;
+  args_info->hub_dab_output_help = gengetopt_args_info_help[12] ;
+  args_info->aracne_file_help = gengetopt_args_info_help[14] ;
+  args_info->output_dab_file_help = gengetopt_args_info_help[15] ;
+  args_info->dweight_dir_help = gengetopt_args_info_help[17] ;
+  args_info->dweight_num_help = gengetopt_args_info_help[18] ;
+  args_info->dweight_map_help = gengetopt_args_info_help[19] ;
+  args_info->dweight_test_dir_help = gengetopt_args_info_help[20] ;
+  args_info->dweight_test_num_help = gengetopt_args_info_help[21] ;
+  args_info->gscore_dir1_help = gengetopt_args_info_help[23] ;
+  args_info->gscore_dir2_help = gengetopt_args_info_help[24] ;
+  args_info->gscore_num1_help = gengetopt_args_info_help[25] ;
+  args_info->order_stat_single_gene_query_help = gengetopt_args_info_help[27] ;
+  args_info->db_help = gengetopt_args_info_help[28] ;
+  args_info->dset_list_help = gengetopt_args_info_help[29] ;
+  args_info->input_help = gengetopt_args_info_help[30] ;
+  args_info->single_query_help = gengetopt_args_info_help[31] ;
+  args_info->dir_in_help = gengetopt_args_info_help[32] ;
+  args_info->dir_prep_in_help = gengetopt_args_info_help[33] ;
+  args_info->dir_gvar_in_help = gengetopt_args_info_help[34] ;
+  args_info->dir_sinfo_in_help = gengetopt_args_info_help[35] ;
+  args_info->is_nibble_help = gengetopt_args_info_help[36] ;
+  args_info->platform_dir_help = gengetopt_args_info_help[37] ;
+  args_info->gvar_cutoff_help = gengetopt_args_info_help[38] ;
+  args_info->multi_query_help = gengetopt_args_info_help[39] ;
+  args_info->output_file_help = gengetopt_args_info_help[40] ;
   
 }
 
 cmdline_parser_release (struct gengetopt_args_info *args_info)
 {
   unsigned int i;
+  free_string_field (&(args_info->dabinput_arg));
+  free_string_field (&(args_info->dabinput_orig));
+  free_string_field (&(args_info->hub_dab_output_arg));
+  free_string_field (&(args_info->hub_dab_output_orig));
+  free_string_field (&(args_info->aracne_file_arg));
+  free_string_field (&(args_info->aracne_file_orig));
+  free_string_field (&(args_info->output_dab_file_arg));
+  free_string_field (&(args_info->output_dab_file_orig));
   free_string_field (&(args_info->dweight_dir_arg));
   free_string_field (&(args_info->dweight_dir_orig));
   free_string_field (&(args_info->dweight_num_orig));
     write_into_file(outfile, "weight2", 0, 0 );
   if (args_info->comp_ranking_given)
     write_into_file(outfile, "comp_ranking", 0, 0 );
+  if (args_info->convert_aracne_given)
+    write_into_file(outfile, "convert_aracne", 0, 0 );
+  if (args_info->limit_hub_given)
+    write_into_file(outfile, "limit_hub", 0, 0 );
+  if (args_info->dabinput_given)
+    write_into_file(outfile, "dabinput", args_info->dabinput_orig, 0);
+  if (args_info->hub_dab_output_given)
+    write_into_file(outfile, "hub_dab_output", args_info->hub_dab_output_orig, 0);
+  if (args_info->aracne_file_given)
+    write_into_file(outfile, "aracne_file", args_info->aracne_file_orig, 0);
+  if (args_info->output_dab_file_given)
+    write_into_file(outfile, "output_dab_file", args_info->output_dab_file_orig, 0);
   if (args_info->dweight_dir_given)
     write_into_file(outfile, "dweight_dir", args_info->dweight_dir_orig, 0);
   if (args_info->dweight_num_given)
         { "weight",	0, NULL, 'W' },
         { "weight2",	0, NULL, 'U' },
         { "comp_ranking",	0, NULL, 'C' },
+        { "convert_aracne",	0, NULL, 'J' },
+        { "limit_hub",	0, NULL, 'Y' },
+        { "dabinput",	1, NULL, 'y' },
+        { "hub_dab_output",	1, NULL, 'Z' },
+        { "aracne_file",	1, NULL, 'K' },
+        { "output_dab_file",	1, NULL, 'L' },
         { "dweight_dir",	1, NULL, 'E' },
         { "dweight_num",	1, NULL, 'n' },
         { "dweight_map",	1, NULL, 'M' },
         { 0,  0, 0, 0 }
       };
 
-      c = getopt_long (argc, argv, "VDAWUCE:n:M:F:G:H:h:I:Ox:X:i:q:d:p:r:s:NP:v:Q:o:", long_options, &option_index);
+      c = getopt_long (argc, argv, "VDAWUCJYy:Z:K:L:E:n:M:F:G:H:h:I:Ox:X:i:q:d:p:r:s:NP:v:Q:o:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'J':	/* Convert Aracne output (.txt) to DAB file.  */
+        
+        
+          if (update_arg((void *)&(args_info->convert_aracne_flag), 0, &(args_info->convert_aracne_given),
+              &(local_args_info.convert_aracne_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "convert_aracne", 'J',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'Y':	/* Limit genes in the DAB to those that are hubby.  */
+        
+        
+          if (update_arg((void *)&(args_info->limit_hub_flag), 0, &(args_info->limit_hub_given),
+              &(local_args_info.limit_hub_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "limit_hub", 'Y',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'y':	/* DAB input file.  */
+        
+        
+          if (update_arg( (void *)&(args_info->dabinput_arg), 
+               &(args_info->dabinput_orig), &(args_info->dabinput_given),
+              &(local_args_info.dabinput_given), optarg, 0, "NA", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "dabinput", 'y',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'Z':	/* DAB output file.  */
+        
+        
+          if (update_arg( (void *)&(args_info->hub_dab_output_arg), 
+               &(args_info->hub_dab_output_orig), &(args_info->hub_dab_output_given),
+              &(local_args_info.hub_dab_output_given), optarg, 0, "NA", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "hub_dab_output", 'Z',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'K':	/* Aracne .txt output file.  */
+        
+        
+          if (update_arg( (void *)&(args_info->aracne_file_arg), 
+               &(args_info->aracne_file_orig), &(args_info->aracne_file_given),
+              &(local_args_info.aracne_file_given), optarg, 0, "NA", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "aracne_file", 'K',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'L':	/* DAB file.  */
+        
+        
+          if (update_arg( (void *)&(args_info->output_dab_file_arg), 
+               &(args_info->output_dab_file_orig), &(args_info->output_dab_file_given),
+              &(local_args_info.output_dab_file_given), optarg, 0, "NA", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "output_dab_file", 'L',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'E':	/* Dataset weight directory.  */
         
         

tools/SeekReader/cmdline.h

   const char *weight2_help; /**< @brief Test dataset weights 2 help description.  */
   int comp_ranking_flag;	/**< @brief Compare two rankings (*.gscore files) (default=off).  */
   const char *comp_ranking_help; /**< @brief Compare two rankings (*.gscore files) help description.  */
+  int convert_aracne_flag;	/**< @brief Convert Aracne output (.txt) to DAB file (default=off).  */
+  const char *convert_aracne_help; /**< @brief Convert Aracne output (.txt) to DAB file help description.  */
+  int limit_hub_flag;	/**< @brief Limit genes in the DAB to those that are hubby (default=off).  */
+  const char *limit_hub_help; /**< @brief Limit genes in the DAB to those that are hubby help description.  */
+  char * dabinput_arg;	/**< @brief DAB input file (default='NA').  */
+  char * dabinput_orig;	/**< @brief DAB input file original value given at command line.  */
+  const char *dabinput_help; /**< @brief DAB input file help description.  */
+  char * hub_dab_output_arg;	/**< @brief DAB output file (default='NA').  */
+  char * hub_dab_output_orig;	/**< @brief DAB output file original value given at command line.  */
+  const char *hub_dab_output_help; /**< @brief DAB output file help description.  */
+  char * aracne_file_arg;	/**< @brief Aracne .txt output file (default='NA').  */
+  char * aracne_file_orig;	/**< @brief Aracne .txt output file original value given at command line.  */
+  const char *aracne_file_help; /**< @brief Aracne .txt output file help description.  */
+  char * output_dab_file_arg;	/**< @brief DAB file (default='NA').  */
+  char * output_dab_file_orig;	/**< @brief DAB file original value given at command line.  */
+  const char *output_dab_file_help; /**< @brief DAB file help description.  */
   char * dweight_dir_arg;	/**< @brief Dataset weight directory (default='NA').  */
   char * dweight_dir_orig;	/**< @brief Dataset weight directory original value given at command line.  */
   const char *dweight_dir_help; /**< @brief Dataset weight directory help description.  */
   unsigned int weight_given ;	/**< @brief Whether weight was given.  */
   unsigned int weight2_given ;	/**< @brief Whether weight2 was given.  */
   unsigned int comp_ranking_given ;	/**< @brief Whether comp_ranking was given.  */
+  unsigned int convert_aracne_given ;	/**< @brief Whether convert_aracne was given.  */
+  unsigned int limit_hub_given ;	/**< @brief Whether limit_hub was given.  */
+  unsigned int dabinput_given ;	/**< @brief Whether dabinput was given.  */
+  unsigned int hub_dab_output_given ;	/**< @brief Whether hub_dab_output was given.  */
+  unsigned int aracne_file_given ;	/**< @brief Whether aracne_file was given.  */
+  unsigned int output_dab_file_given ;	/**< @brief Whether output_dab_file was given.  */
   unsigned int dweight_dir_given ;	/**< @brief Whether dweight_dir was given.  */
   unsigned int dweight_num_given ;	/**< @brief Whether dweight_num was given.  */
   unsigned int dweight_map_given ;	/**< @brief Whether dweight_map was given.  */

tools/SeekTest/SeekTest.cpp

 			Qi.clear();
 		}
 
-		map<float,vector<int> > countPairs;
-		float point = 0.0;
-		while(point<=5.0){
-			countPairs[point] = vector<int>();
-			CSeekTools::InitVector(countPairs[point], iDatasets, (int)0);
-			point+=0.25;
-		}
-
-		for(k=0; k<iDatasets; k++){
-			CSeekIntIntMap *mapQ = vc[k]->GetDBMap();
-			CSeekIntIntMap *mapG = vc[k]->GetGeneMap();
-			if(mapQ==NULL) continue;
-			unsigned char **f = vc[k]->GetMatrix();
-
-			size_t qi, qj;
-			for(qi=0; qi<allQ.size(); qi++){
-				utype gene_qi = allQ[qi];
-				utype iQ = mapQ->GetForward(gene_qi);
-				if(CSeekTools::IsNaN(iQ)) continue;
-				for(qj=qi+1; qj<allQ.size(); qj++){
-					utype gene_qj = allQ[qj];
-					utype jQ = mapG->GetForward(gene_qj);
-					if(CSeekTools::IsNaN(jQ)) continue;
-					unsigned char uc = f[iQ][gene_qj];
-					if(uc==255) continue;
-					float vv = quant[uc];
-					point = 0.0;
-					while(point<=5.0){
-						if(vv>point)
-							countPairs[point][k]++;
-						point+=0.25;
+		if(!!sArgs.count_pair_flag){
+			map<float,vector<int> > countPairs;
+			float point = 0.0;
+			while(point<=5.0){
+				countPairs[point] = vector<int>();
+				CSeekTools::InitVector(countPairs[point], iDatasets, (int)0);
+				point+=0.25;
+			}
+			for(k=0; k<iDatasets; k++){
+				CSeekIntIntMap *mapQ = vc[k]->GetDBMap();
+				CSeekIntIntMap *mapG = vc[k]->GetGeneMap();
+				if(mapQ==NULL) continue;
+				unsigned char **f = vc[k]->GetMatrix();
+				size_t qi, qj;
+				for(qi=0; qi<allQ.size(); qi++){
+					utype gene_qi = allQ[qi];
+					utype iQ = mapQ->GetForward(gene_qi);
+					if(CSeekTools::IsNaN(iQ)) continue;
+					for(qj=qi+1; qj<allQ.size(); qj++){
+						utype gene_qj = allQ[qj];
+						utype jQ = mapG->GetForward(gene_qj);
+						if(CSeekTools::IsNaN(jQ)) continue;
+						unsigned char uc = f[iQ][gene_qj];
+						if(uc==255) continue;
+						float vv = quant[uc];
+						point = 0.0;
+						while(point<=5.0){
+							if(vv>point)
+								countPairs[point][k]++;
+							point+=0.25;
+						}
 					}
 				}
 			}
+			for(i=0; i<iDatasets; i++)
+				vc[i]->DeleteQueryBlock();
+			point = 0.0;			
+			while(point<=5.0){
+				sort(countPairs[point].begin(), countPairs[point].end(), greater<int>());
+				float tmp = 0;
+				for(i=0; i<10; i++)
+					tmp+=(float)countPairs[point][i];
+				tmp/=10.0;
+				fprintf(stderr, "%.2f\t%.1f pairs\n", point, tmp);
+				point+=0.25;
+			}
 		}
-		
-		for(i=0; i<iDatasets; i++)
-			vc[i]->DeleteQueryBlock();
 
-		point = 0.0;			
-		while(point<=5.0){
-			sort(countPairs[point].begin(), countPairs[point].end(), greater<int>());
-			float tmp = 0;
-			for(i=0; i<10; i++){
-				tmp+=(float)countPairs[point][i];
+		if(!!sArgs.histogram_flag){
+			srand(unsigned(time(0)));
+			vector<int> dID;
+			for(k=0; k<iDatasets; k++)
+				dID.push_back(k);
+			random_shuffle(dID.begin(), dID.end());
+			utype kk;
+			for(kk=0; kk<100; kk++){
+				k = dID[kk];
+				CSeekIntIntMap *mapQ = vc[k]->GetDBMap();
+				CSeekIntIntMap *mapG = vc[k]->GetGeneMap();
+				if(mapQ==NULL) continue;
+				unsigned char **f = vc[k]->GetMatrix();
+				size_t qi, ii;
+				for(qi=0; qi<allQ.size(); qi++){
+					utype gene_qi = allQ[qi];
+					utype iQ = mapQ->GetForward(gene_qi);
+					if(CSeekTools::IsNaN(iQ)) continue;
+					vector<float> z_score;
+					const vector<utype> &allRGenes = mapG->GetAllReverse();
+					for(ii=0; ii<mapG->GetNumSet(); ii++){
+						size_t ij = allRGenes[ii];
+						float a = vc[k]->GetGeneAverage(ij);
+						unsigned char x = f[iQ][ij];
+						if(x==255) continue;
+						//float xnew = quant[x] - a;
+						float xnew = quant[x];
+						z_score.push_back(xnew);
+					}
+					sort(z_score.begin(), z_score.end());
+					float pts[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9};
+					fprintf(stderr, "Query %s\tDataset %d\t", vecstrGenes[gene_qi].c_str(), k);
+					for(ii=0; ii<9; ii++)
+						fprintf(stderr, "%.2f\t", z_score[(int)(pts[ii]*z_score.size())]);
+					fprintf(stderr, "\n");
+				}
 			}
-			tmp/=10.0;
-			fprintf(stderr, "%.2f\t%.1f pairs\n", point, tmp);
-			point+=0.25;
+			return 0;
 		}
 
 	}

tools/SeekTest/SeekTest.ggo

 								string typestr="filename"
 option	"quant"				q	"Quant file"
 								string typestr="filename"
+option	"count_pair"		c	"Count number of z-scores exceeding a threshold"
+								flag	off
+option	"histogram"			h	"Get distribution of z-scores of given genes"
+								flag	off
 
 section "DAB mode"
 option	"gene_set_list"		g	"List of gene-set files"