Commits

Qian Zhu committed 16f7f78

Added options to allow integrations of many coexpression networks

  • Participants
  • Parent commits 10a20bd

Comments (0)

Files changed (14)

 const CColor	CColor::c_Red		= CColor( 0xFF, 0x00, 0x00 );
 const CColor	CColor::c_White		= CColor( 0xFF, 0xFF, 0xFF );
 const CColor	CColor::c_Yellow	= CColor( 0xFF, 0xFF, 0x00 );
+const CColor	CColor::c_Blue		= CColor( 0x00, 0x00, 0xFF );
+const CColor	CColor::c_DarkGreen	= CColor( 0x00, 0x64, 0x00 );
+const CColor	CColor::c_Orange	= CColor( 0xFF, 0xA5, 0x00 );
 
 /*!
  * \brief
 	 */
 	static const CColor	c_Yellow;
 
+	static const CColor	c_Blue;
+
+	static const CColor	c_DarkGreen;
+
+	static const CColor	c_Orange;
+
 	static CColor Interpolate( float dValue, const CColor& ColorMinimum, const CColor& ColorMedium,
 		const CColor& ColorMaximum );
 
 				d = 1.0f / ( 1 + exp( ( dAve - d ) / dStd ) );
 				ostm << vecstrNames[ i ] << " -- " << vecstrNames[ j ] << " [weight = " << d <<
 					", color = \"" << ( fHashes ? "#" : "" ) << CColor::Interpolate( d,
-					CColor::c_Green, CColor::c_Black, CColor::c_Red ).ToRGB( ) << "\"];" << endl; }
+					//CColor::c_Green, CColor::c_Black, CColor::c_Red ).ToRGB( ) 
+					CColor::c_Orange, CColor::c_DarkGreen, CColor::c_Blue ).ToRGB( ) 
+					<< "\"];" << endl; }
 
 	ostm << "}" << endl; }
 

src/seekwriter.cpp

 	return true;
 }
 
+
 bool CSeekWriter::NormalizeDAB(CDataPair &Dat,
-	const vector<string> &vecstrGenes,
-	bool cutoff, bool expTransform, bool divideNorm, bool subtractNorm){
+const vector<string> &vecstrGenes, 
+//bool cutoff, float cutoff_val,
+bool expTransform, bool divideNorm, bool subtractNorm){
+	//default cutoff_val is 0
 
-	utype i, j;
-	vector<utype> veciGenes;
+	size_t i, j;
+	vector<unsigned int> veciGenes;
 	veciGenes.clear();
 	veciGenes.resize(vecstrGenes.size());
-	for( i = 0; i < vecstrGenes.size( ); ++i )
-		veciGenes[ i ] = Dat.GetGene( vecstrGenes[i] );
+	for(i=0; i<vecstrGenes.size(); i++)
+		veciGenes[i] = (unsigned int) Dat.GetGeneIndex(vecstrGenes[i]);
 
 	vector<float> vecSum;
 	vector<int> vecNum;
 	CSeekTools::InitVector(vecSum, vecstrGenes.size(), CMeta::GetNaN());
 	CSeekTools::InitVector(vecNum, vecstrGenes.size(), (int)-9999);
 
+	unsigned int s,t;
 	for(i=0; i<vecstrGenes.size(); i++){
-		utype s = veciGenes[i];
-		if(CSeekTools::IsNaN(s)) continue;
-		float sum = 0;
-		int num = 0;
-		vector<float> all;
-		for(j=0; j<vecstrGenes.size(); j++){
-			utype t = veciGenes[j];
-			float d = Dat.Get(s,t);
-			if(CSeekTools::IsNaN(t)) continue;
-			if(CMeta::IsNaN(d)) continue;
-			if(cutoff){
-				if(d>0){
+		if((s=veciGenes[i])==(unsigned int)-1) continue;
+		vecSum[i] = 0;
+		vecNum[i] = 0;
+	}
+
+	if(divideNorm && subtractNorm){
+		fprintf(stderr, "Error: both divideNorm and subtractNorm are true\n");
+		return false;
+	}else if(!divideNorm && !subtractNorm){
+		fprintf(stderr, "Error: both divideNorm and subtractNorm are false\n");
+		return false;
+	}
+
+	float d = -1;
+	float r = -1;
+	for(i=0; i<vecstrGenes.size(); i++){
+		if((s=veciGenes[i])==(unsigned int)-1) continue;
+		for(j=i+1; j<vecstrGenes.size(); j++){
+			if((t=veciGenes[j])==(unsigned int)-1) continue;
+			if(CMeta::IsNaN(d = Dat.Get(s,t))) continue;
+			/*if(cutoff){
+				if(d>cutoff_val){
 					if(expTransform)
-						all.push_back(expf(-1.0*d*d/2.0));
+						r = expf(-1.0*d*d/2.0);
 					else
-						all.push_back(d);
+						r = d;
+					vecSum[i] += r;
+					vecSum[j] += r;
+					vecNum[i]++;
+					vecNum[j]++;
 				}
 			}
-			else{
+			else{*/
 				//fprintf(stderr, "Warning: Negative Z-Scores");
 				if(expTransform)
-					all.push_back(expf(-1.0*d*d/2.0));
+					r = expf(-1.0*d*d/2.0);
 				else
-					all.push_back(d);
-			}	
+					r = d;
+				vecSum[i] += r;
+				vecSum[j] += r;
+				vecNum[i]++;
+				vecNum[j]++;
+			//}	
 		}
-
-		for(j=0; j<all.size(); j++){
-			sum+=all[j];
-			num++;
-		}
-		vecSum[i] = sum;
-		vecNum[i] = num;
 	}
 
 	for(i=0; i<vecstrGenes.size(); i++){
-		utype s = veciGenes[i];
-		if(CSeekTools::IsNaN(s)) continue;
-		float *v = Dat.GetFullRow(s);
-
-		for(j=0; j<vecstrGenes.size(); j++){
-			utype t = veciGenes[j];
-			float d = v[t];
-			if(CSeekTools::IsNaN(t)) continue;
-			if(CMeta::IsNaN(d)) continue;
-			if(cutoff){
-				if(d>0){
+		if((s=veciGenes[i])==(unsigned int)-1) continue;
+		for(j=i+1; j<vecstrGenes.size(); j++){
+			if((t=veciGenes[j])==(unsigned int)-1) continue;
+			if(CMeta::IsNaN(d = Dat.Get(s,t))) continue;
+			/*if(cutoff){
+				if(d>cutoff_val){
 					if(expTransform){
-						if(divideNorm){
-							float r = expf(-1.0*d*d/2.0) / sqrtf(vecSum[i]) / sqrtf(vecSum[j]);
-							Dat.Set(s, t, r);
-						}else if(subtractNorm){
-							float r = expf(-1.0*d*d/2.0) - vecSum[i] / vecNum[i] - vecSum[j] / vecNum[j];
-							Dat.Set(s, t, r);
-						}
+						if(divideNorm)
+							r=expf(-1.0*d*d/2.0)/sqrtf(vecSum[i])/sqrtf(vecSum[j]);
+						else if(subtractNorm)
+							r=expf(-1.0*d*d/2.0)-vecSum[i]/vecNum[i]-vecSum[j]/vecNum[j];
 					}else{
-						if(divideNorm){
-							float r = d / sqrtf(vecSum[i]) / sqrtf(vecSum[j]);
-							Dat.Set(s, t, r);
-						}else if(subtractNorm){
-							float r = d - vecSum[i] / vecNum[i] - vecSum[j] / vecNum[j];
-							Dat.Set(s, t, r);
-						}
+						if(divideNorm)
+							r=d/sqrtf(vecSum[i])/sqrtf(vecSum[j]);
+						else if(subtractNorm)
+							r=d-vecSum[i]/vecNum[i]-vecSum[j]/vecNum[j];
 					}
 				}else{
-					Dat.Set(s, t, 0);
+					r=0; //default value
 				}
+				Dat.Set(s, t, r);
 			}
-			else{
+			else{*/
 				if(expTransform){
-					if(divideNorm){
-						float r = expf(-1.0*d*d/2.0) / sqrtf(vecSum[i]) / sqrtf(vecSum[j]);
-						Dat.Set(s, t, r);
-					}else if(subtractNorm){
-						float r = expf(-1.0*d*d/2.0) - vecSum[i] / vecNum[i] - vecSum[j] / vecNum[j];
-						Dat.Set(s, t, r);
-					}
+					if(divideNorm)
+						r=expf(-1.0*d*d/2.0)/sqrtf(vecSum[i])/sqrtf(vecSum[j]);
+					else if(subtractNorm)
+						r=expf(-1.0*d*d/2.0)-vecSum[i]/vecNum[i]-vecSum[j]/vecNum[j];
 				}else{
 					if(divideNorm){
-						float r = 0;
 						//DANGEROUS
 						if(vecSum[i]<=0){
-							fprintf(stderr, "Warning, Dangerous, divide sqrt(z), where z could be negative\n");
-							r = 0;
-						}else{
-							r = d / sqrtf(vecSum[i]) / sqrtf(vecSum[j]);
-						}
-						Dat.Set(s, t, r);
+							fprintf(stderr, "Warning, divide sqrt(z), when z<=0\n");
+							r=0; //default value
+						}else
+							r=d/sqrtf(vecSum[i])/sqrtf(vecSum[j]);
 					}else if(subtractNorm){
-						float r = d - vecSum[i] / vecNum[i] - vecSum[j] / vecNum[j];
-						Dat.Set(s, t, r);
+						r=d-vecSum[i]/vecNum[i]-vecSum[j]/vecNum[j];
 					}
-
 				}
-			}
+				Dat.Set(s, t, r);
+			//}
 		}
-		free(v);
 	}
 
+	//Plot a distribution
+	/*vector<unsigned long> bins;
+	bins.resize(55);
+	float upper = 5.0; //assume z scores
+	float lower = -5.0;
+	float bin_size = (upper - lower) / 50;
+	for(i=0; i<55; i++)
+		bins[i] = 0;
+	for(i=0; i<Dat.GetGenes(); i++){
+		for(j=i+1; j<Dat.GetGenes(); j++){
+			d = Dat.Get(i,j);
+			if(CMeta::IsNaN(d)) continue;
+			int b = (int) ((d - lower) / bin_size);
+			if(b<0){
+				bins[0]++;
+				continue;
+			}
+			if(b>=55){
+				bins[54]++;
+				continue;
+			}
+			bins[b]++;
+		}
+	}
+	fprintf(stderr, 
+	"Distances: bin size: %.5f, num of bins: %d, min bin val: %.5f, max bin val: %.5f\n",
+	bin_size, 55, lower, upper);
+	for(i=0; i<55; i++){
+		fprintf(stderr, "%lu\t%lu\n", i, bins[i]);
+	}
+	*/
 	return true;
 }
 
 
 	//compatibility
 	template<class tType>
-	static bool WriteSparseMatrix(CDataPair &Dat, vector<map<tType,unsigned short> > &umat,
-	int maxRank, const vector<string> &vecstrGenes, const char *fileName){
+	static bool WriteSparseMatrix(CDataPair &Dat, 
+	vector<map<tType,unsigned short> > &umat, 
+	const vector<string> &vecstrGenes, const char *fileName){
 
 		FILE *f = fopen(fileName, "wb");
 		if(f==NULL){
 
 	//compatiblity
 	template<class tType>
-	static bool GetSparseRankMatrix(CDataPair &Dat, vector<map<tType,unsigned short> > &umat, 
-	int maxRank, const vector<string> &vecstrGenes){
+	static bool GetSparseRankMatrix(CDataPair &Dat, 
+	vector<map<tType,unsigned short> > &umat, int maxRank, 
+	const vector<string> &vecstrGenes){
 	
 		size_t i, j;
 		vector<tType> veciGenes;
 		return true;
 	}
 
+	//To be used after NormalizeDAB
+	template<class tType>
+	static bool ConvertToSparseMatrix(CDataPair &Dat,
+	vector<map<tType,unsigned short> > &umat,
+	const vector<string> &vecstrGenes, const float cutoff_val){
+
+		size_t i, j;
+		vector<tType> veciGenes;
+		veciGenes.resize(vecstrGenes.size());
+		for( i = 0; i < vecstrGenes.size( ); ++i )
+			veciGenes[ i ] = (tType) Dat.GetGeneIndex( vecstrGenes[i] );
+		umat.resize(vecstrGenes.size());
+		for(i=0; i<vecstrGenes.size(); i++)
+			umat[i] = map<tType, unsigned short>();
+
+		tType s,t;
+		for(i=0; i<vecstrGenes.size(); i++){
+			if((s=veciGenes[i])==(tType)-1) continue;
+			if(i%1000==0) fprintf(stderr, "Start reading gene %d...\n", i);
+
+			for(j=i+1; j<vecstrGenes.size(); j++){
+				if((t=veciGenes[j])==(tType)-1) continue;
+				float r = Dat.Get(s,t);
+				if(CMeta::IsNaN(r)) continue;
+				if(r > cutoff_val)
+					umat[i][j] = (unsigned short) (r * 100.0);
+			}
+		}
+		fprintf(stderr, "Finished reading DAB\n");
+		return true;
+	}
+
+	//to be used for sparse matrix created from cutting-off z-scores
+	template<class tType>
+	static bool ReadSeekSparseMatrix(const char *fileName,
+	CSparseFlatMatrix<float> &mat, CSeekIntIntMap &m, 
+	const vector<string> &vecstrGenes, const int initialCapacity, 
+	const float exponent){
+	
+		if(exponent<1.0){
+			fprintf(stderr, "Exponent must be >=1.0\n");
+			return false;
+		}
+	
+		FILE *f = fopen(fileName, "rb");
+		if(f==NULL){
+			cerr << "File not found" << endl;
+			return false;
+		}
+
+		size_t i, j;
+		tType numGenes, numPresent, val;
+		int ret;
+
+		mat.Initialize(vecstrGenes.size());
+		ret = fread((char*) (&numPresent), 1, sizeof(numPresent), f);
+		for(j=0; j<numPresent; j++){
+			ret = fread((char*)(&val), 1, sizeof(val), f); //val = gene ID
+			m.Add((utype) val);
+			mat.InitializeRow(val, initialCapacity); //initial capacity
+		}
+		ret = fread((char*) (&numGenes), 1, sizeof(numGenes), f);
+
+		for(i=0; i<numGenes; i++){
+			tType id, id2;  //gene ID
+			unsigned short numEntries, val; //z-scores * 100.0
+			ret = fread((char*)(&id), 1, sizeof(id), f);
+			ret = fread((char*)(&numEntries), 1, sizeof(numEntries), f);
+			for(j=0; j<numEntries; j++){
+				ret = fread((char*)(&id2),1,sizeof(id2),f);
+				ret = fread((char*)(&val),1,sizeof(val),f);
+				tType first = id;
+				tType second = id2;
+				float fval = (float) val / 100.0;
+				if(exponent>1.0)
+					fval = pow(fval, exponent);
+				mat.Add(first, second, fval);
+				mat.Add(second, first, fval);
+			}
+		}
+		fclose(f);
+
+		mat.Organize();
+		return true;
+	}
+
 	//===============================================================
 	//not currently used
 	static bool ReadSparseMatrix(const char *fileName, 
 	static bool SumSparseMatrix(CSparseFlatMatrix<float> &mat1,
 		CHalfMatrix<float> &res, const CSeekIntIntMap &mi, const float w);
 
-	static bool NormalizeDAB(CDataPair &Dat,
-		const vector<string> &vecstrGenes,
-		bool cutoff, bool expTransform, bool divideNorm, bool subtractNorm);
+	static bool NormalizeDAB(CDataPair &Dat, const vector<string> &vecstrGenes,
+		//bool cutoff, float cutoff_val, 
+		bool expTransform, bool divideNorm, bool subtractNorm);
 
 	static bool GetGeneAverage(CDataPair &Dat,
 		const vector<string> &vecstrGenes,

tools/SeekIterative/SeekIterative.cpp

 			qu[i].push_back(mapstriGenes[vecstrAllQuery[i][j]]);
 	}
 
+	if(sArgs.visualize_flag==1){
+		string dab_base = sArgs.dab_basename_arg;
+		string file1 = dab_dir + "/" + dab_base + ".dab";
+		float cutoff_par = sArgs.cutoff_arg;
+		string genome = sArgs.genome_arg;
+		vector<string> s1, s2;
+		CSeekTools::ReadListTwoColumns(genome.c_str(), s1, s2);
+
+		CGenome g;
+		g.Open(s1);
+		for(i=0; i<s2.size(); i++){
+			CGene &g1 = g.GetGene(g.FindGene(s1[i]));
+			g.AddSynonym(g1, s2[i]);
+		}
+
+		CDat CD;
+		CD.Open(file1.c_str(), false, 2, false, false, false);
+
+		CSeekIntIntMap d1(CD.GetGenes());
+		vector<utype> indexConvReverse;
+		CSeekTools::InitVector(indexConvReverse, vecstrGenes.size(), (utype) -1);	
+		for(i=0; i<CD.GetGenes(); i++){
+			map<string,size_t>::iterator it = mapstriGenes.find(CD.GetGene(i));
+			if(it==mapstriGenes.end()) continue;
+			indexConvReverse[i] = it->second;
+			d1.Add(i);
+		}
+
+		//Visualize
+		for(j=0; j<vecstrAllQuery.size(); j++){
+			vector<string> vec_s;
+			vector<utype> vec_g;
+			for(k=0; k<vecstrAllQuery[j].size(); k++){
+				size_t ind = CD.GetGeneIndex(vecstrAllQuery[j][k]);
+				if(ind==(size_t)-1) continue;
+				vec_g.push_back(CD.GetGeneIndex(vecstrAllQuery[j][k]));
+				vec_s.push_back(vecstrAllQuery[j][k]);
+			}
+			CDat V;
+			V.Open(vec_s);
+			for(k=0; k<vec_s.size(); k++){
+				for(l=k+1; l<vec_s.size(); l++){
+					V.Set(k, l, CD.Get(vec_g[k], vec_g[l]));
+				}
+			}
+			fprintf(stderr, "Query %d\n", j);
+
+			/*float init = 0.00001;
+			float step = 0.00001;
+			float upper = 0.001;
+			*/
+			float init = 0.05;
+			float step = 0.05;
+			float upper = 3;
+
+			float cutoff = init;
+			while(cutoff<upper){
+				int count = 0;
+				for(k=0; k<vec_s.size(); k++){
+					for(l=k+1; l<vec_s.size(); l++){
+						if(!CMeta::IsNaN(V.Get(k,l)) && V.Get(k,l)>=cutoff){
+							count++;
+						}
+					}
+				}
+				fprintf(stderr, "%.5f\t%d\n", cutoff, count);
+				cutoff+=step;
+			}
+
+			sprintf(acBuffer, "%s/%d.dot", output_dir.c_str(), j);			
+			ofstream ot(acBuffer);
+			V.SaveDOT(ot, cutoff_par, &g, false, true, NULL, NULL);
+		}
+
+	}
+
 	if(sArgs.combined_flag==1){
 		string dab_base = sArgs.dab_basename_arg;
 		string file1 = dab_dir + "/" + dab_base + ".dab";
+		float cutoff_par = sArgs.cutoff_arg;
+		string genome = sArgs.genome_arg;
+		vector<string> s1, s2;
+		CSeekTools::ReadListTwoColumns(genome.c_str(), s1, s2);
+
+		CGenome g;
+		g.Open(s1);
+		for(i=0; i<s2.size(); i++){
+			CGene &g1 = g.GetGene(g.FindGene(s1[i]));
+			g.AddSynonym(g1, s2[i]);
+		}
 
 		vector<vector<float> > q_weight;
 		q_weight.resize(vecstrAllQuery.size());
 			sort(ar.begin(), ar.end());
 			vector<utype> vec_g;
 			vector<string> vec_s;
-			int FIRST = 100;
+			int FIRST = 200;
 			for(k=0; k<FIRST; k++){
 				if(ar[k].f==-320) break;
 				vec_g.push_back(CD.GetGeneIndex(vecstrGenes[ar[k].i]));
 				}
 			}
 
+			fprintf(stderr, "Query %d\n", j);
+			float cutoff = 0.00001;
+			while(cutoff<0.001){
+				int count = 0;
+				for(k=0; k<vec_s.size(); k++){
+					for(l=k+1; l<vec_s.size(); l++){
+						if(!CMeta::IsNaN(V.Get(k,l)) && V.Get(k,l)>=cutoff){
+							count++;
+						}
+					}
+				}
+				fprintf(stderr, "%.5f\t%d\n", cutoff, count);
+				cutoff+=0.00001;
+			}
+
 			sprintf(acBuffer, "%s/%d.dot", output_dir.c_str(), j);			
 			ofstream ot(acBuffer);
-			V.SaveDOT(ot, 0.0001, NULL, true, false, NULL, NULL);
+			V.SaveDOT(ot, cutoff_par, &g, false, true, NULL, NULL);
+			//V.SaveDOT(ot, 0.0001, NULL, true, false, NULL, NULL);
 		}
 		
 

tools/SeekIterative/SeekIterative.ggo

 								flag off
 option	"testcombined"		h	"Test count mode"
 								flag off
+option	"visualize"			v	"Visualization mode"
+								flag off
 
-section "Combined-DAB mode"
+section "Combined-DAB / Visualization mode"
 option	"dab_basename"		b	"Combined-dab basename, also shared with Test Mode"
 								string typestr="filename"
 
+section "Visualization mode"
+option	"cutoff"			c	"Cutoff value"
+								float default="0.0001"
+option	"genome"			G	"Genome mapping file"
+								string typestr="filename"
+
 section "Sparse DAB mode"
 option	"dab_list"			V	"DAB list"
 								string typestr="filename"

tools/SeekIterative/cmdline.c

   "  -f, --test                   Test mode  (default=off)",
   "  -g, --testcount              Test count mode  (default=off)",
   "  -h, --testcombined           Test count mode  (default=off)",
-  "\nCombined-DAB mode:",
+  "  -v, --visualize              Visualization mode  (default=off)",
+  "\nCombined-DAB / Visualization mode:",
   "  -b, --dab_basename=filename  Combined-dab basename, also shared with Test \n                                 Mode",
+  "\nVisualization mode:",
+  "  -c, --cutoff=FLOAT           Cutoff value  (default=`0.0001')",
+  "  -G, --genome=filename        Genome mapping file",
   "\nSparse DAB mode:",
   "  -V, --dab_list=filename      DAB list",
   "  -I, --num_iter=INT           Number of iterations  (default=`0')",
   args_info->test_given = 0 ;
   args_info->testcount_given = 0 ;
   args_info->testcombined_given = 0 ;
+  args_info->visualize_given = 0 ;
   args_info->dab_basename_given = 0 ;
+  args_info->cutoff_given = 0 ;
+  args_info->genome_given = 0 ;
   args_info->dab_list_given = 0 ;
   args_info->num_iter_given = 0 ;
   args_info->default_type_given = 0 ;
   args_info->test_flag = 0;
   args_info->testcount_flag = 0;
   args_info->testcombined_flag = 0;
+  args_info->visualize_flag = 0;
   args_info->dab_basename_arg = NULL;
   args_info->dab_basename_orig = NULL;
+  args_info->cutoff_arg = 0.0001;
+  args_info->cutoff_orig = NULL;
+  args_info->genome_arg = NULL;
+  args_info->genome_orig = NULL;
   args_info->dab_list_arg = NULL;
   args_info->dab_list_orig = NULL;
   args_info->num_iter_arg = 0;
   args_info->test_help = gengetopt_args_info_help[5] ;
   args_info->testcount_help = gengetopt_args_info_help[6] ;
   args_info->testcombined_help = gengetopt_args_info_help[7] ;
-  args_info->dab_basename_help = gengetopt_args_info_help[9] ;
-  args_info->dab_list_help = gengetopt_args_info_help[11] ;
-  args_info->num_iter_help = gengetopt_args_info_help[12] ;
-  args_info->default_type_help = gengetopt_args_info_help[13] ;
-  args_info->rbp_p_help = gengetopt_args_info_help[14] ;
-  args_info->max_rank_help = gengetopt_args_info_help[15] ;
-  args_info->input_help = gengetopt_args_info_help[17] ;
-  args_info->query_help = gengetopt_args_info_help[18] ;
-  args_info->dab_dir_help = gengetopt_args_info_help[19] ;
-  args_info->dir_out_help = gengetopt_args_info_help[21] ;
+  args_info->visualize_help = gengetopt_args_info_help[8] ;
+  args_info->dab_basename_help = gengetopt_args_info_help[10] ;
+  args_info->cutoff_help = gengetopt_args_info_help[12] ;
+  args_info->genome_help = gengetopt_args_info_help[13] ;
+  args_info->dab_list_help = gengetopt_args_info_help[15] ;
+  args_info->num_iter_help = gengetopt_args_info_help[16] ;
+  args_info->default_type_help = gengetopt_args_info_help[17] ;
+  args_info->rbp_p_help = gengetopt_args_info_help[18] ;
+  args_info->max_rank_help = gengetopt_args_info_help[19] ;
+  args_info->input_help = gengetopt_args_info_help[21] ;
+  args_info->query_help = gengetopt_args_info_help[22] ;
+  args_info->dab_dir_help = gengetopt_args_info_help[23] ;
+  args_info->dir_out_help = gengetopt_args_info_help[25] ;
   
 }
 
   unsigned int i;
   free_string_field (&(args_info->dab_basename_arg));
   free_string_field (&(args_info->dab_basename_orig));
+  free_string_field (&(args_info->cutoff_orig));
+  free_string_field (&(args_info->genome_arg));
+  free_string_field (&(args_info->genome_orig));
   free_string_field (&(args_info->dab_list_arg));
   free_string_field (&(args_info->dab_list_orig));
   free_string_field (&(args_info->num_iter_orig));
     write_into_file(outfile, "testcount", 0, 0 );
   if (args_info->testcombined_given)
     write_into_file(outfile, "testcombined", 0, 0 );
+  if (args_info->visualize_given)
+    write_into_file(outfile, "visualize", 0, 0 );
   if (args_info->dab_basename_given)
     write_into_file(outfile, "dab_basename", args_info->dab_basename_orig, 0);
+  if (args_info->cutoff_given)
+    write_into_file(outfile, "cutoff", args_info->cutoff_orig, 0);
+  if (args_info->genome_given)
+    write_into_file(outfile, "genome", args_info->genome_orig, 0);
   if (args_info->dab_list_given)
     write_into_file(outfile, "dab_list", args_info->dab_list_orig, 0);
   if (args_info->num_iter_given)
         { "test",	0, NULL, 'f' },
         { "testcount",	0, NULL, 'g' },
         { "testcombined",	0, NULL, 'h' },
+        { "visualize",	0, NULL, 'v' },
         { "dab_basename",	1, NULL, 'b' },
+        { "cutoff",	1, NULL, 'c' },
+        { "genome",	1, NULL, 'G' },
         { "dab_list",	1, NULL, 'V' },
         { "num_iter",	1, NULL, 'I' },
         { "default_type",	1, NULL, 'T' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "defghb:V:I:T:R:M:i:q:F:D:", long_options, &option_index);
+      c = getopt_long (argc, argv, "defghvb:c:G:V:I:T:R:M:i:q:F:D:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'v':	/* Visualization mode.  */
+        
+        
+          if (update_arg((void *)&(args_info->visualize_flag), 0, &(args_info->visualize_given),
+              &(local_args_info.visualize_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "visualize", 'v',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'b':	/* Combined-dab basename, also shared with Test Mode.  */
         
         
             goto failure;
         
           break;
+        case 'c':	/* Cutoff value.  */
+        
+        
+          if (update_arg( (void *)&(args_info->cutoff_arg), 
+               &(args_info->cutoff_orig), &(args_info->cutoff_given),
+              &(local_args_info.cutoff_given), optarg, 0, "0.0001", ARG_FLOAT,
+              check_ambiguity, override, 0, 0,
+              "cutoff", 'c',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'G':	/* Genome mapping file.  */
+        
+        
+          if (update_arg( (void *)&(args_info->genome_arg), 
+               &(args_info->genome_orig), &(args_info->genome_given),
+              &(local_args_info.genome_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "genome", 'G',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'V':	/* DAB list.  */
         
         

tools/SeekIterative/cmdline.h

   const char *testcount_help; /**< @brief Test count mode help description.  */
   int testcombined_flag;	/**< @brief Test count mode (default=off).  */
   const char *testcombined_help; /**< @brief Test count mode help description.  */
+  int visualize_flag;	/**< @brief Visualization mode (default=off).  */
+  const char *visualize_help; /**< @brief Visualization mode help description.  */
   char * dab_basename_arg;	/**< @brief Combined-dab basename, also shared with Test Mode.  */
   char * dab_basename_orig;	/**< @brief Combined-dab basename, also shared with Test Mode original value given at command line.  */
   const char *dab_basename_help; /**< @brief Combined-dab basename, also shared with Test Mode help description.  */
+  float cutoff_arg;	/**< @brief Cutoff value (default='0.0001').  */
+  char * cutoff_orig;	/**< @brief Cutoff value original value given at command line.  */
+  const char *cutoff_help; /**< @brief Cutoff value help description.  */
+  char * genome_arg;	/**< @brief Genome mapping file.  */
+  char * genome_orig;	/**< @brief Genome mapping file original value given at command line.  */
+  const char *genome_help; /**< @brief Genome mapping file help description.  */
   char * dab_list_arg;	/**< @brief DAB list.  */
   char * dab_list_orig;	/**< @brief DAB list original value given at command line.  */
   const char *dab_list_help; /**< @brief DAB list help description.  */
   unsigned int test_given ;	/**< @brief Whether test was given.  */
   unsigned int testcount_given ;	/**< @brief Whether testcount was given.  */
   unsigned int testcombined_given ;	/**< @brief Whether testcombined was given.  */
+  unsigned int visualize_given ;	/**< @brief Whether visualize was given.  */
   unsigned int dab_basename_given ;	/**< @brief Whether dab_basename was given.  */
+  unsigned int cutoff_given ;	/**< @brief Whether cutoff was given.  */
+  unsigned int genome_given ;	/**< @brief Whether genome was given.  */
   unsigned int dab_list_given ;	/**< @brief Whether dab_list was given.  */
   unsigned int num_iter_given ;	/**< @brief Whether num_iter was given.  */
   unsigned int default_type_given ;	/**< @brief Whether default_type was given.  */

tools/SeekIterative/stdafx.h

 using namespace std;
 
 #include <pthread.h>
-
+#include "genome.h"
 #include "seekmap.h"
 #include "seekreader.h"
 #include "seekevaluate.h"

tools/SeekPrep/SeekPrep.cpp

 #include "stdafx.h"
 #include "cmdline.h"
 
-enum ExportMode{ DISTANCE_MATRIX, COUNT_MATRIX, WEIGHTSUM_MATRIX, GENE_PRESENCE_VECTOR };
+enum ExportMode{ 
+	DISTANCE_MATRIX, COUNT_MATRIX, WEIGHTSUM_MATRIX, GENE_PRESENCE_VECTOR 
+};
+
+enum NormMode{
+	RANK_NORM, Z_NORM
+};
 
 //tType can only be unsigned short or utype
+//norm_mode = 0 (rank norm), 1 (subtract_z norm)
 template<class tType>
-bool CalculateMatrix(ExportMode e, const vector<string> &dab_list, 
+bool CalculateMatrix(const NormMode norm_mode, 
+	const ExportMode e, const vector<string> &dab_list, 
 	const string &dab_dir, const string &outdir, const string &outdab, 
 	const vector<string> &vecstrGenes, const vector<float> &w, const float rbp_p,
-	const int MAX_RANK){ //w=weight of dsets
+	const int MAX_RANK, const float exp){ //w=weight of dsets 
+	//rbp_p, MAX_RANK for rank-based normalize
+	//exp for value-based or subtract_z normalize
+	const int RANK_NORM = 0;
+	const int Z_NORM = 1;
 
 	size_t i, j, k;
 
 		vector<utype> gpres;
 		CSeekTools::InitVector(gpres, vecstrGenes.size(), (utype) 0);
 		for(i=0; i<dab_list.size(); i++){
-			fprintf(stderr, "Reading %d of %d: %s\n", i, dab_list.size(), dab_list[i].c_str());
+			fprintf(stderr, "Reading %d of %d: %s\n", i, dab_list.size(), 
+				dab_list[i].c_str());
 			CSeekIntIntMap d1(vecstrGenes.size());
 			string dabfile = dab_dir + "/" + dab_list[i];
 			CSeekWriter::ReadSeekSparseMatrixHeader<tType>(dabfile.c_str(), d1);
 				res.Set(i, j, 0);
 	
 		for(i=0; i<dab_list.size(); i++){
-			fprintf(stderr, "Reading %d of %d: %s\n", i, dab_list.size(), dab_list[i].c_str());
+			fprintf(stderr, "Reading %d of %d: %s\n", i, dab_list.size(), 
+				dab_list[i].c_str());
 			CSeekIntIntMap d1(vecstrGenes.size());
 			string dabfile = dab_dir + "/" + dab_list[i];
 			CSparseFlatMatrix<float> sm (0);
-			CSeekWriter::ReadSeekSparseMatrix<tType>(dabfile.c_str(), sm, d1, MAX_RANK, rbp_p, vecstrGenes);
+			if(norm_mode==RANK_NORM){
+				CSeekWriter::ReadSeekSparseMatrix<tType>(dabfile.c_str(), sm, d1, 
+					MAX_RANK, rbp_p, vecstrGenes);
+			}else if(norm_mode==Z_NORM){
+				CSeekWriter::ReadSeekSparseMatrix<tType>(dabfile.c_str(), sm, d1,
+					vecstrGenes, (int) (0.10*vecstrGenes.size()), exp);
+			}
 			fprintf(stderr, "Summing...\n");
 			CSeekWriter::SumSparseMatrix(sm, res, d1, w[i]);
 			fprintf(stderr, "Finished Summing...\n");
 				res.Set(i, j, 0);
 
 		for(i=0; i<dab_list.size(); i++){
-			fprintf(stderr, "Reading %d of %d: %s\n", i, dab_list.size(), dab_list[i].c_str());
+			fprintf(stderr, "Reading %d of %d: %s\n", i, dab_list.size(), 
+				dab_list[i].c_str());
 			CSeekIntIntMap d1(vecstrGenes.size());
 			string dabfile = dab_dir + "/" + dab_list[i];
 			CSeekWriter::ReadSeekSparseMatrixHeader<tType>(dabfile.c_str(), d1);
 				res.Set(i, j, 0);
 
 		for(i=0; i<dab_list.size(); i++){
-			fprintf(stderr, "Reading %d of %d: %s\n", i, dab_list.size(), dab_list[i].c_str());
+			fprintf(stderr, "Reading %d of %d: %s\n", i, dab_list.size(), 
+				dab_list[i].c_str());
 			CSeekIntIntMap d1(vecstrGenes.size());
 			string dabfile = dab_dir + "/" + dab_list[i];
 			CSeekWriter::ReadSeekSparseMatrixHeader<tType>(dabfile.c_str(), d1);
 					v = vv - vc[k]->GetGeneAverage(j);
 					/*if(isnan(vv) || isinf(vv) || isnan(vc[k]->GetGeneAverage(j)) ||
 						isinf(vc[k]->GetGeneAverage(j))){
-						fprintf(stderr, "%d %.5f %.5f %.5f\n", (int) uc, quant[uc], vv, vc[k]->GetGeneAverage(j));
+						fprintf(stderr, "%d %.5f %.5f %.5f\n", (int) uc, quant[uc], 
+						vv, vc[k]->GetGeneAverage(j));
 					}*/
 					//v = quant[uc];
 					sum[platform_id] += v;
 			fprintf(stderr, "Error, invalid default_type argument!\n");
 			return -1;
 		}
-
-		if(sArgs.max_rank_arg==-1){
-			fprintf(stderr, "Error, please supply the max rank flag.\n");
-			return -1;
-		}
-
-		if(sArgs.rbp_p_arg==-1){
-			fprintf(stderr, "Error, please supply the rbp_p flag.\n");
-			return -1;
-		}
 	}
 
 	/* PCL mode */
 
 			int totNumExperiments = pcl.GetExperiments();
 			if(totNumExperiments<=2){
-				fprintf(stderr, "This dataset is skipped because it contains <=2 columns\n");
+				fprintf(stderr, "This dataset is skipped because it has <=2 columns\n");
 				fprintf(stderr, "An empty vector will be returned\n");
 			}else{
 				for(j=0; j<vecstrGenes.size(); j++){
 			for(j=0; j<vecstrGenes.size(); j++){
 				utype g = pcl.GetGene(vecstrGenes[j]);
 				if(CSeekTools::IsNaN(g)) continue;
-				fprintf(stderr, "%s\t%0.4f\t%.4f\n", vecstrGenes[j].c_str(), var[j], avg[j]);
+				fprintf(stderr, "%s\t%0.4f\t%.4f\n", vecstrGenes[j].c_str(), 
+					var[j], avg[j]);
 			}
 			//fprintf(stderr, "G\n"); 
-			sprintf(outFile, "%s/%s.gexpvar", sArgs.dir_out_arg, fileStem.c_str());
+			sprintf(outFile, "%s/%s.gexpvar", sArgs.dir_out_arg, 
+				fileStem.c_str());
 			CSeekTools::WriteArray(outFile, var);
-			sprintf(outFile, "%s/%s.gexpmean", sArgs.dir_out_arg, fileStem.c_str());
+			sprintf(outFile, "%s/%s.gexpmean", sArgs.dir_out_arg, 
+				fileStem.c_str());
 			CSeekTools::WriteArray(outFile, avg);
 		}
 
 				//printf("Platform %s\n", mapistrPlatform[i].c_str());
 				/*for(j=0; j<vecstrQuery.size(); j++){
 					size_t iGene = mapstriGenes[vecstrQuery[j]];
-					printf("Gene %s %.5f %.5f\n", vecstrQuery[j].c_str(), platform_avg.Get(i, iGene),
-						platform_stdev.Get(i,iGene));
+					printf("Gene %s %.5f %.5f\n", vecstrQuery[j].c_str(), 
+						platform_avg.Get(i, iGene), platform_stdev.Get(i,iGene));
 				}*/
 			//}
 
 
 	} else if(sArgs.dab_flag==1){
 		
-		if(sArgs.norm_flag==1){
-
+		string norm_mode = sArgs.norm_mode_arg;
+		if(sArgs.norm_flag==1 && norm_mode=="rank"){
+			if(sArgs.default_type_arg==-1){
+				fprintf(stderr, "Please supply parameter --default_type\n");
+				return 1;
+			}
+			if(sArgs.max_rank_arg==-1){
+				fprintf(stderr, "Please supply parameter --max_rank\n");
+				return 1;
+			}
 
 			CDataPair Dat;
 			char outFile[1024];
 			sprintf(outFile, "%s/%s.2.dab", sArgs.dir_out_arg,
 				fileStem.c_str());
 			int max_rank = sArgs.max_rank_arg;
-			float rbp_p = sArgs.rbp_p_arg;
-			fprintf(stderr, "Using rbp_p: %.3f, max_rank: %d\n", rbp_p, max_rank);
+			fprintf(stderr, "Using max_rank: %d\n", max_rank);
 			//cutoff, expTransform, divideNorm, subtractNorm
 			//CSeekWriter::NormalizeDAB(Dat, vecstrGenes, true, false, true, false);
 			//CSeekWriter::RankNormalizeDAB(Dat, vecstrGenes, max_rank, rbp_p);
 			//Dat.Save(outFile);
 			if(sArgs.default_type_arg==0){
 				vector<map<utype,unsigned short> > umat;
-				CSeekWriter::GetSparseRankMatrix<utype>(Dat, umat, max_rank, vecstrGenes);
-				CSeekWriter::WriteSparseMatrix<utype>(Dat, umat, max_rank, vecstrGenes, outFile);
+				CSeekWriter::GetSparseRankMatrix<utype>(Dat, umat, max_rank, 
+					vecstrGenes);
+				CSeekWriter::WriteSparseMatrix<utype>(Dat, umat, 
+					vecstrGenes, outFile);
 			}else if(sArgs.default_type_arg==1){
 				vector<map<unsigned short,unsigned short> > umat;
-				CSeekWriter::GetSparseRankMatrix<unsigned short>(Dat, umat, max_rank, vecstrGenes);
-				CSeekWriter::WriteSparseMatrix<unsigned short>(Dat, umat, max_rank, vecstrGenes, outFile);
+				CSeekWriter::GetSparseRankMatrix<unsigned short>(Dat, umat, max_rank, 
+					vecstrGenes);
+				CSeekWriter::WriteSparseMatrix<unsigned short>(Dat, umat, 
+					vecstrGenes, outFile);
 			}else{
 				fprintf(stderr, "Invalid default type!\n");
 				return -1;
 			CSeekWriter::ReadSparseMatrix(l, mat, 0.99, vecstrGenes);*/
 		}
 
+		if(sArgs.view_flag==1){
+			fprintf(stderr, "Operation not implemented yet!\n");
+			return 1;
+		}
+
+		if(sArgs.norm_flag==1 && norm_mode=="subtract_z"){
+			if(sArgs.cutoff_value_arg==-1.0){
+				fprintf(stderr, "Please supply parameter --cutoff_value\n");
+				return 1;
+			}
+			if(sArgs.default_type_arg==-1){
+				fprintf(stderr, "Please supply parameter --default_type\n");
+				return 1;
+			}
+
+			CDataPair Dat;
+			char outFile[125];
+			if(!Dat.Open(sArgs.dabinput_arg, false, false)){
+				cerr << "error opening file" << endl;
+				return 1;
+			}
+			fprintf(stderr, "Finished opening file\n");
+			string fileName = CMeta::Basename(sArgs.dabinput_arg);
+			string fileStem = CMeta::Deextension(fileName);
+			sprintf(outFile, "%s/%s.2.dab", sArgs.dir_out_arg, fileStem.c_str());
+			//expTransform, divideNorm, subtractNorm
+			CSeekWriter::NormalizeDAB(Dat, vecstrGenes, false, false, true);
+			float cutoff = sArgs.cutoff_value_arg;
+			if(sArgs.default_type_arg==0){
+				//unsigned int
+				vector<map<utype,unsigned short> > umat;
+				CSeekWriter::ConvertToSparseMatrix<utype>(Dat, umat, vecstrGenes, 
+				cutoff);
+				CSeekWriter::WriteSparseMatrix<utype>(Dat, umat, vecstrGenes, 
+				outFile);
+			}else if(sArgs.default_type_arg==1){
+				//unsigned short
+				vector<map<unsigned short,unsigned short> > umat;
+				CSeekWriter::ConvertToSparseMatrix<unsigned short>(Dat, umat, vecstrGenes, 
+				cutoff);
+				CSeekWriter::WriteSparseMatrix<unsigned short>(Dat, umat, vecstrGenes, 
+				outFile);
+			}
+			else{
+				fprintf(stderr, "Error, unsupported type --default_type_arg\n");
+				return 1;
+			}
+				
+		}
+
 		if(sArgs.gavg_flag==1){
 			bool logit = false;
 			if(sArgs.logit_flag==1) logit = true;
 			string fileStem = CMeta::Deextension(fileName);
 			sprintf(outFile, "%s/%s.gavg", sArgs.dir_out_arg,
 				fileStem.c_str());
-			CSeekWriter::GetGeneAverage(Dat, vecstrGenes, vecGeneAvg, logit, sArgs.top_avg_percent_arg);
+			CSeekWriter::GetGeneAverage(Dat, vecstrGenes, vecGeneAvg, logit, 
+				sArgs.top_avg_percent_arg);
 
 			//DEBUGGING
 			for(i=0; i<vecGeneAvg.size(); i++){
 	}
 
 	if(sArgs.dabset_flag==1){
+		NormMode n;
+		string norm_mode = sArgs.norm_mode_arg;
+		if(norm_mode=="NA"){
+			fprintf(stderr, "Error, please supply --norm_mode\n");
+			return 1;
+		}
+		if(norm_mode=="subtract_z"){
+			n = Z_NORM;
+			if(sArgs.exp_arg==-1){
+				fprintf(stderr, "Error, please supply --exp\n");
+				return 1;
+			}
+		}else if(norm_mode=="rank"){
+			n = RANK_NORM;
+			if(sArgs.rbp_p_arg==-1 || sArgs.max_rank_arg==-1){
+				fprintf(stderr, "Error, Need both --rbp_p and --max_rank\n");
+				return 1;
+			}
+		}
+
 		vector<string> dab_list;
 		int numGenes = vecstrGenes.size();
 		string dab_dir = sArgs.dab_dir_arg;
 
 		float rbp_p = sArgs.rbp_p_arg;
 		int max_rank = sArgs.max_rank_arg;
-		fprintf(stderr, "Using rbp_p: %.3f, max_rank: %d\n", rbp_p, max_rank);
+		float exp = sArgs.exp_arg;
+		fprintf(stderr, "Using rbp_p: %.3f, max_rank: %d, exp: %.3f\n", rbp_p, max_rank, exp);
 
 		if(sArgs.default_type_arg==0){
-			CalculateMatrix<utype>(GENE_PRESENCE_VECTOR, dab_list, dab_dir, outdir, 
-				outdab, vecstrGenes, dweight, rbp_p, max_rank);	
-			CalculateMatrix<utype>(COUNT_MATRIX, dab_list, dab_dir, outdir, outdab,
-				vecstrGenes, dweight, rbp_p, max_rank);	
-			//CalculateMatrix<utype>(WEIGHTSUM_MATRIX, dab_list, dab_dir, outdir, outdab,
+			CalculateMatrix<utype>(n, GENE_PRESENCE_VECTOR, dab_list, dab_dir, outdir, 
+				outdab, vecstrGenes, dweight, rbp_p, max_rank, exp);	
+			CalculateMatrix<utype>(n, COUNT_MATRIX, dab_list, dab_dir, outdir, outdab,
+				vecstrGenes, dweight, rbp_p, max_rank, exp);	
+			//CalculateMatrix<utype>(n, WEIGHTSUM_MATRIX, dab_list, dab_dir, outdir, outdab,
+				//vecstrGenes, dweight, rbp_p, max_rank, exp);	
+			CalculateMatrix<utype>(n, DISTANCE_MATRIX, dab_list, dab_dir, outdir, outdab,
+				vecstrGenes, dweight, rbp_p, max_rank, exp);	
+		}else if(sArgs.default_type_arg==1){
+			CalculateMatrix<unsigned short>(n, GENE_PRESENCE_VECTOR, dab_list, dab_dir, outdir, 
+				outdab, vecstrGenes, dweight, rbp_p, max_rank, exp);	
+			CalculateMatrix<unsigned short>(n, COUNT_MATRIX, dab_list, dab_dir, outdir, outdab,
+				vecstrGenes, dweight, rbp_p, max_rank, exp);	
+			//CalculateMatrix<unsigned short>(n, WEIGHTSUM_MATRIX, dab_list, dab_dir, outdir, outdab,
 				//vecstrGenes, dweight, rbp_p, max_rank);	
-			CalculateMatrix<utype>(DISTANCE_MATRIX, dab_list, dab_dir, outdir, outdab,
-				vecstrGenes, dweight, rbp_p, max_rank);	
-		}else if(sArgs.default_type_arg==1){
-			CalculateMatrix<unsigned short>(GENE_PRESENCE_VECTOR, dab_list, dab_dir, outdir, 
-				outdab, vecstrGenes, dweight, rbp_p, max_rank);	
-			CalculateMatrix<unsigned short>(COUNT_MATRIX, dab_list, dab_dir, outdir, outdab,
-				vecstrGenes, dweight, rbp_p, max_rank);	
-			//CalculateMatrix<unsigned short>(WEIGHTSUM_MATRIX, dab_list, dab_dir, outdir, outdab,
-				//vecstrGenes, dweight, rbp_p, max_rank);	
-			CalculateMatrix<unsigned short>(DISTANCE_MATRIX, dab_list, dab_dir, outdir, outdab,
-				vecstrGenes, dweight, rbp_p, max_rank);	
+			CalculateMatrix<unsigned short>(n, DISTANCE_MATRIX, dab_list, dab_dir, outdir, outdab,
+				vecstrGenes, dweight, rbp_p, max_rank, exp);	
 		}else{
 			fprintf(stderr, "Invalid default type!\n");
 			return -1;

tools/SeekPrep/SeekPrep.ggo

 								flag	off
 option	"db"				f	"DB mode, suitable for platform wide gene average and stdev calculation"
 								flag	off
-option	"dabset"			g	"DAB set mode, sums a set of sparse rank-normalized DAB files, with weights or no weights"
+option	"dabset"			g	"DAB set mode, sums a set of sparse rank-normalized (or subtract-z-normalized) DAB files, with weights or no weights"
 								flag 	off
 option	"combined_dab"		h	"Combined DAB mode, divides a summed DAB file by total pair counts or dataset weights, generates a new normalized DAB file"
 								flag 	off
 
+section "Combined DAB mode"
+option	"dab_dir2"			H	"Directory containing the summed DAB file"
+								string typestr="directory" default="NA"
+option	"dab_basename"		J	"Summed DAB basename (ie without extension)"
+								string typestr="filename" default="NA"
+
 section "DAB set mode (also see Misc options)"
 option	"dab_dir"			G	"Directory containing the DAB files"
 								string typestr="directory" default="NA"
 								string typestr="filename" default="NA"
 option	"out_dab"			O	"Output DAB file basename (ie without extension)"
 								string typestr="filename" default="NA"
-option	"dataset_w"			W	"Dataset weights"
-								string typestr="filename" default="NA"
-
-section "Combined DAB mode"
-option	"dab_dir2"			H	"Directory containing the summed DAB file"
-								string typestr="directory" default="NA"
-option	"dab_basename"		J	"Summed DAB basename (ie without extension)"
+option	"dataset_w"			W	"Dataset weights (optional)"
 								string typestr="filename" default="NA"
 
 section "DAB mode"
 								string typestr="filename"
 option	"top_avg_percent"	C	"For gene average, top X percent of the values to take average (0 - 1.0)"
 								float	default="1.0"
-option	"norm"				n	"Rank-normalize matrix, also weight rank by RBP (see -M, -R additional options)"
+option	"norm"				F	"Normalize matrix then sparsify it (needs --norm_mode)"
+								flag	off
+option	"view"				X	"View distribution of values in the matrix"
 								flag	off
 
 section "PCL mode"
 								string typestr="filename"
 
 section "Misc"
+option	"default_type"		T	"Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB set mode and if --norm is enabled in DAB mode) (0 - unsigned int, 1 - unsigned short)"
+								int default="-1"
+option	"norm_mode"			n	"Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled)"
+								values="rank","subtract_z","NA" default="NA"
 option	"logit"				l	"For --gavg and --gplat, whether to take logit of the value first (useful if edge value is probability)"
 								flag	off
-option	"max_rank"			M	"Maximum rank value (for DAB --norm, and DAB set mode)"
+option	"max_rank"			M	"Maximum rank value (for --norm_mode=rank)"
 								int default="-1"
-option	"rbp_p"				R	"RBP p parameter (for DAB --norm, and DAB set mode)"
+option	"rbp_p"				R	"RBP p parameter (for --norm_mode=rank)"
 								float default="-1"
-option	"default_type"		T	"Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB --norm, and DAB set mode) (0 - unsigned int, 1 - unsigned short)"
-								int default="-1"
+option	"cutoff_value"		U	"The cutoff value (for --norm_mode=subtract_z)"
+								float	default="-1.0"
+option	"exp"				E	"Raise the z-score to the power of this value (for --norm_mode=subtract_z)"
+								float	default="-1.0"
 
 section "Input"
 option	"input"				i	"Gene mapping file"

tools/SeekPrep/cmdline.c

   "  -d, --dab                    DAB mode, suitable for dataset wide gene average \n                                 and stdev calculation  (default=off)",
   "  -e, --pclbin                 PCL BIN mode, suitable for dataset gene variance \n                                 calculation  (default=off)",
   "  -f, --db                     DB mode, suitable for platform wide gene average \n                                 and stdev calculation  (default=off)",
-  "  -g, --dabset                 DAB set mode, sums a set of sparse \n                                 rank-normalized DAB files, with weights or no \n                                 weights  (default=off)",
+  "  -g, --dabset                 DAB set mode, sums a set of sparse \n                                 rank-normalized (or subtract-z-normalized) DAB \n                                 files, with weights or no weights  \n                                 (default=off)",
   "  -h, --combined_dab           Combined DAB mode, divides a summed DAB file by \n                                 total pair counts or dataset weights, \n                                 generates a new normalized DAB file  \n                                 (default=off)",
+  "\nCombined DAB mode:",
+  "  -H, --dab_dir2=directory     Directory containing the summed DAB file  \n                                 (default=`NA')",
+  "  -J, --dab_basename=filename  Summed DAB basename (ie without extension)  \n                                 (default=`NA')",
   "\nDAB set mode (also see Misc options):",
   "  -G, --dab_dir=directory      Directory containing the DAB files  \n                                 (default=`NA')",
   "  -L, --dablist=filename       List of DAB files  (default=`NA')",
   "  -O, --out_dab=filename       Output DAB file basename (ie without extension)  \n                                 (default=`NA')",
-  "  -W, --dataset_w=filename     Dataset weights  (default=`NA')",
-  "\nCombined DAB mode:",
-  "  -H, --dab_dir2=directory     Directory containing the summed DAB file  \n                                 (default=`NA')",
-  "  -J, --dab_basename=filename  Summed DAB basename (ie without extension)  \n                                 (default=`NA')",
+  "  -W, --dataset_w=filename     Dataset weights (optional)  (default=`NA')",
   "\nDAB mode:",
   "  -a, --gavg                   Generates gene average file  (default=off)",
   "  -p, --gpres                  Generates gene presence file  (default=off)",
   "  -B, --dabinput=filename      DAB dataset file",
   "  -C, --top_avg_percent=FLOAT  For gene average, top X percent of the values to \n                                 take average (0 - 1.0)  (default=`1.0')",
-  "  -n, --norm                   Rank-normalize matrix, also weight rank by RBP \n                                 (see -M, -R additional options)  (default=off)",
+  "  -F, --norm                   Normalize matrix then sparsify it (needs \n                                 --norm_mode)  (default=off)",
+  "  -X, --view                   View distribution of values in the matrix  \n                                 (default=off)",
   "\nPCL mode:",
   "  -V, --pclinput=filename      PCL BIN file",
   "  -v, --gexpvarmean            Generates gene expression variance and mean \n                                 files (.gexpvar, .gexpmean)  (default=off)",
   "  -N, --useNibble              If the DB is nibble type  (default=off)",
   "  -Q, --quant=filename         Quant file",
   "\nMisc:",
+  "  -T, --default_type=INT       Default gene index type (choose unsigned short \n                                 for genes, or unsigned int (32-bit) for \n                                 transcripts) (required for DAB set mode and if \n                                 --norm is enabled in DAB mode) (0 - unsigned \n                                 int, 1 - unsigned short)  (default=`-1')",
+  "  -n, --norm_mode=STRING       Normalization method: rank - rank-normalize \n                                 matrix, subtract_z - subtract-z-normalize \n                                 matrix (required for DAB set mode and if \n                                 --norm is enabled)  (possible values=\"rank\", \n                                 \"subtract_z\", \"NA\" default=`NA')",
   "  -l, --logit                  For --gavg and --gplat, whether to take logit of \n                                 the value first (useful if edge value is \n                                 probability)  (default=off)",
-  "  -M, --max_rank=INT           Maximum rank value (for DAB --norm, and DAB set \n                                 mode)  (default=`-1')",
-  "  -R, --rbp_p=FLOAT            RBP p parameter (for DAB --norm, and DAB set \n                                 mode)  (default=`-1')",
-  "  -T, --default_type=INT       Default gene index type (choose unsigned short \n                                 for genes, or unsigned int (32-bit) for \n                                 transcripts) (required for DAB --norm, and DAB \n                                 set mode) (0 - unsigned int, 1 - unsigned \n                                 short)  (default=`-1')",
+  "  -M, --max_rank=INT           Maximum rank value (for --norm_mode=rank)  \n                                 (default=`-1')",
+  "  -R, --rbp_p=FLOAT            RBP p parameter (for --norm_mode=rank)  \n                                 (default=`-1')",
+  "  -U, --cutoff_value=FLOAT     The cutoff value (for --norm_mode=subtract_z)  \n                                 (default=`-1.0')",
+  "  -E, --exp=FLOAT              Raise the z-score to the power of this value \n                                 (for --norm_mode=subtract_z)  (default=`-1.0')",
   "\nInput:",
   "  -i, --input=filename         Gene mapping file",
   "\nOutput:",
 static int
 cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error);
 
+char *cmdline_parser_norm_mode_values[] = {"rank", "subtract_z", "NA", 0} ;	/* Possible values for norm_mode.  */
+
 static char *
 gengetopt_strdup (const char *s);
 
   args_info->db_given = 0 ;
   args_info->dabset_given = 0 ;
   args_info->combined_dab_given = 0 ;
+  args_info->dab_dir2_given = 0 ;
+  args_info->dab_basename_given = 0 ;
   args_info->dab_dir_given = 0 ;
   args_info->dablist_given = 0 ;
   args_info->out_dab_given = 0 ;
   args_info->dataset_w_given = 0 ;
-  args_info->dab_dir2_given = 0 ;
-  args_info->dab_basename_given = 0 ;
   args_info->gavg_given = 0 ;
   args_info->gpres_given = 0 ;
   args_info->dabinput_given = 0 ;
   args_info->top_avg_percent_given = 0 ;
   args_info->norm_given = 0 ;
+  args_info->view_given = 0 ;
   args_info->pclinput_given = 0 ;
   args_info->gexpvarmean_given = 0 ;
   args_info->sinfo_given = 0 ;
   args_info->dset_given = 0 ;
   args_info->useNibble_given = 0 ;
   args_info->quant_given = 0 ;
+  args_info->default_type_given = 0 ;
+  args_info->norm_mode_given = 0 ;
   args_info->logit_given = 0 ;
   args_info->max_rank_given = 0 ;
   args_info->rbp_p_given = 0 ;
-  args_info->default_type_given = 0 ;
+  args_info->cutoff_value_given = 0 ;
+  args_info->exp_given = 0 ;
   args_info->input_given = 0 ;
   args_info->dir_out_given = 0 ;
 }
   args_info->db_flag = 0;
   args_info->dabset_flag = 0;
   args_info->combined_dab_flag = 0;
+  args_info->dab_dir2_arg = gengetopt_strdup ("NA");
+  args_info->dab_dir2_orig = NULL;
+  args_info->dab_basename_arg = gengetopt_strdup ("NA");
+  args_info->dab_basename_orig = NULL;
   args_info->dab_dir_arg = gengetopt_strdup ("NA");
   args_info->dab_dir_orig = NULL;
   args_info->dablist_arg = gengetopt_strdup ("NA");
   args_info->out_dab_orig = NULL;
   args_info->dataset_w_arg = gengetopt_strdup ("NA");
   args_info->dataset_w_orig = NULL;
-  args_info->dab_dir2_arg = gengetopt_strdup ("NA");
-  args_info->dab_dir2_orig = NULL;
-  args_info->dab_basename_arg = gengetopt_strdup ("NA");
-  args_info->dab_basename_orig = NULL;
   args_info->gavg_flag = 0;
   args_info->gpres_flag = 0;
   args_info->dabinput_arg = NULL;
   args_info->top_avg_percent_arg = 1.0;
   args_info->top_avg_percent_orig = NULL;
   args_info->norm_flag = 0;
+  args_info->view_flag = 0;
   args_info->pclinput_arg = NULL;
   args_info->pclinput_orig = NULL;
   args_info->gexpvarmean_flag = 0;
   args_info->useNibble_flag = 0;
   args_info->quant_arg = NULL;
   args_info->quant_orig = NULL;
+  args_info->default_type_arg = -1;
+  args_info->default_type_orig = NULL;
+  args_info->norm_mode_arg = gengetopt_strdup ("NA");
+  args_info->norm_mode_orig = NULL;
   args_info->logit_flag = 0;
   args_info->max_rank_arg = -1;
   args_info->max_rank_orig = NULL;
   args_info->rbp_p_arg = -1;
   args_info->rbp_p_orig = NULL;
-  args_info->default_type_arg = -1;
-  args_info->default_type_orig = NULL;
+  args_info->cutoff_value_arg = -1.0;
+  args_info->cutoff_value_orig = NULL;
+  args_info->exp_arg = -1.0;
+  args_info->exp_orig = NULL;
   args_info->input_arg = NULL;
   args_info->input_orig = NULL;
   args_info->dir_out_arg = NULL;
   args_info->db_help = gengetopt_args_info_help[5] ;
   args_info->dabset_help = gengetopt_args_info_help[6] ;
   args_info->combined_dab_help = gengetopt_args_info_help[7] ;
-  args_info->dab_dir_help = gengetopt_args_info_help[9] ;
-  args_info->dablist_help = gengetopt_args_info_help[10] ;
-  args_info->out_dab_help = gengetopt_args_info_help[11] ;
-  args_info->dataset_w_help = gengetopt_args_info_help[12] ;
-  args_info->dab_dir2_help = gengetopt_args_info_help[14] ;
-  args_info->dab_basename_help = gengetopt_args_info_help[15] ;
+  args_info->dab_dir2_help = gengetopt_args_info_help[9] ;
+  args_info->dab_basename_help = gengetopt_args_info_help[10] ;
+  args_info->dab_dir_help = gengetopt_args_info_help[12] ;
+  args_info->dablist_help = gengetopt_args_info_help[13] ;
+  args_info->out_dab_help = gengetopt_args_info_help[14] ;
+  args_info->dataset_w_help = gengetopt_args_info_help[15] ;
   args_info->gavg_help = gengetopt_args_info_help[17] ;
   args_info->gpres_help = gengetopt_args_info_help[18] ;
   args_info->dabinput_help = gengetopt_args_info_help[19] ;
   args_info->top_avg_percent_help = gengetopt_args_info_help[20] ;
   args_info->norm_help = gengetopt_args_info_help[21] ;
-  args_info->pclinput_help = gengetopt_args_info_help[23] ;
-  args_info->gexpvarmean_help = gengetopt_args_info_help[24] ;
-  args_info->sinfo_help = gengetopt_args_info_help[25] ;
-  args_info->gplat_help = gengetopt_args_info_help[27] ;
-  args_info->dblist_help = gengetopt_args_info_help[28] ;
-  args_info->dir_prep_in_help = gengetopt_args_info_help[29] ;
-  args_info->dset_help = gengetopt_args_info_help[30] ;
-  args_info->useNibble_help = gengetopt_args_info_help[31] ;
-  args_info->quant_help = gengetopt_args_info_help[32] ;
-  args_info->logit_help = gengetopt_args_info_help[34] ;
-  args_info->max_rank_help = gengetopt_args_info_help[35] ;
-  args_info->rbp_p_help = gengetopt_args_info_help[36] ;
-  args_info->default_type_help = gengetopt_args_info_help[37] ;
-  args_info->input_help = gengetopt_args_info_help[39] ;
-  args_info->dir_out_help = gengetopt_args_info_help[41] ;
+  args_info->view_help = gengetopt_args_info_help[22] ;
+  args_info->pclinput_help = gengetopt_args_info_help[24] ;
+  args_info->gexpvarmean_help = gengetopt_args_info_help[25] ;
+  args_info->sinfo_help = gengetopt_args_info_help[26] ;
+  args_info->gplat_help = gengetopt_args_info_help[28] ;
+  args_info->dblist_help = gengetopt_args_info_help[29] ;
+  args_info->dir_prep_in_help = gengetopt_args_info_help[30] ;
+  args_info->dset_help = gengetopt_args_info_help[31] ;
+  args_info->useNibble_help = gengetopt_args_info_help[32] ;
+  args_info->quant_help = gengetopt_args_info_help[33] ;
+  args_info->default_type_help = gengetopt_args_info_help[35] ;
+  args_info->norm_mode_help = gengetopt_args_info_help[36] ;
+  args_info->logit_help = gengetopt_args_info_help[37] ;
+  args_info->max_rank_help = gengetopt_args_info_help[38] ;
+  args_info->rbp_p_help = gengetopt_args_info_help[39] ;
+  args_info->cutoff_value_help = gengetopt_args_info_help[40] ;
+  args_info->exp_help = gengetopt_args_info_help[41] ;
+  args_info->input_help = gengetopt_args_info_help[43] ;
+  args_info->dir_out_help = gengetopt_args_info_help[45] ;
   
 }
 
 cmdline_parser_release (struct gengetopt_args_info *args_info)
 {
   unsigned int i;
+  free_string_field (&(args_info->dab_dir2_arg));
+  free_string_field (&(args_info->dab_dir2_orig));
+  free_string_field (&(args_info->dab_basename_arg));
+  free_string_field (&(args_info->dab_basename_orig));
   free_string_field (&(args_info->dab_dir_arg));
   free_string_field (&(args_info->dab_dir_orig));
   free_string_field (&(args_info->dablist_arg));
   free_string_field (&(args_info->out_dab_orig));
   free_string_field (&(args_info->dataset_w_arg));
   free_string_field (&(args_info->dataset_w_orig));
-  free_string_field (&(args_info->dab_dir2_arg));
-  free_string_field (&(args_info->dab_dir2_orig));
-  free_string_field (&(args_info->dab_basename_arg));
-  free_string_field (&(args_info->dab_basename_orig));
   free_string_field (&(args_info->dabinput_arg));
   free_string_field (&(args_info->dabinput_orig));
   free_string_field (&(args_info->top_avg_percent_orig));
   free_string_field (&(args_info->dset_orig));
   free_string_field (&(args_info->quant_arg));
   free_string_field (&(args_info->quant_orig));
+  free_string_field (&(args_info->default_type_orig));
+  free_string_field (&(args_info->norm_mode_arg));
+  free_string_field (&(args_info->norm_mode_orig));
   free_string_field (&(args_info->max_rank_orig));
   free_string_field (&(args_info->rbp_p_orig));
-  free_string_field (&(args_info->default_type_orig));
+  free_string_field (&(args_info->cutoff_value_orig));
+  free_string_field (&(args_info->exp_orig));
   free_string_field (&(args_info->input_arg));
   free_string_field (&(args_info->input_orig));
   free_string_field (&(args_info->dir_out_arg));
   clear_given (args_info);
 }
 
+/**
+ * @param val the value to check
+ * @param values the possible values
+ * @return the index of the matched value:
+ * -1 if no value matched,
+ * -2 if more than one value has matched
+ */
+static int
+check_possible_values(const char *val, char *values[])
+{
+  int i, found, last;
+  size_t len;
+
+  if (!val)   /* otherwise strlen() crashes below */
+    return -1; /* -1 means no argument for the option */
+
+  found = last = 0;
+
+  for (i = 0, len = strlen(val); values[i]; ++i)
+    {
+      if (strncmp(val, values[i], len) == 0)
+        {
+          ++found;
+          last = i;
+          if (strlen(values[i]) == len)
+            return i; /* exact macth no need to check more */
+        }
+    }
+
+  if (found == 1) /* one match: OK */
+    return last;
+
+  return (found ? -2 : -1); /* return many values or none matched */
+}
+
 
 static void
 write_into_file(FILE *outfile, const char *opt, const char *arg, char *values[])
 {
+  int found = -1;
   if (arg) {
-    fprintf(outfile, "%s=\"%s\"\n", opt, arg);
+    if (values) {
+      found = check_possible_values(arg, values);      
+    }
+    if (found >= 0)
+      fprintf(outfile, "%s=\"%s\" # %s\n", opt, arg, values[found]);
+    else
+      fprintf(outfile, "%s=\"%s\"\n", opt, arg);
   } else {
     fprintf(outfile, "%s\n", opt);
   }
     write_into_file(outfile, "dabset", 0, 0 );
   if (args_info->combined_dab_given)
     write_into_file(outfile, "combined_dab", 0, 0 );
+  if (args_info->dab_dir2_given)
+    write_into_file(outfile, "dab_dir2", args_info->dab_dir2_orig, 0);
+  if (args_info->dab_basename_given)
+    write_into_file(outfile, "dab_basename", args_info->dab_basename_orig, 0);
   if (args_info->dab_dir_given)
     write_into_file(outfile, "dab_dir", args_info->dab_dir_orig, 0);
   if (args_info->dablist_given)
     write_into_file(outfile, "out_dab", args_info->out_dab_orig, 0);
   if (args_info->dataset_w_given)
     write_into_file(outfile, "dataset_w", args_info->dataset_w_orig, 0);
-  if (args_info->dab_dir2_given)
-    write_into_file(outfile, "dab_dir2", args_info->dab_dir2_orig, 0);
-  if (args_info->dab_basename_given)
-    write_into_file(outfile, "dab_basename", args_info->dab_basename_orig, 0);
   if (args_info->gavg_given)
     write_into_file(outfile, "gavg", 0, 0 );
   if (args_info->gpres_given)
     write_into_file(outfile, "top_avg_percent", args_info->top_avg_percent_orig, 0);
   if (args_info->norm_given)
     write_into_file(outfile, "norm", 0, 0 );
+  if (args_info->view_given)
+    write_into_file(outfile, "view", 0, 0 );
   if (args_info->pclinput_given)
     write_into_file(outfile, "pclinput", args_info->pclinput_orig, 0);
   if (args_info->gexpvarmean_given)
     write_into_file(outfile, "useNibble", 0, 0 );
   if (args_info->quant_given)
     write_into_file(outfile, "quant", args_info->quant_orig, 0);
+  if (args_info->default_type_given)
+    write_into_file(outfile, "default_type", args_info->default_type_orig, 0);
+  if (args_info->norm_mode_given)
+    write_into_file(outfile, "norm_mode", args_info->norm_mode_orig, cmdline_parser_norm_mode_values);
   if (args_info->logit_given)
     write_into_file(outfile, "logit", 0, 0 );
   if (args_info->max_rank_given)
     write_into_file(outfile, "max_rank", args_info->max_rank_orig, 0);
   if (args_info->rbp_p_given)
     write_into_file(outfile, "rbp_p", args_info->rbp_p_orig, 0);
-  if (args_info->default_type_given)
-    write_into_file(outfile, "default_type", args_info->default_type_orig, 0);
+  if (args_info->cutoff_value_given)
+    write_into_file(outfile, "cutoff_value", args_info->cutoff_value_orig, 0);
+  if (args_info->exp_given)
+    write_into_file(outfile, "exp", args_info->exp_orig, 0);
   if (args_info->input_given)
     write_into_file(outfile, "input", args_info->input_orig, 0);
   if (args_info->dir_out_given)
       return 1; /* failure */
     }
 
+  if (possible_values && (found = check_possible_values((value ? value : default_value), possible_values)) < 0)
+    {
+      if (short_opt != '-')
+        fprintf (stderr, "%s: %s argument, \"%s\", for option `--%s' (`-%c')%s\n", 
+          package_name, (found == -2) ? "ambiguous" : "invalid", value, long_opt, short_opt,
+          (additional_error ? additional_error : ""));
+      else
+        fprintf (stderr, "%s: %s argument, \"%s\", for option `--%s'%s\n", 
+          package_name, (found == -2) ? "ambiguous" : "invalid", value, long_opt,
+          (additional_error ? additional_error : ""));
+      return 1; /* failure */
+    }
     
   if (field_given && *field_given && ! override)
     return 0;
         { "db",	0, NULL, 'f' },
         { "dabset",	0, NULL, 'g' },
         { "combined_dab",	0, NULL, 'h' },
+        { "dab_dir2",	1, NULL, 'H' },
+        { "dab_basename",	1, NULL, 'J' },
         { "dab_dir",	1, NULL, 'G' },
         { "dablist",	1, NULL, 'L' },
         { "out_dab",	1, NULL, 'O' },
         { "dataset_w",	1, NULL, 'W' },
-        { "dab_dir2",	1, NULL, 'H' },
-        { "dab_basename",	1, NULL, 'J' },
         { "gavg",	0, NULL, 'a' },
         { "gpres",	0, NULL, 'p' },
         { "dabinput",	1, NULL, 'B' },
         { "top_avg_percent",	1, NULL, 'C' },
-        { "norm",	0, NULL, 'n' },
+        { "norm",	0, NULL, 'F' },
+        { "view",	0, NULL, 'X' },
         { "pclinput",	1, NULL, 'V' },
         { "gexpvarmean",	0, NULL, 'v' },
         { "sinfo",	0, NULL, 's' },
         { "dset",	1, NULL, 'A' },
         { "useNibble",	0, NULL, 'N' },
         { "quant",	1, NULL, 'Q' },
+        { "default_type",	1, NULL, 'T' },
+        { "norm_mode",	1, NULL, 'n' },
         { "logit",	0, NULL, 'l' },
         { "max_rank",	1, NULL, 'M' },
         { "rbp_p",	1, NULL, 'R' },
-        { "default_type",	1, NULL, 'T' },
+        { "cutoff_value",	1, NULL, 'U' },
+        { "exp",	1, NULL, 'E' },
         { "input",	1, NULL, 'i' },
         { "dir_out",	1, NULL, 'D' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "defghG:L:O:W:H:J:apB:C:nV:vsPb:I:A:NQ:lM:R:T:i:D:", long_options, &option_index);
+      c = getopt_long (argc, argv, "defghH:J:G:L:O:W:apB:C:FXV:vsPb:I:A:NQ:T:n:lM:R:U:E:i:D:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
-        case 'g':	/* DAB set mode, sums a set of sparse rank-normalized DAB files, with weights or no weights.  */
+        case 'g':	/* DAB set mode, sums a set of sparse rank-normalized (or subtract-z-normalized) DAB files, with weights or no weights.  */
         
         
           if (update_arg((void *)&(args_info->dabset_flag), 0, &(args_info->dabset_given),
             goto failure;
         
           break;
+        case 'H':	/* Directory containing the summed DAB file.  */
+        
+        
+          if (update_arg( (void *)&(args_info->dab_dir2_arg), 
+               &(args_info->dab_dir2_orig), &(args_info->dab_dir2_given),
+              &(local_args_info.dab_dir2_given), optarg, 0, "NA", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "dab_dir2", 'H',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'J':	/* Summed DAB basename (ie without extension).  */
+        
+        
+          if (update_arg( (void *)&(args_info->dab_basename_arg), 
+               &(args_info->dab_basename_orig), &(args_info->dab_basename_given),
+              &(local_args_info.dab_basename_given), optarg, 0, "NA", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "dab_basename", 'J',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'G':	/* Directory containing the DAB files.  */
         
         
             goto failure;
         
           break;
-        case 'W':	/* Dataset weights.  */
+        case 'W':	/* Dataset weights (optional).  */
         
         
           if (update_arg( (void *)&(args_info->dataset_w_arg), 
             goto failure;
         
           break;
-        case 'H':	/* Directory containing the summed DAB file.  */
-        
-        
-          if (update_arg( (void *)&(args_info->dab_dir2_arg), 
-               &(args_info->dab_dir2_orig), &(args_info->dab_dir2_given),
-              &(local_args_info.dab_dir2_given), optarg, 0, "NA", ARG_STRING,
-              check_ambiguity, override, 0, 0,
-              "dab_dir2", 'H',
-              additional_error))
-            goto failure;
-        
-          break;
-        case 'J':	/* Summed DAB basename (ie without extension).  */
-        
-        
-          if (update_arg( (void *)&(args_info->dab_basename_arg), 
-               &(args_info->dab_basename_orig), &(args_info->dab_basename_given),
-              &(local_args_info.dab_basename_given), optarg, 0, "NA", ARG_STRING,
-              check_ambiguity, override, 0, 0,
-              "dab_basename", 'J',
-              additional_error))
-            goto failure;
-        
-          break;
         case 'a':	/* Generates gene average file.  */
         
         
             goto failure;
         
           break;
-        case 'n':	/* Rank-normalize matrix, also weight rank by RBP (see -M, -R additional options).  */
+        case 'F':	/* Normalize matrix then sparsify it (needs --norm_mode).  */
         
         
           if (update_arg((void *)&(args_info->norm_flag), 0, &(args_info->norm_given),
               &(local_args_info.norm_given), optarg, 0, 0, ARG_FLAG,
-              check_ambiguity, override, 1, 0, "norm", 'n',
+              check_ambiguity, override, 1, 0, "norm", 'F',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'X':	/* View distribution of values in the matrix.  */
+        
+        
+          if (update_arg((void *)&(args_info->view_flag), 0, &(args_info->view_given),
+              &(local_args_info.view_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "view", 'X',
               additional_error))
             goto failure;
         
             goto failure;
         
           break;
+        case 'T':	/* Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB set mode and if --norm is enabled in DAB mode) (0 - unsigned int, 1 - unsigned short).  */
+        
+        
+          if (update_arg( (void *)&(args_info->default_type_arg), 
+               &(args_info->default_type_orig), &(args_info->default_type_given),
+              &(local_args_info.default_type_given), optarg, 0, "-1", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "default_type", 'T',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'n':	/* Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled).  */
+        
+        
+          if (update_arg( (void *)&(args_info->norm_mode_arg), 
+               &(args_info->norm_mode_orig), &(args_info->norm_mode_given),
+              &(local_args_info.norm_mode_given), optarg, cmdline_parser_norm_mode_values, "NA", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "norm_mode", 'n',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'l':	/* For --gavg and --gplat, whether to take logit of the value first (useful if edge value is probability).  */
         
         
             goto failure;
         
           break;
-        case 'M':	/* Maximum rank value (for DAB --norm, and DAB set mode).  */
+        case 'M':	/* Maximum rank value (for --norm_mode=rank).  */
         
         
           if (update_arg( (void *)&(args_info->max_rank_arg), 
             goto failure;
         
           break;
-        case 'R':	/* RBP p parameter (for DAB --norm, and DAB set mode).  */
+        case 'R':	/* RBP p parameter (for --norm_mode=rank).  */
         
         
           if (update_arg( (void *)&(args_info->rbp_p_arg), 
             goto failure;
         
           break;
-        case 'T':	/* Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB --norm, and DAB set mode) (0 - unsigned int, 1 - unsigned short).  */
+        case 'U':	/* The cutoff value (for --norm_mode=subtract_z).  */
         
         
-          if (update_arg( (void *)&(args_info->default_type_arg), 
-               &(args_info->default_type_orig), &(args_info->default_type_given),
-              &(local_args_info.default_type_given), optarg, 0, "-1", ARG_INT,
+          if (update_arg( (void *)&(args_info->cutoff_value_arg), 
+               &(args_info->cutoff_value_orig), &(args_info->cutoff_value_given),
+              &(local_args_info.cutoff_value_given), optarg, 0, "-1.0", ARG_FLOAT,
               check_ambiguity, override, 0, 0,
-              "default_type", 'T',
+              "cutoff_value", 'U',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'E':	/* Raise the z-score to the power of this value (for --norm_mode=subtract_z).  */
+        
+        
+          if (update_arg( (void *)&(args_info->exp_arg), 
+               &(args_info->exp_orig), &(args_info->exp_given),
+              &(local_args_info.exp_given), optarg, 0, "-1.0", ARG_FLOAT,
+              check_ambiguity, override, 0, 0,
+              "exp", 'E',
               additional_error))
             goto failure;
         

tools/SeekPrep/cmdline.h

   const char *pclbin_help; /**< @brief PCL BIN mode, suitable for dataset gene variance calculation help description.  */
   int db_flag;	/**< @brief DB mode, suitable for platform wide gene average and stdev calculation (default=off).  */
   const char *db_help; /**< @brief DB mode, suitable for platform wide gene average and stdev calculation help description.  */
-  int dabset_flag;	/**< @brief DAB set mode, sums a set of sparse rank-normalized DAB files, with weights or no weights (default=off).  */
-  const char *dabset_help; /**< @brief DAB set mode, sums a set of sparse rank-normalized DAB files, with weights or no weights help description.  */
+  int dabset_flag;	/**< @brief DAB set mode, sums a set of sparse rank-normalized (or subtract-z-normalized) DAB files, with weights or no weights (default=off).  */
+  const char *dabset_help; /**< @brief DAB set mode, sums a set of sparse rank-normalized (or subtract-z-normalized) DAB files, with weights or no weights help description.  */
   int combined_dab_flag;	/**< @brief Combined DAB mode, divides a summed DAB file by total pair counts or dataset weights, generates a new normalized DAB file (default=off).  */
   const char *combined_dab_help; /**< @brief Combined DAB mode, divides a summed DAB file by total pair counts or dataset weights, generates a new normalized DAB file help description.  */
+  char * dab_dir2_arg;	/**< @brief Directory containing the summed DAB file (default='NA').  */
+  char * dab_dir2_orig;	/**< @brief Directory containing the summed DAB file original value given at command line.  */
+  const char *dab_dir2_help; /**< @brief Directory containing the summed DAB file help description.  */
+  char * dab_basename_arg;	/**< @brief Summed DAB basename (ie without extension) (default='NA').  */
+  char * dab_basename_orig;	/**< @brief Summed DAB basename (ie without extension) original value given at command line.  */
+  const char *dab_basename_help; /**< @brief Summed DAB basename (ie without extension) help description.  */
   char * dab_dir_arg;	/**< @brief Directory containing the DAB files (default='NA').  */
   char * dab_dir_orig;	/**< @brief Directory containing the DAB files original value given at command line.  */
   const char *dab_dir_help; /**< @brief Directory containing the DAB files help description.  */
   char * out_dab_arg;	/**< @brief Output DAB file basename (ie without extension) (default='NA').  */
   char * out_dab_orig;	/**< @brief Output DAB file basename (ie without extension) original value given at command line.  */
   const char *out_dab_help; /**< @brief Output DAB file basename (ie without extension) help description.  */
-  char * dataset_w_arg;	/**< @brief Dataset weights (default='NA').  */
-  char * dataset_w_orig;	/**< @brief Dataset weights original value given at command line.  */
-  const char *dataset_w_help; /**< @brief Dataset weights help description.  */
-  char * dab_dir2_arg;	/**< @brief Directory containing the summed DAB file (default='NA').  */
-  char * dab_dir2_orig;	/**< @brief Directory containing the summed DAB file original value given at command line.  */
-  const char *dab_dir2_help; /**< @brief Directory containing the summed DAB file help description.  */
-  char * dab_basename_arg;	/**< @brief Summed DAB basename (ie without extension) (default='NA').  */
-  char * dab_basename_orig;	/**< @brief Summed DAB basename (ie without extension) original value given at command line.  */
-  const char *dab_basename_help; /**< @brief Summed DAB basename (ie without extension) help description.  */
+  char * dataset_w_arg;	/**< @brief Dataset weights (optional) (default='NA').  */
+  char * dataset_w_orig;	/**< @brief Dataset weights (optional) original value given at command line.  */
+  const char *dataset_w_help; /**< @brief Dataset weights (optional) help description.  */
   int gavg_flag;	/**< @brief Generates gene average file (default=off).  */
   const char *gavg_help; /**< @brief Generates gene average file help description.  */
   int gpres_flag;	/**< @brief Generates gene presence file (default=off).  */
   float top_avg_percent_arg;	/**< @brief For gene average, top X percent of the values to take average (0 - 1.0) (default='1.0').  */
   char * top_avg_percent_orig;	/**< @brief For gene average, top X percent of the values to take average (0 - 1.0) original value given at command line.  */
   const char *top_avg_percent_help; /**< @brief For gene average, top X percent of the values to take average (0 - 1.0) help description.  */
-  int norm_flag;	/**< @brief Rank-normalize matrix, also weight rank by RBP (see -M, -R additional options) (default=off).  */
-  const char *norm_help; /**< @brief Rank-normalize matrix, also weight rank by RBP (see -M, -R additional options) help description.  */
+  int norm_flag;	/**< @brief Normalize matrix then sparsify it (needs --norm_mode) (default=off).  */
+  const char *norm_help; /**< @brief Normalize matrix then sparsify it (needs --norm_mode) help description.  */
+  int view_flag;	/**< @brief View distribution of values in the matrix (default=off).  */
+  const char *view_help; /**< @brief View distribution of values in the matrix help description.  */
   char * pclinput_arg;	/**< @brief PCL BIN file.  */
   char * pclinput_orig;	/**< @brief PCL BIN file original value given at command line.  */
   const char *pclinput_help; /**< @brief PCL BIN file help description.  */
   char * quant_arg;	/**< @brief Quant file.  */
   char * quant_orig;	/**< @brief Quant file original value given at command line.  */
   const char *quant_help; /**< @brief Quant file help description.  */
+  int default_type_arg;	/**< @brief Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB set mode and if --norm is enabled in DAB mode) (0 - unsigned int, 1 - unsigned short) (default='-1').  */
+  char * default_type_orig;	/**< @brief Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB set mode and if --norm is enabled in DAB mode) (0 - unsigned int, 1 - unsigned short) original value given at command line.  */
+  const char *default_type_help; /**< @brief Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB set mode and if --norm is enabled in DAB mode) (0 - unsigned int, 1 - unsigned short) help description.  */
+  char * norm_mode_arg;	/**< @brief Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled) (default='NA').  */
+  char * norm_mode_orig;	/**< @brief Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled) original value given at command line.  */
+  const char *norm_mode_help; /**< @brief Normalization method: rank - rank-normalize matrix, subtract_z - subtract-z-normalize matrix (required for DAB set mode and if --norm is enabled) help description.  */
   int logit_flag;	/**< @brief For --gavg and --gplat, whether to take logit of the value first (useful if edge value is probability) (default=off).  */
   const char *logit_help; /**< @brief For --gavg and --gplat, whether to take logit of the value first (useful if edge value is probability) help description.  */
-  int max_rank_arg;	/**< @brief Maximum rank value (for DAB --norm, and DAB set mode) (default='-1').  */
-  char * max_rank_orig;	/**< @brief Maximum rank value (for DAB --norm, and DAB set mode) original value given at command line.  */
-  const char *max_rank_help; /**< @brief Maximum rank value (for DAB --norm, and DAB set mode) help description.  */
-  float rbp_p_arg;	/**< @brief RBP p parameter (for DAB --norm, and DAB set mode) (default='-1').  */
-  char * rbp_p_orig;	/**< @brief RBP p parameter (for DAB --norm, and DAB set mode) original value given at command line.  */
-  const char *rbp_p_help; /**< @brief RBP p parameter (for DAB --norm, and DAB set mode) help description.  */
-  int default_type_arg;	/**< @brief Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB --norm, and DAB set mode) (0 - unsigned int, 1 - unsigned short) (default='-1').  */
-  char * default_type_orig;	/**< @brief Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB --norm, and DAB set mode) (0 - unsigned int, 1 - unsigned short) original value given at command line.  */
-  const char *default_type_help; /**< @brief Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB --norm, and DAB set mode) (0 - unsigned int, 1 - unsigned short) help description.  */
+  int max_rank_arg;	/**< @brief Maximum rank value (for --norm_mode=rank) (default='-1').  */
+  char * max_rank_orig;	/**< @brief Maximum rank value (for --norm_mode=rank) original value given at command line.  */
+  const char *max_rank_help; /**< @brief Maximum rank value (for --norm_mode=rank) help description.  */
+  float rbp_p_arg;	/**< @brief RBP p parameter (for --norm_mode=rank) (default='-1').  */
+  char * rbp_p_orig;	/**< @brief RBP p parameter (for --norm_mode=rank) original value given at command line.  */
+  const char *rbp_p_help; /**< @brief RBP p parameter (for --norm_mode=rank) help description.  */
+  float cutoff_value_arg;	/**< @brief The cutoff value (for --norm_mode=subtract_z) (default='-1.0').  */
+  char * cutoff_value_orig;	/**< @brief The cutoff value (for --norm_mode=subtract_z) original value given at command line.  */
+  const char *cutoff_value_help; /**< @brief The cutoff value (for --norm_mode=subtract_z) help description.  */
+  float exp_arg;	/**< @brief Raise the z-score to the power of this value (for --norm_mode=subtract_z) (default='-1.0').  */
+  char * exp_orig;	/**< @brief Raise the z-score to the power of this value (for --norm_mode=subtract_z) original value given at command line.  */
+  const char *exp_help; /**< @brief Raise the z-score to the power of this value (for --norm_mode=subtract_z) help description.  */
   char * input_arg;	/**< @brief Gene mapping file.  */
   char * input_orig;	/**< @brief Gene mapping file original value given at command line.  */
   const char *input_help; /**< @brief Gene mapping file help description.  */
   unsigned int db_given ;	/**< @brief Whether db was given.  */
   unsigned int dabset_given ;	/**< @brief Whether dabset was given.  */
   unsigned int combined_dab_given ;	/**< @brief Whether combined_dab was given.  */
+  unsigned int dab_dir2_given ;	/**< @brief Whether dab_dir2 was given.  */
+  unsigned int dab_basename_given ;	/**< @brief Whether dab_basename was given.  */
   unsigned int dab_dir_given ;	/**< @brief Whether dab_dir was given.  */
   unsigned int dablist_given ;	/**< @brief Whether dablist was given.  */
   unsigned int out_dab_given ;	/**< @brief Whether out_dab was given.  */
   unsigned int dataset_w_given ;	/**< @brief Whether dataset_w was given.  */
-  unsigned int dab_dir2_given ;	/**< @brief Whether dab_dir2 was given.  */
-  unsigned int dab_basename_given ;	/**< @brief Whether dab_basename was given.  */
   unsigned int gavg_given ;	/**< @brief Whether gavg was given.  */
   unsigned int gpres_given ;	/**< @brief Whether gpres was given.  */
   unsigned int dabinput_given ;	/**< @brief Whether dabinput was given.  */
   unsigned int top_avg_percent_given ;	/**< @brief Whether top_avg_percent was given.  */
   unsigned int norm_given ;	/**< @brief Whether norm was given.  */
+  unsigned int view_given ;	/**< @brief Whether view was given.  */
   unsigned int pclinput_given ;	/**< @brief Whether pclinput was given.  */
   unsigned int gexpvarmean_given ;	/**< @brief Whether gexpvarmean was given.  */
   unsigned int sinfo_given ;	/**< @brief Whether sinfo was given.  */
   unsigned int dset_given ;	/**< @brief Whether dset was given.  */
   unsigned int useNibble_given ;	/**< @brief Whether useNibble was given.  */
   unsigned int quant_given ;	/**< @brief Whether quant was given.  */
+  unsigned int default_type_given ;	/**< @brief Whether default_type was given.  */
+  unsigned int norm_mode_given ;	/**< @brief Whether norm_mode was given.  */
   unsigned int logit_given ;	/**< @brief Whether logit was given.  */
   unsigned int max_rank_given ;	/**< @brief Whether max_rank was given.  */
   unsigned int rbp_p_given ;	/**< @brief Whether rbp_p was given.  */
-  unsigned int default_type_given ;	/**< @brief Whether default_type was given.  */
+  unsigned int cutoff_value_given ;	/**< @brief Whether cutoff_value was given.  */
+  unsigned int exp_given ;	/**< @brief Whether exp was given.  */
   unsigned int input_given ;	/**< @brief Whether input was given.  */
   unsigned int dir_out_given ;	/**< @brief Whether dir_out was given.  */
 
 int cmdline_parser_required (struct gengetopt_args_info *args_info,
   const char *prog_name);
 
+extern char *cmdline_parser_norm_mode_values[] ;	/**< @brief Possible values for norm_mode.  */
+
 
 #ifdef __cplusplus
 }