Qian Zhu avatar Qian Zhu committed d703cae

Added function to rank-normalize a distance matrix

Comments (0)

Files changed (6)

src/seekwriter.cpp

 
 namespace Sleipnir {
 
+bool CSeekWriter::ReadSparseMatrix(const char *fileName,
+	vector<vector<float> > &mat, CSeekIntIntMap &m, 
+	const int maxRank, const float rbp_p,
+	const vector<string> &vecstrGenes){
+
+	FILE *f = fopen(fileName, "rb");
+	if(f==NULL){
+		cerr << "File not found" << endl;
+		return false;
+	}
+
+	utype numGenes = 0;
+	utype numPresent = 0;
+	utype i, j;
+	int ret;
+	mat.clear();
+
+	mat.resize(vecstrGenes.size());
+	for(i=0; i<vecstrGenes.size(); i++)
+		CSeekTools::InitVector(mat[i], vecstrGenes.size(), (float) 0);
+
+	ret = fread((char*) (&numPresent), 1, sizeof(numPresent), f);
+	for(j=0; j<numPresent; j++){
+		utype val;
+		ret = fread((char*)(&val), 1, sizeof(val), f);
+		m.Add(val);
+	}
+
+	ret = fread((char*) (&numGenes), 1, sizeof(numGenes), f);
+
+	vector<float> rbp_score;
+	rbp_score.resize(maxRank);
+	for(i=0; i<maxRank; i++)
+		rbp_score[i] = (1.0 - rbp_p) * pow(rbp_p, i);
+
+	fprintf(stderr, "Begin assigning rbp scores\n");
+
+	for(i=0; i<numGenes; i++){
+		utype id, id2;
+		unsigned short numEntries;
+		unsigned short val;
+		ret = fread((char*)(&id), 1, sizeof(id), f);
+		ret = fread((char*)(&numEntries), 1, sizeof(numEntries), f);
+		for(j=0; j<numEntries; j++){
+			ret = fread((char*)(&id2),1,sizeof(id2),f);
+			ret = fread((char*)(&val),1,sizeof(val),f);
+			mat[id][id2] = rbp_score[val];
+		}
+	}
+	fclose(f);
+
+	fprintf(stderr, "Filling zero rbp score\n");
+	/*for(i=0; i<vecstrGenes.size(); i++){
+		if(CSeekTools::IsNaN(m.GetForward(i))){
+			for(j=0; j<vecstrGenes.size(); j++){
+				mat[i][j] = CMeta::GetNaN();
+			}
+			continue;
+		}
+		for(j=0; j<vecstrGenes.size(); j++){
+			if(CSeekTools::IsNaN(m.GetForward(j))){
+				mat[i][j] = CMeta::GetNaN();
+			}
+		}
+	}*/
+
+	/*
+	for(ii=0; ii<m.GetNumSet(); ii++){
+		i = allRGenes[ii];
+		for(jj=ii+1; jj<m.GetNumSet(); jj++){
+			j = allRGenes[jj];
+			if(CMeta::IsNaN(mat[i][j])){
+				mat[i][j] = 0;
+				mat[j][i] = 0;
+			}
+		}
+	}*/
+
+	utype ii, jj;
+	const vector<utype> &allRGenes = m.GetAllReverse();
+	fprintf(stderr, "Begin calculating row sum\n");
+	vector<float> vecSum;
+	CSeekTools::InitVector(vecSum, vecstrGenes.size(), (float) 0);
+	for(ii=0; ii<m.GetNumSet(); ii++){
+		i = allRGenes[ii];
+		for(jj=ii+1; jj<m.GetNumSet(); jj++){
+			j = allRGenes[jj];
+			//if(CMeta::IsNaN(mat[i][j])) continue;
+			//if(mat[i][j] < 0 || mat[i][j]>1){
+			//	fprintf(stderr, "Should not happen, error!\n");
+			//}
+			if(mat[i][j]==0) continue;
+			vecSum[i] += mat[i][j];
+			vecSum[j] += mat[i][j];
+		}
+	}
+
+	vector<float> vecSqrtSum;
+	CSeekTools::InitVector(vecSqrtSum, vecstrGenes.size(), (float) 0);
+
+	for(ii=0; ii<m.GetNumSet(); ii++){
+		i = allRGenes[ii];
+		if(vecSum[i]==0) continue;
+		vecSqrtSum[i] = sqrtf(vecSum[i]);
+	}
+
+	fprintf(stderr, "Begin normalization using row sum\n");
+	for(ii=0; ii<m.GetNumSet(); ii++){
+		i = allRGenes[ii];
+		for(jj=ii+1; jj<m.GetNumSet(); jj++){
+			j = allRGenes[jj];
+			if(mat[i][j]==0 || vecSqrtSum[i]==0 || vecSqrtSum[j]==0) continue;
+			mat[i][j] = mat[i][j] / vecSqrtSum[i] / vecSqrtSum[j];
+			mat[j][i] = mat[i][j];
+		}
+	}
+	return true;
+}
+
+bool CSeekWriter::ProductNorm(const vector<vector<float> > &mat1,
+	const vector<vector<float> > &mat2, const CSeekIntIntMap &m1, 
+	const CSeekIntIntMap &m2, vector<vector<float> > &re){
+
+	utype ii, jj;
+	utype i, j;
+
+	re.resize(mat1.size());
+	for(i=0; i<mat1.size(); i++)
+		CSeekTools::InitVector(re[i], mat1.size(), (float)0);
+
+	const vector<utype> &allRGenes1 = m1.GetAllReverse();
+	CSeekIntIntMap mi(mat1.size());
+	for(ii=0; ii<m1.GetNumSet(); ii++){
+		i = allRGenes1[ii];
+		if(CSeekTools::IsNaN(m2.GetForward(i))) continue;
+		mi.Add(i);
+	}
+
+	const vector<utype> &allR = mi.GetAllReverse();
+	fprintf(stderr, "Begin calculating row sum\n");
+	vector<float> vecSum;
+	CSeekTools::InitVector(vecSum, mat1.size(), (float) 0);
+	for(ii=0; ii<mi.GetNumSet(); ii++){
+		i = allR[ii];
+		for(jj=ii+1; jj<mi.GetNumSet(); jj++){
+			j = allR[jj];
+			if(mat1[i][j]==0 || mat2[i][j]==0) continue;
+			re[i][j] = sqrtf(mat1[i][j] * mat2[i][j]);
+			re[j][i] = re[i][j];
+			vecSum[i] += re[i][j];
+			vecSum[j] += re[i][j];
+		}
+	}
+
+	vector<float> vecSqrtSum;
+	CSeekTools::InitVector(vecSqrtSum, mat1.size(), (float)0);
+	for(ii=0; ii<mi.GetNumSet(); ii++){
+		i = allR[ii];
+		if(vecSum[i]==0) continue;
+		vecSqrtSum[i] = sqrtf(vecSum[i]);
+	}
+
+	utype numNonZero = 0;
+	fprintf(stderr, "Begin normalization using row sum\n");
+	vector<float> vf;
+	for(ii=0; ii<mi.GetNumSet(); ii++){
+		i = allR[ii];
+		for(jj=ii+1; jj<mi.GetNumSet(); jj++){
+			j = allR[jj];
+			if(mat1[i][j]==0 || mat2[i][j]==0) continue;
+			if(vecSqrtSum[i]==0 || vecSqrtSum[j]==0) continue;
+			re[i][j] = re[i][j] / vecSqrtSum[i] / vecSqrtSum[j];
+			re[j][i] = re[i][j];
+			//vf.push_back(rx);
+			numNonZero++;
+			//fprintf(stderr, "%.3e\n", re[i][j]);
+		}
+	}
+	//sort(vf.begin(), vf.end(), greater<float>());
+	//int xi;
+	//for(xi=0; xi<vf.size(); xi++){
+		//if(isinf(vf[xi]) || isnan(vf[xi])){
+	//		fprintf(stderr, "%.3e\n", vf[xi]);
+		//}
+	//}
+	//fprintf(stderr, "Non Zero: %d\n", numNonZero);
+	return true;
+}
+
+bool CSeekWriter::WriteSparseMatrix(vector<vector<unsigned short> > &umat,
+	int maxRank, const vector<string> &vecstrGenes, const char *fileName){
+
+	FILE *f = fopen(fileName, "wb");
+	if(f==NULL){
+		cerr << "File not found!" << endl;
+		return false;
+	}
+	utype numGenes = 0;
+	utype i, j;
+
+	CSeekIntIntMap mm(vecstrGenes.size());
+	for(i=0; i<vecstrGenes.size(); i++){
+		for(j=0; j<vecstrGenes.size(); j++)
+			if(!CSeekTools::IsNaN(umat[i][j])) break;
+		if(j!=vecstrGenes.size()){
+			mm.Add(i);
+		}
+	}
+
+	utype numPresent = mm.GetNumSet();
+	//1 utype
+	fwrite((char*) (&numPresent), 1, sizeof(numPresent), f);
+	const vector<utype> &allR = mm.GetAllReverse();
+	//numPresent utype
+	for(i=0; i<numPresent; i++)
+		fwrite((char*) (&allR[i]), 1, sizeof(allR[i]), f);
+
+	for(i=0; i<vecstrGenes.size(); i++){
+		for(j=i+1; j<vecstrGenes.size(); j++)
+			if(!CSeekTools::IsNaN(umat[i][j]) && umat[i][j]!=maxRank) 
+				break;
+		if(j==vecstrGenes.size()) 
+			continue;
+		numGenes++;
+	}
+
+	//1 utype
+	fwrite((char*) (&numGenes), 1, sizeof(numGenes), f);
+
+	for(i=0; i<vecstrGenes.size(); i++){
+		unsigned short numEntries = 0; //should be 1000
+		for(j=i+1; j<vecstrGenes.size(); j++){
+			if(CSeekTools::IsNaN(umat[i][j]) || umat[i][j]==maxRank)
+				continue;
+			numEntries++;
+		}
+		if(numEntries==0) 
+			continue;
+		//1 utype
+		fwrite((char*) (&i), 1, sizeof(i), f);
+		//1 unsigned short
+		fwrite((char*) (&numEntries), 1, sizeof(numEntries), f);
+		for(j=i+1; j<vecstrGenes.size(); j++){
+			if(CSeekTools::IsNaN(umat[i][j]) || umat[i][j]==maxRank)
+				continue;
+			//1 utype
+			fwrite((char*) (&j), 1, sizeof(j), f);
+			//1 unsigned short
+			fwrite((char*) (&umat[i][j]), 1, sizeof(umat[i][j]), f);
+		}
+	}
+
+	fclose(f);
+	return true;
+}
+
+bool CSeekWriter::GetSparseRankMatrix(CDat &Dat,
+	vector<vector<unsigned short> > &umat, const unsigned short nullValue,
+	int maxRank, //1000
+	const vector<string> &vecstrGenes){
+
+	utype i, j;
+	vector<utype> veciGenes;
+	veciGenes.clear();
+	veciGenes.resize(vecstrGenes.size());
+	for( i = 0; i < vecstrGenes.size( ); ++i )
+		veciGenes[ i ] = Dat.GetGene( vecstrGenes[i] );
+	umat.resize(vecstrGenes.size());
+
+	for(i=0; i<vecstrGenes.size(); i++){
+		CSeekTools::InitVector(umat[i], vecstrGenes.size(), nullValue);
+		utype s = veciGenes[i];
+		if(CSeekTools::IsNaN(s)) continue;
+
+		float *v = Dat.GetFullRow(s);
+		vector<AResultFloat> vv;
+		vv.resize(vecstrGenes.size());
+		for(j=0; j<vecstrGenes.size(); j++){
+			utype t = veciGenes[j];
+			vv[j].i = j;
+			if(CSeekTools::IsNaN(t) || CMeta::IsNaN(v[t])){
+				vv[j].f = -9999;
+				continue;
+			}
+			vv[j].f = v[t];
+		}
+		nth_element(vv.begin(), vv.begin()+maxRank, vv.end());
+		sort(vv.begin(), vv.begin()+maxRank);
+		for(j=0; j<vecstrGenes.size(); j++){
+			if(j<maxRank){
+				umat[i][vv[j].i] = j;
+			}else if(vv[j].f!=-9999){
+				umat[i][vv[j].i] = maxRank;
+			}
+		}
+		free(v);
+	}
+	for(i=0; i<vecstrGenes.size(); i++){
+		utype s = veciGenes[i];
+		if(CSeekTools::IsNaN(s)) continue;
+		for(j=i+1; j<vecstrGenes.size(); j++){
+			utype t = veciGenes[j];
+			if(CSeekTools::IsNaN(t)) continue;
+			umat[i][j] = std::min(umat[i][j], umat[j][i]);
+			umat[j][i] = umat[i][j];
+		}
+	}
+	return true;
+}
+
+bool CSeekWriter::RankNormalizeDAB(CDat &Dat,
+	const vector<string> &vecstrGenes, int max_rank, float rbp_p){
+
+	utype i, j;
+	vector<utype> veciGenes;
+	veciGenes.clear();
+	veciGenes.resize(vecstrGenes.size());
+	for( i = 0; i < vecstrGenes.size( ); ++i )
+		veciGenes[ i ] = Dat.GetGene( vecstrGenes[i] );
+
+	vector<float> vecSum;
+	vector<int> vecNum;
+	CSeekTools::InitVector(vecSum, vecstrGenes.size(), CMeta::GetNaN());
+	CSeekTools::InitVector(vecNum, vecstrGenes.size(), (int)-9999);
+
+	vector<vector<float> > mat;
+	mat.resize(vecstrGenes.size());
+	int max = max_rank;
+	//float rbp_p = 0.99;
+
+	bool expTransform = true;
+	for(i=0; i<vecstrGenes.size(); i++){
+		CSeekTools::InitVector(mat[i], vecstrGenes.size(), CMeta::GetNaN());
+
+		utype s = veciGenes[i];
+		if(CSeekTools::IsNaN(s)) continue;
+		float *v = Dat.GetFullRow(s);
+		vector<AResultFloat> vv;
+		vv.resize(vecstrGenes.size());
+		int numV = 0;		
+
+		for(j=0; j<vecstrGenes.size(); j++){
+			utype t = veciGenes[j];
+			vv[j].i = j;
+			if(CSeekTools::IsNaN(t) || CMeta::IsNaN(v[t])){
+				vv[j].f = -9999;
+				continue;
+			}
+			vv[j].f = v[t];
+			numV++;
+		}
+
+		if(expTransform){
+			nth_element(vv.begin(), vv.begin()+max, vv.end());
+			sort(vv.begin(), vv.begin()+max);
+			for(j=0; j<vecstrGenes.size(); j++){
+				if(j<max){
+					float rank = (1.0 - rbp_p) * pow(rbp_p, j);
+					mat[i][vv[j].i] = rank;
+				}else if(vv[j].f!=-9999){
+					mat[i][vv[j].i] = 0;
+				}
+			}
+		}else{
+			sort(vv.begin(), vv.end());
+			for(j=0; j<vecstrGenes.size(); j++){
+				if(vv[j].f!=-9999){
+					mat[i][vv[j].i] = numV - j;
+				}
+			}
+		}
+
+		free(v);
+	}
+
+	for(i=0; i<vecstrGenes.size(); i++){
+		utype s = veciGenes[i];
+		if(CSeekTools::IsNaN(s)) continue;
+		for(j=i+1; j<vecstrGenes.size(); j++){
+			utype t = veciGenes[j];
+			if(CSeekTools::IsNaN(t)) continue;
+			if(CMeta::IsNaN(mat[i][j]) || CMeta::IsNaN(mat[j][i])){
+				fprintf(stderr, "%.3e %.3e\n", mat[i][j], mat[j][i]);
+			}
+			mat[i][j] = std::max(mat[i][j], mat[j][i]);
+			mat[j][i] = mat[i][j];
+		}
+	}
+
+	for(i=0; i<vecstrGenes.size(); i++){
+		utype s = veciGenes[i];
+		if(CSeekTools::IsNaN(s)) continue;
+		vecSum[i] = 0;
+		vecNum[i] = 0;
+		for(j=0; j<vecstrGenes.size(); j++){
+			utype t = veciGenes[j];
+			if(CSeekTools::IsNaN(t)) continue;
+			if(CMeta::IsNaN(mat[i][j])) continue;
+			vecSum[i] += mat[i][j];
+			vecNum[i]++;
+		}
+		//fprintf(stderr, "%.3e\n", vecSum[i]);
+	}
+
+	for(i=0; i<vecstrGenes.size(); i++){
+		utype s = veciGenes[i];
+		if(CSeekTools::IsNaN(s)) continue;
+		for(j=0; j<vecstrGenes.size(); j++){
+			utype t = veciGenes[j];
+			if(CSeekTools::IsNaN(t)) continue;
+			//fprintf(stderr, "%.3e %.3e\n", vecSum[i], vecSum[j]);
+			float r = mat[i][j] / sqrtf(vecSum[i]) / sqrtf(vecSum[j]);
+			Dat.Set(s, t, r);
+		}
+	}
+	
+	return true;
+}
+
+
+
+bool CSeekWriter::NormalizeDAB(CDat &Dat,
+	const vector<string> &vecstrGenes,
+	bool cutoff, bool expTransform, bool divideNorm, bool subtractNorm){
+
+	utype i, j;
+	vector<utype> veciGenes;
+	veciGenes.clear();
+	veciGenes.resize(vecstrGenes.size());
+	for( i = 0; i < vecstrGenes.size( ); ++i )
+		veciGenes[ i ] = Dat.GetGene( vecstrGenes[i] );
+
+	vector<float> vecSum;
+	vector<int> vecNum;
+	CSeekTools::InitVector(vecSum, vecstrGenes.size(), CMeta::GetNaN());
+	CSeekTools::InitVector(vecNum, vecstrGenes.size(), (int)-9999);
+
+	for(i=0; i<vecstrGenes.size(); i++){
+		utype s = veciGenes[i];
+		if(CSeekTools::IsNaN(s)) continue;
+		float *v = Dat.GetFullRow(s);
+		float sum = 0;
+		int num = 0;
+		vector<float> all;
+		for(j=0; j<vecstrGenes.size(); j++){
+			utype t = veciGenes[j];
+			if(CSeekTools::IsNaN(t)) continue;
+			if(CMeta::IsNaN(v[t])) continue;
+			if(cutoff){
+				if(v[t]>0){
+					if(expTransform)
+						all.push_back(expf(-1.0*v[t]*v[t]/2.0));
+					else
+						all.push_back(v[t]);
+				}
+			}
+			else{
+				//fprintf(stderr, "Warning: Negative Z-Scores");
+				if(expTransform)
+					all.push_back(expf(-1.0*v[t]*v[t]/2.0));
+				else
+					all.push_back(v[t]);
+			}	
+		}
+
+		for(j=0; j<all.size(); j++){
+			sum+=all[j];
+			num++;
+		}
+		vecSum[i] = sum;
+		vecNum[i] = num;
+		free(v);
+	}
+
+	for(i=0; i<vecstrGenes.size(); i++){
+		utype s = veciGenes[i];
+		if(CSeekTools::IsNaN(s)) continue;
+		float *v = Dat.GetFullRow(s);
+
+		for(j=0; j<vecstrGenes.size(); j++){
+			utype t = veciGenes[j];
+			if(CSeekTools::IsNaN(t)) continue;
+			if(CMeta::IsNaN(v[t])) continue;
+			if(cutoff){
+				if(v[t]>0){
+					if(expTransform){
+						if(divideNorm){
+							float r = expf(-1.0*v[t]*v[t]/2.0) / sqrtf(vecSum[i]) / sqrtf(vecSum[j]);
+							Dat.Set(s, t, r);
+						}else if(subtractNorm){
+							float r = expf(-1.0*v[t]*v[t]/2.0) - vecSum[i] / vecNum[i] - vecSum[j] / vecNum[j];
+							Dat.Set(s, t, r);
+						}
+					}else{
+						if(divideNorm){
+							float r = v[t] / sqrtf(vecSum[i]) / sqrtf(vecSum[j]);
+							Dat.Set(s, t, r);
+						}else if(subtractNorm){
+							float r = v[t] - vecSum[i] / vecNum[i] - vecSum[j] / vecNum[j];
+							Dat.Set(s, t, r);
+						}
+					}
+				}else{
+					Dat.Set(s, t, 0);
+				}
+			}
+			else{
+				if(expTransform){
+					if(divideNorm){
+						float r = expf(-1.0*v[t]*v[t]/2.0) / sqrtf(vecSum[i]) / sqrtf(vecSum[j]);
+						Dat.Set(s, t, r);
+					}else if(subtractNorm){
+						float r = expf(-1.0*v[t]*v[t]/2.0) - vecSum[i] / vecNum[i] - vecSum[j] / vecNum[j];
+						Dat.Set(s, t, r);
+					}
+				}else{
+					if(divideNorm){
+						float r = 0;
+						//DANGEROUS
+						if(vecSum[i]<=0){
+							fprintf(stderr, "Warning, Dangerous, divide sqrt(z), where z could be negative\n");
+							r = 0;
+						}else{
+							r = v[t] / sqrtf(vecSum[i]) / sqrtf(vecSum[j]);
+						}
+						Dat.Set(s, t, r);
+					}else if(subtractNorm){
+						float r = v[t] - vecSum[i] / vecNum[i] - vecSum[j] / vecNum[j];
+						Dat.Set(s, t, r);
+					}
+
+				}
+			}
+		}
+		free(v);
+	}
+
+	return true;
+}
+
 bool CSeekWriter::GetGeneAverage(CDataPair &Dat,
 	const vector<string> &vecstrGenes,
 	vector<float> &vecResult, bool logit, float top_percent){
 
 #include "seekbasic.h"
 #include "seekmap.h"
+#include "seekevaluate.h"
 #include "datapair.h"
 
 namespace Sleipnir {
 
 class CSeekWriter{
 public:
+	static bool ReadSparseMatrix(const char *fileName, 
+		vector<vector<float> > &mat, 
+		CSeekIntIntMap &m, const int maxRank, const float rbp_p,
+		const vector<string> &vecstrGenes);
+
+	static bool ProductNorm(const vector<vector<float> > &mat1,
+		const vector<vector<float> > &mat2, const CSeekIntIntMap &m1, 
+		const CSeekIntIntMap &m2, vector<vector<float> > &re);
+
+	static bool WriteSparseMatrix(vector<vector<unsigned short> > &umat,
+		int maxRank, const vector<string> &vecstrGenes, const char *fileName);
+
+	static bool GetSparseRankMatrix(CDat &Dat,
+		vector<vector<unsigned short> > &umat, const unsigned short nullValue,
+		int maxRank, const vector<string> &vecstrGenes);
+
+	static bool RankNormalizeDAB(CDat &Dat,
+		const vector<string> &vecstrGenes, int max_rank, float rbp_p);
+
+	static bool NormalizeDAB(CDat &Dat,
+		const vector<string> &vecstrGenes,
+		bool cutoff, bool expTransform, bool divideNorm, bool subtractNorm);
+
 	static bool GetGeneAverage(CDataPair &Dat,
 		const vector<string> &vecstrGenes,
 		vector<float> &vecResult, bool logit=false, float top_percent=1.0);

tools/SeekPrep/SeekPrep.cpp

 		}
 
 	} else if(sArgs.dab_flag==1){
+		
+		if(sArgs.norm_flag==1){
+			CDat Dat;
+			char outFile[1024];
+			if(!Dat.Open(sArgs.dabinput_arg, false, 2, false, false, false)){
+				cerr << "error opening file" << endl;
+				return 1;
+			}
+			string fileName = CMeta::Basename(sArgs.dabinput_arg);
+			string fileStem = CMeta::Deextension(fileName);
+			sprintf(outFile, "%s/%s.2.dab", sArgs.dir_out_arg,
+				fileStem.c_str());
+			int max_rank = 1000;
+			float rbp_p = 0.99;
+			//cutoff, expTransform, divideNorm, subtractNorm
+			//CSeekWriter::NormalizeDAB(Dat, vecstrGenes, true, false, true, false);
+			//CSeekWriter::RankNormalizeDAB(Dat, vecstrGenes, max_rank, rbp_p);
+			//Dat.Save(outFile);
+			vector<vector<unsigned short> > umat;
+			CSeekWriter::GetSparseRankMatrix(Dat, umat, 65535, max_rank, 
+				vecstrGenes);
+			CSeekWriter::WriteSparseMatrix(umat, max_rank, vecstrGenes, 
+				outFile);
+			/*fprintf(stderr, "Begin\n");
+			vector<unsigned short> l;
+			vector<vector<float> > mat;
+			CSeekWriter::ReadSparseMatrixAsArray(l, outFile);
+			fprintf(stderr, "Begin 2\n");
+			CSeekWriter::ReadSparseMatrix(l, mat, 0.99, vecstrGenes);*/
+		}
 
 		if(sArgs.gavg_flag==1){
 			bool logit = false;

tools/SeekPrep/SeekPrep.ggo

 								string typestr="filename"
 option	"top_avg_percent"	C	"For gene average, top X percent of the values to take average (0 - 1.0)"
 								float	default="1.0"
+option	"norm"				n	"Normalize matrix"
+								flag	off
 
 section "PCL mode"
 option	"pclinput"			V	"PCL BIN file"

tools/SeekPrep/cmdline.c

   "  -p, --gpres                  Generates gene presence file  (default=off)",
   "  -B, --dabinput=filename      DAB dataset file",
   "  -C, --top_avg_percent=FLOAT  For gene average, top X percent of the values to \n                                 take average (0 - 1.0)  (default=`1.0')",
+  "  -n, --norm                   Normalize matrix  (default=off)",
   "\nPCL mode:",
   "  -V, --pclinput=filename      PCL BIN file",
   "  -v, --gexpvarmean            Generates gene expression variance and mean \n                                 files (.gexpvar, .gexpmean)  (default=off)",
   args_info->gpres_given = 0 ;
   args_info->dabinput_given = 0 ;
   args_info->top_avg_percent_given = 0 ;
+  args_info->norm_given = 0 ;
   args_info->pclinput_given = 0 ;
   args_info->gexpvarmean_given = 0 ;
   args_info->sinfo_given = 0 ;
   args_info->dabinput_orig = NULL;
   args_info->top_avg_percent_arg = 1.0;
   args_info->top_avg_percent_orig = NULL;
+  args_info->norm_flag = 0;
   args_info->pclinput_arg = NULL;
   args_info->pclinput_orig = NULL;
   args_info->gexpvarmean_flag = 0;
   args_info->gpres_help = gengetopt_args_info_help[8] ;
   args_info->dabinput_help = gengetopt_args_info_help[9] ;
   args_info->top_avg_percent_help = gengetopt_args_info_help[10] ;
-  args_info->pclinput_help = gengetopt_args_info_help[12] ;
-  args_info->gexpvarmean_help = gengetopt_args_info_help[13] ;
-  args_info->sinfo_help = gengetopt_args_info_help[14] ;
-  args_info->gplat_help = gengetopt_args_info_help[16] ;
-  args_info->dblist_help = gengetopt_args_info_help[17] ;
-  args_info->dir_prep_in_help = gengetopt_args_info_help[18] ;
-  args_info->dset_help = gengetopt_args_info_help[19] ;
-  args_info->useNibble_help = gengetopt_args_info_help[20] ;
-  args_info->quant_help = gengetopt_args_info_help[21] ;
-  args_info->logit_help = gengetopt_args_info_help[23] ;
-  args_info->input_help = gengetopt_args_info_help[25] ;
-  args_info->dir_out_help = gengetopt_args_info_help[27] ;
+  args_info->norm_help = gengetopt_args_info_help[11] ;
+  args_info->pclinput_help = gengetopt_args_info_help[13] ;
+  args_info->gexpvarmean_help = gengetopt_args_info_help[14] ;
+  args_info->sinfo_help = gengetopt_args_info_help[15] ;
+  args_info->gplat_help = gengetopt_args_info_help[17] ;
+  args_info->dblist_help = gengetopt_args_info_help[18] ;
+  args_info->dir_prep_in_help = gengetopt_args_info_help[19] ;
+  args_info->dset_help = gengetopt_args_info_help[20] ;
+  args_info->useNibble_help = gengetopt_args_info_help[21] ;
+  args_info->quant_help = gengetopt_args_info_help[22] ;
+  args_info->logit_help = gengetopt_args_info_help[24] ;
+  args_info->input_help = gengetopt_args_info_help[26] ;
+  args_info->dir_out_help = gengetopt_args_info_help[28] ;
   
 }
 
     write_into_file(outfile, "dabinput", args_info->dabinput_orig, 0);
   if (args_info->top_avg_percent_given)
     write_into_file(outfile, "top_avg_percent", args_info->top_avg_percent_orig, 0);
+  if (args_info->norm_given)
+    write_into_file(outfile, "norm", 0, 0 );
   if (args_info->pclinput_given)
     write_into_file(outfile, "pclinput", args_info->pclinput_orig, 0);
   if (args_info->gexpvarmean_given)
         { "gpres",	0, NULL, 'p' },
         { "dabinput",	1, NULL, 'B' },
         { "top_avg_percent",	1, NULL, 'C' },
+        { "norm",	0, NULL, 'n' },
         { "pclinput",	1, NULL, 'V' },
         { "gexpvarmean",	0, NULL, 'v' },
         { "sinfo",	0, NULL, 's' },
         { 0,  0, 0, 0 }
       };
 
-      c = getopt_long (argc, argv, "hdefapB:C:V:vsPb:I:A:NQ:li:D:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hdefapB:C:nV:vsPb:I:A:NQ:li:D:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'n':	/* Normalize matrix.  */
+        
+        
+          if (update_arg((void *)&(args_info->norm_flag), 0, &(args_info->norm_given),
+              &(local_args_info.norm_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "norm", 'n',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'V':	/* PCL BIN file.  */
         
         

tools/SeekPrep/cmdline.h

   float top_avg_percent_arg;	/**< @brief For gene average, top X percent of the values to take average (0 - 1.0) (default='1.0').  */
   char * top_avg_percent_orig;	/**< @brief For gene average, top X percent of the values to take average (0 - 1.0) original value given at command line.  */
   const char *top_avg_percent_help; /**< @brief For gene average, top X percent of the values to take average (0 - 1.0) help description.  */
+  int norm_flag;	/**< @brief Normalize matrix (default=off).  */
+  const char *norm_help; /**< @brief Normalize matrix help description.  */
   char * pclinput_arg;	/**< @brief PCL BIN file.  */
   char * pclinput_orig;	/**< @brief PCL BIN file original value given at command line.  */
   const char *pclinput_help; /**< @brief PCL BIN file help description.  */
   unsigned int gpres_given ;	/**< @brief Whether gpres was given.  */
   unsigned int dabinput_given ;	/**< @brief Whether dabinput was given.  */
   unsigned int top_avg_percent_given ;	/**< @brief Whether top_avg_percent was given.  */
+  unsigned int norm_given ;	/**< @brief Whether norm was given.  */
   unsigned int pclinput_given ;	/**< @brief Whether pclinput was given.  */
   unsigned int gexpvarmean_given ;	/**< @brief Whether gexpvarmean was given.  */
   unsigned int sinfo_given ;	/**< @brief Whether sinfo was given.  */
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.