Commits

Qian Zhu committed ae0bdad

Provide methods for efficiently storing, retrieving a sparse DAB matrix (seekwriter.cpp, sparsematrix.h)
Create wrapper for saving, reading sparse DAB's (SeekPrep)
Create new class for iterative search (currently disabled), and sequential search that supports multiple DAB's as input instead of a CDatabase collection

  • Participants
  • Parent commits 801666e

Comments (0)

Files changed (17)

File configure.ac

                  tools/SeekEvaluator/Makefile \
                  tools/SeekGeneRecommender/Makefile \
                  tools/SeekAggregatedDataset/Makefile \
+                 tools/SeekIterative/Makefile \
          tools/DBCombiner/Makefile \
                  tools/DSLConverter/Makefile \
                  tools/Dab2Dad/Makefile \

File gen_tools_am

                                 SeekPValue => ['GSL'],
                                 SeekGeneRecommender => ['GSL'],
                                 SeekAggregatedDataset => ['GSL'],
+                                SeekIterative => ['GSL'],
                                 PCLServer => ['GSL'],
                             Dab2Dad  => ['SMILE'],
                             Dab2DB  => ['SMILE'],
 	m_vecstrGenes.resize( vecstrGenes.size( ) );
 	copy( vecstrGenes.begin( ), vecstrGenes.end( ), m_vecstrGenes.begin( ) );
 
+	m_mapstrGenes.clear();
+	for( i = 0; i < vecstrGenes.size(); ++i )
+		m_mapstrGenes[vecstrGenes[i]] = i; 
+
 	if( szFile ) {
 		iSize = sizeof(uint32_t);
 		for( i = 0; i < GetGenes( ); ++i )
 	for( i = 0; i < vecstrGenes.size( ); ++i )
 		m_vecstrGenes.push_back( vecstrGenes[ i ] );
 
+	for( i = 0; i < vecstrGenes.size(); ++i )
+		m_mapstrGenes[vecstrGenes[i]] = i; 
+
 	m_Data.Initialize( MatScores );
 
 	return true; }

File src/seekwriter.cpp

 
 namespace Sleipnir {
 
+//mat a symmetric matrix
 bool CSeekWriter::ReadSparseMatrix(const char *fileName,
 	vector<map<utype,float> > &mat, CSeekIntIntMap &m, 
 	const int maxRank, const float rbp_p,
 	for(i=0; i<vecstrGenes.size(); i++)
 		mat[i] = map<utype,float>();
 
+	//m need to be initialized to size vecstrGenes.size() first!
 	ret = fread((char*) (&numPresent), 1, sizeof(numPresent), f);
 	for(j=0; j<numPresent; j++){
 		utype val;
 			j = it->first;
 			if(vecSqrtSum[i]==0 || vecSqrtSum[j]==0) continue;
 			it->second = it->second / vecSqrtSum[i] / vecSqrtSum[j];
+			//symmetric matrix
+			mat[j][i] = it->second;
 		}
 	}
 	return true;
 }
 
+//add this matrix with weight w
+bool CSeekWriter::SumSparseMatrix(CSparseFlatMatrix<float> &mat1,
+	CSparseFlatHalfMatrix<float> &res, const CSeekIntIntMap &mi, const float w){
+	utype i, j, ii, jj;
+	//suppose res is already initialized
+	const vector<utype> &allR = mi.GetAllReverse();
+	for(ii=0; ii<mi.GetNumSet(); ii++){
+		i = allR[ii];
+		vector<CPair<float> >::iterator row_it;
+		vector<CPair<float> > create;
+		for(row_it = mat1.RowBegin(i); row_it!=mat1.RowEnd(i); row_it++){
+			utype j = row_it->i;
+			float rv = row_it->v;
+			if(j<=i) continue; //only add if j is greater than i
+			CPair<float> *pp = res.GetElement(i,j);
+			if(pp==NULL){		
+				CPair<float> cp;
+				cp.i = j;
+				cp.v = rv;
+				create.push_back(cp);
+				continue;
+			}
+			pp->v += rv * w;
+		}
+		for(jj=0; jj<create.size(); jj++)
+			res.Add(i, create[jj].i, create[jj].v * w);
+		if(create.size()>0)
+			res.SortRow(i);
+	}
+	return true;
+}
+
+bool CSeekWriter::SumSparseMatrix(CSparseFlatMatrix<float> &mat1,
+	CHalfMatrix<float> &res, const CSeekIntIntMap &mi, const float w){
+	utype i, ii;
+	//suppose res is already initialized
+	const vector<utype> &allR = mi.GetAllReverse();
+	for(ii=0; ii<mi.GetNumSet(); ii++){
+		i = allR[ii];
+		vector<CPair<float> >::iterator row_it;
+		for(row_it = mat1.RowBegin(i); row_it!=mat1.RowEnd(i); row_it++){
+			utype j = row_it->i;
+			float rv = row_it->v;
+			if(j<=i) continue; //only add if j is greater than i
+			res.Set(i, j, res.Get(i, j) + rv * w);
+		}
+	}
+	return true;
+}
 //Calculate the similarity of two distance matrices
 //by simply taking product of two matrix for corresponding entries
 bool CSeekWriter::ProductNorm(const vector<map<utype,float> > &mat1,
 	return true;
 }
 
-bool CSeekWriter::WriteSparseMatrix(CDataPair &Dat,
-	vector< map<utype,unsigned short> > &umat, 
-	int maxRank, const vector<string> &vecstrGenes, const char *fileName){
-
-	FILE *f = fopen(fileName, "wb");
-	if(f==NULL){
-		cerr << "File not found!" << endl;
-		return false;
-	}
-	utype numGenes = 0;
-	utype i, j;
-
-	vector<utype> veciGenes;
-	veciGenes.clear();
-	veciGenes.resize(vecstrGenes.size());
-	for( i = 0; i < vecstrGenes.size( ); ++i )
-		veciGenes[ i ] = Dat.GetGeneIndex( vecstrGenes[i] );
-
-	CSeekIntIntMap mm(vecstrGenes.size());
-	for(i=0; i<vecstrGenes.size(); i++)
-		if(!CSeekTools::IsNaN(veciGenes[i]))
-			mm.Add(i);
-
-	utype numPresent = mm.GetNumSet();
-	//1 utype
-	fwrite((char*) (&numPresent), 1, sizeof(numPresent), f);
-	const vector<utype> &allR = mm.GetAllReverse();
-	//numPresent utype
-	for(i=0; i<numPresent; i++)
-		fwrite((char*) (&allR[i]), 1, sizeof(allR[i]), f);
-
-	for(i=0; i<vecstrGenes.size(); i++){
-		if(umat[i].size()==0) continue;
-		numGenes++;
-	}
-
-	//1 utype
-	fwrite((char*) (&numGenes), 1, sizeof(numGenes), f);
-
-	for(i=0; i<vecstrGenes.size(); i++){
-		unsigned short numEntries = umat[i].size(); //should be 1000
-		if(numEntries==0) 
-			continue;
-		//1 utype
-		fwrite((char*) (&i), 1, sizeof(i), f);
-		//1 unsigned short
-		fwrite((char*) (&numEntries), 1, sizeof(numEntries), f);
-		map<utype,unsigned short>::iterator it;
-		for(it=umat[i].begin(); it!=umat[i].end(); it++){
-			utype first = it->first;
-			unsigned short second = it->second;
-			//1 utype
-			fwrite((char*) (&first), 1, sizeof(first), f);
-			//1 unsigned short
-			fwrite((char*) (&second), 1, sizeof(second), f);
-		}
-	}
-	fclose(f);
-	return true;
-}
-
-bool CSeekWriter::GetSparseRankMatrix(CDataPair &Dat,
-	vector< map<utype,unsigned short> > &umat, 
-	int maxRank, //1000
-	const vector<string> &vecstrGenes){
-
-	utype i, j;
-	vector<utype> veciGenes;
-	veciGenes.clear();
-	veciGenes.resize(vecstrGenes.size());
-	for( i = 0; i < vecstrGenes.size( ); ++i )
-		veciGenes[ i ] = Dat.GetGeneIndex( vecstrGenes[i] );
-	umat.resize(vecstrGenes.size());
-	for(i=0; i<vecstrGenes.size(); i++){
-		umat[i] = map<utype, unsigned short>();
-	}
-
-	fprintf(stderr, "Start reading DAB...\n");
-	for(i=0; i<vecstrGenes.size(); i++){
-		utype s = veciGenes[i];
-		if(CSeekTools::IsNaN(s)) continue;
-		if(i%1000==0)
-			fprintf(stderr, "Start reading gene %d...\n", i);
-
-		//float *v = Dat.GetRowSeek(s);
-		float *v = Dat.GetFullRow(s);
-		vector<AResultFloat> vv;
-		vv.resize(vecstrGenes.size());
-
-		for(j=0; j<vecstrGenes.size(); j++){
-			utype t = veciGenes[j];
-			vv[j].i = j;
-			vv[j].f = -9999;
-			if(CSeekTools::IsNaN(t)) continue;
-			float d = v[t];
-			if(CMeta::IsNaN(d)) continue;
-			vv[j].f = d;
-		}
-
-		nth_element(vv.begin(), vv.begin()+maxRank, vv.end());
-		sort(vv.begin(), vv.begin()+maxRank);
-
-		for(j=0; j<vecstrGenes.size(); j++){
-			if(j<maxRank){
-				utype first = i;
-				utype second = vv[j].i;
-				if(i >= vv[j].i){
-					first = vv[j].i;
-					second = i;
-				}
-				map<utype,unsigned short>::iterator it;
-				if((it=umat[first].find(second))==umat[first].end())
-					umat[first][second] = (unsigned short) j;
-				else
-					umat[first][second] = std::min(it->second, (unsigned short) j);
-			}
-		}
-		free(v);
-	}
-	fprintf(stderr, "Finished reading DAB\n");
-	return true;
-}
-
 bool CSeekWriter::NormalizeDAB(CDataPair &Dat,
 	const vector<string> &vecstrGenes,
 	bool cutoff, bool expTransform, bool divideNorm, bool subtractNorm){

File src/seekwriter.h

 #include "seekbasic.h"
 #include "seekmap.h"
 #include "seekevaluate.h"
+#include "sparsematrix.h"
 #include "datapair.h"
+#include "seekreader.h"
 
 namespace Sleipnir {
 
 class CSeekWriter{
 public:
+
+	//should be either unsigned short or utype
+	template<class tType>
+	static bool ReadSeekSparseMatrixHeader(const char *fileName,
+	CSeekIntIntMap &m){
+		FILE *f = fopen(fileName, "rb");
+		if(f==NULL){
+			cerr << "File not found" << endl;
+			return false;
+		}
+		size_t j;
+		tType val, numPresent;
+		int ret;
+
+		//m need to be initialized to size vecstrGenes.size() first!
+		ret = fread((char*) (&numPresent), 1, sizeof(numPresent), f);
+		for(j=0; j<numPresent; j++){
+			ret = fread((char*)(&val), 1, sizeof(val), f); //val stores the present gene
+			m.Add((utype) val);
+		}
+		fclose(f);
+		return true;
+	}
+
+	//compatibility
+	template<class tType>
+	static bool ReadSeekSparseMatrix(const char *fileName,
+	CSparseFlatMatrix<float> &mat, CSeekIntIntMap &m, const int maxRank, 
+	const float rbp_p, const vector<string> &vecstrGenes){
+	
+		FILE *f = fopen(fileName, "rb");
+		if(f==NULL){
+			cerr << "File not found" << endl;
+			return false;
+		}
+
+		size_t i, j;
+		tType numGenes, numPresent, val;
+		int ret;
+
+		mat.Initialize(vecstrGenes.size());
+		ret = fread((char*) (&numPresent), 1, sizeof(numPresent), f);
+		for(j=0; j<numPresent; j++){
+			ret = fread((char*)(&val), 1, sizeof(val), f); //val = gene ID
+			m.Add((utype) val);
+			mat.InitializeRow(val, maxRank*2); //initial capacity
+		}
+		ret = fread((char*) (&numGenes), 1, sizeof(numGenes), f);
+
+		vector<float> rbp_score;
+		rbp_score.resize(maxRank);
+		for(i=0; i<maxRank; i++)
+			rbp_score[i] = (1.0 - rbp_p) * pow(rbp_p, i);
+
+		for(i=0; i<numGenes; i++){
+			tType id, id2;  //gene ID
+			unsigned short numEntries, val; //rank
+			ret = fread((char*)(&id), 1, sizeof(id), f);
+			ret = fread((char*)(&numEntries), 1, sizeof(numEntries), f);
+			for(j=0; j<numEntries; j++){
+				ret = fread((char*)(&id2),1,sizeof(id2),f);
+				ret = fread((char*)(&val),1,sizeof(val),f);
+				tType first = id;
+				tType second = id2;
+				mat.Add(first, second, rbp_score[val]);
+				mat.Add(second, first, rbp_score[val]);
+			}
+		}
+		fclose(f);
+
+		mat.Organize();
+		size_t ii, jj;
+		const vector<utype> &allRGenes = m.GetAllReverse();
+		fprintf(stderr, "Begin calculating row sum\n");
+
+		vector<float> vecSum;
+		CSeekTools::InitVector(vecSum, vecstrGenes.size(), (float) 0);
+		for(ii=0; ii<m.GetNumSet(); ii++){
+			i = (size_t) allRGenes[ii];
+			const vector<CPair<float> > &row = mat.GetRow(i);
+			for(jj=0; jj<row.size(); jj++)
+				vecSum[i] += row[jj].v;
+		}
+
+		vector<float> vecSqrtSum;
+		CSeekTools::InitVector(vecSqrtSum, vecstrGenes.size(), (float) 0);
+
+		for(ii=0; ii<m.GetNumSet(); ii++){
+			i = (size_t) allRGenes[ii];
+			if(vecSum[i]==0) continue;
+			vecSqrtSum[i] = sqrtf(vecSum[i]);
+		}
+
+		fprintf(stderr, "Begin normalization using row sum\n");
+		float rv;
+		for(ii=0; ii<m.GetNumSet(); ii++){
+			i = (size_t) allRGenes[ii];
+			vector<CPair<float> >::iterator row_it;
+			for(row_it = mat.RowBegin(i); row_it!=mat.RowEnd(i); row_it++){
+				j = (size_t) row_it->i;
+				rv = row_it->v;
+				if(vecSqrtSum[i]==0 || vecSqrtSum[j]==0) continue;
+				row_it->v = rv / vecSqrtSum[i] / vecSqrtSum[j];
+			}
+		}
+		return true;
+	}
+
+	//compatibility
+	template<class tType>
+	static bool WriteSparseMatrix(CDataPair &Dat, vector<map<tType,unsigned short> > &umat,
+	int maxRank, const vector<string> &vecstrGenes, const char *fileName){
+
+		FILE *f = fopen(fileName, "wb");
+		if(f==NULL){
+			fprintf(stderr, "File not found %s\n", fileName);
+			return false;
+		}
+
+		size_t i, j;
+		vector<tType> veciGenes;
+		veciGenes.resize(vecstrGenes.size());
+		for(i=0; i<vecstrGenes.size(); i++)
+			veciGenes[i] = (tType) Dat.GetGeneIndex(vecstrGenes[i]);
+
+		CSeekIntIntMap mm(vecstrGenes.size());
+		for(i=0; i<vecstrGenes.size(); i++)
+			if(veciGenes[i]!=(tType)-1)
+				mm.Add((utype)i);
+
+		tType numPresent = (tType) mm.GetNumSet();	
+		//1 tType
+		fwrite((char*)(&numPresent), 1, sizeof(numPresent), f);
+		const vector<utype> &allR = mm.GetAllReverse();
+		//numPresent tType
+		for(i=0; i<numPresent; i++){
+			tType pr = (tType) allR[i];
+			fwrite((char*)(&pr), 1, sizeof(pr), f);
+		}
+
+		tType numGenes = 0;
+		for(i=0; i<vecstrGenes.size(); i++){
+			if(umat[i].size()==0) continue;
+			numGenes++;
+		}
+		//1 tType
+		fwrite((char*) (&numGenes), 1, sizeof(numGenes), f);
+
+		for(i=0; i<vecstrGenes.size(); i++){
+			unsigned short numEntries = umat[i].size(); //should be 1000
+			if(numEntries==0) 
+				continue;
+			//1 tType
+			tType gi = (tType) i;
+			fwrite((char*) (&gi), 1, sizeof(gi), f);
+			//1 unsigned short
+			fwrite((char*) (&numEntries), 1, sizeof(numEntries), f);
+			typename map<tType,unsigned short>::iterator it;
+			for(it=umat[i].begin(); it!=umat[i].end(); it++){
+				tType first = it->first;
+				unsigned short second = it->second;
+				//1 tType
+				fwrite((char*) (&first), 1, sizeof(first), f);
+				//1 unsigned short
+				fwrite((char*) (&second), 1, sizeof(second), f);
+			}
+		}
+		fclose(f);
+		return true;
+	}
+
+	//compatiblity
+	template<class tType>
+	static bool GetSparseRankMatrix(CDataPair &Dat, vector<map<tType,unsigned short> > &umat, 
+	int maxRank, const vector<string> &vecstrGenes){
+	
+		size_t i, j;
+		vector<tType> veciGenes;
+		veciGenes.resize(vecstrGenes.size());
+		for( i = 0; i < vecstrGenes.size( ); ++i )
+			veciGenes[ i ] = (tType) Dat.GetGeneIndex( vecstrGenes[i] );
+		umat.resize(vecstrGenes.size());
+		for(i=0; i<vecstrGenes.size(); i++)
+			umat[i] = map<tType, unsigned short>();
+
+		fprintf(stderr, "Start reading DAB...\n");
+		tType s, t;
+		for(i=0; i<vecstrGenes.size(); i++){
+			if((s=veciGenes[i])==(tType)-1) continue;
+			if(i%1000==0) fprintf(stderr, "Start reading gene %d...\n", i);
+
+			float *v = Dat.GetFullRow(s);
+			vector<AResultFloat> vv;
+			vv.resize(vecstrGenes.size());
+
+			for(j=0; j<vecstrGenes.size(); j++){
+				vv[j].i = (utype) j;
+				vv[j].f = -9999;
+				if((t=veciGenes[j])==(tType)-1) continue;
+				if(CMeta::IsNaN(v[t])) continue;
+				vv[j].f = v[t];
+			}
+			nth_element(vv.begin(), vv.begin()+maxRank, vv.end());
+			sort(vv.begin(), vv.begin()+maxRank);
+
+			for(j=0; j<vecstrGenes.size(); j++){
+				if(j<maxRank){
+					tType first = (tType) i;
+					tType second = (tType) vv[j].i;
+					if((tType) i >= (tType) vv[j].i){
+						first = (tType) vv[j].i;
+						second = (tType) i;
+					}
+					typename map<tType,unsigned short>::iterator it;
+					if((it=umat[first].find(second))==umat[first].end())
+						umat[first][second] = (unsigned short) j;
+					else
+						umat[first][second] = std::min(it->second, (unsigned short) j);
+				}
+			}
+			delete[] v;
+		}
+		fprintf(stderr, "Finished reading DAB\n");
+		return true;
+	}
+
+	//===============================================================
+	//not currently used
 	static bool ReadSparseMatrix(const char *fileName, 
 		vector<map<utype,float> > &mat, 
 		CSeekIntIntMap &m, const int maxRank, const float rbp_p,
 		const vector<string> &vecstrGenes);
 
+	//not currently used
 	static bool ProductNorm(const vector<map<utype,float> > &mat1,
 		const vector<map<utype,float> > &mat2, const CSeekIntIntMap &m1, 
 		const CSeekIntIntMap &m2, vector<map<utype,float> > &re);
 
-	static bool WriteSparseMatrix(CDataPair &Dat,
-		vector<map<utype,unsigned short> > &umat,
-		int maxRank, const vector<string> &vecstrGenes, const char *fileName);
+	//================================================================
+	static bool SumSparseMatrix(CSparseFlatMatrix<float> &mat1,
+		CSparseFlatHalfMatrix<float> &res, const CSeekIntIntMap &mi, const float w);
 
-	static bool GetSparseRankMatrix(CDataPair &Dat,
-		vector<map<utype,unsigned short> > &umat,
-		int maxRank, const vector<string> &vecstrGenes);
+	static bool SumSparseMatrix(CSparseFlatMatrix<float> &mat1,
+		CHalfMatrix<float> &res, const CSeekIntIntMap &mi, const float w);
 
 	static bool NormalizeDAB(CDataPair &Dat,
 		const vector<string> &vecstrGenes,

File src/sparsematrix.h

 
 namespace Sleipnir {
 
+template <typename tType>
+struct CPair {
+	utype i;
+	tType v;
+};
+
+template <typename tType>
+struct CAscendingKey{
+	bool operator()(const CPair<tType>& lx, const CPair<tType>& rx) const{
+		if(lx.i < rx.i) return true;
+		if(lx.i > rx.i) return false;
+		if(lx.v < rx.v) return true;
+		return false;
+	}
+};
+
+template <typename tType>
+struct CAscendingValue{
+	bool operator()(const CPair<tType> &lx, const CPair<tType> &rx) const{
+		if(lx.v < rx.v) return true;
+		if(lx.v > rx.v) return false;
+		if(lx.i < rx.i) return true;
+		return false;
+	}
+};
+
+template <typename tType>
+struct CDescendingKey{
+	bool operator()(const CPair<tType>& lx, const CPair<tType>& rx) const{
+		if(lx.i < rx.i) return false;
+		if(lx.i > rx.i) return true;
+		if(lx.v < rx.v) return false;
+		return true;
+	}
+};
+
+template <typename tType>
+struct CDescendingValue{
+	bool operator()(const CPair<tType> &lx, const CPair<tType> &rx) const{
+		if(lx.v < rx.v) return false;
+		if(lx.v > rx.v) return true;
+		if(lx.i < rx.i) return false;
+		return true;
+	}
+};
+
+
+//A full matrix
+template<class tType>
+class CSparseFlatMatrix : protected CSparseMatrixImpl<tType> {
+public:
+	CSparseFlatMatrix(const tType& Default): CSparseMatrixImpl<tType>(Default){}
+	~CSparseFlatMatrix(){
+		Reset();
+	}
+	void Initialize(size_t iR){
+		Reset();
+		CSparseMatrixImpl<tType>::m_iR = iR;
+		m_vecData.resize(iR);
+		m_currentIndex.resize(iR);
+		m_bSorted.resize(iR);
+	}
+	//num is initial capacity
+	void InitializeRow(size_t rowID, size_t num){
+		m_vecData[rowID] = vector<CPair<tType> >();
+		m_vecData[rowID].reserve(num);
+		m_currentIndex[rowID] = 0;
+		m_bSorted[rowID] = false;
+	}
+	void Reset() {
+		size_t i;
+		for(i=0; i<CSparseMatrixImpl<tType>::m_iR; i++)
+			m_vecData[i].clear();
+		m_vecData.clear();
+		CSparseMatrixImpl<tType>::m_iR = 0; 
+	}
+	const tType& GetDefault() const {
+		return CSparseMatrixImpl<tType>::GetDefault();
+	}
+	size_t GetRows() const {
+		return CSparseMatrixImpl<tType>::GetRows(); 
+	}
+	//does not check if [iY][iX] already exists
+	//nor if the row m_vecData[iY] is already full
+	void Add(size_t iY, size_t iX, tType v){
+		CPair<tType> cp;
+		cp.i = (utype) iX;
+		cp.v = v;
+		m_vecData[iY].push_back(cp);
+		m_bSorted[iY] = false;
+		m_currentIndex[iY]++;
+	}
+	const vector<CPair<tType> >& GetRow(size_t iY) const{
+		return m_vecData[iY];
+	}
+	typename vector<CPair<tType> >::iterator RowBegin(size_t iY){
+		return m_vecData[iY].begin();
+	}
+	typename vector<CPair<tType> >::iterator RowEnd(size_t iY){
+		return m_vecData[iY].end();
+	}
+	void Shrink(){
+		size_t i;
+		for(i=0; i<m_vecData.size(); i++)
+			m_vecData[i].resize(m_currentIndex[i]);
+	}
+	void Organize(){
+		size_t i;
+		for(i=0; i<m_vecData.size(); i++){
+			sort(m_vecData[i].begin(), m_vecData[i].end(), CAscendingKey<tType>());	
+			m_bSorted[i] = true;
+		}
+	}
+	bool Check(size_t iY, size_t iX){
+		if(!m_bSorted[iY])
+			SortRow(iY);
+		size_t ind = GetIndex(iY, iX);
+		if(ind!=(size_t) -1) 
+			return true;
+		return false;
+	}
+	//assume element at this coordinate exists
+	tType Get(size_t iY, size_t iX) const {
+		if(!m_bSorted[iY])
+			SortRow(iY);
+		size_t ind = GetIndex(iY, iX);
+		return m_vecData[iY][ind].v;
+	}
+	//assume element at this coordinate exists
+	void Set(size_t iY, size_t iX, tType v) {
+		if(!m_bSorted[iY])
+			SortRow(iY);
+		size_t ind = GetIndex(iY, iX);
+		m_vecData[iY][ind].v = v;
+	}
+	void Increment(size_t iY, size_t iX, tType v){
+		if(!m_bSorted[iY])
+			SortRow(iY);
+		size_t ind = GetIndex(iY, iX);
+		m_vecData[iY][ind].v += v;
+	}
+
+private:
+	vector<vector<CPair<tType> > > m_vecData;
+	//parent class contains m_iR and m_Default
+	vector<bool> m_bSorted;
+	vector<size_t> m_currentIndex;
+
+	void SortRow(size_t iY){
+		sort(m_vecData[iY].begin(), m_vecData[iY].end(), CAscendingKey<tType>());
+		m_bSorted[iY] = true;
+	}
+	size_t GetIndex(size_t iY, size_t iX) {
+		//suppose m_bSorted[iY] is true
+		return BinarySearch(m_vecData[iY], iX);
+	}
+	size_t BinarySearch(vector<CPair<tType> > &A, size_t iX){
+		int iMax = A.size()-1;
+		int iMin = 0;
+		while(iMax>=iMin){
+			int iMid = (iMin + iMax) / 2;
+			if(A[iMid].i < iX)
+				iMin = iMid + 1;
+			else if(A[iMid].i > iX)
+				iMax = iMid - 1;
+			else
+				return (size_t) iMid;
+		}
+		//fprintf(stderr, "Element %d is not found!\n", iX);
+		return (size_t) -1;
+	}
+};
+
+//A half-matrix
+template<class tType>
+class CSparseFlatHalfMatrix : protected CSparseMatrixImpl<tType> {
+public:
+	CSparseFlatHalfMatrix(const tType& Default): CSparseMatrixImpl<tType>(Default){}
+	~CSparseFlatHalfMatrix(){
+		Reset();
+	}
+	void Copy(const CSparseFlatMatrix<tType> &cf){ //a full matrix (symmetric)
+		CSparseMatrixImpl<tType>::m_Default = cf.GetDefault();
+		Initialize(cf.GetRows());
+		size_t i,j;
+		for(i=0; i<CSparseMatrixImpl<tType>::m_iR; i++){
+			const vector<CPair<tType> > &allR = cf.GetRow(i);
+			InitializeRow(i, allR.size());
+			for(j=0; j<allR.size(); j++)
+				if(allR[j].i > i)
+					Add(i, allR[j].i, allR[j].v);
+		}
+		Organize();
+	}
+	void Initialize(size_t iR){
+		Reset();
+		CSparseMatrixImpl<tType>::m_iR = iR;
+		m_vecData.resize(iR);
+		m_currentIndex.resize(iR);
+		m_bSorted.resize(iR);
+	}
+	//num is initial capacity
+	void InitializeRow(size_t rowID, size_t num){
+		m_vecData[rowID] = vector<CPair<tType> >();
+		m_vecData[rowID].reserve(num);
+		m_currentIndex[rowID] = 0;
+		m_bSorted[rowID] = false;
+	}
+	void Reset() {
+		size_t i;
+		for(i=0; i<CSparseMatrixImpl<tType>::m_iR; i++)
+			m_vecData[i].clear();
+		m_vecData.clear();
+		CSparseMatrixImpl<tType>::m_iR = 0; 
+	}
+	const tType& GetDefault() const {
+		return CSparseMatrixImpl<tType>::GetDefault();
+	}
+	size_t GetRows() const {
+		return CSparseMatrixImpl<tType>::GetRows(); 
+	}
+	//does not check if [iY][iX] already exists
+	//nor if the row m_vecData[iY] is already full
+	void Add(size_t iY, size_t iX, tType v){
+		AdjustCoord(iY, iX);
+		CPair<tType> cp;
+		cp.i = (utype) iX;
+		cp.v = v;
+		m_vecData[iY].push_back(cp);
+		m_bSorted[iY] = false;
+		m_currentIndex[iY]++;
+	}
+	const vector<CPair<tType> >& GetRow(size_t iY) const{
+		return m_vecData[iY];
+	}
+	typename vector<CPair<tType> >::iterator RowBegin(size_t iY){
+		return m_vecData[iY].begin();
+	}
+	typename vector<CPair<tType> >::iterator RowEnd(size_t iY){
+		return m_vecData[iY].end();
+	}
+	void Shrink(){
+		size_t i;
+		for(i=0; i<m_vecData.size(); i++)
+			m_vecData[i].resize(m_currentIndex[i]);
+	}
+	void SortRow(size_t iY){
+		sort(m_vecData[iY].begin(), m_vecData[iY].end(), CAscendingKey<tType>());
+		m_bSorted[iY] = true;
+	}
+	void Organize(){
+		size_t i;
+		for(i=0; i<m_vecData.size(); i++)
+			SortRow(i);
+	}
+	void AdjustCoord(size_t &iY, size_t &iX){
+		if(iY>=iX){ //second must be the greater
+			size_t tmp = iY;
+			iY = iX;
+			iX = tmp;
+		}
+	}
+	bool Check(size_t iY, size_t iX){
+		AdjustCoord(iY, iX);
+		if(!m_bSorted[iY]) SortRow(iY);
+		size_t ind = GetIndex(iY, iX);
+		if(ind!=(size_t) -1) return true;
+		return false;
+	}
+	CPair<tType>* GetElement(size_t iY, size_t iX){
+		AdjustCoord(iY, iX);
+		if(!m_bSorted[iY]) SortRow(iY);
+		size_t ind = GetIndex(iY, iX);
+		if(ind==(size_t)-1) return NULL;
+		return &m_vecData[iY][ind];
+	}
+	//assume element at this coordinate exists
+	tType Get(size_t iY, size_t iX) const {
+		AdjustCoord(iY, iX);
+		if(!m_bSorted[iY]) SortRow(iY);
+		size_t ind = GetIndex(iY, iX);
+		return m_vecData[iY][ind].v;
+	}
+	//assume element at this coordinate exists
+	void Set(size_t iY, size_t iX, tType v) {
+		AdjustCoord(iY, iX);
+		if(!m_bSorted[iY]) SortRow(iY);
+		size_t ind = GetIndex(iY, iX);
+		m_vecData[iY][ind].v = v;
+	}
+	void Increment(size_t iY, size_t iX, tType v){
+		AdjustCoord(iY, iX);
+		if(!m_bSorted[iY]) SortRow(iY);
+		size_t ind = GetIndex(iY, iX);
+		m_vecData[iY][ind].v += v;
+	}
+
+private:
+	//parent class contains m_iR and m_Default
+	vector<vector<CPair<tType> > > m_vecData;
+	vector<bool> m_bSorted;
+	vector<size_t> m_currentIndex;
+
+	size_t GetIndex(size_t iY, size_t iX) {
+		return BinarySearch(m_vecData[iY], iX); //suppose m_bSorted[iY] = true
+	}
+	size_t BinarySearch(vector<CPair<tType> > &A, size_t iX){
+		int iMax = A.size()-1;
+		int iMin = 0;
+		while(iMax>=iMin){
+			int iMid = (iMin + iMax) / 2;
+			if(A[iMid].i < iX)
+				iMin = iMid + 1;
+			else if(A[iMid].i > iX)
+				iMax = iMid - 1;
+			else
+				return (size_t) iMid;
+		}
+		return (size_t) -1;
+	}
+};
 /*!
  * \brief
  * An asymmetric two-dimensional sparse matrix using maps for each row.

File tools/Makefile.am

 	  SeekGeneRecommender \
 	  SeekAggregatedDataset \
       SeekPValue \
+      SeekIterative \
 	  PCLServer
 endif
 

File tools/SeekIterative/SeekIterative.cpp

+#include "stdafx.h"
+#include "cmdline.h"
+
+bool transfer(CDat &Dat, vector<vector<float> > &mat, CSeekIntIntMap *geneMap,
+	vector<string> &vecstrGenes){
+
+	int i, j;
+	vector<utype> veciGenes;
+	veciGenes.clear();
+	veciGenes.resize(vecstrGenes.size());
+	for( i = 0; i < vecstrGenes.size( ); ++i )
+		veciGenes[ i ] = Dat.GetGene( vecstrGenes[i] );
+	
+	mat.resize(vecstrGenes.size());
+	for(i=0; i<vecstrGenes.size(); i++)
+		CSeekTools::InitVector(mat[i], vecstrGenes.size(), CMeta::GetNaN());
+
+	for(i=0; i<vecstrGenes.size(); i++){
+		utype s = veciGenes[i];
+		if(CSeekTools::IsNaN(s)) continue;
+		geneMap->Add(i);
+		float *v = Dat.GetFullRow(s);
+		for(j=0; j<vecstrGenes.size(); j++){
+			utype t = veciGenes[j];
+			if(CSeekTools::IsNaN(t)) continue;
+			if(CMeta::IsNaN(v[t])) continue;
+			mat[i][j] = Dat.Get(s, t);
+		}
+		free(v);
+	}
+	return true;
+}
+
+//Add scores from two vectors and integrate them using alpha
+bool integrate(vector<float> &d1, vector<float> &d2, 
+	vector<float> &dest, CSeekIntIntMap *geneMap, int k, float alpha){
+	utype i;
+	CSeekTools::InitVector(dest, geneMap->GetSize(), (float) CMeta::GetNaN());
+	const vector<utype> &allGenes = geneMap->GetAllReverse();
+	for(i=0; i<geneMap->GetNumSet(); i++){
+		utype gi = allGenes[i];
+		dest[gi] = (1.0 - alpha) * d1[gi] + alpha / k * d2[gi];
+	}
+	return true;
+}
+
+//initialize score vector
+bool init_score(vector<float> &dest, CSeekIntIntMap *geneMap){
+	CSeekTools::InitVector(dest, geneMap->GetSize(), (float) CMeta::GetNaN());
+	utype i;	
+	const vector<utype> &allGenes = geneMap->GetAllReverse();
+	for(i=0; i<geneMap->GetNumSet(); i++)
+		dest[allGenes[i]] = 0;
+	return true;
+}
+
+bool add_score(vector<float> &src, vector<float> &dest, CSeekIntIntMap *geneMap){
+	const vector<utype> &allGenes = geneMap->GetAllReverse();
+	utype i, j;
+	for(i=0; i<geneMap->GetNumSet(); i++){
+		utype gi = allGenes[i];
+		dest[gi] += src[gi];
+	}
+	return true;
+}
+
+bool search_one_dab(vector<float> &gene_score, 
+	CDat &mat, const size_t numGenes,
+	CSeekIntIntMap &d1,
+	vector<utype> &indexConvReverse,
+	vector<float> &q_weight){ 
+	//q_weight is query presence - 1.0 if gene at the index i is a query gene
+
+	vector<float> gene_count;
+	CSeekTools::InitVector(gene_score, numGenes, (float)CMeta::GetNaN());
+	CSeekTools::InitVector(gene_count, numGenes, (float)0);
+	utype qqi;
+	utype kk, k;
+
+	const vector<utype> &allGenes = d1.GetAllReverse();	
+	for(kk=0; kk<d1.GetNumSet(); kk++){
+		utype k = allGenes[kk];
+		utype gi = indexConvReverse[k];
+		gene_score[gi] = 0;
+	}
+
+	for(qqi=0; qqi<d1.GetNumSet(); qqi++){
+		utype qi = allGenes[qqi];
+		utype qq = indexConvReverse[qi];
+		if(q_weight[qq]==0) //not a query gene
+			continue;
+		//now a query gene
+		float *vc = mat.GetFullRow(qi);
+		for(kk=0; kk<d1.GetNumSet(); kk++){
+			utype k = allGenes[kk];
+			utype gi = indexConvReverse[k];
+			float fl = vc[k];
+			gene_score[gi] += fl;
+			gene_count[gi] += 1.0;
+		}
+		delete[] vc;
+	}
+
+	for(kk=0; kk<d1.GetNumSet(); kk++){
+		utype k = allGenes[kk];
+		utype gi = indexConvReverse[k];
+		gene_score[gi] /= gene_count[gi];
+	}
+
+	return true;
+}
+
+bool get_score(vector<float> &gene_score, 
+	//vector<vector<float> > &mat, 
+	CSparseFlatMatrix<float> &mat,
+	CSeekIntIntMap *geneMap, vector<float> &q_weight){
+	vector<float> gene_count;
+	int numGenes = geneMap->GetSize();
+	CSeekTools::InitVector(gene_score, numGenes, (float)CMeta::GetNaN());
+	CSeekTools::InitVector(gene_count, numGenes, (float)0);
+	int qi=0;
+	const vector<utype> &allGenes = geneMap->GetAllReverse();
+	utype kk, k;
+	
+	for(kk=0; kk<geneMap->GetNumSet(); kk++){
+		utype gi = allGenes[kk];
+		gene_score[gi] = 0;
+	}
+	for(qi=0; qi<geneMap->GetNumSet(); qi++){
+		utype qq = allGenes[qi];
+		if(q_weight[qq]==0) 
+			continue;
+		const vector<CPair<float> > &vc = mat.GetRow(qq);
+		for(kk=0; kk<vc.size(); kk++){
+			float fl = vc[kk].v;
+			utype gi = vc[kk].i;
+			gene_score[gi] += fl * q_weight[qq];
+		}
+		for(kk=0; kk<geneMap->GetNumSet(); kk++){
+			utype gi = allGenes[kk];
+			gene_count[gi] += q_weight[qq];
+		}
+		/*for(kk=0; kk<geneMap->GetNumSet(); kk++){
+			utype gi = allGenes[kk];
+			map<utype,float>::iterator it;
+			float fl = 0;
+			if((it=mat[qq].find(gi))==mat[qq].end()){
+				fl = 0;
+			}else{
+				fl = it->second;
+			}
+			//float fl = mat[qq][gi];
+			gene_score[gi] += fl * q_weight[qq];
+			gene_count[gi] += q_weight[qq];
+		}*/
+	}
+
+	for(k=0; k<geneMap->GetNumSet(); k++){
+		utype gi = allGenes[k];
+		gene_score[gi] /= gene_count[gi];
+	}
+	return true;
+}
+
+/*bool iterate(vector<float> &src, vector<float> &dest, 
+	vector<vector<float> > &tr, CSeekIntIntMap *geneMap,
+	float alpha, int numIterations){
+	
+	const vector<utype> &allGenes = geneMap->GetAllReverse();
+	dest.resize(src.size());
+	vector<float> backup;
+	CSeekTools::InitVector(backup, src.size(), CMeta::GetNaN());
+	int i, j, k;
+	for(i=0; i<dest.size(); i++)
+		dest[i] = src[i];	
+		
+	for(i=0; i<numIterations; i++){
+		for(j=0; j<geneMap->GetNumSet(); j++){
+			utype gi = allGenes[j];
+			backup[gi] = dest[gi];
+		}
+		get_score(dest, tr, geneMap, backup);
+		for(j=0; j<geneMap->GetNumSet(); j++){
+			utype gi = allGenes[j];
+			dest[gi] = (1.0 - alpha) * src[j] + alpha * dest[gi];
+		}
+	}
+	return true;
+}*/
+
+bool weight(vector<char> &is_query, vector<float> &d1,
+	CSeekIntIntMap *geneMap, float rbp_p, float &w){
+	vector<AResultFloat> ar;
+	ar.resize(geneMap->GetSize());
+	utype i;
+	for(i=0; i<ar.size(); i++)
+		ar[i].i = i;
+	
+	const vector<utype> &allGenes = geneMap->GetAllReverse();
+	for(i=0; i<geneMap->GetNumSet(); i++){
+		utype gi = allGenes[i];
+		ar[gi].f = d1[gi];
+	}
+	
+	int MAX = 5000;
+	nth_element(ar.begin(), ar.begin()+MAX, ar.end());
+	sort(ar.begin(), ar.begin()+MAX);
+	
+	w = 0;
+	for(i=0; i<MAX; i++)
+		if(is_query[ar[i].i]==1)
+			w += pow(rbp_p, i);
+	w *= (1.0 - rbp_p);	
+	return true;
+}
+
+bool cv_weight(vector<utype> &query, 
+	//vector<vector<float> > &mat,
+	CSparseFlatMatrix<float> &mat,
+	CSeekIntIntMap *geneMap, float rbp_p, float &tot_w){
+	//leave one in cross-validation
+	utype i, j;
+	int numGenes = geneMap->GetSize();
+	tot_w = 0;
+	
+	for(i=0; i<query.size(); i++){
+		vector<char> is_query;
+		vector<float> q_weight;
+		CSeekTools::InitVector(is_query, numGenes, (char) 0);
+		CSeekTools::InitVector(q_weight, numGenes, (float) 0);
+		q_weight[query[i]] = 1.0;
+		float w = 0;
+		
+		for(j=0; j<query.size(); j++){
+			if(i==j) continue;
+			is_query[query[j]] = 1;
+		}
+		vector<float> gene_score;
+		get_score(gene_score, mat, geneMap, q_weight);
+		gene_score[query[i]] = 0;
+		weight(is_query, gene_score, geneMap, rbp_p, w);
+		tot_w += w;
+	}
+	tot_w /= query.size();
+	
+	return true;	
+}
+
+int main(int iArgs, char **aszArgs){
+	static const size_t c_iBuffer   = 1024;
+	char acBuffer[c_iBuffer];
+	
+	gengetopt_args_info sArgs;
+	ifstream ifsm;
+	istream *pistm;
+	vector<string> vecstrLine, vecstrGenes, vecstrDBs, vecstrQuery;
+	utype i, qi, j, k, l, kk;
+	
+	if(cmdline_parser(iArgs, aszArgs, &sArgs)){
+		cmdline_parser_print_help();
+		return 1;
+	}
+
+	if( sArgs.input_arg ) {
+		ifsm.open( sArgs.input_arg );
+		pistm = &ifsm; 
+	}else
+		pistm = &cin;
+	
+	map<string, size_t> mapstriGenes;
+	while( !pistm->eof( ) ) {
+		pistm->getline( acBuffer, c_iBuffer - 1 );
+		acBuffer[ c_iBuffer - 1 ] = 0;
+		vecstrLine.clear( );
+		CMeta::Tokenize( acBuffer, vecstrLine );
+		if( vecstrLine.size( ) < 2 ) {
+			//cerr << "Ignoring line: " << acBuffer << endl;
+			continue;
+		}
+		if( !( i = atoi( vecstrLine[ 0 ].c_str( ) ) ) ) {
+			cerr << "Illegal gene ID: " << vecstrLine[ 0 ] << " for "
+				<< vecstrLine[ 1 ] << endl;
+			return 1;
+		}
+		i--;
+		if( vecstrGenes.size( ) <= i )
+			vecstrGenes.resize( i + 1 );
+		vecstrGenes[ i ] = vecstrLine[ 1 ];
+		mapstriGenes[vecstrGenes[i]] = i;
+	}
+
+	fprintf(stderr, "Finished reading gene map\n");
+
+	string dab_dir = sArgs.dab_dir_arg;
+	string output_dir = sArgs.dir_out_arg;
+	int numGenes = vecstrGenes.size();
+	vector< vector<string> > vecstrAllQuery;
+		
+	fprintf(stderr, "Reading queries\n");
+	if(!CSeekTools::ReadMultipleQueries(sArgs.query_arg, vecstrAllQuery))
+		return -1;
+
+	vector<vector<utype> > qu;
+	qu.resize(vecstrAllQuery.size());
+	for(i=0; i<vecstrAllQuery.size(); i++){
+		qu[i] = vector<utype>();
+		for(j=0; j<vecstrAllQuery[i].size(); j++)
+			qu[i].push_back(mapstriGenes[vecstrAllQuery[i][j]]);
+	}
+
+	if(sArgs.combined_flag==1){
+		string dab_base = sArgs.dab_basename_arg;
+		string file1 = dab_dir + "/" + dab_base + ".dab";
+
+		vector<vector<float> > q_weight;
+		q_weight.resize(vecstrAllQuery.size());
+		for(i=0; i<vecstrAllQuery.size(); i++){
+			CSeekTools::InitVector(q_weight[i], numGenes, (float) 0);
+			for(j=0; j<vecstrAllQuery[i].size(); j++)
+				q_weight[i][mapstriGenes[vecstrAllQuery[i][j]]] = 1;
+		}
+
+		CDat CD;
+		CD.Open(file1.c_str(), false, 2, false, false, false);
+
+		CSeekIntIntMap d1(CD.GetGenes());
+		vector<utype> indexConvReverse;
+		CSeekTools::InitVector(indexConvReverse, vecstrGenes.size(), (utype) -1);	
+		for(i=0; i<CD.GetGenes(); i++){
+			map<string,size_t>::iterator it = mapstriGenes.find(CD.GetGene(i));
+			if(it==mapstriGenes.end()) continue;
+			indexConvReverse[i] = it->second;
+			d1.Add(i);
+		}
+
+		vector<vector<float> > final_score;
+		final_score.resize(vecstrAllQuery.size());
+		for(j=0; j<vecstrAllQuery.size(); j++)
+			CSeekTools::InitVector(final_score[j], vecstrGenes.size(), 
+			(float)CMeta::GetNaN());
+
+		const vector<utype> &allGenes = d1.GetAllReverse();			
+		for(j=0; j<vecstrAllQuery.size(); j++){
+			vector<float> tmp_score;
+			search_one_dab(tmp_score, CD, vecstrGenes.size(), d1, indexConvReverse,
+			q_weight[j]);
+			for(kk=0; kk<d1.GetNumSet(); kk++){
+				utype k = allGenes[kk];
+				utype gi = indexConvReverse[k];
+				final_score[j][gi] = tmp_score[gi];
+			}
+		}
+
+		for(j=0; j<vecstrAllQuery.size(); j++){
+			for(k=0; k<final_score[j].size(); k++){
+				if(CMeta::IsNaN(final_score[j][k])){
+					final_score[j][k] = -320;
+					continue;
+				}
+			}
+			sprintf(acBuffer, "%s/%d.query", output_dir.c_str(), j);
+			CSeekTools::WriteArrayText(acBuffer, vecstrAllQuery[j]);
+			sprintf(acBuffer, "%s/%d.gscore", output_dir.c_str(), j);
+			CSeekTools::WriteArray(acBuffer, final_score[j]);
+		}
+
+		//Visualize
+		for(j=0; j<vecstrAllQuery.size(); j++){
+			vector<AResultFloat> ar;
+			ar.resize(final_score[j].size());
+			for(k=0; k<final_score[j].size(); k++){
+				ar[k].i = k;
+				ar[k].f = final_score[j][k];
+			}
+			sort(ar.begin(), ar.end());
+			vector<utype> vec_g;
+			vector<string> vec_s;
+			int FIRST = 100;
+			for(k=0; k<FIRST; k++){
+				if(ar[k].f==-320) break;
+				vec_g.push_back(CD.GetGeneIndex(vecstrGenes[ar[k].i]));
+				vec_s.push_back(vecstrGenes[ar[k].i]);
+			}
+			if(vec_g.size()!=FIRST) continue;
+			CDat V;
+			V.Open(vec_s);
+			for(k=0; k<vec_s.size(); k++){
+				for(l=k+1; l<vec_s.size(); l++){
+					V.Set(k, l, CD.Get(vec_g[k], vec_g[l]));
+				}
+			}
+
+			sprintf(acBuffer, "%s/%d.dot", output_dir.c_str(), j);			
+			ofstream ot(acBuffer);
+			V.SaveDOT(ot, 0.0001, NULL, true, false, NULL, NULL);
+		}
+		
+
+	}
+
+	if(sArgs.test_flag==1){
+		string dab_base = sArgs.dab_basename_arg;
+		string file1 = dab_dir + "/" + dab_base + ".half";
+
+		vector<vector<float> > q_weight;
+		q_weight.resize(vecstrAllQuery.size());
+		for(i=0; i<vecstrAllQuery.size(); i++){
+			CSeekTools::InitVector(q_weight[i], numGenes, (float) 0);
+			for(j=0; j<vecstrAllQuery[i].size(); j++)
+				q_weight[i][mapstriGenes[vecstrAllQuery[i][j]]] = 1;
+		}
+
+		CHalfMatrix<float> res;
+		res.Initialize(vecstrGenes.size());
+		ifstream istm1;
+		uint32_t dim;
+		istm1.open(file1.c_str(), ios_base::binary);
+		istm1.read((char*)(&dim), sizeof(dim));
+		float *adScores = new float[dim-1];
+		for(i=0; (i+1)<dim; i++){
+			istm1.read((char*)adScores, sizeof(*adScores) * (dim - i - 1));
+			res.Set(i, adScores);
+		}
+		delete[] adScores;
+		istm1.close();
+	
+		string s1 = "uc003vor.2";
+		vector<string> r1;
+		r1.push_back("uc003vos.2");
+		r1.push_back("uc003vop.1");
+		r1.push_back("uc011jwz.1");
+		r1.push_back("uc002rgw.1");
+
+		for(i=0; i<r1.size(); i++){
+			size_t i1 = mapstriGenes[s1];
+			size_t j1 = mapstriGenes[r1[i]];
+			fprintf(stderr, "%s %s %.5f\n", s1.c_str(), r1[i].c_str(), res.Get(i1, j1));
+		}
+	}
+
+	if(sArgs.testcount_flag==1){
+		string dab_base = sArgs.dab_basename_arg;
+		string file1 = dab_dir + "/" + dab_base + ".pair_count";
+
+		vector<vector<float> > q_weight;
+		q_weight.resize(vecstrAllQuery.size());
+		for(i=0; i<vecstrAllQuery.size(); i++){
+			CSeekTools::InitVector(q_weight[i], numGenes, (float) 0);
+			for(j=0; j<vecstrAllQuery[i].size(); j++)
+				q_weight[i][mapstriGenes[vecstrAllQuery[i][j]]] = 1;
+		}
+
+		CHalfMatrix<unsigned short> res;
+		res.Initialize(vecstrGenes.size());
+		ifstream istm1;
+		uint32_t dim;
+		istm1.open(file1.c_str(), ios_base::binary);
+		istm1.read((char*)(&dim), sizeof(dim));
+		unsigned short *adScores = new unsigned short[dim-1];
+		for(i=0; (i+1)<dim; i++){
+			istm1.read((char*)adScores, sizeof(*adScores) * (dim - i - 1));
+			res.Set(i, adScores);
+		}
+		delete[] adScores;
+		istm1.close();
+	
+		string s1 = "uc003vor.2";
+		vector<string> r1;
+		r1.push_back("uc003vos.2");
+		r1.push_back("uc003vop.1");
+		r1.push_back("uc011jwz.1");
+		r1.push_back("uc002rgw.1");
+
+		for(i=0; i<r1.size(); i++){
+			size_t i1 = mapstriGenes[s1];
+			size_t j1 = mapstriGenes[r1[i]];
+			fprintf(stderr, "%s %s %d\n", s1.c_str(), r1[i].c_str(), res.Get(i1, j1));
+		}
+	}
+
+	if(sArgs.testcombined_flag==1){
+		string dab_base = sArgs.dab_basename_arg;
+		string file3 = dab_dir + "/" + dab_base + ".gene_count";
+		string file2 = dab_dir + "/" + dab_base + ".pair_count";
+		string file1 = dab_dir + "/" + dab_base + ".half";
+
+		vector<utype> gene_count;
+		CSeekTools::ReadArray(file3.c_str(), gene_count);
+
+		float max_count = *max_element(gene_count.begin(), gene_count.end());
+		float min_count_required = max_count*0.50;
+		CSeekIntIntMap d1(vecstrGenes.size());
+		vector<string> validGenes;
+		for(i=0; i<vecstrGenes.size(); i++){
+			if(gene_count[i]<min_count_required) continue;
+			validGenes.push_back(vecstrGenes[i]);
+			d1.Add(i);
+		}
+		
+		CDat CD;
+		CD.Open(validGenes);
+		for(i=0; i<validGenes.size(); i++){
+			for(j=i+1; j<validGenes.size(); j++){
+				CD.Set(i,j,CMeta::GetNaN());
+			}
+		}
+		//CHalfMatrix<float> res;
+		//res.Initialize(vecstrGenes.size());
+		ifstream istm1, istm2;
+		uint32_t dim;
+		istm1.open(file1.c_str(), ios_base::binary);
+		istm1.read((char*)(&dim), sizeof(dim));
+		istm2.open(file2.c_str(), ios_base::binary);
+		istm2.read((char*)(&dim), sizeof(dim));
+		float *adScores = new float[dim-1];
+		unsigned short *adScores2 = new unsigned short[dim-1];
+		for(i=0; (i+1)<dim; i++){
+			istm1.read((char*)adScores, sizeof(*adScores) * (dim - i - 1));
+			istm2.read((char*)adScores2, sizeof(*adScores2) * (dim - i - 1));
+			if(i%2000==0)
+				fprintf(stderr, "%d Finished\n", i);
+			utype gi = d1.GetForward(i);
+			if(CSeekTools::IsNaN(gi)) continue;
+
+			for(j=0; j<dim-i-1; j++){
+				utype gj = d1.GetForward(j+i+1);
+				if(CSeekTools::IsNaN(gj) || 
+					(float) adScores2[j]<min_count_required) continue;
+				adScores[j] = adScores[j] / (float) adScores2[j];
+				CD.Set(gi, gj, adScores[j]);
+			}
+	
+			//res.Set(i, adScores);
+		}
+		delete[] adScores;
+		delete[] adScores2;
+		istm1.close();
+		istm2.close();
+	
+		string s1 = "uc003vor.2";
+		vector<string> r1;
+		r1.push_back("uc003vos.2");
+		r1.push_back("uc003vop.1");
+		r1.push_back("uc011jwz.1");
+		r1.push_back("uc002rgw.1");
+
+		for(i=0; i<r1.size(); i++){
+			//size_t i1 = mapstriGenes[s1];
+			//size_t j1 = mapstriGenes[r1[i]];
+			size_t i1 = CD.GetGeneIndex(s1);
+			size_t j1 = CD.GetGeneIndex(r1[i]);
+			fprintf(stderr, "%s %s %.5f\n", s1.c_str(), r1[i].c_str(), CD.Get(i1, j1));
+		}
+	}
+	//DAB mode
+	if(sArgs.dab_flag==1){
+		if(sArgs.default_type_arg!=0 && sArgs.default_type_arg!=1){
+			fprintf(stderr, "Error, invalid type!\n");
+			return -1;
+		}
+
+		if(sArgs.max_rank_arg==-1){
+			fprintf(stderr, "Error, please supply the max rank flag.\n");
+			return -1;
+		}
+
+		if(sArgs.rbp_p_arg==-1){
+			fprintf(stderr, "Error, please supply the rbp_p flag.\n");
+			return -1;
+		}
+
+		float rbp_p = sArgs.rbp_p_arg;
+		int max_rank = sArgs.max_rank_arg;
+
+		int num_iter = sArgs.num_iter_arg;
+		vector<string> dab_list;
+		CSeekTools::ReadListOneColumn(sArgs.dab_list_arg, dab_list);
+		vector<CSeekIntIntMap*> dm;
+		dm.resize(dab_list.size());
+		
+		//MODE 1 - Normal search:
+		vector<vector<float> > q_weight;
+		q_weight.resize(vecstrAllQuery.size());
+		for(i=0; i<vecstrAllQuery.size(); i++){
+			CSeekTools::InitVector(q_weight[i], numGenes, (float) 0);
+			for(j=0; j<vecstrAllQuery[i].size(); j++)
+				q_weight[i][mapstriGenes[vecstrAllQuery[i][j]]] = 1;
+		}
+		
+		
+		fprintf(stderr, "Reading sparse DAB\n");
+		vector<vector<float> > final_score, count;
+		vector<vector<int> > freq;
+		final_score.resize(vecstrAllQuery.size());
+		count.resize(vecstrAllQuery.size());
+		freq.resize(vecstrAllQuery.size());
+		for(j=0; j<vecstrAllQuery.size(); j++){
+			CSeekTools::InitVector(final_score[j], vecstrGenes.size(), (float)CMeta::GetNaN());
+			CSeekTools::InitVector(count[j], vecstrGenes.size(), (float) 0);
+			CSeekTools::InitVector(freq[j], vecstrGenes.size(), (int) 0);
+		}
+	
+		//CSparseFlatHalfMatrix<float> res(0);
+		/*CHalfMatrix<float> res;
+		res.Initialize(vecstrGenes.size());
+		for(i=0; i<vecstrGenes.size(); i++)
+			for(j=i+1; j<vecstrGenes.size(); j++)
+				res.Set(i, j, 0);
+		*/
+		for(i=0; i<dab_list.size(); i++){
+			fprintf(stderr, "Reading %d: %s\n", i, dab_list[i].c_str());
+			//vector<vector<float> > sm;
+			//vector<map<utype,float> > sm;
+			CSeekIntIntMap d1(vecstrGenes.size());
+			string dabfile = dab_dir + "/" + dab_list[i];
+			//CDat Dat;
+			//Dat.Open(dabfile.c_str(), false, 2, false, false, false);
+			//CSeekWriter::NormalizeDAB(Dat, vecstrGenes, false, false, false, true);
+			//transfer(Dat, sm, &d1, vecstrGenes);
+			
+			CSparseFlatMatrix<float> sm (0);
+			//CSeekWriter::ReadSparseMatrix(dabfile.c_str(), sm, d1, 3000, 0.997, vecstrGenes);
+
+			if(sArgs.default_type_arg==0) //utype
+				CSeekWriter::ReadSeekSparseMatrix<utype>(dabfile.c_str(), sm, d1, max_rank, rbp_p, vecstrGenes);
+			else
+				CSeekWriter::ReadSeekSparseMatrix<unsigned short>(dabfile.c_str(), sm, d1, max_rank, rbp_p, vecstrGenes);
+			/*if(i==0){
+				fprintf(stderr, "Summing...\n");
+				res.Copy(sm);
+				fprintf(stderr, "Finished Summing...\n");
+			}else{*/
+			//	fprintf(stderr, "Summing...\n");
+			//	CSeekWriter::SumSparseMatrix(sm, res, d1, 1.0);
+			//	fprintf(stderr, "Finished Summing...\n");
+			/*}
+			*/
+			//fprintf(stderr, "Finished!\n");	
+			
+			const vector<utype> &allGenes = d1.GetAllReverse();			
+			for(j=0; j<vecstrAllQuery.size(); j++){
+				float dw = 1.0;
+				//cv_weight(qu[j], sm, &d1, 0.99, dw);
+				//fprintf(stderr, "%.3e\n", dw);
+				vector<float> tmp_score;
+				get_score(tmp_score, sm, &d1, q_weight[j]);
+				for(k=0; k<d1.GetNumSet(); k++){
+					utype gi = allGenes[k];
+					if(CMeta::IsNaN(final_score[j][gi]))
+						final_score[j][gi] = 0;
+					final_score[j][gi] += tmp_score[gi] * dw;
+					count[j][gi]+=dw;
+					freq[j][gi]++;
+				}	
+			}
+		}
+
+		/*
+		ofstream ofsm;
+		ofsm.open("/memex/qzhu/p4/concatenate_tumor_network.half", ios_base::binary);
+		res.Save(ofsm, true);
+		*/
+		
+		int minRequired = (int) ((float) dab_list.size() * 0.50);
+		for(j=0; j<vecstrAllQuery.size(); j++){
+			for(k=0; k<final_score[j].size(); k++){
+				if(freq[j][k]<minRequired){
+					final_score[j][k] = -320;
+					continue;
+				}
+				if(CMeta::IsNaN(final_score[j][k])){
+					final_score[j][k] = -320;
+					continue;
+				}
+				final_score[j][k] /= count[j][k];
+			}
+			sprintf(acBuffer, "%s/%d.query", output_dir.c_str(), j);
+			CSeekTools::WriteArrayText(acBuffer, vecstrAllQuery[j]);
+			sprintf(acBuffer, "%s/%d.gscore", output_dir.c_str(), j);
+			CSeekTools::WriteArray(acBuffer, final_score[j]);
+		}
+	
+		//MODE 2
+		/*vector<vector<utype> > qu;
+		qu.resize(vecstrAllQuery.size());
+		for(i=0; i<vecstrAllQuery.size(); i++){
+			qu[i] = vector<utype>();
+			for(j=0; j<vecstrAllQuery[i].size(); j++)
+				qu[i].push_back(mapstriGenes[vecstrAllQuery[i][j]]);
+		}
+		vector<vector<float> > q_weight;
+		q_weight.resize(vecstrAllQuery.size());
+		for(i=0; i<vecstrAllQuery.size(); i++){
+			CSeekTools::InitVector(q_weight[i], numGenes, (float) 0);
+			for(j=0; j<vecstrAllQuery[i].size(); j++)
+				q_weight[i][mapstriGenes[vecstrAllQuery[i][j]]] = 1;
+		}
+
+		float alpha1 = 0.1; //inter - propagation
+		float alpha2 = 0.1; //intra - propagation
+		//float alpha = 0;		
+		bool label_propagate = false;
+
+		for(i=0; i<dab_list.size(); i++){
+			CSeekIntIntMap d1(vecstrGenes.size());
+			vector<vector<float> > sm1;
+			vector<utype> l1;
+			string dabfile1 = dab_dir + "/" + dab_list[i];
+			CSeekWriter::ReadSparseMatrixAsArray(l1, dabfile1.c_str());
+			CSeekWriter::ReadSparseMatrix(l1, sm1, d1, 1000, 0.99, vecstrGenes);
+
+			cerr << "1 " << dabfile1 << endl;
+			vector<vector<float> > final_score;
+			final_score.resize(vecstrAllQuery.size());
+			
+			//vector<vector<float> > tmp_score;
+			//tmp_score.resize(vecstrAllQuery.size());
+	
+			vector<vector<float> > tmp_score_2;
+			tmp_score_2.resize(vecstrAllQuery.size());
+
+			//this score
+			//component 1
+			for(j=0; j<vecstrAllQuery.size(); j++){
+				//get_score(tmp_score[j], sm1, &d1, q_weight[j]);
+				init_score(tmp_score_2[j], &d1);
+				init_score(final_score[j], &d1);
+			}
+
+			for(j=0; label_propagate && j<dab_list.size(); j++){
+				if(i==j) continue;
+				CSeekIntIntMap d2(vecstrGenes.size());
+				vector<vector<float> > sm2;
+				vector<utype> l2;
+				string dabfile2 = dab_dir + "/" + dab_list[j];
+				cerr << "2 " << dabfile2 << endl;
+				CSeekWriter::ReadSparseMatrixAsArray(l2, dabfile2.c_str());
+				CSeekWriter::ReadSparseMatrix(l2, sm2, d2, 1000, 0.99, vecstrGenes);
+
+				//similarity matrix
+				vector<vector<float> > sim;
+				CSeekWriter::ProductNorm(sm1, sm2, d1, d2, sim);
+
+				utype kj, kk;
+				for(k=0; k<vecstrAllQuery.size(); k++){
+					//component 2
+					cerr << k << " of " << vecstrAllQuery.size() << endl;
+					vector<float> intra;
+					vector<float> inter;
+					get_score(intra, sm2, &d2, q_weight[k]);
+					get_score(inter, sim, &d2, intra);
+					//get_score(inter, sim, &d2, q_weight[k]);
+					add_score(inter, tmp_score_2[k], &d2);
+				}
+			}
+
+			for(j=0; j<vecstrAllQuery.size(); j++){
+				if(label_propagate){
+					vector<float> tmp3;
+					integrate(q_weight[j], tmp_score_2[j], tmp3, &d1, dab_list.size()-1, alpha1);
+					iterate(tmp3, final_score[j], sm1, &d1, alpha2, 1);
+				}else{
+					//integrate(q_weight[j], tmp_score_2[j], tmp3, &d1, dab_list.size()-1, alpha1);
+					iterate(q_weight[j], final_score[j], sm1, &d1, alpha2, 1);
+				}
+
+				for(k=0; k<final_score[j].size(); k++){
+					if(CMeta::IsNaN(final_score[j][k])){
+						final_score[j][k] = -320;
+						continue;
+					}
+				}
+
+				for(k=0; k<qu[j].size(); k++){
+					final_score[j][qu[j][k]] = -320;
+				}
+
+				sprintf(acBuffer, "%s/%s/%d.query", output_dir.c_str(), dab_list[i].c_str(), j);
+				CSeekTools::WriteArrayText(acBuffer, vecstrAllQuery[j]);
+				sprintf(acBuffer, "%s/%s/%d.gscore", output_dir.c_str(), dab_list[i].c_str(), j);
+				CSeekTools::WriteArray(acBuffer, final_score[j]);
+			}
+		}*/
+		
+	}
+	
+}

File tools/SeekIterative/SeekIterative.ggo

+package	"SeekIterative"
+version	"1.0"
+purpose	"Search based on iterative algorithm"
+
+section "Mode"
+option	"dab"				d	"Sparse Dab mode"
+								flag off
+option	"combined"			e	"Combined-dab mode"
+								flag off
+option	"test"				f	"Test mode"
+								flag off
+option	"testcount"			g	"Test count mode"
+								flag off
+option	"testcombined"		h	"Test count mode"
+								flag off
+
+section "Combined-DAB mode"
+option	"dab_basename"		b	"Combined-dab basename, also shared with Test Mode"
+								string typestr="filename"
+
+section "Sparse DAB mode"
+option	"dab_list"			V	"DAB list"
+								string typestr="filename"
+option	"num_iter"			I	"Number of iterations"
+								int	default="0"
+option	"default_type"		T	"Default gene index type (choose unsigned short for genes, or unsigned int (32-bit) for transcripts) (required for DAB mode) (0 - unsigned int, 1 - unsigned short)"
+								int default="-1"
+option	"rbp_p"				R	"RBP p parameter (must be specified) (p<1.0) (recommended > 0.95)"
+								float default="-1"
+option	"max_rank"			M	"Maximum rank number in the sparse DAB matrix (must be specified)"
+								int default="-1"
+
+section "Input"
+option	"input"				i	"Gene mapping file"
+								string typestr="filename"	yes
+option	"query"				q	"Query file"
+								string typestr="filename"	yes
+option	"dab_dir"			F	"DAB directory"
+								string typestr="directory"	yes
+
+section "Output"
+option	"dir_out"			D	"Output directory"
+								string typestr="directory"
+

File tools/SeekIterative/cmdline.c

+/*
+  File autogenerated by gengetopt version 2.22
+  generated with the following command:
+  /memex/qzhu/usr/bin/gengetopt -iSeekIterative.ggo --default-optional -u -N -e 
+
+  The developers of gengetopt consider the fixed text that goes in all
+  gengetopt output files to be in the public domain:
+  we make no copyright claims on it.
+*/
+
+/* If we use autoconf.  */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "getopt.h"
+
+#include "cmdline.h"
+
+const char *gengetopt_args_info_purpose = "Search based on iterative algorithm";
+
+const char *gengetopt_args_info_usage = "Usage: SeekIterative [OPTIONS]... [FILES]...";
+
+const char *gengetopt_args_info_description = "";
+
+const char *gengetopt_args_info_help[] = {
+  "      --help                   Print help and exit",
+  "      --version                Print version and exit",
+  "\nMode:",
+  "  -d, --dab                    Sparse Dab mode  (default=off)",
+  "  -e, --combined               Combined-dab mode  (default=off)",
+  "  -f, --test                   Test mode  (default=off)",
+  "  -g, --testcount              Test count mode  (default=off)",
+  "  -h, --testcombined           Test count mode  (default=off)",
+  "\nCombined-DAB mode:",
+  "  -b, --dab_basename=filename  Combined-dab basename, also shared with Test \n                                 Mode",
+  "\nSparse DAB mode:",
+  "  -V, --dab_list=filename      DAB list",
+  "  -I, --num_iter=INT           Number of iterations  (default=`0')",
+  "  -T, --default_type=INT       Default gene index type (choose unsigned short \n                                 for genes, or unsigned int (32-bit) for \n                                 transcripts) (required for DAB mode) (0 - \n                                 unsigned int, 1 - unsigned short)  \n                                 (default=`-1')",
+  "  -R, --rbp_p=FLOAT            RBP p parameter (must be specified) (p<1.0) \n                                 (recommended > 0.95)  (default=`-1')",
+  "  -M, --max_rank=INT           Maximum rank number in the sparse DAB matrix \n                                 (must be specified)  (default=`-1')",
+  "\nInput:",
+  "  -i, --input=filename         Gene mapping file",
+  "  -q, --query=filename         Query file",
+  "  -F, --dab_dir=directory      DAB directory",
+  "\nOutput:",
+  "  -D, --dir_out=directory      Output directory",
+    0
+};
+
+typedef enum {ARG_NO
+  , ARG_FLAG
+  , ARG_STRING
+  , ARG_INT
+  , ARG_FLOAT
+} cmdline_parser_arg_type;
+
+static
+void clear_given (struct gengetopt_args_info *args_info);
+static
+void clear_args (struct gengetopt_args_info *args_info);
+
+static int
+cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info,
+                        struct cmdline_parser_params *params, const char *additional_error);
+
+static int
+cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error);
+
+static char *
+gengetopt_strdup (const char *s);
+
+static
+void clear_given (struct gengetopt_args_info *args_info)
+{
+  args_info->help_given = 0 ;
+  args_info->version_given = 0 ;
+  args_info->dab_given = 0 ;
+  args_info->combined_given = 0 ;
+  args_info->test_given = 0 ;
+  args_info->testcount_given = 0 ;
+  args_info->testcombined_given = 0 ;
+  args_info->dab_basename_given = 0 ;
+  args_info->dab_list_given = 0 ;
+  args_info->num_iter_given = 0 ;
+  args_info->default_type_given = 0 ;
+  args_info->rbp_p_given = 0 ;
+  args_info->max_rank_given = 0 ;
+  args_info->input_given = 0 ;
+  args_info->query_given = 0 ;
+  args_info->dab_dir_given = 0 ;
+  args_info->dir_out_given = 0 ;
+}
+
+static
+void clear_args (struct gengetopt_args_info *args_info)
+{
+  args_info->dab_flag = 0;
+  args_info->combined_flag = 0;
+  args_info->test_flag = 0;
+  args_info->testcount_flag = 0;
+  args_info->testcombined_flag = 0;
+  args_info->dab_basename_arg = NULL;
+  args_info->dab_basename_orig = NULL;
+  args_info->dab_list_arg = NULL;
+  args_info->dab_list_orig = NULL;
+  args_info->num_iter_arg = 0;
+  args_info->num_iter_orig = NULL;
+  args_info->default_type_arg = -1;
+  args_info->default_type_orig = NULL;
+  args_info->rbp_p_arg = -1;
+  args_info->rbp_p_orig = NULL;
+  args_info->max_rank_arg = -1;
+  args_info->max_rank_orig = NULL;
+  args_info->input_arg = NULL;
+  args_info->input_orig = NULL;
+  args_info->query_arg = NULL;
+  args_info->query_orig = NULL;
+  args_info->dab_dir_arg = NULL;
+  args_info->dab_dir_orig = NULL;
+  args_info->dir_out_arg = NULL;
+  args_info->dir_out_orig = NULL;
+  
+}
+
+static
+void init_args_info(struct gengetopt_args_info *args_info)
+{
+
+
+  args_info->help_help = gengetopt_args_info_help[0] ;
+  args_info->version_help = gengetopt_args_info_help[1] ;
+  args_info->dab_help = gengetopt_args_info_help[3] ;
+  args_info->combined_help = gengetopt_args_info_help[4] ;
+  args_info->test_help = gengetopt_args_info_help[5] ;
+  args_info->testcount_help = gengetopt_args_info_help[6] ;
+  args_info->testcombined_help = gengetopt_args_info_help[7] ;
+  args_info->dab_basename_help = gengetopt_args_info_help[9] ;
+  args_info->dab_list_help = gengetopt_args_info_help[11] ;
+  args_info->num_iter_help = gengetopt_args_info_help[12] ;
+  args_info->default_type_help = gengetopt_args_info_help[13] ;
+  args_info->rbp_p_help = gengetopt_args_info_help[14] ;
+  args_info->max_rank_help = gengetopt_args_info_help[15] ;
+  args_info->input_help = gengetopt_args_info_help[17] ;
+  args_info->query_help = gengetopt_args_info_help[18] ;
+  args_info->dab_dir_help = gengetopt_args_info_help[19] ;
+  args_info->dir_out_help = gengetopt_args_info_help[21] ;
+  
+}
+
+void
+cmdline_parser_print_version (void)
+{
+  printf ("%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION);
+}
+
+static void print_help_common(void) {
+  cmdline_parser_print_version ();
+
+  if (strlen(gengetopt_args_info_purpose) > 0)
+    printf("\n%s\n", gengetopt_args_info_purpose);
+
+  if (strlen(gengetopt_args_info_usage) > 0)
+    printf("\n%s\n", gengetopt_args_info_usage);
+
+  printf("\n");
+
+  if (strlen(gengetopt_args_info_description) > 0)
+    printf("%s\n", gengetopt_args_info_description);
+}
+
+void
+cmdline_parser_print_help (void)
+{
+  int i = 0;
+  print_help_common();
+  while (gengetopt_args_info_help[i])
+    printf("%s\n", gengetopt_args_info_help[i++]);
+}
+
+void
+cmdline_parser_init (struct gengetopt_args_info *args_info)
+{
+  clear_given (args_info);
+  clear_args (args_info);
+  init_args_info (args_info);
+
+  args_info->inputs = NULL;
+  args_info->inputs_num = 0;
+}
+
+void
+cmdline_parser_params_init(struct cmdline_parser_params *params)
+{
+  if (params)
+    { 
+      params->override = 0;
+      params->initialize = 1;
+      params->check_required = 1;
+      params->check_ambiguity = 0;
+      params->print_errors = 1;
+    }
+}
+
+struct cmdline_parser_params *
+cmdline_parser_params_create(void)
+{
+  struct cmdline_parser_params *params = 
+    (struct cmdline_parser_params *)malloc(sizeof(struct cmdline_parser_params));
+  cmdline_parser_params_init(params);  
+  return params;
+}
+
+static void
+free_string_field (char **s)
+{
+  if (*s)
+    {
+      free (*s);
+      *s = 0;
+    }
+}
+
+
+static void
+cmdline_parser_release (struct gengetopt_args_info *args_info)
+{
+  unsigned int i;
+  free_string_field (&(args_info->dab_basename_arg));
+  free_string_field (&(args_info->dab_basename_orig));
+  free_string_field (&(args_info->dab_list_arg));
+  free_string_field (&(args_info->dab_list_orig));
+  free_string_field (&(args_info->num_iter_orig));
+  free_string_field (&(args_info->default_type_orig));
+  free_string_field (&(args_info->rbp_p_orig));
+  free_string_field (&(args_info->max_rank_orig));
+  free_string_field (&(args_info->input_arg));
+  free_string_field (&(args_info->input_orig));
+  free_string_field (&(args_info->query_arg));
+  free_string_field (&(args_info->query_orig));
+  free_string_field (&(args_info->dab_dir_arg));
+  free_string_field (&(args_info->dab_dir_orig));
+  free_string_field (&(args_info->dir_out_arg));
+  free_string_field (&(args_info->dir_out_orig));
+  
+  
+  for (i = 0; i < args_info->inputs_num; ++i)
+    free (args_info->inputs [i]);
+
+  if (args_info->inputs_num)
+    free (args_info->inputs);
+
+  clear_given (args_info);
+}
+
+
+static void
+write_into_file(FILE *outfile, const char *opt, const char *arg, char *values[])
+{
+  if (arg) {
+    fprintf(outfile, "%s=\"%s\"\n", opt, arg);
+  } else {
+    fprintf(outfile, "%s\n", opt);
+  }
+}
+
+
+int
+cmdline_parser_dump(FILE *outfile, struct gengetopt_args_info *args_info)
+{
+  int i = 0;
+
+  if (!outfile)
+    {
+      fprintf (stderr, "%s: cannot dump options to stream\n", CMDLINE_PARSER_PACKAGE);
+      return EXIT_FAILURE;
+    }
+
+  if (args_info->help_given)
+    write_into_file(outfile, "help", 0, 0 );
+  if (args_info->version_given)
+    write_into_file(outfile, "version", 0, 0 );
+  if (args_info->dab_given)
+    write_into_file(outfile, "dab", 0, 0 );
+  if (args_info->combined_given)
+    write_into_file(outfile, "combined", 0, 0 );
+  if (args_info->test_given)
+    write_into_file(outfile, "test", 0, 0 );
+  if (args_info->testcount_given)
+    write_into_file(outfile, "testcount", 0, 0 );
+  if (args_info->testcombined_given)
+    write_into_file(outfile, "testcombined", 0, 0 );
+  if (args_info->dab_basename_given)
+    write_into_file(outfile, "dab_basename", args_info->dab_basename_orig, 0);
+  if (args_info->dab_list_given)
+    write_into_file(outfile, "dab_list", args_info->dab_list_orig, 0);
+  if (args_info->num_iter_given)
+    write_into_file(outfile, "num_iter", args_info->num_iter_orig, 0);
+  if (args_info->default_type_given)
+    write_into_file(outfile, "default_type", args_info->default_type_orig, 0);
+  if (args_info->rbp_p_given)
+    write_into_file(outfile, "rbp_p", args_info->rbp_p_orig, 0);
+  if (args_info->max_rank_given)
+    write_into_file(outfile, "max_rank", args_info->max_rank_orig, 0);
+  if (args_info->input_given)
+    write_into_file(outfile, "input", args_info->input_orig, 0);
+  if (args_info->query_given)
+    write_into_file(outfile, "query", args_info->query_orig, 0);
+  if (args_info->dab_dir_given)
+    write_into_file(outfile, "dab_dir", args_info->dab_dir_orig, 0);
+  if (args_info->dir_out_given)
+    write_into_file(outfile, "dir_out", args_info->dir_out_orig, 0);
+  
+
+  i = EXIT_SUCCESS;
+  return i;
+}
+
+int
+cmdline_parser_file_save(const char *filename, struct gengetopt_args_info *args_info)
+{
+  FILE *outfile;
+  int i = 0;
+
+  outfile = fopen(filename, "w");
+
+  if (!outfile)
+    {
+      fprintf (stderr, "%s: cannot open file for writing: %s\n", CMDLINE_PARSER_PACKAGE, filename);
+      return EXIT_FAILURE;
+    }
+
+  i = cmdline_parser_dump(outfile, args_info);
+  fclose (outfile);
+
+  return i;
+}
+
+void
+cmdline_parser_free (struct gengetopt_args_info *args_info)
+{
+  cmdline_parser_release (args_info);
+}
+
+/** @brief replacement of strdup, which is not standard */
+char *
+gengetopt_strdup (const char *s)
+{
+  char *result = NULL;
+  if (!s)
+    return result;
+
+  result = (char*)malloc(strlen(s) + 1);
+  if (result == (char*)0)
+    return (char*)0;
+  strcpy(result, s);
+  return result;
+}
+
+int
+cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info)
+{
+  return cmdline_parser2 (argc, argv, args_info, 0, 1, 1);
+}
+
+int
+cmdline_parser_ext (int argc, char * const *argv, struct gengetopt_args_info *args_info,
+                   struct cmdline_parser_params *params)
+{
+  int result;
+  result = cmdline_parser_internal (argc, argv, args_info, params, NULL);
+
+  return result;
+}