Commits

Qian Zhu  committed fd1e64d

merged (most of) parallel_db branch into main
merged Chris' change with mine (except tools/Makefile.am and configure.ac that break gen_auto)

things in parallel_db not brought to main include:
1) CDatabaselet::OpenFast() in database.cpp
2) class CUcharFullMatrix in compactmatrixi.h

  • Participants
  • Parent commits fd5f1cf

Comments (0)

Files changed (36)

File configure.ac

 #      AC_DEFINE([SMILEXML_LIB], [1])
 #      SMILEXML_LIB="-lsmilexml"
 #    fi
-         SMILE_CFLAGS="-I $SMILE_INCLUDE_DIR"
+         SMILE_CFLAGS="-I $SMILE_INCLUDE_DIR -fopenmp"
 #         SMILE_LIBS="-L $SMILE_LIB_DIR $SMILEXML_LIB -lsmile"
-         SMILE_LIBS="-L $SMILE_LIB_DIR -lsmile"
+         SMILE_LIBS="-L $SMILE_LIB_DIR -lsmile -fopenmp"
         ],                                                        dnl and found in specified path
 	[],                                                       dnl not found
         [smile_state=no],                                         dnl and not found installed
 		 tools/Combiner/Makefile \
 		 tools/NetworkCombiner/Makefile \
 		 tools/DChecker/Makefile \
-		 tools/PCLEvaluator/Makefile \
 		 tools/Dat2Dab/Makefile \
 		 tools/Dat2Graph/Makefile \
 		 tools/Data2Bnt/Makefile \
 		 tools/Contexter/Makefile \
 		 tools/Counter/Makefile \
 		 tools/Data2DB/Makefile \
+         tools/DBCombiner/Makefile \
 		 tools/DSLConverter/Makefile \
 		 tools/Dab2Dad/Makefile \
 		 tools/Edges2Posteriors/Makefile \

File gen_tools_am

 			    Counter  => ['SMILE'],
 			    DSLConverter  => ['SMILE'],
 			    Data2DB  => ['SMILE'],
+				DBCombiner => ['SMILE'],
 			    Dab2Dad  => ['SMILE'],
 			    Dab2DB  => ['SMILE'],
 			    Data2Svm => ['SVM_PERF'],

File src/compactmatrixi.h

 			cRet |= ( *( pi + 1 ) & ( SIZE_MAX >> ( ( 16 * sizeof(*m_aiData) ) - m_cBits -
 				cShift ) ) ) << ( ( 8 * sizeof(*m_aiData) ) - cShift );
 
-		return cRet; }
+		return cRet;
+	}
 
 	void Set( size_t iX, size_t iY, unsigned char cValue ) {
 		unsigned char	cShift;
 			pi++;
 			iMask = SIZE_MAX >> ( ( 16 * sizeof(*m_aiData) ) - m_cBits - cShift );
 			*pi = ( *pi & ~iMask ) |
-				( ( cValue >> ( ( 8 * sizeof(*m_aiData) ) - cShift ) ) & iMask ); } }
+				( ( cValue >> ( ( 8 * sizeof(*m_aiData) ) - cShift ) ) & iMask );
+		}
+	}
 
 	virtual size_t CountWords( ) const = 0;
 	virtual size_t* GetWord( size_t, size_t, unsigned char& ) const = 0;
 
 	size_t CountWords( ) const {
 		size_t	iRet;
-
 		iRet = m_iRows * m_iColumns;
-		return ( ( ( ( iRet * m_cBits ) - 1 ) / ( 8 * sizeof(*m_aiData) ) ) + 1 ); }
+		return ( ( ( ( iRet * m_cBits ) - 1 ) / ( 8 * sizeof(*m_aiData) ) ) + 1 );
+	}
 
 	size_t* GetWord( size_t iY, size_t iX, unsigned char& cShift ) const {
 		size_t	iIndex;
  * \param fZScore
  * If true and the given stream contains a PCL, z-score similarity measures after pairwise calculation.
  * 
+ * \param fSeek
+ * If true, read by seeking in the file, particularly useful if reading a few values, since there is no
+ * need to read the entire file. (for binary format only)
+ *
  * \returns
  * True if CDat was successfully opened.
  * 
  * Save | CPCL
  */
 bool CDat::Open( std::istream& istm, EFormat eFormat, float dDefault, bool fDuplicates, size_t iSkip,
-	bool fZScore ) {
+	bool fZScore, bool fSeek ) {
 
 	switch( eFormat ) {
 		case EFormatText:
 	
 		case EFormatQdab:
 			return OpenQdab( istm ); 		       
+
 	}
-	return OpenBinary( istm ); }
+
+	if(fSeek){
+		m_fSeek = true;
+	}
+
+	return OpenBinary( istm, fSeek );
+}
 
 bool CDatImpl::OpenPCL( std::istream& istm, size_t iSkip, bool fZScore ) {
 
 
 	return true; }
 
-bool CDatImpl::OpenBinary( std::istream& istm ) {
+bool CDatImpl::OpenBinary( std::istream& istm, bool fSeek ) {
 	size_t	i;
 	float*	adScores;
 
+	if(fSeek){
+		if(!OpenHeader(istm)){
+			cerr << "Error opening header" << endl;
+			return false;
+		}
+		return true;
+	}
+
 	if( !OpenGenes( istm, true, false ) )
 		return false;
 	m_Data.Initialize( GetGenes( ) );
 
 	return true; }
 
+/* still to be tested
+ * used for BINARY mode, and DAB file only, ie Float matrix
+ */
+bool CDatImpl::OpenHeader(std::istream& istm){
+	if(!m_fSeek){
+		cerr << "Don't know how you got here" << endl;
+	}
+
+	if(!OpenGenes(istm, true, false)){
+		return false;
+	}
+	EstimateSeekPositions(istm);
+	return true;
+}
+
+/* still to be tested
+ * used for BINARY mode, and DAB file only, ie Float matrix
+ */
+float* CDatImpl::GetRowSeek(std::istream& istm, size_t ind){
+	if(!m_fSeek){
+		cerr << "Don't know how you got here" << endl;
+	}
+
+	size_t iRow, iColumn;
+	size_t i, iNumGenes;
+	iNumGenes = GetGenes();
+	float* adScores = (float*)malloc(iNumGenes * sizeof(float));
+
+	size_t j;
+	for(i=0; i<ind; i++){
+		iRow = i;
+		iColumn = ind - 1;
+		size_t offset1 = m_iHeader + m_veciSeekPos[iRow] + iColumn * sizeof(float);
+		istm.seekg(offset1, ios_base::beg);
+		float v;
+		char *p = (char*) &v;
+		istm.read(p, sizeof(float));
+		adScores[i] = v;
+	}
+
+	adScores[ind] = CMeta::GetNaN();
+
+	int iSize = iNumGenes - (ind+1);
+	if(iSize==0){
+		return adScores;
+	}
+
+	float *v = (float*)malloc(iSize*sizeof(float));
+	char *p = (char*) v;
+	istm.seekg(m_iHeader+m_veciSeekPos[ind], ios_base::beg);
+	istm.read(p, iSize*sizeof(float));
+	for(i=0; i<iSize; i++){
+		adScores[i+ind+1] = v[i];
+	}
+	free(v);
+
+	return adScores;
+}
+
+/* still to be tested
+ * used for BINARY mode, and DAB file only, ie Float matrix
+ */
+float* CDatImpl::GetRowSeek(std::istream& istm, std::string &strGene){
+	if(!m_fSeek){
+		cerr << "Don't know how you got here" << endl;
+	}
+	size_t i;
+	size_t iNumGenes = GetGenes();
+	size_t ind;
+
+	if( (ind = GetGeneIndex(strGene) ) ==-1){
+		return NULL; //missing gene
+	}
+
+	return GetRowSeek(istm, ind);
+}
+
+
 bool CDatImpl::OpenQdab( std::istream& istm ) {
   size_t	iTotal, i, j, num_bins, num_bits, iPos;
 	float*	adScores;
 		m_vecstrGenes.resize( iCount );
 		for( i = 0; i < iCount; ++i ) {
 			DabGene( istm, acBuf );
-			m_vecstrGenes[ i ] = acBuf; } }
+			m_vecstrGenes[ i ] = acBuf;
+			m_mapstrGenes[ acBuf ] = i; } }
 	else {
 		set<string>					setstrGenes;
 		set<string>::const_iterator	iterGenes;
  * \see
  * Save | CPCL
  */
-bool CDat::Open( const char* szFile, bool fMemmap, size_t iSkip, bool fZScore, bool fDuplicates ) {
-	ifstream	ifsm;
+bool CDat::Open( const char* szFile, bool fMemmap, size_t iSkip, bool fZScore, bool fDuplicates, bool fSeek ) {
 	EFormat		eFormat;
 	size_t		i;
 
 			return false; }
 		return OpenHelper( ); }
 
-	ifsm.open( szFile, ( ( eFormat == EFormatText ) || ( eFormat == EFormatPCL ) ) ? ios_base::in :
+	m_ifsm.open( szFile, ( ( eFormat == EFormatText ) || ( eFormat == EFormatPCL ) ) ? ios_base::in :
 		ios_base::binary );
-	if( !ifsm.is_open( ) )
+	if( !m_ifsm.is_open( ) )
 		return false;
-	return Open( ifsm, eFormat, (float)HUGE_VAL, fDuplicates, iSkip, fZScore ); }
+
+	if(fSeek){
+		m_fSeek = true;
+	}
+
+	return Open( m_ifsm, eFormat, (float)HUGE_VAL, fDuplicates, iSkip, fZScore, fSeek ); }
 
 bool CDatImpl::OpenHelper( ) {
 	unsigned char*	pb;
 		 * \brief
 		 * Binary format listing null-terminated element name strings followed by bits representing the quantized bins.
 		 */		
-		EFormatQdab = EFormatSparse + 1 
+		EFormatQdab = EFormatSparse + 1
+
 	};
 
 	/*!
 		ENormalizePCC		= ENormalizeNormCDF + 1
 	};
 
+
 	bool Open( const char* szFile, bool fMemmap = false, size_t iSkip = 2, bool fZScore = false,
-		bool fDuplicates = false );
+		bool fDuplicates = false, bool fSeek = false );
 	bool Open( std::istream& istm, EFormat eFormat = EFormatBinary, float dDefault = HUGE_VAL,
-		bool fDuplicates = false, size_t iSkip = 2, bool fZScore = false );
+		bool fDuplicates = false, size_t iSkip = 2, bool fZScore = false, bool fSeek = false );
 	bool Open( const CSlim& Slim );
 	bool Open( const CSlim& SlimPositives, const CSlim& SlimNonnegatives );
 	bool Open( const std::vector<std::string>& vecstrGenes, bool fClear = true, const char* szFile = NULL );
 			  float dEdgeAggressiveness = 0.5, bool fAbsolute = false, const std::vector<float>* pvecdWeights = NULL );
 	void NormalizeQuantiles( size_t iQuantiles );
 
+	float* GetRowSeek(std::string &strGene){
+		return CDatImpl::GetRowSeek(m_ifsm, strGene);
+	}
+	float* GetRowSeek(size_t &i){
+		return CDatImpl::GetRowSeek(m_ifsm, i);
+	}
+
 	void Clear( float dValue ) {
 		size_t	i;
 
 
 		return CDatImpl::GetGene( strGene ); }
 
+	float* GetFullRow( size_t iY ) {
+		return CDatImpl::GetFullRow(iY);
+	}
+
+
 	/*!
 	 * \brief
 	 * Return the value at the requested CDat position.
 						break; }
 				Set( i, j, dTwo );
 				Set( iOne, iTwo, dOne ); } }
+
+
 };
 
 }

File src/database.cpp

 * Maria D. Chikina
 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
 *
+* Changes made Jun 2012 by Qian Zhu
+*
 * If you use this library, the included executable tools, or any related
 * code in your work, please cite the following publication:
 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
 // CDatabaselet
 ///////////////////////////////////////////////////////////////////////////////
 
-CDatabaselet::CDatabaselet( ) {
-
+CDatabaselet::CDatabaselet( bool useNibble) {
+	m_useNibble = useNibble;
 	m_pmutx = new pthread_mutex_t( );
 	pthread_mutex_init( m_pmutx, NULL );
 }
 	}
 }
 
+/* original method for initializing databaselets, including writing header + pre-allocation */
 bool CDatabaselet::Open( const std::string& strFile, const std::vector<std::string>& vecstrGenes,
 	uint32_t iGenes, uint32_t iDatasets ) {
 	uint32_t	iSize;
 	char*		acFiller;
 
 	m_fstm.clear( );
+	/* Open with overwriting */
 	m_fstm.open( strFile.c_str( ), ios_base::in | ios_base::out | ios_base::binary | ios_base::trunc );
 
 	if( !m_fstm.is_open( ) ) {
 	m_vecstrGenes.resize( vecstrGenes.size( ) );
 	copy( vecstrGenes.begin( ), vecstrGenes.end( ), m_vecstrGenes.begin( ) );
 
+	//allocate space
 	m_fstm.write( (char*)&m_iHeader, sizeof(m_iHeader) );
 	m_fstm.write( (char*)&m_iGenes, sizeof(m_iGenes) );
 	m_fstm.write( (char*)&m_iDatasets, sizeof(m_iDatasets) );
 
 	m_fstm.write((char*)&iSize, sizeof(iSize));
 
+	//write gene-name for only the genes in the databaselets
 	m_iHeader = sizeof(m_iHeader) + sizeof(m_iGenes) + sizeof(m_iDatasets) + sizeof(iSize);
 	for( i = 0; i < m_vecstrGenes.size( ); ++i ) {
 		m_fstm.write( m_vecstrGenes[ i ].c_str( ), m_vecstrGenes[ i ].size( ) + 1);
 		m_iHeader += m_vecstrGenes[ i ].size( ) + 1;
 	}
 
-	m_fstm.seekp( 0 );
+	m_fstm.seekp( 0, ios_base::beg);
 	m_fstm.write( (char*)&m_iHeader, sizeof(m_iHeader) );
 
-	m_fstm.seekp( m_iHeader );
+	//pre-allocations
+	m_fstm.seekp( m_iHeader, ios_base::beg );
 	acFiller = new char[ GetSizeGene( ) ];
 	memset( acFiller, -1, GetSizeGene( ) );
 	for( i = 0; i < m_vecstrGenes.size( ); ++i ){
 		m_fstm.write( acFiller, GetSizeGene( ) );
 	}
 	delete[] acFiller;
+	SetFile(strFile);
 
-	return true; }
+	return true;
+
+}
+
+/* simply opens the file without overwriting */
+bool CDatabaselet::OpenNoOverwrite() {
+	m_fstm.clear( );
+	/* open without overwriting */
+	m_fstm.open( strFileName.c_str( ), ios_base::in | ios_base::out | ios_base::binary);
+
+	if( !m_fstm.is_open( ) ) {
+		g_CatSleipnir( ).error( "CDatabaselet::Open( %s ) open failed", strFileName.c_str( ));
+		return false;
+	}
+	return true;
+}
 
 bool CDatabaselet::OpenWrite( unsigned char bValue, size_t iOffset, ENibbles eNibbles,
 	unsigned char* abImage ) {
 	unsigned char	b;
 
-#ifndef DATABASE_NIBBLES
-	eNibbles = ENibblesBoth;
-#endif // DATABASE_NIBBLES
+	if(m_useNibble==0){
+		eNibbles = ENibblesBoth;
+	}
 
 	if( abImage )
 		iOffset -= m_iHeader;
 		if( abImage )
 			b = abImage[ iOffset ];
 		else {
-			m_fstm.seekg( iOffset );
+			m_fstm.seekg( iOffset, ios_base::beg);
 			b = m_fstm.get( );
 		}
 	}
 			break;
 
 		case ENibblesBoth:
-			b = bValue; }
+			b = bValue;
+			break;
+	}
 	if( abImage )
 		abImage[ iOffset ] = b;
 	else {
-		m_fstm.seekp( iOffset );
+		m_fstm.seekp( iOffset, ios_base::beg );
 		m_fstm.put( b );
 	}
 
 	return true; }
 
-bool CDatabaselet::Open( const vector<CCompactFullMatrix>& vecData, size_t iBaseGenes, size_t iBaseDatasets,
-	bool fBuffer ) {
+/* original file writing method */
+bool CDatabaselet::Open( const vector<CCompactFullMatrix>& vecData, size_t iBaseGenes, size_t iBaseDatasets, bool fBuffer ) {
 	unsigned char*	abImage;
 	size_t			iSize, iDatum, iGeneOne, iGeneTwo;
 	unsigned char	bOne, bTwo;
 
 	if( fBuffer ) {
+		//iBaseGenes: gene id of first gene in each databaselet
+		//iDataset: dataset id
+		//printf("Number: %d %d %d %d\n", GetSizeGene(), GetSizePair(), iBaseGenes, iBaseDatasets);
 		abImage = new unsigned char[ iSize = ( GetSizeGene( ) * m_vecstrGenes.size( ) ) ];
-		m_fstm.seekg( m_iHeader );
+		m_fstm.seekg( m_iHeader, ios_base::beg );
 		m_fstm.read( (char*)abImage, iSize );
 	}
 	else
 		abImage = NULL;
-	if( iBaseDatasets % 2 )
-		for( iGeneOne = 0; iGeneOne < GetGenes( ); ++iGeneOne )
-			for( iGeneTwo = 0; iGeneTwo < vecData[ 0 ].GetColumns( ); ++iGeneTwo )
-				if( bOne = vecData[ 0 ].Get( iBaseGenes + iGeneOne, iGeneTwo ) )
-					OpenWrite( bOne - 1, GetOffset( iGeneOne, iGeneTwo, iBaseDatasets ), ENibblesHigh,
-						abImage );
-	for( iDatum = ( iBaseDatasets % 2 ); ( iDatum + 1 ) < vecData.size( ); iDatum += 2 )
-		for( iGeneOne = 0; iGeneOne < GetGenes( ); ++iGeneOne )
+
+	//vecData: # of genes in databaselet x # of genes user's list
+
+	//if this is not the first dataset in the dataset block
+	if( iBaseDatasets % 2 ){
+		//iGeneOne: iterate over all genes in this databaselet (# of genes in each databaselet)
+		for( iGeneOne = 0; iGeneOne < GetGenes( ); ++iGeneOne ){
+			//iGeneTwo: iterate overall genes in user's gene list
+			for( iGeneTwo = 0; iGeneTwo < vecData[ 0 ].GetColumns( ); ++iGeneTwo ){
+				//bOne, get the value of the gene located at the position (iBaseGene + iGeneOne, iGeneTwo)
+				if( bOne = vecData[ 0 ].Get( iBaseGenes + iGeneOne, iGeneTwo ) ){
+					//Offset is: m_iHeader + (GetSizeGene() * iOne) + (GetSizePair() * iTwo) + iDataset (for byte case)
+					OpenWrite( bOne - 1, GetOffset( iGeneOne, iGeneTwo, iBaseDatasets ), ENibblesHigh, abImage );
+				}
+			}
+		}
+	}
+
+	for( iDatum = ( iBaseDatasets % 2 ); ( iDatum + 1 ) < vecData.size( ); iDatum += 2 ){
+		for( iGeneOne = 0; iGeneOne < GetGenes( ); ++iGeneOne ){
 			for( iGeneTwo = 0; iGeneTwo < vecData[ iDatum ].GetColumns( ); ++iGeneTwo ) {
 				bOne = vecData[ iDatum ].Get( iBaseGenes + iGeneOne, iGeneTwo );
 				bTwo = vecData[ iDatum + 1 ].Get( iBaseGenes + iGeneOne, iGeneTwo );
 					continue;
 				bOne -= 1;
 				bTwo -= 1;
-#ifdef DATABASE_NIBBLES
-				OpenWrite( ( bOne & 0xF ) | ( bTwo << 4 ), GetOffset( iGeneOne, iGeneTwo, iBaseDatasets +
-					iDatum ), ENibblesBoth, abImage );
-#else // DATABASE_NIBBLES
-				OpenWrite( bOne, GetOffset( iGeneOne, iGeneTwo, iBaseDatasets + iDatum ), ENibblesBoth,
-					abImage );
-				OpenWrite( bTwo, GetOffset( iGeneOne, iGeneTwo, iBaseDatasets + iDatum + 1 ), ENibblesBoth,
-					abImage );
-#endif // DATABASE_NIBBLES
+				if(m_useNibble){
+					OpenWrite( ( bOne & 0xF ) | ( bTwo << 4 ), GetOffset( iGeneOne, iGeneTwo, iBaseDatasets +
+							iDatum ), ENibblesBoth, abImage );
+				}else{
+					OpenWrite( bOne, GetOffset( iGeneOne, iGeneTwo, iBaseDatasets + iDatum ), ENibblesBoth,
+							abImage );
+					OpenWrite( bTwo, GetOffset( iGeneOne, iGeneTwo, iBaseDatasets + iDatum + 1 ), ENibblesBoth,
+							abImage );
+				}
 			}
-	if( iDatum < vecData.size( ) )
-		for( iGeneOne = 0; iGeneOne < GetGenes( ); ++iGeneOne )
-			for( iGeneTwo = 0; iGeneTwo < vecData[ iDatum ].GetColumns( ); ++iGeneTwo )
-				if( bOne = vecData[ iDatum ].Get( iBaseGenes + iGeneOne, iGeneTwo ) )
+		}
+	}
+
+	if( iDatum < vecData.size( ) ){
+		for( iGeneOne = 0; iGeneOne < GetGenes( ); ++iGeneOne ){
+			for( iGeneTwo = 0; iGeneTwo < vecData[ iDatum ].GetColumns( ); ++iGeneTwo ){
+				if( bOne = vecData[ iDatum ].Get( iBaseGenes + iGeneOne, iGeneTwo ) ){
 					OpenWrite( bOne - 1, GetOffset( iGeneOne, iGeneTwo, iBaseDatasets + iDatum ), ENibblesLow,
 						abImage );
+				}
+			}
+		}
+	}
 	if( fBuffer ) {
-		m_fstm.seekp( m_iHeader );
+		m_fstm.seekp( m_iHeader, ios_base::beg );
 		m_fstm.write( (char*)abImage, iSize );
 		delete[] abImage;
 	}
 
 	return true; }
 
+bool CDatabaselet::Get( size_t iOne, size_t iTwo,
+		vector<unsigned char>& vecbData, unsigned char *charImage){
+	size_t	i;
+	size_t offset = GetOffset(iOne, iTwo) - m_iHeader;
+
+	if(this->m_useNibble==false){
+		vecbData.clear();
+		vecbData.resize(GetSizePair());
+
+		for(i=0; i<vecbData.size(); i++){
+			vecbData[i] = charImage[offset + i];
+		}
+	}else{
+		vecbData.clear();
+		vecbData.resize(m_iDatasets);
+
+
+		for(i=0; i<GetSizePair(); i++){
+			unsigned char b = charImage[offset + i];
+			unsigned char bValue = -1;
+			if( ( bValue = ( b & 0xF ) ) == 0xF ){
+				bValue = -1;
+			}
+			vecbData[ 2 * i ] = bValue;
+
+			if( ( bValue = ( ( b >> 4 ) & 0xF ) ) == 0xF ){
+				bValue = -1;
+			}
+
+			if((2 * i + 1)==m_iDatasets){
+				break;
+			}
+			vecbData[ (2 * i) + 1 ] = bValue;
+		}
+	}
+
+	return true;
+}
+
+/*	static function, combine multiple databaselets (that share the same genes, ie m_vecStrGenes),
+	and output result to a single file, or output one-gene per file (if databaselet contains multiple genes)
+ 	bSplit: whether or not to output one-gene per file
+	Works for both nibble and byte
+*/
+bool CDatabaselet::Combine(std::vector<CDatabaselet*>& vecDatabaselet,
+		std::string strOutDirectory, bool bSplit){
+
+	/* for checking on consistency of databaselets */
+	bool bIsConsistent = true;
+	bool fUseNibble;
+
+	size_t i, j;
+	uint32_t iGenes, iDatasets;
+
+	CDatabaselet *first = vecDatabaselet[0];
+	fUseNibble = first->m_useNibble;
+
+	iGenes = first->GetGenes();
+	iDatasets = first->GetDatasets();
+
+	vector<string> vecGenes;
+	vecGenes.resize(iGenes);
+
+	for(i=0; i<iGenes; i++){
+		vecGenes[i] = first->GetGene(i);
+	}
+
+	for(i=1; bIsConsistent && i<vecDatabaselet.size(); i++){
+		if(iGenes!=vecDatabaselet[i]->GetGenes() || fUseNibble!=vecDatabaselet[i]->m_useNibble){
+			bIsConsistent = false;
+			break;
+		}
+		for(j=0; j<iGenes; j++){
+			if(vecGenes[j]!=vecDatabaselet[i]->GetGene(j)){
+				bIsConsistent = false;
+				break;
+			}
+		}
+		iDatasets+=vecDatabaselet[i]->GetDatasets();
+	}
+
+	if(!bIsConsistent){
+		cerr << "Databaselets are not consistent!" << endl;
+		return false;
+	}
+
+	/* load all Databaselets into memory, for efficiency */
+	unsigned char **charImages =
+			(unsigned char**)malloc(vecDatabaselet.size()*sizeof(unsigned char*));
+	size_t iImageSize = iDatasets * iGenes * first->m_iGenes;
+	charImages[0] = (unsigned char*)malloc(iImageSize*sizeof(unsigned char));
+	for(i=1; i<vecDatabaselet.size(); i++){
+		charImages[i] = charImages[i-1] + vecDatabaselet[i-1]->m_iDatasets * first->m_iGenes * iGenes;
+	}
+
+	/* read databaselet into charImages */
+	for(i=0; i<vecDatabaselet.size(); i++){
+		CDatabaselet *current = vecDatabaselet[i];
+		if(current->m_fstm.is_open()){
+			current->m_fstm.seekg(current->m_iHeader, ios_base::beg);
+			current->m_fstm.read((char*) charImages[i], iImageSize);
+		}else{
+			cerr << "CDatabaselet is not open." << endl;
+			free(charImages[0]);
+			free(charImages);
+			return false;
+		}
+	}
+
+	/* splitting to one gene per file after combine */
+	if(bSplit){
+
+		for(i=0; i<iGenes; i++){
+
+			/* open a new Databaselet containing only one gene */
+			string thisGene = first->GetGene(i);
+			string path = strOutDirectory + "/" + thisGene + ".db";
+			vector<string> vecstrThisGene;
+			vecstrThisGene.push_back(thisGene);
+
+			/* Create a new Databaselet */
+			size_t iSize;
+			CDatabaselet DBS(first->m_useNibble);
+			DBS.Open(path.c_str(), vecstrThisGene, first->m_iGenes, iDatasets);
+			unsigned char *abImage = (unsigned char*)
+				malloc( iSize = (DBS.GetSizeGene( ) * DBS.m_vecstrGenes.size( ) ));
+			size_t iDatum;
+			size_t iGeneOne, iGeneTwo;
+			size_t offset2, offset3;
+			iGeneOne = i;
+
+			if(first->m_useNibble==false){
+				/* m_iGenes is all the genes in the genome */
+				for( iGeneTwo = 0; iGeneTwo < first->m_iGenes; ++iGeneTwo ){
+					offset2 = DBS.GetSizePair()*iGeneTwo;
+					int totalSum = 0;
+					for( iDatum = 0; iDatum  < vecDatabaselet.size(); iDatum ++ ){
+						vector<unsigned char> vc;
+						CDatabaselet *current = vecDatabaselet[iDatum];
+						current->Get( iGeneOne, iGeneTwo, vc, charImages[iDatum]);
+						offset3 = offset2 + totalSum;
+						for(j=0; j<vc.size(); j++){
+							abImage[offset3 + j] = vc[j];
+						}
+						totalSum+=vc.size();
+					}
+				}
+			}else{
+				size_t j;
+				unsigned char *abImage2 = (unsigned char*)
+					malloc(iDatasets);
+
+				/* m_iGenes is all the genes in the genome */
+				for( iGeneTwo = 0; iGeneTwo < first->m_iGenes; ++iGeneTwo ){
+					offset2 = DBS.GetSizePair() * iGeneTwo;
+					int totalSum = 0;
+					for( iDatum = 0; iDatum  < vecDatabaselet.size(); iDatum ++ ){
+						vector<unsigned char> vc;
+						CDatabaselet *current = vecDatabaselet[iDatum];
+						current->Get( iGeneOne, iGeneTwo, vc, charImages[iDatum]);
+						offset3 = totalSum;
+						for(j=0; j<vc.size(); j++){
+							abImage2[offset3+j] = vc[j];
+						}
+						totalSum+=vc.size();
+					}
+					for(j=0; j+1 < iDatasets; j+=2){
+						abImage[offset2 + j / 2] = (abImage2[j] & 0xF) | (abImage2[j+1] << 4);
+					}
+					if(j<iDatasets){
+						unsigned char bValue = abImage2[iDatasets - 1];
+						unsigned char b = 255;
+						abImage[offset2 + j / 2] = ( bValue & 0xF ) | ( b & 0xF0 );
+					}
+				}
+
+				free(abImage2);
+			}
+
+			/* close fstream */
+			if(DBS.m_fstm.is_open()){
+				DBS.m_fstm.seekp( DBS.m_iHeader, ios_base::beg );
+				DBS.m_fstm.write( (char*)abImage, iSize );
+				DBS.m_fstm.close();
+			}else{
+				cerr << "CDatabaselet is not opened." << endl;
+				free(abImage);
+				free(charImages[0]);
+				free(charImages);
+				return false;
+			}
+
+			free(abImage);
+
+		}
+
+	/* do not split, just combine into one file */
+	}else{
+
+		vector<string> strTok;
+		CMeta::Tokenize(first->strFileName.c_str(), strTok, "/");
+		string path = strOutDirectory + "/" + strTok[strTok.size()-1];
+
+		CDatabaselet DBS(first->m_useNibble);
+
+		DBS.Open(path.c_str(), first->m_vecstrGenes, first->m_iGenes, iDatasets);
+
+		size_t iDatum;
+		size_t iSize;
+		unsigned char *abImage = (unsigned char*)
+				malloc( iSize = (DBS.GetSizeGene( ) * DBS.m_vecstrGenes.size( ) ) );
+		size_t iGeneOne, iGeneTwo;
+		size_t offset1, offset2, offset3;
+
+		if(first->m_useNibble==false){
+			for(iGeneOne = 0; iGeneOne < first->GetGenes(); ++iGeneOne){
+				offset1 = DBS.GetSizeGene() * iGeneOne;
+				for( iGeneTwo = 0; iGeneTwo < first->m_iGenes; ++iGeneTwo ){
+					offset2 = DBS.GetSizePair()*iGeneTwo;
+					int totalSum = 0;
+					for( iDatum = 0; iDatum  < vecDatabaselet.size(); iDatum ++ ){
+						vector<unsigned char> vc;
+						CDatabaselet *current = vecDatabaselet[iDatum];
+						current->Get( iGeneOne, iGeneTwo, vc, charImages[iDatum]);
+						offset3 = offset1 + offset2 + totalSum;
+						for(j=0; j<vc.size(); j++){
+							abImage[offset3 + j] = vc[j];
+						}
+						totalSum+=vc.size();
+					}
+				}
+			}
+		}else{
+			size_t j;
+			unsigned char *abImage2 = (unsigned char*)
+				malloc(DBS.m_iDatasets);
+			/* m_iGenes is all the genes in the genome */
+			for(iGeneOne = 0; iGeneOne < first->GetGenes(); ++iGeneOne){
+				offset1 = DBS.GetSizeGene() * iGeneOne;
+				for( iGeneTwo = 0; iGeneTwo < first->m_iGenes; ++iGeneTwo ){
+					offset2 = DBS.GetSizePair()*iGeneTwo;
+					int totalSum = 0;
+					for( iDatum = 0; iDatum  < vecDatabaselet.size(); iDatum ++ ){
+						vector<unsigned char> vc;
+						CDatabaselet *current = vecDatabaselet[iDatum];
+						current->Get( iGeneOne, iGeneTwo, vc, charImages[iDatum]);
+						offset3 = totalSum;
+						for(j=0; j<vc.size(); j++){
+							abImage2[offset3 + j] = vc[j];
+						}
+						totalSum+=vc.size();
+					}
+					for(j=0; j+1 < iDatasets; j+=2){
+						abImage[offset1 + offset2 + j / 2] = (abImage2[j] & 0xF) | (abImage2[j+1] << 4);
+					}
+					if(j<iDatasets){
+						unsigned char bValue = abImage2[iDatasets - 1];
+						unsigned char b = 255;
+						abImage[offset1 + offset2 + j / 2] = ( bValue & 0xF ) | ( b & 0xF0 );
+					}
+				}
+			}
+			free(abImage2);
+		}
+
+		/* close the databaselet */
+		if(DBS.m_fstm.is_open()){
+			DBS.m_fstm.seekp( DBS.m_iHeader, ios_base::beg );
+			DBS.m_fstm.write( (char*)abImage, iSize );
+			DBS.m_fstm.close();
+		}else{
+			cerr << "CDatabaselet is not opened." << endl;
+			free(abImage);
+			free(charImages[0]);
+			free(charImages);
+			return false;
+		}
+
+		free(abImage);
+	}
+
+	free(charImages[0]);
+	free(charImages);
+
+	return true;
+}
+
 bool CDatabaselet::Get( size_t iOne, size_t iTwo, vector<unsigned char>& vecbData ) const {
 	size_t	i;
 
 		m_vecstrGenes[ i ] = pc;
 	delete[] acBuffer;
 
+	SetFile(strFile);
+
 	return true; }
 
 ///////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////
 
 bool CDatabase::Open( const std::vector<std::string>& vecstrGenes, const std::string& strInputDirectory,
-	const IBayesNet* pBayesNet, const std::string& strOutputDirectory, size_t iFiles ) {
+	const IBayesNet* pBayesNet, const std::string& strOutputDirectory, size_t iFiles) {
 	vector<string>	vecstrNodes, vecstrSubset;
 	size_t			i, j;
 	char			acNumber[ 16 ];
 		vecstrNodes.resize( vecstrNodes.size( ) - 1 );
 	m_vecpDBs.resize( iFiles );
 	for( i = 0; i < m_vecpDBs.size( ); ++i ) {
-		m_vecpDBs[ i ] = new CDatabaselet( );
+		m_vecpDBs[ i ] = new CDatabaselet( m_useNibble);
 		vecstrSubset.clear( );
 		for( j = i; j < vecstrGenes.size( ); j += m_vecpDBs.size( ) )
 			vecstrSubset.push_back( vecstrGenes[ j ] );
 	for( i = 0; i < vecstrGenes.size( ); ++i )
 		m_mapstriGenes[ m_vecpDBs[ i % m_vecpDBs.size( ) ]->GetGene( i / m_vecpDBs.size( ) ) ] = i;
 
-	return CDatabaseImpl::Open( vecstrGenes, vecstrNodes ); }
+	return CDatabaseImpl::Open( vecstrGenes, vecstrNodes );
+}
 
+/* Version of Open() that takes a list of datasets as input. Key method */
 bool CDatabase::Open( const std::vector<std::string>& vecstrGenes, const std::vector<std::string>& vecstrDatasets,
 	const std::string& strInputDirectory, const std::string& strOutputDirectory, size_t iFiles){
 
 	}
 
 	m_vecpDBs.resize( iFiles );
-	for( i = 0; i < m_vecpDBs.size( ); ++i ) {	//block size, 1000
-		m_vecpDBs[ i ] = new CDatabaselet( );
+	int iNumFilesOpen = 1000;
+	for( i = 0; i < m_vecpDBs.size( ); ++i ) {
+		m_vecpDBs[ i ] = new CDatabaselet( m_useNibble );
+	}
+
+	size_t k;
+
+	for( i = 0; i < m_vecpDBs.size( ); ++i ) { //block size (such as 1000)
+		if(i%iNumFilesOpen==0 && i>0){
+			for(k=0; k<iNumFilesOpen; k++){
+				m_vecpDBs[i-k-1]->CloseFile();
+			}
+		}
 		vecstrSubset.clear( );
 		for( j = i; j < vecstrGenes.size( ); j += m_vecpDBs.size( ) )
 			vecstrSubset.push_back( vecstrGenes[ j ] ); //contains index for 1000, 2000, 3000th genes
 		sprintf( acNumber, "%08u", i );
-		strFile = strOutputDirectory + '/' + acNumber + c_acExtension;
+		if(iFiles>=vecstrGenes.size()){
+			//if one gene per file, let databaselet filename be gene-name
+			strFile = strOutputDirectory + '/' + vecstrSubset[0] + c_acExtension;
+		}else{
+			strFile = strOutputDirectory + '/' + acNumber + c_acExtension;
+		}
+
 		if( !( i % 100 ) )
 			g_CatSleipnir( ).notice( "CDatabase::Open( %s, %d ) initializing file %d/%d",
 				strOutputDirectory.c_str( ), iFiles, i, m_vecpDBs.size( ) );
 			g_CatSleipnir( ).error( "CDatabase::Open( %s, %d ) could not open file %s",
 				strOutputDirectory.c_str( ), iFiles, strFile.c_str( ) );
 			return false; } }
+
+	for( i = 0; i < m_vecpDBs.size( ); ++i ){
+		m_vecpDBs[i]->CloseFile();
+	}
+
 	for( i = 0; i < vecstrGenes.size( ); ++i )
 		m_mapstriGenes[ m_vecpDBs[ i % m_vecpDBs.size( ) ]->GetGene( i / m_vecpDBs.size( ) ) ] = i;
 
-	return CDatabaseImpl::Open( vecstrGenes, vecstrNodes ); }
+	return CDatabaseImpl::Open( vecstrGenes, vecstrNodes ); 
+}
 
-
+/* the key Open() method for Data2DB conversion */
 bool CDatabaseImpl::Open( const std::vector<std::string>& vecstrGenes,
 	const std::vector<std::string>& vecstrFiles ) {
-	size_t			i, j, iOne, iTwo, iOutBlock, iOutBase, iOutOffset, iInBlock, iInBase, iInOffset;
+	size_t			i, j, k, iOne, iTwo, iOutBlock, iOutBase, iOutOffset, iInBlock, iInBase, iInOffset;
 	vector<size_t>	veciGenes;
 	float			d;
 
+	/* define number of threads to concurrently process datasets */
+	omp_set_num_threads(4);
+
 	veciGenes.resize( vecstrGenes.size( ) );
 	iOutBlock = ( m_iBlockOut == -1 ) ? m_vecpDBs.size( ) : m_iBlockOut;
 	iInBlock = ( m_iBlockIn == -1 ) ? vecstrFiles.size( ) : m_iBlockIn;
+
+	int iNumFilesOpen = 1000;
+
+	/* blocking parameter, iOutBase: number of databaselets to process at a time */
 	for( iOutBase = 0; iOutBase < m_vecpDBs.size( ); iOutBase += iOutBlock ) {
 		vector<string>	vecstrMyGenes;
 		vector<size_t>	veciMyGenes;
 			const CDatabaselet&	DB	= *m_vecpDBs[ iOutBase + iOutOffset ];
 
 			for( i = 0; i < DB.GetGenes( ); ++i )
-				vecstrMyGenes.push_back( DB.GetGene( i ) ); }
+				vecstrMyGenes.push_back( DB.GetGene( i ) );
+		}
+
 		veciMyGenes.resize( vecstrMyGenes.size( ) );
 
 		for( iInBase = 0; iInBase < vecstrFiles.size( ); iInBase += iInBlock ) {
 			vector<CCompactFullMatrix>	vecData;
-
 			vecData.resize( ( ( iInBase + iInBlock ) > vecstrFiles.size( ) ) ?
 				( vecstrFiles.size( ) - iInBase ) : iInBlock );
 			for( iInOffset = 0; iInOffset < vecData.size( ); ++iInOffset ) {
 				CDataPair	Dat;
 
-				//printf("Reading dab\n");
-				if( !Dat.Open( (vecstrFiles[ iInBase + iInOffset ] + c_acDAB).c_str( ), false, m_fMemmap ) ) {
-					g_CatSleipnir( ).error( "CDatabaseImpl::Open( ) could not open %s",
-						(vecstrFiles[ iInBase + iInOffset ] + c_acDAB).c_str( ) );
+				if( !Dat.Open( (vecstrFiles[ iInBase + iInOffset ] + c_acDAB).c_str( ),
+						false, m_fMemmap) ) {
 				    if( !Dat.Open( (vecstrFiles[ iInBase + iInOffset ] + c_acQDAB).c_str( ), false, m_fMemmap ) ) {
+						g_CatSleipnir( ).error( "CDatabaseImpl::Open( ) could not open %s",
+							(vecstrFiles[ iInBase + iInOffset ] + c_acDAB).c_str( ) );
 				    	g_CatSleipnir( ).error( "CDatabaseImpl::Open( ) could not open %s",
-						(vecstrFiles[ iInBase + iInOffset ] + c_acQDAB).c_str( ) );
+							(vecstrFiles[ iInBase + iInOffset ] + c_acQDAB).c_str( ) );
 				    	return false;
 				    }
 				}
 
-				//printf("Finished reading dab\n");
 				for( i = 0; i < veciMyGenes.size( ); ++i )
 					veciMyGenes[ i ] = Dat.GetGene( vecstrMyGenes[ i ] );
 				for( i = 0; i < veciGenes.size( ); ++i )
 					veciGenes[ i ] = Dat.GetGene( vecstrGenes[ i ] );
-				vecData[ iInOffset ].Initialize( veciMyGenes.size( ), veciGenes.size( ), 16, true );
-				//printf("Done\n");
 
-				/*printf("veciMyGenes %d\n", veciMyGenes.size());
+				if(m_useNibble){
+					vecData[ iInOffset ].Initialize( veciMyGenes.size( ), veciGenes.size( ), 16, true );
+				}else{
+					vecData[ iInOffset ].Initialize( veciMyGenes.size( ), veciGenes.size( ), 256, true );
+				}
+
+				//#pragma omp parallel for \
+				shared(Dat, veciGenes, veciMyGenes, vecData) \
+				private(j, i) \
+				schedule(static)
 				for(i=0; i<veciMyGenes.size(); i++){
-					printf("%d ", veciMyGenes[i]);
+					size_t t = veciMyGenes[i];
+					if(t==-1) continue;
+					float *d_array = Dat.GetFullRow(t);
+					for(j=0; j<veciGenes.size(); j++){
+						size_t s = veciGenes[j];
+						if(s == -1) continue;
+						if(s == t) continue;
+						vecData[iInOffset].Set(i,j,Dat.Quantize(d_array[s])+1);
+					}
+					free(d_array);
 				}
-				printf("\n");*/
 
-				for( i = 0; i < veciMyGenes.size( ); ++i ) {
-					if( ( iOne = veciMyGenes[ i ] ) == -1 )
-						continue;
-					for( j = 0; j < veciGenes.size( ); ++j ){
-						if( ( ( iTwo = veciGenes[ j ] ) != -1 ) &&
-							!CMeta::IsNaN( d = Dat.Get( iOne, iTwo ) ) ){
-							vecData[ iInOffset ].Set( i, j, Dat.Quantize( d ) + 1 ); 
-						}
-					}
-				}
 
 			}
 
 				m_vecpDBs.size( ) ); ++iOutOffset ) {
 				CDatabaselet&	DB	= *m_vecpDBs[ iOutBase + iOutOffset ];
 
+				/* close files if too many file handles opened */
+				if(iOutOffset>0 && (iOutOffset%iNumFilesOpen==0 || 
+					(iOutBase + iOutOffset)==m_vecpDBs.size()-1)){
+					for(k=0; k<iNumFilesOpen; k++){
+						if(iOutOffset + iOutBase - 1 - k == 0){
+							break;
+						}
+						m_vecpDBs[iOutOffset + iOutBase - 1 - k]->CloseFile();
+					}
+				}
+
+				DB.OpenNoOverwrite();
+
 				if( !( iOutOffset % 100 ) )
 					cerr << "Processing offset " << iOutOffset << '/' << iOutBlock << endl;
-				if( !DB.Open( vecData, i, iInBase, m_fBuffer ) )
+				//iInBase, for B=2, iInBase = 0, 2, 4 (total 5 datasets)
+				//i = 0 18 36 ...
+				if( !DB.Open( vecData, i, iInBase, m_fBuffer ) ){
 					return false;
-				i += DB.GetGenes( ); } } }
+				}
 
-	return true; }
+				i += DB.GetGenes( );
+			}
+		}
+	}
+
+	return true;
+}
 
 bool CDatabase::Open( const std::string& strInputDirectory ) {
 	size_t			i, j;
 	Clear( );
 	m_vecpDBs.resize( vecstrFiles.size( ) );
 	for( i = 0; i < m_vecpDBs.size( ); ++i ) {
-		m_vecpDBs[ i ] = new CDatabaselet( );
+		m_vecpDBs[ i ] = new CDatabaselet( m_useNibble);
 		if( !m_vecpDBs[ i ]->Open( vecstrFiles[ i ] ) )
 			return false;
 		for( j = 0; j < m_vecpDBs[ i ]->GetGenes( ); ++j )
 			m_mapstriGenes[ m_vecpDBs[ i ]->GetGene( j ) ] = ( j * m_vecpDBs.size( ) ) + i; }
 
-	return true; }
+	return true;
+}
+
+
+
 
 }

File src/database.h

 	 * SetBlockIn | SetBlockOut
 	 */
 	bool Open( const std::vector<std::string>& vecstrGenes, const std::string& strInputDirectory,
-		const IBayesNet* pBayesNet, const std::string& strOutputDirectory, size_t iFiles );
+		const IBayesNet* pBayesNet, const std::string& strOutputDirectory, size_t iFiles);
+
 
 	//Qian
 	bool Open( const std::vector<std::string>& vecstrGenes, const std::vector<std::string>& vecstrDatasets,
 		const std::string& strInputDirectory, const std::string& strOutputDirectory, size_t iFiles);
 
+	//Qian
+	CDatabase(bool isNibble) : CDatabaseImpl(isNibble){
+	}
+
 	/*!
 	 * \brief
 	 * Open an existing database from subfiles in the given directory.
 	 */
 	bool Open( const std::string& strInputDirectory );
 
+
 	/*!
 	 * \brief
 	 * Retrieve data values from all datasets for a given gene pair.

File src/databasei.h

 #ifndef DATABASEI_H
 #define DATABASEI_H
 
-#define DATABASE_NIBBLES
-
 #include <fstream>
 #include <map>
 #include <vector>
 		ENibblesBoth
 	};
 
-	CDatabaselet( );
+	CDatabaselet( bool );
 	~CDatabaselet( );
 
 	bool Open( const std::string&, const std::vector<std::string>&, uint32_t, uint32_t );
 	bool Open( const std::string& );
 	bool Open( const std::vector<CCompactFullMatrix>&, size_t, size_t, bool );
+
+	bool OpenNoOverwrite();
+
 	bool OpenWrite( unsigned char, size_t, ENibbles, unsigned char* );
+
+	/* Get pair by referring to memory cache (ie charImage) of the db file */
+	bool Get( size_t iOne, size_t iTwo, vector<unsigned char>& vecbData, unsigned char *charImage);
+
+	/* Get pair by seeking in db file */
 	bool Get( size_t, size_t, std::vector<unsigned char>& ) const;
 	bool Get( size_t, std::vector<unsigned char>&, bool ) const;
 	bool Get( size_t, const std::vector<size_t>&, std::vector<unsigned char>&, bool ) const;
 
+	static bool Combine(std::vector<CDatabaselet*>& vecDatabaselet,
+			std::string strOutDirectory, bool bSplit = true);
+
 	size_t GetGenes( ) const {
 
 		return m_vecstrGenes.size( ); }
 		std::streamoff	iOffset;
 
 		iOffset = (std::streamoff)GetOffset( iOne, iTwo, iDataset );
-#ifdef DATABASE_NIBBLES
-		if( !fBoth ) {
-			unsigned char	b;
 
-			m_fstm.seekg( iOffset );
-			b = m_fstm.get( );
-			bValue = ( iDataset % 2 ) ? ( ( b & 0xF ) | ( bValue << 4 ) ) :
-				( ( b & 0xF0 ) | ( bValue & 0xF ) ); }
-#endif // DATABASE_NIBBLES
+		if(m_useNibble){
+			if( !fBoth ) {
+				unsigned char	b;
+				m_fstm.seekg( iOffset );
+				b = m_fstm.get( );
+				bValue = ( iDataset % 2 ) ? ( ( b & 0xF ) | ( bValue << 4 ) ) :
+						( ( b & 0xF0 ) | ( bValue & 0xF ) ); 
+				}
+		}
+
 		m_fstm.seekp( iOffset );
 		m_fstm.put( bValue );
 	}
 
 		return m_iDatasets; }
 
+	void CloseFile(){
+		if(m_fstm.is_open()){
+			m_fstm.close();
+		}
+	}
+
+	void SetFile(string std){
+		strFileName = std;
+	}
+
 private:
 
 	size_t GetOffsetDataset( size_t iDataset ) const {
-
-		return ( iDataset
-#ifdef DATABASE_NIBBLES
-			/ 2
-#endif // DATABASE_NIBBLES
-			); }
+		if(m_useNibble){
+			return (iDataset / 2);
+		}else{
+			return iDataset;
+		}
+	}
 
 	size_t GetSizePair( ) const {
 
-		return ( ( m_iDatasets
-#ifdef DATABASE_NIBBLES
-			+ 1 ) / 2
-#else // DATABASE_NIBBLES
-			)
-#endif // DATABASE_NIBBLES
-			); }
+		if(m_useNibble){
+			return (m_iDatasets + 1) / 2;
+		}else{
+			return m_iDatasets;
+		}
+
+	}
 
 	size_t GetSizeGenes( ) const {
 
 	uint32_t					m_iGenes;
 	uint32_t					m_iDatasets;
 	std::vector<std::string>	m_vecstrGenes;
+	std::string					strFileName;
 
+	bool						m_useNibble;
 	mutable std::fstream		m_fstm;
 	mutable pthread_mutex_t*	m_pmutx;
 };
 	static const char	c_acQDAB[];
 	static const char	c_acExtension[];
 
-	CDatabaseImpl( ) : m_fMemmap(false), m_iBlockIn(-1), m_iBlockOut(-1), m_fBuffer(false) { }
+	CDatabaseImpl(bool useNibble){
+		m_fMemmap = false;
+		m_iBlockIn = -1;
+		m_iBlockOut = -1;
+		m_fBuffer = false;
+		m_useNibble = useNibble;
+	}
 
 	~CDatabaseImpl( ) {
 
 
 	void Clear( ) {
 		size_t	i;
-
 		m_mapstriGenes.clear( );
 		for( i = 0; i < m_vecpDBs.size( ); ++i )
 			delete m_vecpDBs[ i ];
 	size_t							m_iBlockOut;
 	std::vector<CDatabaselet*>		m_vecpDBs;
 	std::map<std::string, size_t>	m_mapstriGenes;
+	/* defines whether the CDatabaselet is nibble type. If false, it is byte by default.*/
+	bool							m_useNibble;
 };
 
 }

File src/datapair.cpp

  * CDat::Open
  */
 bool CDataPair::Open( const char* szDatafile, bool fContinuous, bool fMemmap, size_t iSkip,
-	bool fZScore ) {
+	bool fZScore, bool fSeek ) {
 
 
 	g_CatSleipnir( ).notice( "CDataPair::Open( %s, %d )", szDatafile, fContinuous );
 	  return OpenQdab( szDatafile );
 	}
 	else{
-	  if( !CDat::Open( szDatafile, fMemmap, iSkip, fZScore ) )
+	  if( !CDat::Open( szDatafile, fMemmap, iSkip, fZScore, false, fSeek ) )
 	    return false;
 	  return ( m_fContinuous ? true : OpenQuants( szDatafile ) ); 	  
 	}

File src/datapair.h

 class CDataPair : public CDataPairImpl {
 public:
 	bool Open( const char* szDatafile, bool fContinuous, bool fMemmap = false, size_t iSkip = 2,
-		bool fZScore = false );
+		bool fZScore = false, bool fSeek = false );
 	bool Open( const CSlim& Slim );
 	bool Open( const CDat& dat );
 	bool OpenQuants( const char* szDatafile );
 	  SetQuants(adBinEdges, iBins );
 	}
 	void SetQuants( const std::vector<float>& vecdBinEdges );
+	std::vector<float> GetQuants(){
+		std::vector<float> v;
+		size_t i;
+		for(i=0; i<m_vecdQuant.size(); i++){
+			v.push_back(m_vecdQuant[i]);
+		}
+		return v;
+	}
+
 	size_t Quantize( float dValue ) const;
 	void Quantize( );
-        size_t Quantize( size_t iY, size_t iX, size_t iZero ) const;
+	size_t Quantize( size_t iY, size_t iX, size_t iZero ) const;
 
 	void Save( const char* szFile ) const;
 	
 		return ( m_pFilter ? m_pFilter->Quantize( dValue ) : ( m_pDat ? m_pDat->Quantize( dValue ) : -1 ) ); }
 
 
-        size_t Quantize( size_t iY, size_t iX, size_t iZero ) const {
-            float d;
-            if( iY == -1 || iX == -1 ) {
-                return -1;
-            }
-            else if( CMeta::IsNaN( (d = Get( iY, iX )) ) ) {
-                return iZero;
-            }
-            else {
-                return Quantize(d); 
-            }
-        }
+	size_t Quantize( size_t iY, size_t iX, size_t iZero ) const {
+		float d;
+		if( iY == -1 || iX == -1 ) {
+			return -1;
+		}else if( CMeta::IsNaN( (d = Get( iY, iX )) ) ) {
+			return iZero;
+		}else {
+			return Quantize(d);
+		}
+	}
 
 
 

File src/datapairi.h

 	static const char  c_acQdab[];
 	bool OpenQdab( const char* szDatafile );
 	void SetQuants( const float* adBinEdges, size_t iBins );
+	std::vector<float> GetQuants();
 };
 
 class CPCLPairImpl : protected CPairImpl, public CPCL {
 	void Reset( );
 	bool OpenPCL( std::istream&, size_t, bool );
 	bool OpenText( std::istream&, float, bool );
-	bool OpenBinary( std::istream& );
+	bool OpenBinary( std::istream&, bool = false );
 	bool OpenSparse( std::istream& );
 	bool OpenQdab( std::istream& );
 	bool OpenGenes( std::istream&, bool, bool );
 	bool OpenMemmap( const unsigned char* );
 	void FilterGenesGraph( const CGenes&, std::vector<bool>&, size_t, float, bool, bool, const std::vector<float>* );
 
+	float* GetFullRow(size_t iY){
+		float *d_array = m_Data.GetFullRow(iY);
+		d_array[iY] = CMeta::GetNaN();
+		return d_array;
+	}
+
 	float& Get( size_t iX, size_t iY ) const {
 		static float	s_dRet;
 
 
 		return ( m_pPCL ? m_pPCL->GetGenes( ) : m_vecstrGenes.size( ) ); }
 
+	size_t GetGeneIndex(std::string &strGene){
+		std::map<std::string, size_t>::const_iterator	iterGene;
+		return ( ( ( iterGene = m_mapstrGenes.find( strGene ) ) == m_mapstrGenes.end( ) ) ? -1 :
+			iterGene->second );
+	}
+
 	std::string GetGene( size_t iGene ) const {
 
 		return ( m_pPCL ? m_pPCL->GetGene( iGene ) : m_vecstrGenes[ iGene ] ); }
 
 		return ( m_pMeasure ? m_pPCL->GetGeneNames( ) : m_vecstrGenes ); }
 
+	void EstimateSeekPositions(istream &istm){
+		m_iHeader = istm.tellg();
+		size_t i;
+		m_veciSeekPos.resize(m_vecstrGenes.size());
+		m_veciSeekPos[0] = 0;
+		for(i=1; i<m_vecstrGenes.size()-1; i++){
+			m_veciSeekPos[i] = m_veciSeekPos[i-1] +
+				(sizeof(float)*(m_vecstrGenes.size()-1 - i));
+		}
+	}
+
+	float* GetRowSeek(std::istream& istm, std::string &strGene);
+	float* GetRowSeek(std::istream& istm, size_t ind);
+	bool OpenHeader(std::istream& istm);
+
+
 	CDistanceMatrix	m_Data;
 	TVecStr			m_vecstrGenes;
+	std::map<std::string, size_t> m_mapstrGenes;
 // PCL back end
 	CPCL*			m_pPCL;
 	bool			m_fPCLMemory;
 	size_t			m_iData;
 	HANDLE			m_hndlData;
 	float**			m_aadData;
+// Seek positions
+	std::vector<size_t>	m_veciSeekPos;
+	size_t			m_iHeader;
+	bool			m_fSeek;
+	/* handle used to open this file
+	 * used for reading sparse number of values
+	 * without reading the entire file
+	 */
+	ifstream	m_ifsm;
+
 };
 
 }

File src/halfmatrix.h

 		m_iSize = 0;
 		m_aaData = NULL; }
 
+	tType* GetFullRow( size_t iY ) {
+		size_t i, j;
+		tType *newData = new tType[m_iSize];
+		for(i=0; i<m_iSize; i++){
+			if(i==iY){
+				newData[i] = 0;
+			}else if(i<iY){
+				newData[i] = m_aaData[i][iY-i-1];
+			}else{
+				for(j=i; j<m_iSize; j++){
+					newData[j] = m_aaData[iY][j-iY-1];
+				}
+				break;
+			}
+		}
+		return newData;
+	}
+
 	/*!
 	 * \brief
 	 * Return a single row of the matrix.
 	 */
 	template <class tType>
 	static size_t Quantize( tType Value, const std::vector<tType>& vecQuants ) {
-		size_t	i;
+
 
 		if( IsNaN( Value ) )
 			return -1;
 
+		/*size_t i;
 		for( i = 0; i < vecQuants.size( ); ++i )
 			if( Value <= vecQuants[ i ] )
 				break;
+		size_t r = min(i, vecQuants.size()-1);
+		return r;
+		 */
 
-		return min( i, vecQuants.size( ) - 1 ); }
+
+		size_t mid = vecQuants.size() / 2;
+		int i = mid;
+
+		if(Value <= vecQuants[i]){
+			i--;
+			//LEFT direction
+			while(i>=0){
+				if(Value <= vecQuants[i]){
+					i--;
+				}else{
+					i++;
+					break;
+				}
+			}
+			if(i==-1){
+				i = 0;
+			}
+		}else{
+			i++;
+			//RIGHT direction
+			while(i<vecQuants.size()){
+				if(Value > vecQuants[i]){
+					i++;
+				}else{
+					break;
+				}
+			}
+			if(i==vecQuants.size()){
+				i = vecQuants.size() - 1;
+			}
+		}
+
+		size_t ii = i;
+
+		return ii;
+	}
 
 	/*!
 	 * \brief

File src/stdafx.h

 #include <sys/mman.h>
 #include <sys/socket.h>
 #include <sys/time.h>
+#include <omp.h>
 
 #define _isnan				isnan
 #define _lseek				lseek

File src/svmperf.h

 #include "dat.h"
 
 #include <stdio.h>
-#include <execinfo.h>
+
+/* removed to support cygwin */
+//#include <execinfo.h>
 
 namespace SVMLight {
 extern "C" {

File tools/BNServer/BNServer.cpp

 	vector<float>				vecdPriors;
 	CBayesNetMinimal			BNDefault;
 	vector<CBayesNetMinimal>	vecBNs;
-	CDatabase					Database;
+
+	bool isNibble = true;
+	if(sArgs.is_nibble_arg==0){
+		isNibble = false;
+	}
+	CDatabase Database(isNibble);
 	CDataMatrix					MatBackgrounds, MatParameters, MatWithinC, MatWithinD;
 	CDataMatrix					MatBetweenCC, MatBetweenDD, MatBetweenDC;
 	vector<vector<size_t> >		vecveciDiseases, vecveciContexts;

File tools/BNServer/BNServer.ggo

 							string	typestr="filename"	yes
 option	"diseases"		s	"Disease/gene mapping"
 							string	typestr="filename"
+option	"is_nibble"		N	"Specify whether the database is nibble type"
+							int default="1"
 
 section "Bayes nets"
 option	"networks"		n	"Bayes net directory"

File tools/BNServer/cmdline.c

 /*
-  File autogenerated by gengetopt version 2.22
+  File autogenerated by gengetopt version 2.22.5
   generated with the following command:
-  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iBNServer.ggo --default-optional -C -N -e 
+  gengetopt -iBNServer.ggo --default-optional -C -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
 #include <stdlib.h>
 #include <string.h>
 
-#include "getopt.h"
+#ifndef FIX_UNUSED
+#define FIX_UNUSED(X) (void) (X) /* avoid warnings for unused params */
+#endif
+
+#include <getopt.h>
 
 #include "cmdline.h"
 
   "  -i, --input=filename          Context IDs and names",
   "  -c, --contexts=filename       Context/gene mapping",
   "  -s, --diseases=filename       Disease/gene mapping",
+  "  -N, --is_nibble=INT           Specify whether the database is nibble type  \n                                  (default=`1')",
   "\nBayes nets:",
   "  -n, --networks=directory      Bayes net directory  (default=`.')",
   "  -b, --default=filename        Bayes net for no context",
 void clear_args (struct gengetopt_args_info *args_info);
 
 static int
-cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info,
+cmdline_parser_internal (int argc, char **argv, struct gengetopt_args_info *args_info,
                         struct cmdline_parser_params *params, const char *additional_error);
 
 static int
   args_info->input_given = 0 ;
   args_info->contexts_given = 0 ;
   args_info->diseases_given = 0 ;
+  args_info->is_nibble_given = 0 ;
   args_info->networks_given = 0 ;
   args_info->default_given = 0 ;
   args_info->xdsl_given = 0 ;
 static
 void clear_args (struct gengetopt_args_info *args_info)
 {
+  FIX_UNUSED (args_info);
   args_info->database_arg = gengetopt_strdup (".");
   args_info->database_orig = NULL;
   args_info->input_arg = NULL;
   args_info->contexts_orig = NULL;
   args_info->diseases_arg = NULL;
   args_info->diseases_orig = NULL;
+  args_info->is_nibble_arg = 1;
+  args_info->is_nibble_orig = NULL;
   args_info->networks_arg = gengetopt_strdup (".");
   args_info->networks_orig = NULL;
   args_info->default_arg = NULL;
   args_info->input_help = gengetopt_args_info_help[4] ;
   args_info->contexts_help = gengetopt_args_info_help[5] ;
   args_info->diseases_help = gengetopt_args_info_help[6] ;
-  args_info->networks_help = gengetopt_args_info_help[8] ;
-  args_info->default_help = gengetopt_args_info_help[9] ;
-  args_info->xdsl_help = gengetopt_args_info_help[10] ;
-  args_info->minimal_in_help = gengetopt_args_info_help[11] ;
-  args_info->minimal_out_help = gengetopt_args_info_help[12] ;
-  args_info->global_help = gengetopt_args_info_help[14] ;
-  args_info->within_c_help = gengetopt_args_info_help[15] ;
-  args_info->within_d_help = gengetopt_args_info_help[16] ;
-  args_info->between_cc_help = gengetopt_args_info_help[17] ;
-  args_info->between_dd_help = gengetopt_args_info_help[18] ;
-  args_info->between_dc_help = gengetopt_args_info_help[19] ;
-  args_info->backgrounds_help = gengetopt_args_info_help[20] ;
-  args_info->go_onto_help = gengetopt_args_info_help[22] ;
-  args_info->go_anno_help = gengetopt_args_info_help[23] ;
-  args_info->kegg_help = gengetopt_args_info_help[24] ;
-  args_info->kegg_org_help = gengetopt_args_info_help[25] ;
-  args_info->port_help = gengetopt_args_info_help[27] ;
-  args_info->timeout_help = gengetopt_args_info_help[28] ;
-  args_info->networklets_help = gengetopt_args_info_help[30] ;
-  args_info->assoc_diseases_help = gengetopt_args_info_help[31] ;
-  args_info->assoc_context_help = gengetopt_args_info_help[32] ;
-  args_info->limit_help = gengetopt_args_info_help[34] ;
-  args_info->files_help = gengetopt_args_info_help[35] ;
-  args_info->graphviz_help = gengetopt_args_info_help[36] ;
-  args_info->config_help = gengetopt_args_info_help[37] ;
-  args_info->verbosity_help = gengetopt_args_info_help[38] ;
+  args_info->is_nibble_help = gengetopt_args_info_help[7] ;
+  args_info->networks_help = gengetopt_args_info_help[9] ;
+  args_info->default_help = gengetopt_args_info_help[10] ;
+  args_info->xdsl_help = gengetopt_args_info_help[11] ;
+  args_info->minimal_in_help = gengetopt_args_info_help[12] ;
+  args_info->minimal_out_help = gengetopt_args_info_help[13] ;
+  args_info->global_help = gengetopt_args_info_help[15] ;
+  args_info->within_c_help = gengetopt_args_info_help[16] ;
+  args_info->within_d_help = gengetopt_args_info_help[17] ;
+  args_info->between_cc_help = gengetopt_args_info_help[18] ;
+  args_info->between_dd_help = gengetopt_args_info_help[19] ;
+  args_info->between_dc_help = gengetopt_args_info_help[20] ;
+  args_info->backgrounds_help = gengetopt_args_info_help[21] ;
+  args_info->go_onto_help = gengetopt_args_info_help[23] ;
+  args_info->go_anno_help = gengetopt_args_info_help[24] ;
+  args_info->kegg_help = gengetopt_args_info_help[25] ;
+  args_info->kegg_org_help = gengetopt_args_info_help[26] ;
+  args_info->port_help = gengetopt_args_info_help[28] ;
+  args_info->timeout_help = gengetopt_args_info_help[29] ;
+  args_info->networklets_help = gengetopt_args_info_help[31] ;
+  args_info->assoc_diseases_help = gengetopt_args_info_help[32] ;
+  args_info->assoc_context_help = gengetopt_args_info_help[33] ;
+  args_info->limit_help = gengetopt_args_info_help[35] ;
+  args_info->files_help = gengetopt_args_info_help[36] ;
+  args_info->graphviz_help = gengetopt_args_info_help[37] ;
+  args_info->config_help = gengetopt_args_info_help[38] ;
+  args_info->verbosity_help = gengetopt_args_info_help[39] ;
   
 }
 
 void
 cmdline_parser_print_version (void)
 {
-  printf ("%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION);
+  printf ("%s %s\n",
+     (strlen(CMDLINE_PARSER_PACKAGE_NAME) ? CMDLINE_PARSER_PACKAGE_NAME : CMDLINE_PARSER_PACKAGE),
+     CMDLINE_PARSER_VERSION);
 }
 
 static void print_help_common(void) {
   printf("\n");
 
   if (strlen(gengetopt_args_info_description) > 0)
-    printf("%s\n", gengetopt_args_info_description);
+    printf("%s\n\n", gengetopt_args_info_description);
 }
 
 void
   free_string_field (&(args_info->contexts_orig));
   free_string_field (&(args_info->diseases_arg));
   free_string_field (&(args_info->diseases_orig));
+  free_string_field (&(args_info->is_nibble_orig));
   free_string_field (&(args_info->networks_arg));
   free_string_field (&(args_info->networks_orig));
   free_string_field (&(args_info->default_arg));
 
 
 static void
-write_into_file(FILE *outfile, const char *opt, const char *arg, char *values[])
+write_into_file(FILE *outfile, const char *opt, const char *arg, const char *values[])
 {
+  FIX_UNUSED (values);
   if (arg) {
     fprintf(outfile, "%s=\"%s\"\n", opt, arg);
   } else {
     write_into_file(outfile, "contexts", args_info->contexts_orig, 0);
   if (args_info->diseases_given)
     write_into_file(outfile, "diseases", args_info->diseases_orig, 0);
+  if (args_info->is_nibble_given)
+    write_into_file(outfile, "is_nibble", args_info->is_nibble_orig, 0);
   if (args_info->networks_given)
     write_into_file(outfile, "networks", args_info->networks_orig, 0);
   if (args_info->default_given)
 char *
 gengetopt_strdup (const char *s)
 {
-  char *result = NULL;
+  char *result = 0;
   if (!s)
     return result;
 
 }
 
 int
-cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info)
+cmdline_parser (int argc, char **argv, struct gengetopt_args_info *args_info)
 {
   return cmdline_parser2 (argc, argv, args_info, 0, 1, 1);
 }
 
 int
-cmdline_parser_ext (int argc, char * const *argv, struct gengetopt_args_info *args_info,
+cmdline_parser_ext (int argc, char **argv, struct gengetopt_args_info *args_info,
                    struct cmdline_parser_params *params)
 {
   int result;
-  result = cmdline_parser_internal (argc, argv, args_info, params, NULL);
+  result = cmdline_parser_internal (argc, argv, args_info, params, 0);
 
   return result;
 }
 
 int
-cmdline_parser2 (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required)
+cmdline_parser2 (int argc, char **argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required)
 {
   int result;
   struct cmdline_parser_params params;
   params.check_ambiguity = 0;
   params.print_errors = 1;
 
-  result = cmdline_parser_internal (argc, argv, args_info, &params, NULL);
+  result = cmdline_parser_internal (argc, argv, args_info, &params, 0);
 
   return result;
 }
 {
   int result = EXIT_SUCCESS;
 
-  if (cmdline_parser_required2(args_info, prog_name, NULL) > 0)
+  if (cmdline_parser_required2(args_info, prog_name, 0) > 0)
     result = EXIT_FAILURE;
 
   return result;
 cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error)
 {
   int error = 0;
+  FIX_UNUSED (additional_error);
 
   /* checks for required options */
   if (! args_info->contexts_given)
 static
 int update_arg(void *field, char **orig_field,
                unsigned int *field_given, unsigned int *prev_given, 
-               char *value, char *possible_values[], const char *default_value,
+               char *value, const char *possible_values[],
+               const char *default_value,
                cmdline_parser_arg_type arg_type,
                int check_ambiguity, int override,
                int no_free, int multiple_option,
   const char *val = value;
   int found;
   char **string_field;
+  FIX_UNUSED (field);
 
   stop_char = 0;
   found = 0;
       return 1; /* failure */
     }
 
+  FIX_UNUSED (default_value);
     
   if (field_given && *field_given && ! override)
     return 0;
 
 
 int
-cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info,
+cmdline_parser_internal (
+  int argc, char **argv, struct gengetopt_args_info *args_info,
                         struct cmdline_parser_params *params, const char *additional_error)
 {
   int c;	/* Character of the parsed option.  */
         { "input",	1, NULL, 'i' },
         { "contexts",	1, NULL, 'c' },
         { "diseases",	1, NULL, 's' },
+        { "is_nibble",	1, NULL, 'N' },
         { "networks",	1, NULL, 'n' },
         { "default",	1, NULL, 'b' },
         { "xdsl",	0, NULL, 'x' },
         { "graphviz",	1, NULL, 'z' },
         { "config",	1, NULL, 'C' },
         { "verbosity",	1, NULL, 'v' },
-        { NULL,	0, NULL, 0 }
+        { 0,  0, 0, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVd:i:c:s:n:b:xmM:P:w:W:e:E:B:a:g:G:k:K:p:t:lr:R:L:f:z:C:v:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVd:i:c:s:N:n:b:xmM:P:w:W:e:E:B:a:g:G:k:K:p:t:lr:R:L:f:z:C:v:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'N':	/* Specify whether the database is nibble type.  */
+        
+        
+          if (update_arg( (void *)&(args_info->is_nibble_arg), 
+               &(args_info->is_nibble_orig), &(args_info->is_nibble_given),
+              &(local_args_info.is_nibble_given), optarg, 0, "1", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "is_nibble", 'N',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'n':	/* Bayes net directory.  */
         
         
 /* 3 is for "--" and "=" */
 
 static int
-_cmdline_parser_configfile (char * const filename, int *my_argc)
+_cmdline_parser_configfile (const char *filename, int *my_argc)
 {
   FILE* file;
   char my_argv[CONFIG_FILE_LINE_BUFFER_SIZE+1];
   size_t len, next_token;
   char delimiter;
 
-  if ((file = fopen(filename, "r")) == NULL)
+  if ((file = fopen(filename, "r")) == 0)
     {
       fprintf (stderr, "%s: Error opening configuration file '%s'\n",
                CMDLINE_PARSER_PACKAGE, filename);
       return EXIT_FAILURE;
     }
 
-  while ((fgets(linebuf, CONFIG_FILE_LINE_SIZE, file)) != NULL)
+  while ((fgets(linebuf, CONFIG_FILE_LINE_SIZE, file)) != 0)
     {
       ++line_num;
       my_argv[0] = '\0';
 
       if (fopt[next_token] == '\0') /* the line is over */
         {
-          farg  = NULL;
+          farg  = 0;
           equal = 0;
           goto noarg;
         }
 }
 
 int
-cmdline_parser_configfile (char * const filename,
+cmdline_parser_configfile (
+  const char *filename,
                            struct gengetopt_args_info *args_info,
                            int override, int initialize, int check_required)
 {
 }
 
 int
-cmdline_parser_config_file (char * const filename,
+cmdline_parser_config_file (const char *filename,
                            struct gengetopt_args_info *args_info,
                            struct cmdline_parser_params *params)
 {

File tools/BNServer/cmdline.h

 /** @file cmdline.h
  *  @brief The header file for the command line option parser
- *  generated by GNU Gengetopt version 2.22
+ *  generated by GNU Gengetopt version 2.22.5
  *  http://www.gnu.org/software/gengetopt.
  *  DO NOT modify this file, since it can be overwritten
  *  @author GNU Gengetopt by Lorenzo Bettini */
 #endif /* __cplusplus */
 
 #ifndef CMDLINE_PARSER_PACKAGE
-/** @brief the program name */
+/** @brief the program name (used for printing errors) */
 #define CMDLINE_PARSER_PACKAGE "BNServer"
 #endif
 
+#ifndef CMDLINE_PARSER_PACKAGE_NAME
+/** @brief the complete program name (used for help and version) */
+#define CMDLINE_PARSER_PACKAGE_NAME "BNServer"
+#endif
+
 #ifndef CMDLINE_PARSER_VERSION
 /** @brief the program version */
 #define CMDLINE_PARSER_VERSION "1.0"
   char * diseases_arg;	/**< @brief Disease/gene mapping.  */
   char * diseases_orig;	/**< @brief Disease/gene mapping original value given at command line.  */
   const char *diseases_help; /**< @brief Disease/gene mapping help description.  */
+  int is_nibble_arg;	/**< @brief Specify whether the database is nibble type (default='1').  */
+  char * is_nibble_orig;	/**< @brief Specify whether the database is nibble type original value given at command line.  */
+  const char *is_nibble_help; /**< @brief Specify whether the database is nibble type help description.  */
   char * networks_arg;	/**< @brief Bayes net directory (default='.').  */
   char * networks_orig;	/**< @brief Bayes net directory original value given at command line.  */
   const char *networks_help; /**< @brief Bayes net directory help description.  */
   unsigned int input_given ;	/**< @brief Whether input was given.  */
   unsigned int contexts_given ;	/**< @brief Whether contexts was given.  */
   unsigned int diseases_given ;	/**< @brief Whether diseases was given.  */
+  unsigned int is_nibble_given ;	/**< @brief Whether is_nibble was given.  */
   unsigned int networks_given ;	/**< @brief Whether networks was given.  */
   unsigned int default_given ;	/**< @brief Whether default was given.  */
   unsigned int xdsl_given ;	/**< @brief Whether xdsl was given.  */
  * @param args_info the structure where option information will be stored
  * @return 0 if everything went fine, NON 0 if an error took place
  */
-int cmdline_parser (int argc, char * const *argv,
+int cmdline_parser (int argc, char **argv,
   struct gengetopt_args_info *args_info);
 
 /**
  * @return 0 if everything went fine, NON 0 if an error took place
  * @deprecated use cmdline_parser_ext() instead
  */
-int cmdline_parser2 (int argc, char * const *argv,
+int cmdline_parser2 (int argc, char **argv,
   struct gengetopt_args_info *args_info,
   int override, int initialize, int check_required);
 
  * @param params additional parameters for the parser
  * @return 0 if everything went fine, NON 0 if an error took place
  */
-int cmdline_parser_ext (int argc, char * const *argv,
+int cmdline_parser_ext (int argc, char **argv,
   struct gengetopt_args_info *args_info,
   struct cmdline_parser_params *params);
 
  * @return 0 if everything went fine, NON 0 if an error took place
  * @deprecated use cmdline_parser_config_file() instead
  */
-int cmdline_parser_configfile (char * const filename,
+int cmdline_parser_configfile (const char *filename,
   struct gengetopt_args_info *args_info,
   int override, int initialize, int check_required);
 
  * @param params additional parameters for the parser
  * @return 0 if everything went fine, NON 0 if an error took place
  */
-int cmdline_parser_config_file (char * const filename,
+int cmdline_parser_config_file (const char *filename,
   struct gengetopt_args_info *args_info,
   struct cmdline_parser_params *params);
 

File tools/Contexter/Contexter.cpp

 	map<size_t, string>					mapistrBNs;
 	map<size_t, string>::const_iterator	iterBN;
 	size_t								i, iMax, iGene;
-	CDatabase							Database;
+	//CDatabase							Database;
 	uint32_t							iSize;
 	float*								adGenes;
 	CDat								Dat;
 	CCompactFullMatrix					MatContexts;
 	vector<string>						vecstrLine;
 
+	bool isNibble = true;
+	if(sArgs.is_nibble_arg==0){
+		isNibble = false;
+	}
+	CDatabase Database(isNibble);
+
 	if( !Database.Open( sArgs.database_arg ) ) {
 		cerr << "Could not open: " << sArgs.database_arg << endl;
 		return 1; }

File tools/Contexter/Contexter.ggo

 							int	yes
 option	"genes"			g	"Gene ID to name mapping"
 							string	typestr="filename"
+option	"is_nibble"		N	"Define whether the database is nibble type."
+							int	default="1"
 
 section "Bayes nets"
 option	"networks"		n	"Bayes net directory"

File tools/Contexter/cmdline.c

 /*
-  File autogenerated by gengetopt version 2.22
+  File autogenerated by gengetopt version 2.22.5
   generated with the following command:
-  /home/chuttenh/hg/sleipnir/trunk/../extlib/gengetopt-2.22/bin/gengetopt -iContexter.ggo --default-optional -u -N -e 
+  gengetopt -iContexter.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
 #include <stdlib.h>
 #include <string.h>
 
-#include "getopt.h"
+#ifndef FIX_UNUSED
+#define FIX_UNUSED(X) (void) (X) /* avoid warnings for unused params */
+#endif
+
+#include <getopt.h>
 
 #include "cmdline.h"
 
   "  -c, --contexts=filename     Context/gene mapping",
   "  -e, --context=INT           Context ID to process",
   "  -g, --genes=filename        Gene ID to name mapping",
+  "  -N, --is_nibble=INT         Define whether the database is nibble type.  \n                                (default=`1')",