1. libsleipnir
  2. sleipnir

Commits

Qian Zhu  committed 10dee50 Merge

more merge

  • Participants
  • Parent commits 2dee8f4, 99fc1f5
  • Branches search_project

Comments (0)

Files changed (19)

File configure.ac

  • Ignore whitespace
File contents unchanged.

File src/compactmatrix.cpp

View file
  • Ignore whitespace
 	m_fMemory = true;
 	for( m_cBits = 0,--cValues; cValues; ++m_cBits,cValues >>= 1 );
 	m_aiData = new size_t[ iWords = CountWords( ) ];
+	//printf("size %d\n", iWords);
 	if( fClear )
 		memset( m_aiData, 0, iWords * sizeof(*m_aiData) ); }
 

File src/compactmatrixi.h

View file
  • Ignore whitespace
 
 namespace Sleipnir {
 
+class CUcharFullMatrix {	//unsigned char full matrix
+public:
+	CUcharFullMatrix(): m_cBits(0), m_aiData(NULL), m_fMemory(true), m_iRows(0), m_iColumns(0){ }
+	~CUcharFullMatrix(){
+		if(m_aiData!=NULL){
+			free(m_aiData[0]);
+			free(m_aiData);
+		}
+	}
+
+	void Initialize(size_t numRows, size_t numColumns, unsigned char Value){
+		size_t i, j;
+		m_aiData = (unsigned char**)malloc(numRows*sizeof(unsigned char*));
+		m_aiData[0] = (unsigned char*)malloc(numRows*numColumns*sizeof(unsigned char));
+		for(i=1; i<numRows; i++){
+			m_aiData[i] = m_aiData[i-1] + numColumns;
+		}
+		for(i=0; i<numRows; i++){
+			for(j=0; j<numColumns; j++){
+				m_aiData[i][j] = 0;
+			}
+		}
+		m_iColumns = numColumns;
+		m_iRows = numRows;
+	}
+
+	unsigned char Get(size_t iRow, size_t iColumn) const{
+		return m_aiData[iRow][iColumn];
+	}
+
+	void Set(size_t iRow, size_t iColumn, unsigned char cValue){
+		m_aiData[iRow][iColumn] = cValue;
+	}
+
+	size_t GetRows() const{
+		return m_iRows;
+	}
+
+	size_t GetColumns() const{
+		return m_iColumns;
+	}
+
+	void AddGeneMap(size_t i, std::string s){
+		m_mapstriGenes[s] = i;
+		//m_vecstrGenes[i] = s;
+	}
+
+	size_t GetGeneIndex(std::string strGene) const{
+		std::map<std::string, size_t>::const_iterator	iterGene;
+		return ( ( ( iterGene = m_mapstriGenes.find( strGene ) ) == m_mapstriGenes.end( ) ) ? -1 :
+					iterGene->second );
+	}
+
+private:
+	bool			m_fMemory; //so far does not work
+	unsigned char	m_cBits; //so far does not work
+	unsigned char**	m_aiData;
+	size_t		m_iRows;
+	size_t		m_iColumns;
+	std::map<std::string, size_t>	m_mapstriGenes;
+	//vector<std::string> m_vecstrGenes;
+};
+
+
 class CCompactMatrixBase {
 protected:
 	CCompactMatrixBase( ) : m_cBits(0), m_aiData(NULL), m_fMemory(true) { }

File src/dat.cpp

  • Ignore whitespace
File contents unchanged.

File src/dat.h

  • Ignore whitespace
File contents unchanged.

File src/database.cpp

View file
  • Ignore whitespace
 	if( fBuffer ) {
 		//iBaseGenes: gene id of first gene in each databaselet
 		//iDataset: dataset id
-		//printf("Number: %d %d %d %d\n", GetSizeGene(), GetSizePair(), iBaseGenes, iBaseDatasets);
 		abImage = new unsigned char[ iSize = ( GetSizeGene( ) * m_vecstrGenes.size( ) ) ];
 		m_fstm.seekg( m_iHeader, ios_base::beg );
 		m_fstm.read( (char*)abImage, iSize );
 
 	return true; }
 
+/* 	A faster and simpler writing method for the matrix.
+	takes UcharFullMatrix
+	and requires buffering to be enabled, and works only with byte output
+*/
+bool CDatabaselet::OpenFast( const vector<CUcharFullMatrix>& vecData, size_t iBaseGenes, size_t iBaseDatasets, bool fBuffer) {
+	if(fBuffer){
+		cerr << "Requires buferring to be enabled." << endl;
+		return false;
+	}
+	if(m_useNibble){
+		cerr << "Requires byte." << endl;
+		return false;
+	}
+
+	unsigned char*	abImage;
+	size_t			iSize, iDatum, iGeneOne, iGeneTwo;
+	unsigned char	bOne, bTwo;
+
+	abImage = new unsigned char[ iSize = ( GetSizeGene( ) * m_vecstrGenes.size( ) ) ];
+	m_fstm.seekg( m_iHeader );
+	m_fstm.read( (char*)abImage, iSize );
+
+	for( iDatum = 0; iDatum  < vecData.size( ); iDatum ++ ){
+		for( iGeneOne = 0; iGeneOne < GetGenes( ); ++iGeneOne ){
+			size_t index = vecData[iDatum].GetGeneIndex(GetGene(iGeneOne));
+			size_t iOffset = (GetSizeGene() * iGeneOne) + iBaseDatasets + iDatum;
+
+			for( iGeneTwo = 0; iGeneTwo < vecData[iDatum].GetColumns(); ++iGeneTwo ){
+				if( bOne = vecData[ iDatum].Get(index, iGeneTwo ) ){
+					abImage[ iOffset + GetSizePair() * iGeneTwo ] = bOne - 1;
+				}
+			}
+		}
+	}
+
+	m_fstm.seekp( m_iHeader );
+	m_fstm.write( (char*)abImage, iSize );
+	delete[] abImage;
+
+	return true;
+}
+
 bool CDatabaselet::Get( size_t iOne, size_t iTwo,
 		vector<unsigned char>& vecbData, unsigned char *charImage){
 	size_t	i;
 	float			d;
 
 	/* define number of threads to concurrently process datasets */
-	omp_set_num_threads(4);
+	//omp_set_num_threads(4);
 
 	veciGenes.resize( vecstrGenes.size( ) );
 	iOutBlock = ( m_iBlockOut == -1 ) ? m_vecpDBs.size( ) : m_iBlockOut;
 
 		for( iInBase = 0; iInBase < vecstrFiles.size( ); iInBase += iInBlock ) {
 			vector<CCompactFullMatrix>	vecData;
+
 			vecData.resize( ( ( iInBase + iInBlock ) > vecstrFiles.size( ) ) ?
 				( vecstrFiles.size( ) - iInBase ) : iInBlock );
 			for( iInOffset = 0; iInOffset < vecData.size( ); ++iInOffset ) {

File src/database.h

View file
  • Ignore whitespace
  */
 class CDatabase : CDatabaseImpl {
 public:
+
+	struct ArrayULInt{
+		size_t iX;
+		unsigned long int v;
+	};
+
+	int ULIntComp(const void * a, const void* b){
+		if ( *(unsigned long int*) a > *(unsigned long int*) b){
+			return(1);
+		}
+		if ( *(unsigned long int*) a < *(unsigned long int*) b){
+			return(-1);
+		}
+		return(0);
+	}
+
 	/*!
 	 * \brief
 	 * Construct a new database over the given genes from the given datasets and Bayes net.

File src/databasei.h

View file
  • Ignore whitespace
 
 	void Clear( ) {
 		size_t	i;
+
 		m_mapstriGenes.clear( );
 		for( i = 0; i < m_vecpDBs.size( ); ++i )
 			delete m_vecpDBs[ i ];

File src/datapair.h

  • Ignore whitespace
File contents unchanged.

File src/dati.h

  • Ignore whitespace
File contents unchanged.

File src/meta.h

View file
  • Ignore whitespace
 	template <class tType>
 	static size_t Quantize( tType Value, const std::vector<tType>& vecQuants ) {
 
-
 		if( IsNaN( Value ) )
 			return -1;
 

File tools/DBCombiner/DBCombiner.ggo

  • Ignore whitespace
File contents unchanged.

File tools/DBCombiner/cmdline.c

  • Ignore whitespace
File contents unchanged.

File tools/DBCombiner/cmdline.h

  • Ignore whitespace
File contents unchanged.

File tools/Data2DB/Data2DB.cpp

  • Ignore whitespace
File contents unchanged.

File tools/Data2DB/Data2DB.ggo

  • Ignore whitespace
File contents unchanged.

File tools/Data2DB/cmdline.c

  • Ignore whitespace
File contents unchanged.

File tools/Data2DB/cmdline.h

  • Ignore whitespace
File contents unchanged.

File tools/Makefile.am

View file
  • Ignore whitespace
 	  Contexter \
 	  Counter \
 	  Data2DB \
-	  DBCombiner \
+      DBCombiner \
 	  DSLConverter \
 	  Dab2Dad \
 	  Edges2Posteriors \