Commits

Qian Zhu committed b434c8c

added map and contiguous matrix data structures

Comments (0)

Files changed (3)

+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#include "stdafx.h"
+#include "seekmap.h"
+
+namespace Sleipnir {
+
+/*
+ * Presence Data Structure
+ */
+CSeekPresence::CSeekPresence(size_t i){
+	p = (char*)malloc(i);
+	memset(p, 0, i);
+	iSize = i;
+}
+
+CSeekPresence::CSeekPresence(char *cP, size_t i){
+	p = (char*)malloc(i);
+	memcpy(p, cP, i);
+	iSize = i;
+}
+
+CSeekPresence::CSeekPresence(CSeekPresence& cP){
+	p = (char*)malloc(cP.iSize);
+	memcpy(p, cP.p, cP.iSize);
+	iSize = cP.iSize;
+}
+
+CSeekPresence::~CSeekPresence(){
+	free(p);
+	iSize = 0;
+}
+
+bool CSeekPresence::Check(size_t i){
+	if(p[i]==0){
+		return false;
+	}
+	return true;
+}
+
+void CSeekPresence::Set(size_t i){
+	p[i] = 1;
+}
+
+void CSeekPresence::Clear(size_t i){
+	p[i] = 0;
+}
+
+void CSeekPresence::Clear(){
+	memset(p, 0, iSize);
+}
+
+size_t CSeekPresence::GetSize(){
+	return iSize;
+}
+
+/*
+ * IntIntMap Data Structure
+ */
+CSeekIntIntMap::CSeekIntIntMap(size_t iSize){
+	m_iF = (int*)malloc(iSize * sizeof(int));
+	m_iR = (int*)malloc(iSize * sizeof(int));
+	m_iSize = iSize;
+	Clear();
+}
+
+CSeekIntIntMap::CSeekIntIntMap(CSeekPresence &cP, bool bReverse){
+	m_iSize = cP.GetSize();
+	m_iF = (int*)malloc(m_iSize * sizeof(int));
+	m_iR = (int*)malloc(m_iSize * sizeof(int));
+	Clear();
+	Reset(cP, bReverse);
+}
+
+CSeekIntIntMap::~CSeekIntIntMap(){
+	free(m_iF);
+	free(m_iR);
+	m_iNumSet = 0;
+	m_iSize = 0;
+}
+
+int CSeekIntIntMap::GetForward(int i){
+	return m_iF[i];
+}
+
+int CSeekIntIntMap::GetReverse(int i){
+	return m_iR[i];
+}
+
+void CSeekIntIntMap::Add(int i){
+	int j = m_iNumSet;
+	m_iF[i] = j;
+	m_iR[j] = i;
+	m_iNumSet++;
+}
+
+void CSeekIntIntMap::Clear(){
+	int i;
+	for(i=0; i<m_iSize; i++){
+		m_iF[i] = -1;
+		m_iR[i] = -1;
+	}
+	m_iNumSet = 0;
+}
+
+void CSeekIntIntMap::Reset(CSeekPresence &cP, bool bReverse){
+	int i;
+	if(bReverse==false){
+		int j = 0;
+		for(i=0; i<m_iSize; i++){
+			if(cP.Check(i)==true){
+				Add(i);
+			}
+		}
+	}else{
+		int j = 0;
+		for(i=0; i<m_iSize; i++){
+			if(cP.Check(i)==false){
+				Add(i);
+			}
+		}
+	}
+}
+
+/*
+ * StrIntMap Data Structure
+ */
+CSeekStrIntMap::CSeekStrIntMap(){
+	m_mapstrint.clear();
+	m_mapintstr.clear();
+}
+
+CSeekStrIntMap::~CSeekStrIntMap(){}
+
+void CSeekStrIntMap::Set(string s, int i){
+	m_mapstrint[s] = i;
+	m_mapintstr[i] = s;
+}
+
+int CSeekStrIntMap::Get(string s){
+	return m_mapstrint[s];
+}
+
+string CSeekStrIntMap::Get(int i){
+	return m_mapintstr[i];
+}
+
+size_t CSeekStrIntMap::GetSize(){
+	return m_mapintstr.size();
+}
+
+vector<string>& CSeekStrIntMap::GetAllString(){
+	vector<string> vecStr;
+	vecStr.clear();
+	vecStr.resize(GetSize());
+	map<string, int>::iterator	iter;
+	size_t i = 0;
+	for(iter = m_mapstrint.begin(); iter!=m_mapstrint.end(); iter++){
+		vecStr[i] = iter->first;
+	}
+	return vecStr;
+}
+
+vector<int>& CSeekStrIntMap::GetAllInteger(){
+	vector<int> vecInt;
+	vecInt.clear();
+	vecInt.resize(GetSize());
+	map<int, string>::iterator	iter;
+	size_t i = 0;
+	for(iter = m_mapintstr.begin(); iter!=m_mapintstr.end(); iter++){
+		vecInt[i] = iter->first;
+	}
+	return vecInt;
+}
+
+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#ifndef SEEKMAP_H
+#define SEEKMAP_H
+
+#include "stdafx.h"
+
+namespace Sleipnir {
+
+class CSeekPresence{
+public:
+	CSeekPresence(size_t);
+	CSeekPresence(char*, size_t);
+	CSeekPresence(CSeekPresence&);
+	~CSeekPresence();
+	void Clear();
+	bool Check(size_t);
+	void Set(size_t);
+	void Clear(size_t);
+	size_t GetSize();
+private:
+	char *p;
+	int iSize;
+};
+
+class CSeekIntIntMap{
+public:
+	CSeekIntIntMap(size_t);
+	CSeekIntIntMap(CSeekPresence&, bool=false);
+	~CSeekIntIntMap();
+	int GetForward(int);
+	int GetReverse(int);
+	void Add(int);
+	void Clear();
+	void Reset(CSeekPresence&, bool=false);
+
+private:
+	int *m_iF;
+	int *m_iR;
+	int m_iSize;
+	int m_iNumSet;
+};
+
+class CSeekStrIntMap{
+public:
+	CSeekStrIntMap();
+	~CSeekStrIntMap();
+	void Set(string, int);
+	int Get(string);
+	size_t GetSize();
+	string Get(int);
+	vector<string>& GetAllString();
+	vector<int>& GetAllInteger();
+private:
+	map<string, int> m_mapstrint;
+	map<int, string> m_mapintstr;
+};
+
+}
+#endif
+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#ifndef SEEKMATRIX_H
+#define SEEKMATRIX_H
+
+#include "stdafx.h"
+
+namespace Sleipnir {
+
+class CSeekMatrixTools {
+public:
+	template<class tType>
+	static tType** Init(size_t iRow, size_t iColumn, tType tValue){
+		tType **m_Data = (tType**)malloc(iRow*sizeof(tType*));
+		m_Data[0] = (tType*)malloc(iRow*iColumn*sizeof(tType));
+		size_t i, j;
+		for(i=1; i<iRow; i++){
+			m_Data[i] = m_Data[i-1] + iColumn;
+		}
+		for(i=0; i<iRow; i++){
+			for(j=0; j<iColumn; j++){
+				m_Data[i][j] = tValue;
+			}
+		}
+		return m_Data;
+	}
+
+	template<class tType>
+	static void Free(tType **m_Data){
+		free(m_Data[0]);
+		free(m_Data);
+	}
+
+};
+
+template<class tType>
+class CSeekMatrix{
+public:
+	CSeekMatrix(size_t iRow, size_t iColumn, tType tValue){
+		m_Data = (tType**)malloc(iRow*sizeof(tType*));
+		m_Data[0] = (tType*)malloc(iRow*iColumn*sizeof(tType));
+		size_t i, j;
+		for(i=1; i<iRow; i++){
+			m_Data[i] = m_Data[i-1] + iColumn;
+		}
+		for(i=0; i<iRow; i++){
+			for(j=0; j<iColumn; j++){
+				m_Data[i][j] = tValue;
+			}
+		}
+		m_iRow = iRow;
+		m_iColumn = iColumn;
+		m_cIsMatrix = true;
+		m_cIsCompacted = true;
+		m_veciRowSize.clear();
+	}
+
+	CSeekMatrix(size_t iRow){
+		m_Data = (tType**)malloc(iRow*sizeof(tType*));
+		size_t i;
+		m_veciRowSize.clear();
+		m_veciRowSize.resize(iRow);
+		for(i=0; i<iRow; i++){
+			m_Data[i] = NULL;
+			m_veciRowSize[i] = 0;
+		}
+		m_iRow = iRow;
+		m_cIsMatrix = false;
+		m_cIsCompacted = false;
+	}
+
+	void InitializeRow(size_t atX, size_t iSize, tType tValue){
+		m_Data[atX] = (tType*)malloc(iSize*sizeof(tType));
+		size_t i;
+		for(i=0; i<iSize; i++){
+			m_Data[atX][i] = tValue;
+		}
+		m_veciRowSize[atX] = iSize;
+		m_cIsMatrix = false;
+		m_cIsCompacted = false;
+	}
+
+	size_t GetElements(){
+		size_t i;
+		size_t iTot = 0;
+		for(i=0; i<m_iRow; i++){
+			iTot+=m_veciRowSize[i];
+		}
+		return iTot;
+	}
+
+	void Clear(){
+		size_t i,j;
+		for(i=0; i<m_iRow; i++){
+			for(j=0; j<m_iColumn; j++){
+				m_Data[i][j] = 0;
+			}
+		}
+	}
+
+	bool Compact(){
+		if(m_cIsMatrix==true || m_cIsCompacted==true){
+			return true;
+		}
+
+		size_t i,j;
+
+		size_t iSize = GetElements();
+		tType **m2 = (tType**)malloc(m_iRow*sizeof(tType*));
+		m2[0] = (tType*)malloc(iSize*sizeof(tType));
+		for(j=0; j<m_iRow; j++){
+			m2[j] = NULL;
+		}
+
+		bool isFirst = true;
+		tType *prev = NULL;
+		int prev_id = 0;
+		for(i=0; i<m_iRow; i++){
+			if(m_Data[i]==NULL) continue;
+			if(isFirst==true){
+				isFirst = false;
+				m2[i] = (tType*)malloc(iSize*sizeof(tType));
+				prev = m2[i];
+				prev_id = i;
+			}else{
+				m2[i] = prev + m_veciRowSize[prev_id];
+				prev = m2[i];
+				prev_id = i;
+			}
+		}
+
+		for(i=0; i<m_iRow; i++){
+			if(m_Data[i]==NULL) continue;
+			for(j=0; j<m_veciRowSize[i]; j++){
+				m2[i][j] = m_Data[i][j];
+			}
+		}
+
+		for(i=0; i<m_iRow; i++){
+			if(m_Data[i]==NULL) continue;
+			free(m_Data[i]);
+		}
+		free(m_Data);
+
+		m_Data = m2;
+		m_cIsCompacted = true;
+
+		return true;
+	}
+
+	void Free(){
+		if(m_cIsMatrix==true){
+			free(m_Data[0]);
+			free(m_Data);
+		}else if(m_cIsCompacted==true){
+			int i = -1;
+			for(i=0; i<m_iRow; i++){
+				if(m_Data[i]==NULL) continue;
+				break;
+			}
+			if(i!=-1 && i<m_iRow){
+				free(m_Data[i]);
+				free(m_Data);
+			}
+		}else{
+			int i = -1;
+			for(i=0; i<m_iRow; i++){
+				if(m_Data[i]==NULL) continue;
+				free(m_Data[i]);
+			}
+			free(m_Data);
+		}
+	}
+
+	~CSeekMatrix(){
+		Free();
+	}
+
+	tType Get(size_t iRow, size_t iColumn){
+		return m_Data[iRow][iColumn];
+	}
+
+	tType* GetRow(size_t iRow){
+		return m_Data[iRow];
+	}
+
+	size_t GetRowSize(size_t iRow){
+		return m_veciRowSize[iRow];
+	}
+
+	void Set(size_t iRow, size_t iColumn, tType tValue){
+		m_Data[iRow][iColumn] = tValue;
+	}
+
+private:
+	tType **m_Data;
+	size_t m_iRow;
+	size_t m_iColumn;
+	bool m_cIsMatrix;
+	bool m_cIsCompacted;
+	vector<size_t> m_veciRowSize;
+};
+
+
+}
+
+#endif // SEEKMATRIX_H