1. libsleipnir
  2. sleipnir

Commits

Qian Zhu  committed 9215274

Added tool DBCombiner for combining multiple CDatabaselets (made from the same CDatabase)
Added support nibble in DBCombiner

  • Participants
  • Parent commits fc224ce
  • Branches parallel_data2db

Comments (0)

Files changed (15)

File configure.ac

View file
 		 tools/Contexter/Makefile \
 		 tools/Counter/Makefile \
 		 tools/Data2DB/Makefile \
+         tools/DBCombiner/Makefile \
 		 tools/DSLConverter/Makefile \
 		 tools/Dab2Dad/Makefile \
 		 tools/Edges2Posteriors/Makefile \

File gen_tools_am

View file
 			    Counter  => ['SMILE'],
 			    DSLConverter  => ['SMILE'],
 			    Data2DB  => ['SMILE'],
+				DBCombiner => ['SMILE'],
 			    Dab2Dad  => ['SMILE'],
 			    Dab2DB  => ['SMILE'],
 			    Data2Svm => ['SVM_PERF'],

File src/database.cpp

View file
 		m_fstm.write( acFiller, GetSizeGene( ) );
 	}
 	delete[] acFiller;
+	SetFile(strFile);
 
 	return true;
 
 		}
 	}
 
-
-	if( iDatum < vecData.size( ) )
-		for( iGeneOne = 0; iGeneOne < GetGenes( ); ++iGeneOne )
-			for( iGeneTwo = 0; iGeneTwo < vecData[ iDatum ].GetColumns( ); ++iGeneTwo )
-				if( bOne = vecData[ iDatum ].Get( iBaseGenes + iGeneOne, iGeneTwo ) )
+	if( iDatum < vecData.size( ) ){
+		for( iGeneOne = 0; iGeneOne < GetGenes( ); ++iGeneOne ){
+			for( iGeneTwo = 0; iGeneTwo < vecData[ iDatum ].GetColumns( ); ++iGeneTwo ){
+				if( bOne = vecData[ iDatum ].Get( iBaseGenes + iGeneOne, iGeneTwo ) ){
 					OpenWrite( bOne - 1, GetOffset( iGeneOne, iGeneTwo, iBaseDatasets + iDatum ), ENibblesLow,
 						abImage );
+				}
+			}
+		}
+	}
 	if( fBuffer ) {
 		m_fstm.seekp( m_iHeader, ios_base::beg );
 		m_fstm.write( (char*)abImage, iSize );
 bool CDatabaselet::Get( size_t iOne, size_t iTwo,
 		vector<unsigned char>& vecbData, unsigned char *charImage){
 	size_t	i;
-	vecbData.clear();
-	vecbData.resize(GetSizePair());
+	size_t offset = GetOffset(iOne, iTwo) - m_iHeader;
 
-	size_t offset = GetOffset(iOne, iTwo) - m_iHeader;
-	for(i=0; i<vecbData.size(); i++){
-		vecbData[i] = charImage[offset + i];
+	if(this->m_useNibble==false){
+		vecbData.clear();
+		vecbData.resize(GetSizePair());
+
+		for(i=0; i<vecbData.size(); i++){
+			vecbData[i] = charImage[offset + i];
+		}
+	}else{
+		vecbData.clear();
+		vecbData.resize(m_iDatasets);
+
+
+		for(i=0; i<GetSizePair(); i++){
+			unsigned char b = charImage[offset + i];
+			unsigned char bValue = -1;
+			if( ( bValue = ( b & 0xF ) ) == 0xF ){
+				bValue = -1;
+			}
+			vecbData[ 2 * i ] = bValue;
+
+			if( ( bValue = ( ( b >> 4 ) & 0xF ) ) == 0xF ){
+				bValue = -1;
+			}
+
+			if((2 * i + 1)==m_iDatasets){
+				break;
+			}
+			vecbData[ (2 * i) + 1 ] = bValue;
+		}
 	}
 
 	return true;
 /*	static function, combine multiple databaselets (that share the same genes, ie m_vecStrGenes),
 	and output result to a single file, or output one-gene per file (if databaselet contains multiple genes)
  	bSplit: whether or not to output one-gene per file
-	WORKS FOR BYTE ONLY
+	Works for both nibble and byte
 */
-bool CDatabaselet::Combine(std::vector<CDatabaselet>& vecDatabaselet,
+bool CDatabaselet::Combine(std::vector<CDatabaselet*>& vecDatabaselet,
 		std::string strOutDirectory, bool bSplit){
 
 	/* for checking on consistency of databaselets */
-	bool bIsConsistent = false;
+	bool bIsConsistent = true;
+	bool fUseNibble;
 
 	size_t i, j;
 	uint32_t iGenes, iDatasets;
 
-	CDatabaselet *first = &vecDatabaselet[0];
+	CDatabaselet *first = vecDatabaselet[0];
+	fUseNibble = first->m_useNibble;
 
 	iGenes = first->GetGenes();
 	iDatasets = first->GetDatasets();
 	}
 
 	for(i=1; bIsConsistent && i<vecDatabaselet.size(); i++){
-		if(iGenes!=vecDatabaselet[i].GetGenes()){
+		if(iGenes!=vecDatabaselet[i]->GetGenes() || fUseNibble!=vecDatabaselet[i]->m_useNibble){
 			bIsConsistent = false;
 			break;
 		}
 		for(j=0; j<iGenes; j++){
-			if(vecGenes[j]!=vecDatabaselet[i].GetGene(j)){
+			if(vecGenes[j]!=vecDatabaselet[i]->GetGene(j)){
 				bIsConsistent = false;
 				break;
 			}
 		}
-		iDatasets+=vecDatabaselet[i].GetDatasets();
+		iDatasets+=vecDatabaselet[i]->GetDatasets();
 	}
 
 	if(!bIsConsistent){
 	/* load all Databaselets into memory, for efficiency */
 	unsigned char **charImages =
 			(unsigned char**)malloc(vecDatabaselet.size()*sizeof(unsigned char*));
-	size_t iImageSize = first->GetSizeGenes();
-	charImages[0] = (unsigned char*)malloc(vecDatabaselet.size()*iImageSize*sizeof(unsigned char));
+	size_t iImageSize = iDatasets * iGenes * first->m_iGenes;
+	charImages[0] = (unsigned char*)malloc(iImageSize*sizeof(unsigned char));
 	for(i=1; i<vecDatabaselet.size(); i++){
-		charImages[i] = charImages[i-1] + iImageSize;
+		charImages[i] = charImages[i-1] + vecDatabaselet[i-1]->m_iDatasets * first->m_iGenes * iGenes;
 	}
 
-
 	/* read databaselet into charImages */
 	for(i=0; i<vecDatabaselet.size(); i++){
-		CDatabaselet *current = &vecDatabaselet[i];
+		CDatabaselet *current = vecDatabaselet[i];
 		if(current->m_fstm.is_open()){
 			current->m_fstm.seekg(current->m_iHeader, ios_base::beg);
 			current->m_fstm.read((char*) charImages[i], iImageSize);
 			string path = strOutDirectory + "/" + thisGene + ".db";
 			vector<string> vecstrThisGene;
 			vecstrThisGene.push_back(thisGene);
-			CDatabaselet DBS(false);
+
+			/* Create a new Databaselet */
+			size_t iSize;
+			CDatabaselet DBS(first->m_useNibble);
 			DBS.Open(path.c_str(), vecstrThisGene, first->m_iGenes, iDatasets);
+			unsigned char *abImage = (unsigned char*)
+				malloc( iSize = (DBS.GetSizeGene( ) * DBS.m_vecstrGenes.size( ) ));
+			size_t iDatum;
+			size_t iGeneOne, iGeneTwo;
+			size_t offset2, offset3;
+			iGeneOne = i;
 
+			if(first->m_useNibble==false){
+				/* m_iGenes is all the genes in the genome */
+				for( iGeneTwo = 0; iGeneTwo < first->m_iGenes; ++iGeneTwo ){
+					offset2 = DBS.GetSizePair()*iGeneTwo;
+					int totalSum = 0;
+					for( iDatum = 0; iDatum  < vecDatabaselet.size(); iDatum ++ ){
+						vector<unsigned char> vc;
+						CDatabaselet *current = vecDatabaselet[iDatum];
+						current->Get( iGeneOne, iGeneTwo, vc, charImages[iDatum]);
+						offset3 = offset2 + totalSum;
+						for(j=0; j<vc.size(); j++){
+							abImage[offset3 + j] = vc[j];
+						}
+						totalSum+=vc.size();
+					}
+				}
+			}else{
+				size_t j;
+				unsigned char *abImage2 = (unsigned char*)
+					malloc(iDatasets);
 
-			size_t iDatum;
-			size_t iSize;
+				/* m_iGenes is all the genes in the genome */
+				for( iGeneTwo = 0; iGeneTwo < first->m_iGenes; ++iGeneTwo ){
+					offset2 = DBS.GetSizePair() * iGeneTwo;
+					int totalSum = 0;
+					for( iDatum = 0; iDatum  < vecDatabaselet.size(); iDatum ++ ){
+						vector<unsigned char> vc;
+						CDatabaselet *current = vecDatabaselet[iDatum];
+						current->Get( iGeneOne, iGeneTwo, vc, charImages[iDatum]);
+						offset3 = totalSum;
+						for(j=0; j<vc.size(); j++){
+							abImage2[offset3+j] = vc[j];
+						}
+						totalSum+=vc.size();
+					}
+					for(j=0; j+1 < iDatasets; j+=2){
+						abImage[offset2 + j / 2] = (abImage2[j] & 0xF) | (abImage2[j+1] << 4);
+					}
+					if(j<iDatasets){
+						unsigned char bValue = abImage2[iDatasets - 1];
+						unsigned char b = 255;
+						abImage[offset2 + j / 2] = ( bValue & 0xF ) | ( b & 0xF0 );
+					}
+				}
 
-			unsigned char *abImage = (unsigned char*)
-					malloc( iSize = (DBS.GetSizeGene( ) * DBS.m_vecstrGenes.size( ) ));
-
-			size_t iGeneOne, iGeneTwo;
-			size_t offset1, offset2, offset3, offset4;
-
-			iGeneOne = i;
-			offset1 = 0;
-
-			/* m_iGenes is all the genes in the genome */
-			for( iGeneTwo = 0; iGeneTwo < first->m_iGenes; ++iGeneTwo ){
-				offset2 = DBS.GetSizePair()*iGeneTwo;
-				int totalSum = 0;
-				for( iDatum = 0; iDatum  < vecDatabaselet.size(); iDatum ++ ){
-					vector<unsigned char> vc;
-					CDatabaselet *current = &vecDatabaselet[iDatum];
-					current->Get( iGeneOne, iGeneTwo, vc, charImages[iDatum]);
-
-					offset3 = offset1 + offset2 + totalSum;
-
-					for(j=0; j<vc.size(); j++){
-						abImage[offset3 + j] = vc[j];
-					}
-
-					totalSum+=vc.size();
-				}
+				free(abImage2);
 			}
 
 			/* close fstream */
 		CMeta::Tokenize(first->strFileName.c_str(), strTok, "/");
 		string path = strOutDirectory + "/" + strTok[strTok.size()-1];
 
-		CDatabaselet DBS(false);
+		CDatabaselet DBS(first->m_useNibble);
 
 		DBS.Open(path.c_str(), first->m_vecstrGenes, first->m_iGenes, iDatasets);
+
 		size_t iDatum;
 		size_t iSize;
-
 		unsigned char *abImage = (unsigned char*)
 				malloc( iSize = (DBS.GetSizeGene( ) * DBS.m_vecstrGenes.size( ) ) );
 		size_t iGeneOne, iGeneTwo;
-		size_t offset1, offset2, offset3, offset4;
+		size_t offset1, offset2, offset3;
 
-		for(iGeneOne = 0; iGeneOne < first->GetGenes(); ++iGeneOne){
-			offset1 = DBS.GetSizeGene() * iGeneOne;
-			for( iGeneTwo = 0; iGeneTwo < first->m_iGenes; ++iGeneTwo ){
-				offset2 = DBS.GetSizePair()*iGeneTwo;
-				int totalSum = 0;
-				for( iDatum = 0; iDatum  < vecDatabaselet.size(); iDatum ++ ){
-					vector<unsigned char> vc;
-					CDatabaselet *current = &vecDatabaselet[iDatum];
-					current->Get( iGeneOne, iGeneTwo, vc, charImages[iDatum]);
-
-					offset3 = offset1 + offset2 + totalSum;
-
-					for(j=0; j<vc.size(); j++){
-						abImage[offset3 + j] = vc[j];
+		if(first->m_useNibble==false){
+			for(iGeneOne = 0; iGeneOne < first->GetGenes(); ++iGeneOne){
+				offset1 = DBS.GetSizeGene() * iGeneOne;
+				for( iGeneTwo = 0; iGeneTwo < first->m_iGenes; ++iGeneTwo ){
+					offset2 = DBS.GetSizePair()*iGeneTwo;
+					int totalSum = 0;
+					for( iDatum = 0; iDatum  < vecDatabaselet.size(); iDatum ++ ){
+						vector<unsigned char> vc;
+						CDatabaselet *current = vecDatabaselet[iDatum];
+						current->Get( iGeneOne, iGeneTwo, vc, charImages[iDatum]);
+						offset3 = offset1 + offset2 + totalSum;
+						for(j=0; j<vc.size(); j++){
+							abImage[offset3 + j] = vc[j];
+						}
+						totalSum+=vc.size();
 					}
-
-					totalSum+=vc.size();
 				}
 			}
+		}else{
+			size_t j;
+			unsigned char *abImage2 = (unsigned char*)
+				malloc(DBS.m_iDatasets);
+			/* m_iGenes is all the genes in the genome */
+			for(iGeneOne = 0; iGeneOne < first->GetGenes(); ++iGeneOne){
+				offset1 = DBS.GetSizeGene() * iGeneOne;
+				for( iGeneTwo = 0; iGeneTwo < first->m_iGenes; ++iGeneTwo ){
+					offset2 = DBS.GetSizePair()*iGeneTwo;
+					int totalSum = 0;
+					for( iDatum = 0; iDatum  < vecDatabaselet.size(); iDatum ++ ){
+						vector<unsigned char> vc;
+						CDatabaselet *current = vecDatabaselet[iDatum];
+						current->Get( iGeneOne, iGeneTwo, vc, charImages[iDatum]);
+						offset3 = totalSum;
+						for(j=0; j<vc.size(); j++){
+							abImage2[offset3 + j] = vc[j];
+						}
+						totalSum+=vc.size();
+					}
+					for(j=0; j+1 < iDatasets; j+=2){
+						abImage[offset1 + offset2 + j / 2] = (abImage2[j] & 0xF) | (abImage2[j+1] << 4);
+					}
+					if(j<iDatasets){
+						unsigned char bValue = abImage2[iDatasets - 1];
+						unsigned char b = 255;
+						abImage[offset1 + offset2 + j / 2] = ( bValue & 0xF ) | ( b & 0xF0 );
+					}
+				}
+			}
+			free(abImage2);
 		}
 
 		/* close the databaselet */
 		m_vecstrGenes[ i ] = pc;
 	delete[] acBuffer;
 
+	SetFile(strFile);
+
 	return true; }
 
 ///////////////////////////////////////////////////////////////////////////////
 		}
 
 		/* save the file name */
-		m_vecpDBs[i]->SetFile(strFile);
+		//m_vecpDBs[i]->SetFile(strFile);
 
 		if( !( i % 100 ) )
 			g_CatSleipnir( ).notice( "CDatabase::Open( %s, %d ) initializing file %d/%d",

File src/databasei.h

View file
 	bool Get( size_t, std::vector<unsigned char>&, bool ) const;
 	bool Get( size_t, const std::vector<size_t>&, std::vector<unsigned char>&, bool ) const;
 
-	static bool Combine(std::vector<CDatabaselet>& vecDatabaselet,
+	static bool Combine(std::vector<CDatabaselet*>& vecDatabaselet,
 			std::string strOutDirectory, bool bSplit = true);
 
 	size_t GetGenes( ) const {

File tools/DBCombiner/DBCombiner.cpp

View file
+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#include "stdafx.h"
+#include "cmdline.h"
+
+
+int main( int iArgs, char** aszArgs ) {
+	static const size_t	c_iBuffer	= 1024;
+#ifdef WIN32
+	pthread_win32_process_attach_np( );
+#endif // WIN32
+	gengetopt_args_info	sArgs;
+	ifstream			ifsm;
+	istream*			pistm;
+	vector<string>		vecstrLine, vecstrGenes, vecstrDBs;
+	char				acBuffer[ c_iBuffer ];
+	size_t				i;
+
+	if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
+		cmdline_parser_print_help( );
+		return 1; }
+
+	if( sArgs.input_arg ) {
+		ifsm.open( sArgs.input_arg );
+		pistm = &ifsm; }
+	else
+		pistm = &cin;
+	while( !pistm->eof( ) ) {
+		pistm->getline( acBuffer, c_iBuffer - 1 );
+		acBuffer[ c_iBuffer - 1 ] = 0;
+		vecstrLine.clear( );
+		CMeta::Tokenize( acBuffer, vecstrLine );
+		if( vecstrLine.size( ) < 2 ) {
+			cerr << "Ignoring line: " << acBuffer << endl;
+			continue; }
+		if( !( i = atoi( vecstrLine[ 0 ].c_str( ) ) ) ) {
+			cerr << "Illegal gene ID: " << vecstrLine[ 0 ] << " for " << vecstrLine[ 1 ] << endl;
+			return 1; }
+		i--;
+		if( vecstrGenes.size( ) <= i )
+			vecstrGenes.resize( i + 1 );
+		vecstrGenes[ i ] = vecstrLine[ 1 ]; }
+	if( sArgs.input_arg )
+		ifsm.close( );
+
+	bool useNibble = false;
+	if(sArgs.is_nibble_flag==1){
+		useNibble = true;
+	}
+
+	CDatabase DB(useNibble);
+
+	bool fSplit = false;
+	if(sArgs.split_flag==1){
+		fSplit = true;
+	}
+
+	if(sArgs.db_arg){
+		ifsm.open(sArgs.db_arg);
+		while(!pistm->eof()){
+			pistm->getline(acBuffer, c_iBuffer -1);
+			if(acBuffer[0]==0){
+				break;
+			}
+			acBuffer[c_iBuffer-1] = 0;
+			vecstrDBs.push_back(acBuffer);
+		}
+		vecstrDBs.resize(vecstrDBs.size());
+		ifsm.close();
+
+		//printf("Reading DBS"); getchar();
+		vector<CDatabaselet*> DBS;
+		DBS.resize(vecstrDBs.size());
+		for(i=0; i<vecstrDBs.size(); i++){
+	    	DBS[i] = new CDatabaselet(useNibble);
+	    	DBS[i]->Open(vecstrDBs[i]);
+	    }
+		//printf("Finished reading DBS"); getchar();
+
+	    CDatabaselet::Combine(DBS, sArgs.dir_out_arg, fSplit);
+	    for(i=0; i<vecstrDBs.size(); i++){
+	    	free(DBS[i]);
+	    }
+
+	}else{
+		cerr << "Must give a db list." << endl;
+		return 1;
+
+	}
+
+#ifdef WIN32
+	pthread_win32_process_detach_np( );
+#endif // WIN32
+	return 0; }

File tools/DBCombiner/DBCombiner.ggo

View file
+package	"DBCombiner"
+version	"1.0"
+purpose	"Combines a list of DB files with the same gene content"
+
+section "Main"
+option	"db"				x	"Input a set of databaselet filenames"
+								string typestr="filename"
+option	"input"				i	"Input gene mapping"
+								string	typestr="filename"	
+option	"dir_in"			d	"Data directory"
+								string	typestr="directory"	default="."
+option	"dir_out"			D	"Database directory"
+								string	typestr="directory"	default="."
+option	"is_nibble"			N	"Whether the input DB is nibble type"
+								flag	off
+option	"split"				s	"Split to one-gene per file"
+								flag	off

File tools/DBCombiner/cmdline.c

View file
+/*
+  File autogenerated by gengetopt version 2.22.5
+  generated with the following command:
+  gengetopt -iDBCombiner.ggo --default-optional -u -N -e 
+
+  The developers of gengetopt consider the fixed text that goes in all
+  gengetopt output files to be in the public domain:
+  we make no copyright claims on it.
+*/
+
+/* If we use autoconf.  */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef FIX_UNUSED
+#define FIX_UNUSED(X) (void) (X) /* avoid warnings for unused params */
+#endif
+
+#include <getopt.h>
+
+#include "cmdline.h"
+
+const char *gengetopt_args_info_purpose = "Combines a list of DB files with the same gene content";
+
+const char *gengetopt_args_info_usage = "Usage: DBCombiner [OPTIONS]... [FILES]...";
+
+const char *gengetopt_args_info_description = "";
+
+const char *gengetopt_args_info_help[] = {
+  "  -h, --help               Print help and exit",
+  "  -V, --version            Print version and exit",
+  "\nMain:",
+  "  -x, --db=filename        Input a set of databaselet filenames",
+  "  -i, --input=filename     Input gene mapping",
+  "  -d, --dir_in=directory   Data directory  (default=`.')",
+  "  -D, --dir_out=directory  Database directory  (default=`.')",
+  "  -N, --is_nibble          Whether the input DB is nibble type  (default=off)",
+  "  -s, --split              Split to one-gene per file  (default=off)",
+    0
+};
+
+typedef enum {ARG_NO
+  , ARG_FLAG
+  , ARG_STRING
+} cmdline_parser_arg_type;
+
+static
+void clear_given (struct gengetopt_args_info *args_info);
+static
+void clear_args (struct gengetopt_args_info *args_info);
+
+static int
+cmdline_parser_internal (int argc, char **argv, struct gengetopt_args_info *args_info,
+                        struct cmdline_parser_params *params, const char *additional_error);
+
+
+static char *
+gengetopt_strdup (const char *s);
+
+static
+void clear_given (struct gengetopt_args_info *args_info)
+{
+  args_info->help_given = 0 ;
+  args_info->version_given = 0 ;
+  args_info->db_given = 0 ;
+  args_info->input_given = 0 ;
+  args_info->dir_in_given = 0 ;
+  args_info->dir_out_given = 0 ;
+  args_info->is_nibble_given = 0 ;
+  args_info->split_given = 0 ;
+}
+
+static
+void clear_args (struct gengetopt_args_info *args_info)
+{
+  FIX_UNUSED (args_info);
+  args_info->db_arg = NULL;
+  args_info->db_orig = NULL;
+  args_info->input_arg = NULL;
+  args_info->input_orig = NULL;
+  args_info->dir_in_arg = gengetopt_strdup (".");
+  args_info->dir_in_orig = NULL;
+  args_info->dir_out_arg = gengetopt_strdup (".");
+  args_info->dir_out_orig = NULL;
+  args_info->is_nibble_flag = 0;
+  args_info->split_flag = 0;
+  
+}
+
+static
+void init_args_info(struct gengetopt_args_info *args_info)
+{
+
+
+  args_info->help_help = gengetopt_args_info_help[0] ;
+  args_info->version_help = gengetopt_args_info_help[1] ;
+  args_info->db_help = gengetopt_args_info_help[3] ;
+  args_info->input_help = gengetopt_args_info_help[4] ;
+  args_info->dir_in_help = gengetopt_args_info_help[5] ;
+  args_info->dir_out_help = gengetopt_args_info_help[6] ;
+  args_info->is_nibble_help = gengetopt_args_info_help[7] ;
+  args_info->split_help = gengetopt_args_info_help[8] ;
+  
+}
+
+void
+cmdline_parser_print_version (void)
+{
+  printf ("%s %s\n",
+     (strlen(CMDLINE_PARSER_PACKAGE_NAME) ? CMDLINE_PARSER_PACKAGE_NAME : CMDLINE_PARSER_PACKAGE),
+     CMDLINE_PARSER_VERSION);
+}
+
+static void print_help_common(void) {
+  cmdline_parser_print_version ();
+
+  if (strlen(gengetopt_args_info_purpose) > 0)
+    printf("\n%s\n", gengetopt_args_info_purpose);
+
+  if (strlen(gengetopt_args_info_usage) > 0)
+    printf("\n%s\n", gengetopt_args_info_usage);
+
+  printf("\n");
+
+  if (strlen(gengetopt_args_info_description) > 0)
+    printf("%s\n\n", gengetopt_args_info_description);
+}
+
+void
+cmdline_parser_print_help (void)
+{
+  int i = 0;
+  print_help_common();
+  while (gengetopt_args_info_help[i])
+    printf("%s\n", gengetopt_args_info_help[i++]);
+}
+
+void
+cmdline_parser_init (struct gengetopt_args_info *args_info)
+{
+  clear_given (args_info);
+  clear_args (args_info);
+  init_args_info (args_info);
+
+  args_info->inputs = 0;
+  args_info->inputs_num = 0;
+}
+
+void
+cmdline_parser_params_init(struct cmdline_parser_params *params)
+{
+  if (params)
+    { 
+      params->override = 0;
+      params->initialize = 1;
+      params->check_required = 1;
+      params->check_ambiguity = 0;
+      params->print_errors = 1;
+    }
+}
+
+struct cmdline_parser_params *
+cmdline_parser_params_create(void)
+{
+  struct cmdline_parser_params *params = 
+    (struct cmdline_parser_params *)malloc(sizeof(struct cmdline_parser_params));
+  cmdline_parser_params_init(params);  
+  return params;
+}
+
+static void
+free_string_field (char **s)
+{
+  if (*s)
+    {
+      free (*s);
+      *s = 0;
+    }
+}
+
+
+static void
+cmdline_parser_release (struct gengetopt_args_info *args_info)
+{
+  unsigned int i;
+  free_string_field (&(args_info->db_arg));
+  free_string_field (&(args_info->db_orig));
+  free_string_field (&(args_info->input_arg));
+  free_string_field (&(args_info->input_orig));
+  free_string_field (&(args_info->dir_in_arg));
+  free_string_field (&(args_info->dir_in_orig));
+  free_string_field (&(args_info->dir_out_arg));
+  free_string_field (&(args_info->dir_out_orig));
+  
+  
+  for (i = 0; i < args_info->inputs_num; ++i)
+    free (args_info->inputs [i]);
+
+  if (args_info->inputs_num)
+    free (args_info->inputs);
+
+  clear_given (args_info);
+}
+
+
+static void
+write_into_file(FILE *outfile, const char *opt, const char *arg, const char *values[])
+{
+  FIX_UNUSED (values);
+  if (arg) {
+    fprintf(outfile, "%s=\"%s\"\n", opt, arg);
+  } else {
+    fprintf(outfile, "%s\n", opt);
+  }
+}
+
+
+int
+cmdline_parser_dump(FILE *outfile, struct gengetopt_args_info *args_info)
+{
+  int i = 0;
+
+  if (!outfile)
+    {
+      fprintf (stderr, "%s: cannot dump options to stream\n", CMDLINE_PARSER_PACKAGE);
+      return EXIT_FAILURE;
+    }
+
+  if (args_info->help_given)
+    write_into_file(outfile, "help", 0, 0 );
+  if (args_info->version_given)
+    write_into_file(outfile, "version", 0, 0 );
+  if (args_info->db_given)
+    write_into_file(outfile, "db", args_info->db_orig, 0);
+  if (args_info->input_given)
+    write_into_file(outfile, "input", args_info->input_orig, 0);
+  if (args_info->dir_in_given)
+    write_into_file(outfile, "dir_in", args_info->dir_in_orig, 0);
+  if (args_info->dir_out_given)
+    write_into_file(outfile, "dir_out", args_info->dir_out_orig, 0);
+  if (args_info->is_nibble_given)
+    write_into_file(outfile, "is_nibble", 0, 0 );
+  if (args_info->split_given)
+    write_into_file(outfile, "split", 0, 0 );
+  
+
+  i = EXIT_SUCCESS;
+  return i;
+}
+
+int
+cmdline_parser_file_save(const char *filename, struct gengetopt_args_info *args_info)
+{
+  FILE *outfile;
+  int i = 0;
+
+  outfile = fopen(filename, "w");
+
+  if (!outfile)
+    {
+      fprintf (stderr, "%s: cannot open file for writing: %s\n", CMDLINE_PARSER_PACKAGE, filename);
+      return EXIT_FAILURE;
+    }
+
+  i = cmdline_parser_dump(outfile, args_info);
+  fclose (outfile);
+
+  return i;
+}
+
+void
+cmdline_parser_free (struct gengetopt_args_info *args_info)
+{
+  cmdline_parser_release (args_info);
+}
+
+/** @brief replacement of strdup, which is not standard */
+char *
+gengetopt_strdup (const char *s)
+{
+  char *result = 0;
+  if (!s)
+    return result;
+
+  result = (char*)malloc(strlen(s) + 1);
+  if (result == (char*)0)
+    return (char*)0;
+  strcpy(result, s);
+  return result;
+}
+
+int
+cmdline_parser (int argc, char **argv, struct gengetopt_args_info *args_info)
+{
+  return cmdline_parser2 (argc, argv, args_info, 0, 1, 1);
+}
+
+int
+cmdline_parser_ext (int argc, char **argv, struct gengetopt_args_info *args_info,
+                   struct cmdline_parser_params *params)
+{
+  int result;
+  result = cmdline_parser_internal (argc, argv, args_info, params, 0);
+
+  return result;
+}
+
+int
+cmdline_parser2 (int argc, char **argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required)
+{
+  int result;
+  struct cmdline_parser_params params;
+  
+  params.override = override;
+  params.initialize = initialize;
+  params.check_required = check_required;
+  params.check_ambiguity = 0;
+  params.print_errors = 1;
+
+  result = cmdline_parser_internal (argc, argv, args_info, &params, 0);
+
+  return result;
+}
+
+int
+cmdline_parser_required (struct gengetopt_args_info *args_info, const char *prog_name)
+{
+  FIX_UNUSED (args_info);
+  FIX_UNUSED (prog_name);
+  return EXIT_SUCCESS;
+}
+
+
+static char *package_name = 0;
+
+/**
+ * @brief updates an option
+ * @param field the generic pointer to the field to update
+ * @param orig_field the pointer to the orig field
+ * @param field_given the pointer to the number of occurrence of this option
+ * @param prev_given the pointer to the number of occurrence already seen
+ * @param value the argument for this option (if null no arg was specified)
+ * @param possible_values the possible values for this option (if specified)
+ * @param default_value the default value (in case the option only accepts fixed values)
+ * @param arg_type the type of this option
+ * @param check_ambiguity @see cmdline_parser_params.check_ambiguity
+ * @param override @see cmdline_parser_params.override
+ * @param no_free whether to free a possible previous value
+ * @param multiple_option whether this is a multiple option
+ * @param long_opt the corresponding long option
+ * @param short_opt the corresponding short option (or '-' if none)
+ * @param additional_error possible further error specification
+ */
+static
+int update_arg(void *field, char **orig_field,
+               unsigned int *field_given, unsigned int *prev_given, 
+               char *value, const char *possible_values[],
+               const char *default_value,
+               cmdline_parser_arg_type arg_type,
+               int check_ambiguity, int override,
+               int no_free, int multiple_option,
+               const char *long_opt, char short_opt,
+               const char *additional_error)
+{
+  char *stop_char = 0;
+  const char *val = value;
+  int found;
+  char **string_field;
+  FIX_UNUSED (field);
+
+  stop_char = 0;
+  found = 0;
+
+  if (!multiple_option && prev_given && (*prev_given || (check_ambiguity && *field_given)))
+    {
+      if (short_opt != '-')
+        fprintf (stderr, "%s: `--%s' (`-%c') option given more than once%s\n", 
+               package_name, long_opt, short_opt,
+               (additional_error ? additional_error : ""));
+      else
+        fprintf (stderr, "%s: `--%s' option given more than once%s\n", 
+               package_name, long_opt,
+               (additional_error ? additional_error : ""));
+      return 1; /* failure */
+    }
+
+  FIX_UNUSED (default_value);
+    
+  if (field_given && *field_given && ! override)
+    return 0;
+  if (prev_given)
+    (*prev_given)++;
+  if (field_given)
+    (*field_given)++;
+  if (possible_values)
+    val = possible_values[found];
+
+  switch(arg_type) {
+  case ARG_FLAG:
+    *((int *)field) = !*((int *)field);
+    break;
+  case ARG_STRING:
+    if (val) {
+      string_field = (char **)field;
+      if (!no_free && *string_field)
+        free (*string_field); /* free previous string */
+      *string_field = gengetopt_strdup (val);
+    }
+    break;
+  default:
+    break;
+  };
+
+
+  /* store the original value */
+  switch(arg_type) {
+  case ARG_NO:
+  case ARG_FLAG:
+    break;
+  default:
+    if (value && orig_field) {
+      if (no_free) {
+        *orig_field = value;
+      } else {
+        if (*orig_field)
+          free (*orig_field); /* free previous string */
+        *orig_field = gengetopt_strdup (value);
+      }
+    }
+  };
+
+  return 0; /* OK */
+}
+
+
+int
+cmdline_parser_internal (
+  int argc, char **argv, struct gengetopt_args_info *args_info,
+                        struct cmdline_parser_params *params, const char *additional_error)
+{
+  int c;	/* Character of the parsed option.  */
+
+  int error = 0;
+  struct gengetopt_args_info local_args_info;
+  
+  int override;
+  int initialize;
+  int check_required;
+  int check_ambiguity;
+  
+  package_name = argv[0];
+  
+  override = params->override;
+  initialize = params->initialize;
+  check_required = params->check_required;
+  check_ambiguity = params->check_ambiguity;
+
+  if (initialize)
+    cmdline_parser_init (args_info);
+
+  cmdline_parser_init (&local_args_info);
+
+  optarg = 0;
+  optind = 0;
+  opterr = params->print_errors;
+  optopt = '?';
+
+  while (1)
+    {
+      int option_index = 0;
+
+      static struct option long_options[] = {
+        { "help",	0, NULL, 'h' },
+        { "version",	0, NULL, 'V' },
+        { "db",	1, NULL, 'x' },
+        { "input",	1, NULL, 'i' },
+        { "dir_in",	1, NULL, 'd' },
+        { "dir_out",	1, NULL, 'D' },
+        { "is_nibble",	0, NULL, 'N' },
+        { "split",	0, NULL, 's' },
+        { 0,  0, 0, 0 }
+      };
+
+      c = getopt_long (argc, argv, "hVx:i:d:D:Ns", long_options, &option_index);
+
+      if (c == -1) break;	/* Exit from `while (1)' loop.  */
+
+      switch (c)
+        {
+        case 'h':	/* Print help and exit.  */
+          cmdline_parser_print_help ();
+          cmdline_parser_free (&local_args_info);
+          exit (EXIT_SUCCESS);
+
+        case 'V':	/* Print version and exit.  */
+        
+        
+          if (update_arg( 0 , 
+               0 , &(args_info->version_given),
+              &(local_args_info.version_given), optarg, 0, 0, ARG_NO,
+              check_ambiguity, override, 0, 0,
+              "version", 'V',
+              additional_error))
+            goto failure;
+          cmdline_parser_free (&local_args_info);
+          return 0;
+        
+          break;
+        case 'x':	/* Input a set of databaselet filenames.  */
+        
+        
+          if (update_arg( (void *)&(args_info->db_arg), 
+               &(args_info->db_orig), &(args_info->db_given),
+              &(local_args_info.db_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "db", 'x',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'i':	/* Input gene mapping.  */
+        
+        
+          if (update_arg( (void *)&(args_info->input_arg), 
+               &(args_info->input_orig), &(args_info->input_given),
+              &(local_args_info.input_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "input", 'i',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'd':	/* Data directory.  */
+        
+        
+          if (update_arg( (void *)&(args_info->dir_in_arg), 
+               &(args_info->dir_in_orig), &(args_info->dir_in_given),
+              &(local_args_info.dir_in_given), optarg, 0, ".", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "dir_in", 'd',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'D':	/* Database directory.  */
+        
+        
+          if (update_arg( (void *)&(args_info->dir_out_arg), 
+               &(args_info->dir_out_orig), &(args_info->dir_out_given),
+              &(local_args_info.dir_out_given), optarg, 0, ".", ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "dir_out", 'D',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'N':	/* Whether the input DB is nibble type.  */
+        
+        
+          if (update_arg((void *)&(args_info->is_nibble_flag), 0, &(args_info->is_nibble_given),
+              &(local_args_info.is_nibble_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "is_nibble", 'N',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 's':	/* Split to one-gene per file.  */
+        
+        
+          if (update_arg((void *)&(args_info->split_flag), 0, &(args_info->split_given),
+              &(local_args_info.split_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "split", 's',
+              additional_error))
+            goto failure;
+        
+          break;
+
+        case 0:	/* Long option with no short option */
+        case '?':	/* Invalid option.  */
+          /* `getopt_long' already printed an error message.  */
+          goto failure;
+
+        default:	/* bug: option not considered.  */
+          fprintf (stderr, "%s: option unknown: %c%s\n", CMDLINE_PARSER_PACKAGE, c, (additional_error ? additional_error : ""));
+          abort ();
+        } /* switch */
+    } /* while */
+
+
+
+
+  cmdline_parser_release (&local_args_info);
+
+  if ( error )
+    return (EXIT_FAILURE);
+
+  if (optind < argc)
+    {
+      int i = 0 ;
+      int found_prog_name = 0;
+      /* whether program name, i.e., argv[0], is in the remaining args
+         (this may happen with some implementations of getopt,
+          but surely not with the one included by gengetopt) */
+
+      i = optind;
+      while (i < argc)
+        if (argv[i++] == argv[0]) {
+          found_prog_name = 1;
+          break;
+        }
+      i = 0;
+
+      args_info->inputs_num = argc - optind - found_prog_name;
+      args_info->inputs =
+        (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ;
+      while (optind < argc)
+        if (argv[optind++] != argv[0])
+          args_info->inputs[ i++ ] = gengetopt_strdup (argv[optind-1]) ;
+    }
+
+  return 0;
+
+failure:
+  
+  cmdline_parser_release (&local_args_info);
+  return (EXIT_FAILURE);
+}

File tools/DBCombiner/cmdline.h

View file
+/** @file cmdline.h
+ *  @brief The header file for the command line option parser
+ *  generated by GNU Gengetopt version 2.22.5
+ *  http://www.gnu.org/software/gengetopt.
+ *  DO NOT modify this file, since it can be overwritten
+ *  @author GNU Gengetopt by Lorenzo Bettini */
+
+#ifndef CMDLINE_H
+#define CMDLINE_H
+
+/* If we use autoconf.  */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h> /* for FILE */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef CMDLINE_PARSER_PACKAGE
+/** @brief the program name (used for printing errors) */
+#define CMDLINE_PARSER_PACKAGE "DBCombiner"
+#endif
+
+#ifndef CMDLINE_PARSER_PACKAGE_NAME
+/** @brief the complete program name (used for help and version) */
+#define CMDLINE_PARSER_PACKAGE_NAME "DBCombiner"
+#endif
+
+#ifndef CMDLINE_PARSER_VERSION
+/** @brief the program version */
+#define CMDLINE_PARSER_VERSION "1.0"
+#endif
+
+/** @brief Where the command line options are stored */
+struct gengetopt_args_info
+{
+  const char *help_help; /**< @brief Print help and exit help description.  */
+  const char *version_help; /**< @brief Print version and exit help description.  */
+  char * db_arg;	/**< @brief Input a set of databaselet filenames.  */
+  char * db_orig;	/**< @brief Input a set of databaselet filenames original value given at command line.  */
+  const char *db_help; /**< @brief Input a set of databaselet filenames help description.  */
+  char * input_arg;	/**< @brief Input gene mapping.  */
+  char * input_orig;	/**< @brief Input gene mapping original value given at command line.  */
+  const char *input_help; /**< @brief Input gene mapping help description.  */
+  char * dir_in_arg;	/**< @brief Data directory (default='.').  */
+  char * dir_in_orig;	/**< @brief Data directory original value given at command line.  */
+  const char *dir_in_help; /**< @brief Data directory help description.  */
+  char * dir_out_arg;	/**< @brief Database directory (default='.').  */
+  char * dir_out_orig;	/**< @brief Database directory original value given at command line.  */
+  const char *dir_out_help; /**< @brief Database directory help description.  */
+  int is_nibble_flag;	/**< @brief Whether the input DB is nibble type (default=off).  */
+  const char *is_nibble_help; /**< @brief Whether the input DB is nibble type help description.  */
+  int split_flag;	/**< @brief Split to one-gene per file (default=off).  */
+  const char *split_help; /**< @brief Split to one-gene per file help description.  */
+  
+  unsigned int help_given ;	/**< @brief Whether help was given.  */
+  unsigned int version_given ;	/**< @brief Whether version was given.  */
+  unsigned int db_given ;	/**< @brief Whether db was given.  */
+  unsigned int input_given ;	/**< @brief Whether input was given.  */
+  unsigned int dir_in_given ;	/**< @brief Whether dir_in was given.  */
+  unsigned int dir_out_given ;	/**< @brief Whether dir_out was given.  */
+  unsigned int is_nibble_given ;	/**< @brief Whether is_nibble was given.  */
+  unsigned int split_given ;	/**< @brief Whether split was given.  */
+
+  char **inputs ; /**< @brief unamed options (options without names) */
+  unsigned inputs_num ; /**< @brief unamed options number */
+} ;
+
+/** @brief The additional parameters to pass to parser functions */
+struct cmdline_parser_params
+{
+  int override; /**< @brief whether to override possibly already present options (default 0) */
+  int initialize; /**< @brief whether to initialize the option structure gengetopt_args_info (default 1) */
+  int check_required; /**< @brief whether to check that all required options were provided (default 1) */
+  int check_ambiguity; /**< @brief whether to check for options already specified in the option structure gengetopt_args_info (default 0) */
+  int print_errors; /**< @brief whether getopt_long should print an error message for a bad option (default 1) */
+} ;
+
+/** @brief the purpose string of the program */
+extern const char *gengetopt_args_info_purpose;
+/** @brief the usage string of the program */
+extern const char *gengetopt_args_info_usage;
+/** @brief all the lines making the help output */
+extern const char *gengetopt_args_info_help[];
+
+/**
+ * The command line parser
+ * @param argc the number of command line options
+ * @param argv the command line options
+ * @param args_info the structure where option information will be stored
+ * @return 0 if everything went fine, NON 0 if an error took place
+ */
+int cmdline_parser (int argc, char **argv,
+  struct gengetopt_args_info *args_info);
+
+/**
+ * The command line parser (version with additional parameters - deprecated)
+ * @param argc the number of command line options
+ * @param argv the command line options
+ * @param args_info the structure where option information will be stored
+ * @param override whether to override possibly already present options
+ * @param initialize whether to initialize the option structure my_args_info
+ * @param check_required whether to check that all required options were provided
+ * @return 0 if everything went fine, NON 0 if an error took place
+ * @deprecated use cmdline_parser_ext() instead
+ */
+int cmdline_parser2 (int argc, char **argv,
+  struct gengetopt_args_info *args_info,
+  int override, int initialize, int check_required);
+
+/**
+ * The command line parser (version with additional parameters)
+ * @param argc the number of command line options
+ * @param argv the command line options
+ * @param args_info the structure where option information will be stored
+ * @param params additional parameters for the parser
+ * @return 0 if everything went fine, NON 0 if an error took place
+ */
+int cmdline_parser_ext (int argc, char **argv,
+  struct gengetopt_args_info *args_info,
+  struct cmdline_parser_params *params);
+
+/**
+ * Save the contents of the option struct into an already open FILE stream.
+ * @param outfile the stream where to dump options
+ * @param args_info the option struct to dump
+ * @return 0 if everything went fine, NON 0 if an error took place
+ */
+int cmdline_parser_dump(FILE *outfile,
+  struct gengetopt_args_info *args_info);
+
+/**
+ * Save the contents of the option struct into a (text) file.
+ * This file can be read by the config file parser (if generated by gengetopt)
+ * @param filename the file where to save
+ * @param args_info the option struct to save
+ * @return 0 if everything went fine, NON 0 if an error took place
+ */
+int cmdline_parser_file_save(const char *filename,
+  struct gengetopt_args_info *args_info);
+
+/**
+ * Print the help
+ */
+void cmdline_parser_print_help(void);
+/**
+ * Print the version
+ */
+void cmdline_parser_print_version(void);
+
+/**
+ * Initializes all the fields a cmdline_parser_params structure 
+ * to their default values
+ * @param params the structure to initialize
+ */
+void cmdline_parser_params_init(struct cmdline_parser_params *params);
+
+/**
+ * Allocates dynamically a cmdline_parser_params structure and initializes
+ * all its fields to their default values
+ * @return the created and initialized cmdline_parser_params structure
+ */
+struct cmdline_parser_params *cmdline_parser_params_create(void);
+
+/**
+ * Initializes the passed gengetopt_args_info structure's fields
+ * (also set default values for options that have a default)
+ * @param args_info the structure to initialize
+ */
+void cmdline_parser_init (struct gengetopt_args_info *args_info);
+/**
+ * Deallocates the string fields of the gengetopt_args_info structure
+ * (but does not deallocate the structure itself)
+ * @param args_info the structure to deallocate
+ */
+void cmdline_parser_free (struct gengetopt_args_info *args_info);
+
+/**
+ * Checks that all the required options were specified
+ * @param args_info the structure to check
+ * @param prog_name the name of the program that will be used to print
+ *   possible errors
+ * @return
+ */
+int cmdline_parser_required (struct gengetopt_args_info *args_info,
+  const char *prog_name);
+
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* CMDLINE_H */

File tools/DBCombiner/stdafx.cpp

View file
+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#include "stdafx.h"
+
+/*!
+ * \page DBCombiner DBCombiner
+ * 
+ * 
+ * \section sec_usage Usage
+ * 
+ * \subsection ssec_usage_basic Basic Usage
+ * 
+ * \code
+ * DBCombiner -i <genes.txt> -x <db list> -d <input directory> -D <output_dir>
+ * \endcode
+ * 
+ * 
+ * \subsection ssec_usage_detailed Detailed Usage
+ * 
+ * \include DBCombiner/DBCombiner.ggo
+ * 
+ * <table><tr>
+ *	<th>Flag</th>
+ *	<th>Default</th>
+ *	<th>Type</th>
+ *	<th>Description</th>
+ * </tr><tr>
+ *	<td>-i</td>
+ *	<td>stdin</td>
+ *	<td>Text file</td>
+ *	<td>Tab-delimited text file containing two columns, numerical gene IDs (one-based) and unique gene
+ *		names (matching those in the input DAT/DAB files).</td>
+ * </tr><tr>
+ *	<td>-d</td>
+ *	<td>.</td>
+ *	<td>Directory</td>
+ *	<td>Input directory containing DB files</td>
+ * </tr><tr>
+ *	<td>-D</td>
+ *	<td>.</td>
+ *	<td>Directory</td>
+ *	<td>Output directory in which database files will be stored.</td>
+ * </tr><tr>
+ *	<td>-x</td>
+ *	<td>.</td>
+ *	<td>Text file</td>
+ *	<td>Input file containing list of CDatabaselets to combine</td>
+ * </tr></table>
+ */

File tools/DBCombiner/stdafx.h

View file
+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#ifndef STDAFX_H
+#define STDAFX_H
+
+#define __STDC_LIMIT_MACROS
+
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <string>
+#include <omp.h>
+using namespace std;
+
+#include <pthread.h>
+
+#include "bayesnet.h"
+#include "database.h"
+#include "meta.h"
+using namespace Sleipnir;
+
+#endif // STDAFX_H

File tools/Data2DB/Data2DB.cpp

View file
 		}
 
 	}else if(sArgs.dataset_arg){
-		if(sArgs.fast_flag==1){
-			if(sArgs.buffer_flag!=1){
-				cerr << "-u must be included." << endl;
-				return 1;
-			}
-			if(sArgs.use_nibble_flag==1){
-				cerr << "-N must be removed." << endl;
-				return 1;
-			}
-			if(sArgs.block_files_arg!=-1){
-				cerr << "-b must be removed." << endl;
-				return 1;
-			}
-		}
 
 		ifsm.open(sArgs.dataset_arg);
-
 		while(!pistm->eof()){
 			pistm->getline(acBuffer, c_iBuffer -1);
 			if(acBuffer[0]==0){
 		vecstrDatasets.resize(vecstrDatasets.size());
 		ifsm.close();
 
-		if(sArgs.fast_flag==1){
-			if( !DB.OpenFast( vecstrGenes, vecstrDatasets, sArgs.dir_in_arg, sArgs.dir_out_arg, min((size_t)sArgs.files_arg,
-				vecstrGenes.size( )) ) ) {
-				cerr << "Could not open data" << endl;
-				return 1;
-			}
-		}else{
-			if( !DB.Open( vecstrGenes, vecstrDatasets, sArgs.dir_in_arg, sArgs.dir_out_arg, min((size_t)sArgs.files_arg,
-				vecstrGenes.size( ))) ) {
-				cerr << "Could not open data" << endl;
-				return 1;
-			}
+		if( !DB.Open( vecstrGenes, vecstrDatasets, sArgs.dir_in_arg, sArgs.dir_out_arg, min((size_t)sArgs.files_arg,
+			vecstrGenes.size( ))) ) {
+			cerr << "Could not open data" << endl;
+			return 1;
 		}
 
 	}else{

File tools/Data2DB/Data2DB.ggo

View file
 								int	default="-1"
 option	"use_nibble"		N	"Use nibble for compact storage"
 								flag	off
-option	"fast"				F	"Fast processing. Requires -u, -x. The options -n, -N, -b are not supported."
-								flag	off	dependon="buffer"	dependon="dataset"
 
 section "Optional"
 option	"buffer"			u	"Memory buffer disk writes"

File tools/Data2DB/cmdline.c

View file
 /*
   File autogenerated by gengetopt version 2.22.5
   generated with the following command:
-  /usr/bin/gengetopt -iData2DB.ggo --default-optional -u -N -e 
+  gengetopt -iData2DB.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
   "  -b, --block_files=INT     Number of database files per block  (default=`-1')",
   "  -B, --block_datasets=INT  Number of datasets per block  (default=`-1')",
   "  -N, --use_nibble          Use nibble for compact storage  (default=off)",
-  "  -F, --fast                Fast processing. Requires -u, -x. The options -n, \n                              -N, -b are not supported.  (default=off)",
   "\nOptional:",
   "  -u, --buffer              Memory buffer disk writes  (default=off)",
   "  -m, --memmap              Memory map input/output  (default=off)",
 cmdline_parser_internal (int argc, char **argv, struct gengetopt_args_info *args_info,
                         struct cmdline_parser_params *params, const char *additional_error);
 
-static int
-cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error);
 
 static char *
 gengetopt_strdup (const char *s);
   args_info->block_files_given = 0 ;
   args_info->block_datasets_given = 0 ;
   args_info->use_nibble_given = 0 ;
-  args_info->fast_given = 0 ;
   args_info->buffer_given = 0 ;
   args_info->memmap_given = 0 ;
   args_info->verbosity_given = 0 ;
   args_info->block_datasets_arg = -1;
   args_info->block_datasets_orig = NULL;
   args_info->use_nibble_flag = 0;
-  args_info->fast_flag = 0;
   args_info->buffer_flag = 0;
   args_info->memmap_flag = 0;
   args_info->verbosity_arg = 5;
   args_info->block_files_help = gengetopt_args_info_help[10] ;
   args_info->block_datasets_help = gengetopt_args_info_help[11] ;
   args_info->use_nibble_help = gengetopt_args_info_help[12] ;
-  args_info->fast_help = gengetopt_args_info_help[13] ;
-  args_info->buffer_help = gengetopt_args_info_help[15] ;
-  args_info->memmap_help = gengetopt_args_info_help[16] ;
-  args_info->verbosity_help = gengetopt_args_info_help[17] ;
+  args_info->buffer_help = gengetopt_args_info_help[14] ;
+  args_info->memmap_help = gengetopt_args_info_help[15] ;
+  args_info->verbosity_help = gengetopt_args_info_help[16] ;
   
 }
 
     write_into_file(outfile, "block_datasets", args_info->block_datasets_orig, 0);
   if (args_info->use_nibble_given)
     write_into_file(outfile, "use_nibble", 0, 0 );
-  if (args_info->fast_given)
-    write_into_file(outfile, "fast", 0, 0 );
   if (args_info->buffer_given)
     write_into_file(outfile, "buffer", 0, 0 );
   if (args_info->memmap_given)
 int
 cmdline_parser_required (struct gengetopt_args_info *args_info, const char *prog_name)
 {
-  int result = EXIT_SUCCESS;
-
-  if (cmdline_parser_required2(args_info, prog_name, 0) > 0)
-    result = EXIT_FAILURE;
-
-  return result;
-}
-
-int
-cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error)
-{
-  int error = 0;
-  FIX_UNUSED (additional_error);
-
-  /* checks for required options */
-  
-  /* checks for dependences among options */
-  if (args_info->fast_given && ! args_info->dataset_given)
-    {
-      fprintf (stderr, "%s: '--fast' ('-F') option depends on option 'dataset'%s\n", prog_name, (additional_error ? additional_error : ""));
-      error = 1;
-    }
-
-  return error;
+  FIX_UNUSED (args_info);
+  FIX_UNUSED (prog_name);
+  return EXIT_SUCCESS;
 }
 
 
         { "block_files",	1, NULL, 'b' },
         { "block_datasets",	1, NULL, 'B' },
         { "use_nibble",	0, NULL, 'N' },
-        { "fast",	0, NULL, 'F' },
         { "buffer",	0, NULL, 'u' },
         { "memmap",	0, NULL, 'm' },
         { "verbosity",	1, NULL, 'v' },
         { 0,  0, 0, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVx:n:i:d:D:f:b:B:NFumv:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVx:n:i:d:D:f:b:B:Numv:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
-        case 'F':	/* Fast processing. Requires -u, -x. The options -n, -N, -b are not supported..  */
-        
-        
-          if (update_arg((void *)&(args_info->fast_flag), 0, &(args_info->fast_given),
-              &(local_args_info.fast_given), optarg, 0, 0, ARG_FLAG,
-              check_ambiguity, override, 1, 0, "fast", 'F',
-              additional_error))
-            goto failure;
-        
-          break;
         case 'u':	/* Memory buffer disk writes.  */
         
         
 
 
 
-  if (check_required)
-    {
-      error += cmdline_parser_required2 (args_info, argv[0], additional_error);
-    }
 
   cmdline_parser_release (&local_args_info);
 

File tools/Data2DB/cmdline.h

View file
   const char *block_datasets_help; /**< @brief Number of datasets per block help description.  */
   int use_nibble_flag;	/**< @brief Use nibble for compact storage (default=off).  */
   const char *use_nibble_help; /**< @brief Use nibble for compact storage help description.  */
-  int fast_flag;	/**< @brief Fast processing. Requires -u, -x. The options -n, -N, -b are not supported. (default=off).  */
-  const char *fast_help; /**< @brief Fast processing. Requires -u, -x. The options -n, -N, -b are not supported. help description.  */
   int buffer_flag;	/**< @brief Memory buffer disk writes (default=off).  */
   const char *buffer_help; /**< @brief Memory buffer disk writes help description.  */
   int memmap_flag;	/**< @brief Memory map input/output (default=off).  */
   unsigned int block_files_given ;	/**< @brief Whether block_files was given.  */
   unsigned int block_datasets_given ;	/**< @brief Whether block_datasets was given.  */
   unsigned int use_nibble_given ;	/**< @brief Whether use_nibble was given.  */
-  unsigned int fast_given ;	/**< @brief Whether fast was given.  */
   unsigned int buffer_given ;	/**< @brief Whether buffer was given.  */
   unsigned int memmap_given ;	/**< @brief Whether memmap was given.  */
   unsigned int verbosity_given ;	/**< @brief Whether verbosity was given.  */

File tools/Makefile.am

View file
 	  Contexter \
 	  Counter \
 	  Data2DB \
+      DBCombiner \
 	  DSLConverter \
 	  Dab2Dad \
 	  Edges2Posteriors \