Commits

Chris Park committed 0cb5f19

merged qdab support and additional functionalities for Dat2Dab

Comments (0)

Files changed (10)

 #include "statistics.h"
 #include "annotation.h"
 #include "color.h"
+#include "meta.h"
 
 namespace Sleipnir {
 
 	{"dat",	CDat::EFormatText},
 	{"das",	CDat::EFormatSparse},
 	{"pcl",	CDat::EFormatPCL},
+	{"qdab",CDat::EFormatQdab},
 	{NULL,	CDat::EFormatBinary}
 };
 
 			return OpenPCL( istm, iSkip, fZScore );
 
 		case EFormatSparse:
-			return OpenSparse( istm ); }
-
+			return OpenSparse( istm ); 
+	
+		case EFormatQdab:
+			return OpenQdab( istm ); 		       
+	}
 	return OpenBinary( istm ); }
 
 bool CDatImpl::OpenPCL( std::istream& istm, size_t iSkip, bool fZScore ) {
 
 	return true; }
 
+bool CDatImpl::OpenQdab( std::istream& istm ) {
+  size_t	iTotal, i, j, num_bins, num_bits, iPos;
+	float*	adScores;
+	char tmp;
+	float* bounds;
+	unsigned char btmpf;
+	unsigned char btmpb;
+	
+	unsigned char bufferA;
+	unsigned char bufferB;
+	
+	float nan_val;
+
+	if( !OpenGenes( istm, true, false ) )
+		return false;
+	m_Data.Initialize( GetGenes( ) );
+	
+	// read the number of bins 
+	istm.read((char*)&tmp, sizeof(char));       
+	num_bins = (size_t)tmp;
+
+	//read the bin boundaries
+	bounds = new float[num_bins];
+	istm.read((char*)bounds, sizeof(float) * num_bins);
+	
+	// number of bits required for each bin representation
+	num_bits = (size_t)ceil(log( num_bins ) / log ( 2.0 ));	
+	
+	// add one more bit for NaN case
+	if( pow(2, num_bits) == num_bins )
+	  ++num_bits;
+	
+	// set nan value
+	nan_val = pow(2, num_bits) -1;
+	
+	// read the data	
+	adScores = new float[ GetGenes( ) - 1 ];
+	
+	istm.read( (char*)&bufferA, sizeof(bufferA));
+	istm.read( (char*)&bufferB, sizeof(bufferB));
+	
+	for( iTotal = i = 0; ( i + 1 ) < GetGenes( ); ++i ) {
+		for(j = 0; j < ( GetGenes( ) - i - 1 ); ++j){
+		  iPos = (iTotal * num_bits) % 8;
+		  
+		  // check bit data overflow??
+		  if( iPos + num_bits > 8){
+		    btmpb = (bufferA << iPos);
+		    btmpf = (bufferB >> (16 - num_bits - iPos)) << (8-num_bits);		    
+		    adScores[j] = ((btmpb | btmpf) >> (8 - num_bits));		    
+		    ++iTotal;
+		    bufferA = bufferB;
+		    istm.read( (char*)&bufferB, sizeof(bufferB));
+		  }
+		  else{
+		    adScores[j] = (((bufferA << iPos) & 0xFF) >> (8 - num_bits));
+		    ++iTotal;
+			if( iPos + num_bits == 8 ) {
+				bufferA = bufferB;
+                    		istm.read( (char*)&bufferB, sizeof(bufferB));
+			}
+		  }
+
+		  // check if value added was promised 2^#bits -1 (NaN value)
+		  if(adScores[j] == nan_val)
+		    adScores[j] =  CMeta::GetNaN( );
+		}
+		
+		Set( i, adScores ); 
+	}
+	
+	delete[] adScores;
+	delete[] bounds;
+	return true; }
+
+
 bool CDatImpl::OpenSparse( std::istream& istm ) {
 	size_t		i;
 	uint32_t	j;
 		 * \brief
 		 * Binary format listing null-terminated element name strings followed by index/value pairs.
 		 */
-		EFormatSparse	= EFormatPCL + 1
+		EFormatSparse	= EFormatPCL + 1,
+
+		/*!
+		 * \brief
+		 * Binary format listing null-terminated element name strings followed by bits representing the quantized bins.
+		 */		
+		EFormatQdab = EFormatSparse + 1 
 	};
 
 	/*!
 #include "datapair.h"
 #include "meta.h"
 #include "genome.h"
+#include "math.h"
 
 namespace Sleipnir {
 
 const char	CPairImpl::c_szQuantExt[]	= ".quant";
+const char   CDataPairImpl::c_acQdab[]   = ".qdab";
 
 bool CPairImpl::Open( const char* szDatafile, std::ifstream& ifsm ) {
 	string		strToken;
  * CDat::Open
  */
 bool CDataPair::Open( const CSlim& Slim ) {
-
+	m_fQuantized = false;
 	Reset( false );
 	return CDat::Open( Slim ); }
 
 bool CDataPair::Open( const char* szDatafile, bool fContinuous, bool fMemmap, size_t iSkip,
 	bool fZScore ) {
 
+
 	g_CatSleipnir( ).notice( "CDataPair::Open( %s, %d )", szDatafile, fContinuous );
+	
+	Reset( fContinuous );
+	m_fQuantized = false;
+	
+	const char* file_ext = NULL;
+	
+	if((file_ext = strstr(szDatafile, c_acQdab)) != NULL){
 
-	Reset( fContinuous );
-	if( !CDat::Open( szDatafile, fMemmap, iSkip, fZScore ) )
-		return false;
-	return ( m_fContinuous ? true : OpenQuants( szDatafile ) ); }
+	  return OpenQdab( szDatafile );
+	}
+	else{
+	  if( !CDat::Open( szDatafile, fMemmap, iSkip, fZScore ) )
+	    return false;
+	  return ( m_fContinuous ? true : OpenQuants( szDatafile ) ); 	  
+	}
+}
+
+bool CDataPairImpl::OpenQdab( const char* szDatafile ){
+  size_t	iTotal, i, j, num_bins, num_bits, iPos;
+  float*	adScores;
+  char tmp;
+  float* bounds;
+  unsigned char btmpf;
+  unsigned char btmpb;
+  
+  unsigned char bufferA;
+  unsigned char bufferB;
+  ifstream        istm;
+  
+  float nan_val;
+
+  g_CatSleipnir( ).notice( "CDataPair::OpenQdab( %s )", szDatafile );
+
+  istm.open( szDatafile, ios_base::binary );
+  
+  if( !CDat::OpenGenes( istm, true, false ) )
+    return false;
+  m_Data.Initialize( GetGenes( ) );
+  
+  // read the number of bins 
+  istm.read((char*)&tmp, sizeof(char));       
+  num_bins = (size_t)tmp;
+  
+  //read the bin boundaries
+  bounds = new float[num_bins];
+  istm.read((char*)bounds, sizeof(float) * num_bins);
+  
+  // set quant values
+  SetQuants( bounds, num_bins );
+  
+  // number of bits required for each bin representation
+  num_bits = (size_t)ceil(log( num_bins ) / log ( 2.0 ));	
+  
+  // add one more bit for NaN case
+  if( pow(2, num_bits) == num_bins )
+    ++num_bits;
+  
+  // set nan value
+  nan_val = pow(2, num_bits) -1;
+  
+  // read the data	
+  adScores = new float[ GetGenes( ) - 1 ];
+  
+  istm.read( (char*)&bufferA, sizeof(bufferA));
+  istm.read( (char*)&bufferB, sizeof(bufferB));
+  
+  for( iTotal = i = 0; ( i + 1 ) < GetGenes( ); ++i ) {
+    for(j = 0; j < ( GetGenes( ) - i - 1 ); ++j){
+      iPos = (iTotal * num_bits) % 8;
+      
+      // check bit data overflow??
+      if( iPos + num_bits > 8){
+	btmpb = (bufferA << iPos);
+	btmpf = (bufferB >> (16 - num_bits - iPos)) << (8-num_bits);
+	adScores[j] = (float)((btmpb | btmpf) >> (8 - num_bits));
+	++iTotal;
+	bufferA = bufferB;
+	istm.read( (char*)&bufferB, sizeof(bufferB));
+      }
+      else{
+	adScores[j] = (((bufferA << iPos) & 0xFF) >> (8 - num_bits));
+	++iTotal;
+	if( iPos + num_bits == 8 ) {
+		bufferA = bufferB;
+		istm.read( (char*)&bufferB, sizeof(bufferB));
+        }
+
+      }
+      
+      // check if value added was promised 2^#bits -1 (NaN value)
+      if(adScores[j] == nan_val)
+	adScores[j] =  CMeta::GetNaN( );
+      
+    }    
+    
+    CDat::Set( i, adScores ); 
+  }
+  
+  istm.close();
+  
+  delete[] adScores;
+  delete[] bounds;
+
+  m_fQuantized = true;
+
+  return true; 
+}
+  
+  
+bool CDataPair::Open( const CDat& dat ) {
+	m_fContinuous = true;	
+	Reset( true );
+	if( !CDat::Open( dat ) ) return false;
+}
 
 /*!
  * \brief
  * SetQuants | CMeta::Quantize
  */
 size_t CDataPair::Quantize( float dValue ) const {
+	if ( m_fQuantized ) 
+		return (size_t)dValue;
+	else
+		return CMeta::Quantize( dValue, m_vecdQuant ); }
 
-	return CMeta::Quantize( dValue, m_vecdQuant ); }
+
+void CDataPair::Quantize() {
+	if ( m_fQuantized )
+		return;
+	for( size_t i = 0; i < GetGenes( ); ++i ) {
+                for( size_t j = ( i + 1 ); j < GetGenes( ); ++j ) {
+			float d = Get( i, j );
+			if( CMeta::IsNaN( d ) ) continue;
+
+			Set( i, j, Quantize( d ) );
+		}
+	}
+	m_fQuantized = true;
+}
 
 void CDataPairImpl::Reset( bool fContinuous ) {
 
  * \see
  * GetValues | Quantize
  */
-void CDataPair::SetQuants( const float* adBinEdges, size_t iBins ) {
+void CDataPairImpl::SetQuants( const float* adBinEdges, size_t iBins ) {
 
 	Reset( false );
 	m_vecdQuant.resize( iBins );
 	copy( adBinEdges, adBinEdges + iBins, m_vecdQuant.begin( ) ); }
 
+
 /*!
  * \brief
  * Set the data pair's bin edges.
 	m_vecdQuant.resize( vecdBinEdges.size( ) );
 	copy( vecdBinEdges.begin( ), vecdBinEdges.end( ), m_vecdQuant.begin( ) ); }
 
+void CDataPair::Save( const char* szFile ) const {
+
+	if( !strcmp( szFile + strlen( szFile ) - strlen( "qdab" ), "qdab" ) ) {
+		g_CatSleipnir().info( "CDataPair::Save( ) qdab file: %s", szFile );	
+		ofstream ofsm;
+		ofsm.open( szFile, ios_base::binary );		
+		CDat::SaveGenes( ofsm );
+		
+		unsigned char bins = (unsigned char)m_vecdQuant.size();
+		size_t bit_len = (size_t)ceil( log((size_t)bins)/log(2) );
+
+		//reserve largest value for NaN		
+		if( (size_t)pow(2, bit_len) == (size_t)bins )
+			bit_len++;
+
+		//NaN = 2^N - 1
+		size_t nan_val = (size_t)pow(2, bit_len) -1;
+
+		//write out total bins, bin boundaries
+		ofsm.write( (char*)&bins, sizeof(bins) );
+		for( size_t i =0; i < m_vecdQuant.size(); i++ ) {
+			float b = m_vecdQuant[i];
+			ofsm.write( (char*)&b, sizeof(b) );
+		}
+	
+		size_t offset = 0, byte_count = 0;
+		unsigned char cur_byte = 0;
+
+                for( size_t i = 0; i < GetGenes( ); ++i ) {
+                        for( size_t j = ( i + 1 ); j < GetGenes( ); ++j ) {
+                                unsigned char d = (unsigned char)Get( i, j );
+				if( CMeta::IsNaN( Get(i,j) ) ) {
+					d = nan_val;
+				}
+				//check if fits in one byte
+				if( offset + bit_len <= 8 ) { 
+					cur_byte = cur_byte | (d << (8-bit_len-offset));
+					offset = offset + bit_len;
+					if( offset == 8 ) {
+						ofsm.write((char*)&cur_byte, 1);
+						cur_byte = offset = 0;
+						byte_count++;
+					}
+				}
+				else {
+					//spans byte boundary; offset+bit_len > 8
+					cur_byte = cur_byte | (d >> (offset+bit_len-8));
+					ofsm.write((char*)&cur_byte, 1);
+					cur_byte = d << (16-offset-bit_len);
+					offset = offset+bit_len-8;
+					byte_count++;
+				}	
+			}
+		}
+		size_t bytes_req = (size_t)ceil((GetGenes()*(GetGenes()-1)/2.0) * bit_len / 8.0);
+		if( byte_count < bytes_req ) {
+			ofsm.write((char*)&cur_byte, 1);
+		}	
+		//g_CatSleipnir().info( "CDataPair::Save( ) byte count: %s", byte_count );
+	}
+	else {
+        	CDat::Save( szFile );
+	}
+}
+
 /*!
  * \brief
  * Open the given data file as a PCL and load discretization bin edges from an accompanying QUANT file.
 	size_t		i;
 
 	g_CatSleipnir( ).notice( "CPCLPair::Open( %s )", szDatafile );
-
-	ifsm.open( szDatafile );
-	if( !CPCL::Open( ifsm, iSkip ) )
+	
+	if( !CPCL::Open( szDatafile, iSkip ) )
 		return false;
-	ifsm.close( );
-
-	ifsm.clear( );
+	
 	if( !CPairImpl::Open( szDatafile, ifsm ) )
 		return false;
 
 
 	return CMeta::Quantize( dValue, m_vecvecdQuants[ iExperiment ] ); }
 
+void CPCLPair::Quantize() {
+  for( size_t i = 0; i < GetGenes( ); ++i ) {
+    for( size_t j = 0; j < GetExperiments( ); ++j ) {
+      float d = Get( i, j );
+      if( CMeta::IsNaN( d ) ) continue;      
+      Set( i, j, Quantize( d, j ) );
+    }
+  }
+}
+
 bool CDatFilterImpl::Attach( const CDataPair* pDat, const CDatFilter* pFilter, const CGenes* pGenes,
 	CDat::EFilter eFilter, const CDat* pAnswers ) {
 	size_t	i;
 	bool Open( const char* szDatafile, bool fContinuous, bool fMemmap = false, size_t iSkip = 2,
 		bool fZScore = false );
 	bool Open( const CSlim& Slim );
+	bool Open( const CDat& dat );
 	bool OpenQuants( const char* szDatafile );
-	void SetQuants( const float* adBinEdges, size_t iBins );
+	void SetQuants( const float* adBinEdges, size_t iBins ){
+	  SetQuants(adBinEdges, iBins );
+	}
 	void SetQuants( const std::vector<float>& vecdBinEdges );
 	size_t Quantize( float dValue ) const;
-
+	void Quantize( );
+	void Save( const char* szFile ) const;
+	
+	
 	/*!
 	 * \brief
 	 * Returns the number of discrete values taken by this data pair.
 public:
 	bool Open( const char* szDatafile, size_t iSkip );
 	size_t Quantize( float dValue, size_t iExperiment ) const;
+	void Quantize( );
+
+
+	/*!
+	 * \brief
+	 * Returns the number of discrete values taken by this PCL pair.
+	 * 
+	 * \returns
+	 * Number of discrete values taken by this PCL pair.
+	 * 
+	 * \remarks
+	 * Equivalent to number of bins in the PCL pair and number of bin edges in the QUANT file.
+	 * 
+	 * \see
+	 * SetQuants | Quantize
+	 */
+	unsigned char GetValues( size_t iExperiment ) const {
+	  
+	  return (unsigned char)m_vecvecdQuants[ iExperiment ].size( ); }
+	
+
 };
 
 /*!
 
 class CDataPairImpl : protected CPairImpl, public CDat {
 protected:
+  	CDataPairImpl( ) : m_fQuantized(false) {}
 	void Reset( bool );
+	bool				m_fContinuous;
+	bool				m_fQuantized;
+	std::vector<float>	m_vecdQuant;
 
-	bool				m_fContinuous;
-	std::vector<float>	m_vecdQuant;
+	static const char  c_acQdab[];
+	bool OpenQdab( const char* szDatafile );
+	void SetQuants( const float* adBinEdges, size_t iBins );
 };
 
 class CPCLPairImpl : protected CPairImpl, public CPCL {
 	bool OpenText( std::istream&, float, bool );
 	bool OpenBinary( std::istream& );
 	bool OpenSparse( std::istream& );
+	bool OpenQdab( std::istream& );
 	bool OpenGenes( std::istream&, bool, bool );
 	void SaveText( std::ostream& ) const;
 	void SaveBinary( std::ostream& ) const;

tools/Counter/Counter.cpp

 class CRegularize;
 
 static const char	c_acDab[]	= ".dab";
+static const char	c_acQDab[]	= ".qdab";
 static const char	c_acQuant[]	= ".quant";
 static const char	c_acTxt[]	= ".txt";
 
 			pthread_join( vecpthdThreads[ iTerm + i ], NULL ); }
 
 	FOR_EACH_DIRECTORY_FILE((string)sArgs.directory_arg, strFile)
-		string					strName;
+		string					strName, strNameQ;
 		vector<CCountMatrix*>*	pvecpMatCounts;
 
 		if( !CMeta::IsExtension( strFile, c_acQuant ) )
 
 		i = strFile.rfind( '.' );
 		strName = (string)sArgs.directory_arg + "/" + strFile.substr( 0, i ) + c_acDab;
-		if( !Dat.Open( strName.c_str( ), false, !!sArgs.memmap_flag ) ) {
-			cerr << "Couldn't open: " << strName << endl;
+		strNameQ = (string)sArgs.directory_arg + "/" + strFile.substr( 0, i ) + c_acQDab;
+		if( !( Dat.Open( strName.c_str( ), false, !!sArgs.memmap_flag ) ||  
+					Dat.Open( strNameQ.c_str(), false, !!sArgs.memmap_flag ) ) ) {
+			cerr << "Couldn't open: " << strName << ", " << strNameQ << endl;
 			return 1; }
 		cerr << "Processing: " << strName << endl;
 		strName = CMeta::Filename( CMeta::Deextension( CMeta::Basename( strName.c_str( ) ) ) );
 		string		strFile;
 
 		if( !Dat.Open( ( strFile = ( (string)sArgs.directory_arg + '/' + iterDataset->first +
-			c_acDab ) ).c_str( ), false, !!sArgs.memmap_flag ) ) {
+			c_acDab ) ).c_str( ), false, !!sArgs.memmap_flag ) && !Dat.Open( ( strFile = ( (string)sArgs.directory_arg + '/' + iterDataset->first +
+                        c_acQDab ) ).c_str( ), false, !!sArgs.memmap_flag ) ) {
 			cerr << "Couldn't open: " << strFile << endl;
 			return 1; }
 		cerr << "Processing: " << strFile << endl;

tools/Counter/Counter.ggo

 purpose	"Pre-Bayesian learning tool; counts distributions of values in data"
 
 defgroup "Mode"	yes
-groupoption	"answers"	w	"Answer file"
+groupoption	"answers"	w	"Answer file (-w triggers counts mode)"
 							string	typestr="filename"	group="Mode"
-groupoption	"counts"	k	"Directory containing count files"
+groupoption	"counts"	k	"Directory containing count files (-k triggers learning mode)"
 							string	typestr="directory"	group="Mode"
-groupoption	"networks"	n	"Bayes nets"
+groupoption	"networks"	n	"Bayes nets (-n triggers inference mode)"
 							string	typestr="filename"	group="Mode"
 
 section "Main"

tools/Dat2Dab/Dat2Dab.cpp

 #include "cmdline.h"
 
 #include "statistics.h"
+#include "datapair.h"
 
 int main( int iArgs, char** aszArgs ) {
 	gengetopt_args_info	sArgs;
 	if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
 		cmdline_parser_print_help( );
 		return 1; }
-	CMeta Meta( sArgs.verbosity_arg );
 
+	CMeta Meta( sArgs.verbosity_arg, sArgs.random_arg );
+	
 	if( sArgs.genes_arg ) {
 		ifsm.open( sArgs.genes_arg );
 		if( !Genes.Open( ifsm ) ) {
 
 	fModified = sArgs.normalize_flag || sArgs.subsample_arg;
 	if( sArgs.input_arg ) {
-		if( !Dat.Open( sArgs.input_arg, sArgs.memmap_flag && !fModified ) ) {
-			cerr << "Could not open: " << sArgs.input_arg << endl;
-			return 1; } }
+	  if( !Dat.Open( sArgs.input_arg, sArgs.memmap_flag && !fModified,
+			 sArgs.skip_arg, !!sArgs.zscore_flag, !!sArgs.duplicates_flag ) ) {
+	    cerr << "Could not open: " << sArgs.input_arg << endl;
+	    return 1; } }	
 	else if( !Dat.Open( cin, CDat::EFormatText, (float)HUGE_VAL, !!sArgs.duplicates_flag ) ) {
 		cerr << "Could not open input" << endl;
 		return 1; }
 					return 1; }
 				Dat.SetGene( i, iterName->second ); } }
 
+	// should I add random noise from standard Normal?
+	if( sArgs.noise_flag ){
+	  float d;
+	  for( i = 0; i < Dat.GetGenes( ); ++i )
+	    for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )
+	      if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) ){
+		Dat.Set( i, j, d + CStatistics::SampleNormalStandard() );		  
+	      }	
+	}      
 	if( sArgs.randomize_flag )
 		Dat.Randomize( );
 	if( sArgs.rank_flag )
 		Dat.Rank( );
 	if( sArgs.normalize_flag || sArgs.zscore_flag )
 		Dat.Normalize( sArgs.zscore_flag ? CDat::ENormalizeZScore : CDat::ENormalizeMinMax );
-	if( sArgs.zero_flag )
-		for( i = 0; i < Dat.GetGenes( ); ++i )
-			for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )
-				if( CMeta::IsNaN( Dat.Get( i, j ) ) )
-					Dat.Set( i, j, 0 );
+	if( sArgs.zero_flag || sArgs.dmissing_arg )
+	  for( i = 0; i < Dat.GetGenes( ); ++i )
+	    for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )
+	      if( CMeta::IsNaN( Dat.Get( i, j ) ) ){
+		if ( sArgs.zero_flag ){
+		  Dat.Set( i, j, 0 );
+		}
+		else{
+		  Dat.Set( i, j, sArgs.dmissing_arg );
+		}
+	      }
 	if( sArgs.flip_flag )
 		Dat.Invert( );
 	if( Genes.GetGenes( ) )
 				if( ( ( iTwo = veciGenesOne[ j ] ) == -1 ) ||
 					CMeta::IsNaN( DatLk1.Get( iOne, iTwo ) ) )
 					Dat.Set( i, j, CMeta::GetNaN( ) ); } }
+	
+	if( sArgs.exedges_arg ) {
+		CDat			DatLk1;
+		vector<size_t>	veciGenesOne;
+		size_t			iOne, iTwo;
+
+		if( !DatLk1.Open( sArgs.exedges_arg ) ) {
+			cerr << "Could not open: " << sArgs.exedges_arg << endl;
+			return 1; }
+		
+		veciGenesOne.resize( Dat.GetGenes( ) );
+		
+		for( i = 0; i < veciGenesOne.size( ); ++i )
+			veciGenesOne[ i ] = DatLk1.GetGene( Dat.GetGene( i ) );
+		
+		for( i = 0; i < Dat.GetGenes( ); ++i ) {
+		  if( ( iOne = veciGenesOne[ i ] ) == -1 ) 
+			  continue;
+			for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j ){
+			  if( (( iTwo = veciGenesOne[ j ] ) == -1 ) ||
+			      CMeta::IsNaN( DatLk1.Get( iOne, iTwo )))
+			    continue;
+			  Dat.Set( i, j, CMeta::GetNaN( ) ); 
+			}			
+		} 
+	}
+
 	if( sArgs.lookups1_arg ) {
 		CGenes			GenesLk1( Genome );
 		vector<size_t>	veciGenesOne;
 				cout << '\t';
 				if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) )
 					cout << d; }
-			cout << endl; } }
-	else if( sArgs.output_arg )
-		Dat.Save( sArgs.output_arg );
+			cout << endl; } }	
+	else if( sArgs.output_arg ) {
+		CDataPair datOut;
+		datOut.Open( Dat );
+		if( sArgs.quant_arg ) {
+			datOut.OpenQuants( sArgs.quant_arg );
+			datOut.Quantize();
+		}
+		datOut.Save( sArgs.output_arg );  
+	}
 	else {
 		Dat.Save( cout, CDat::EFormatText );
 		cout.flush( ); }

tools/Dat2Dab/Dat2Dab.ggo

 							string	typestr="filename"
 option	"output"		o	"Output DAT/DAB file"
 							string	typestr="filename"
+option	"quant"			q	"Input Quant file"
+							string	typestr="filename"
 
 section "Preprocessing"
 option	"flip"			f	"Calculate one minus values"
 							string	typestr="filename"
 option	"edges"			e	"Process only edges from the given DAT/DAB"
 							string	typestr="filename"
+option	"exedges"		x	"Exclude edges from the given DAT/DAB"
+							string	typestr="filename"
 option	"cutoff"		c	"Exclude edges below cutoff"
 							double
 option	"zero"			Z	"Zero missing values"
 							flag	off
+option	"dmissing"		D	"set missing values to a set default value"
+							float	
 option	"duplicates"	d	"Allow dissimilar duplicate values"
 							flag	off
 option	"subsample"		u	"Fraction of output to randomly subsample"
 							int	default="2"
 option	"memmap"		m	"Memory map input/output"
 							flag	off
+option	"random"		R	"Seed random generator (default -1 uses current time)"
+							int	default="-1"
+option	"noise"			N	"Add noise from standard Normal to all non-missing values"
+							flag	off
 option	"verbosity"		v	"Message verbosity"
 							int	default="5"