Commits

Qian Zhu  committed d1e3f1e

DBCombiner: Add support for CDatabase reorganization (eg transforming a 21000-file CDatabase to a 1000-file CDatabase, more genes per file)
PCL2Bin: Fixed Bin-to-PCL conversion header bug

  • Participants
  • Parent commits 27cadb6

Comments (0)

Files changed (9)

File src/database.cpp

 
 }
 
+bool CDatabaselet::Write(char* Qt, const size_t &iSize, const size_t offset){
+	m_fstm.seekg(m_iHeader + offset, ios_base::beg);
+	m_fstm.write(Qt, iSize);
+}
+
 /* simply opens the file without overwriting */
 bool CDatabaselet::OpenNoOverwrite() {
 	m_fstm.clear( );
 	return true;
 }
 
-
 /*	static function, combine multiple databaselets (that share the same genes, ie m_vecStrGenes),
 	and output result to a single file, or output one-gene per file (if databaselet contains multiple genes)
  	bSplit: whether or not to output one-gene per file
 	return true;
 }
 
+//Create a copy of current CDatabase collection that has X number of CDatabaselets 
+bool CDatabase::Reorganize(const char *dest_db_dir, const size_t &num_db){
+	int dest_db = num_db;
+	size_t i, j, l;
+	const char c_acExtension[] = ".db";
+	char acNumber[16];
+
+	vector<string> vecstrG;
+	vecstrG.resize(m_mapstriGenes.size());
+
+	for(map<string,size_t>::iterator iter=m_mapstriGenes.begin();
+		iter!=m_mapstriGenes.end(); iter++){
+		string first = iter->first;
+		size_t second = iter->second;
+		vecstrG[second] = first;
+	}
+
+	for(i=0; i<dest_db; i++){
+		vector<string> vecstrSubset;
+		for(j=i; j<vecstrG.size(); j+=dest_db)
+			vecstrSubset.push_back(vecstrG[j]);
+		//size of this db
+		size_t iSize = GetGenes() * GetDatasets() * vecstrSubset.size();
+
+		unsigned char *Qt = (unsigned char*)malloc(iSize);
+		int tot = 0;
+		for(j=0; j<vecstrSubset.size(); j++){
+			size_t k = m_mapstriGenes.find(vecstrSubset[j])->second;
+			vector<unsigned char> Qi;
+			if(!GetGene(k, Qi)){
+				cerr << "Gene error" << endl;
+				continue;
+			}
+			for(l=0; l<Qi.size(); l++)
+				Qt[tot+l] = Qi[l];
+			tot+=Qi.size();
+		}
+		sprintf(acNumber, "%08lu", i);
+		string dest_dir = dest_db_dir;
+		string strFile = dest_dir + "/" + acNumber + c_acExtension;
+
+		CDatabaselet DBS(false);
+		DBS.Open(strFile.c_str(), vecstrSubset, vecstrG.size(), 
+			GetDatasets());
+		DBS.Write((char*) Qt, iSize, 0);
+		free(Qt);
+	}
+	return true;
+}
+
 //For SeekMiner
 bool CDatabase::Open(const string &strDBDirectory,
 		const vector<string> &vecstrGenes, const size_t &iDatasets, const size_t &iNumDBs){

File src/database.h

 	CDatabase(bool isNibble) : CDatabaseImpl(isNibble){
 	}
 
+	bool Reorganize(const char*, const size_t&);
+
+
 	bool GetGene(const string &, vector<unsigned char>&) const;
 	bool GetGene(const size_t &, vector<unsigned char>&) const;
 

File src/databasei.h

 
 	bool OpenNoOverwrite();
 
+	//directly write bytes to disk
+	bool Write(char* data, const size_t& iSize, const size_t offset = 0);
+
 	bool OpenWrite( unsigned char, size_t, ENibbles, unsigned char* );
 
 	/* Get pair by referring to memory cache (ie charImage) of the db file */
 		return;
 
 /*	if (fCDT)
-		ostm << c_szGID << '\t';
-	ostm << m_vecstrFeatures[0];*/
-	for (i = 1; i < m_vecstrFeatures.size(); ++i)
-		ostm << '\t' << m_vecstrFeatures[i];
+		ostm << c_szGID << '\t'; */
+	ostm << m_vecstrFeatures[0]; //Gene name
+//	for (i = 1; i < m_vecstrFeatures.size(); ++i)
+//		ostm << '\t' << m_vecstrFeatures[i];
 	for (i = 0; i < m_vecstrExperiments.size(); ++i)
 		ostm << '\t' << m_vecstrExperiments[i];
 	ostm << endl;

File tools/DBCombiner/DBCombiner.cpp

 		useNibble = true;
 	}
 
-	CDatabase DB(useNibble);
+	if(sArgs.reorganize_flag==1){
+		vector<string> vecstrDataset;
+		ifstream ifsm2;
+		ifsm2.open(sArgs.dataset_arg);
+		while(!ifsm2.eof()){
+			ifsm2.getline(acBuffer, c_iBuffer-1);
+			if(acBuffer[0]==0) break;
+			acBuffer[c_iBuffer-1] = 0;
+			vector<string> vecstrLine;
+			CMeta::Tokenize(acBuffer, vecstrLine);
+			vecstrDataset.push_back(vecstrLine[0]);
+		}
+		ifsm2.close();
 
-	bool fSplit = false;
-	if(sArgs.split_flag==1){
-		fSplit = true;
+		CDatabase db(false);
+		db.Open(sArgs.db_dir_arg, vecstrGenes, vecstrDataset.size(), 
+			sArgs.src_db_num_arg);
+		db.Reorganize(sArgs.dest_db_dir_arg, sArgs.dest_db_num_arg);
+		return 0;
 	}
 
-	if(sArgs.db_arg){
-		ifsm.open(sArgs.db_arg);
-		while(!pistm->eof()){
-			pistm->getline(acBuffer, c_iBuffer -1);
-			if(acBuffer[0]==0){
-				break;
+	if(sArgs.combine_flag==1){
+		CDatabase DB(useNibble);
+
+		bool fSplit = false;
+		if(sArgs.split_flag==1){
+			fSplit = true;
+		}
+
+		if(sArgs.db_arg){
+			ifsm.open(sArgs.db_arg);
+			while(!pistm->eof()){
+				pistm->getline(acBuffer, c_iBuffer -1);
+				if(acBuffer[0]==0){
+					break;
+				}
+				acBuffer[c_iBuffer-1] = 0;
+				vecstrDBs.push_back(acBuffer);
 			}
-			acBuffer[c_iBuffer-1] = 0;
-			vecstrDBs.push_back(acBuffer);
+			vecstrDBs.resize(vecstrDBs.size());
+			ifsm.close();
+
+			//printf("Reading DBS"); getchar();
+			vector<CDatabaselet*> DBS;
+			DBS.resize(vecstrDBs.size());
+			for(i=0; i<vecstrDBs.size(); i++){
+				DBS[i] = new CDatabaselet(useNibble);
+				DBS[i]->Open(vecstrDBs[i]);
+			}
+			//printf("Finished reading DBS"); getchar();
+
+			CDatabaselet::Combine(DBS, sArgs.dir_out_arg, vecstrGenes, fSplit);
+			for(i=0; i<vecstrDBs.size(); i++){
+				free(DBS[i]);
+			}
+
+		}else{
+			cerr << "Must give a db list." << endl;
+			return 1;
+
 		}
-		vecstrDBs.resize(vecstrDBs.size());
-		ifsm.close();
-
-		//printf("Reading DBS"); getchar();
-		vector<CDatabaselet*> DBS;
-		DBS.resize(vecstrDBs.size());
-		for(i=0; i<vecstrDBs.size(); i++){
-	    	DBS[i] = new CDatabaselet(useNibble);
-	    	DBS[i]->Open(vecstrDBs[i]);
-	    }
-		//printf("Finished reading DBS"); getchar();
-
-	    CDatabaselet::Combine(DBS, sArgs.dir_out_arg, vecstrGenes, fSplit);
-	    for(i=0; i<vecstrDBs.size(); i++){
-	    	free(DBS[i]);
-	    }
-
-	}else{
-		cerr << "Must give a db list." << endl;
-		return 1;
-
 	}
-
 #ifdef WIN32
 	pthread_win32_process_detach_np( );
 #endif // WIN32

File tools/DBCombiner/DBCombiner.ggo

 version	"1.0"
 purpose	"Combines a list of DB files with the same gene content"
 
+section "Mode"
+option	"combine"			C	"Combine a set of DB's, each coming from a different dataset subset"
+								flag	off
+option	"reorganize"		R	"Reorganize a set of DB's, such as from 21000 DB files to 1000 DB files, ie expanding/shrinking the number of genes a DB contains"
+								flag	off
+
 section "Main"
+option	"input"				i	"Input gene mapping"
+								string	typestr="filename"	yes	
+
+section "Combine Mode"
 option	"db"				x	"Input a set of databaselet filenames (including path)"
 								string typestr="filename"
-option	"input"				i	"Input gene mapping"
-								string	typestr="filename"	
-option	"dir_out"			D	"Database directory"
+option	"dir_out"			D	"Output database directory"
 								string	typestr="directory"	default="."
 option	"is_nibble"			N	"Whether the input DB is nibble type"
 								flag	off
 option	"split"				s	"Split to one-gene per file"
-								flag	off
+								flag	off
+
+section "Reorganize Mode"
+option	"dataset"			A	"Dataset-platform mapping file"
+								string typestr="filename"
+option	"db_dir"			d	"Source DB collection directory"
+								string typestr="directory"
+option	"src_db_num"		n	"Source DB number of files"
+								int
+option	"dest_db_num"		b	"Destination DB number of files"
+								int
+option	"dest_db_dir"		B	"Destination DB directory"
+								string typestr="directory"

File tools/DBCombiner/cmdline.c

 /*
   File autogenerated by gengetopt version 2.22
   generated with the following command:
-  gengetopt -iDBCombiner.ggo --default-optional -u -N -e 
+  /memex/qzhu/usr/bin/gengetopt -iDBCombiner.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
 const char *gengetopt_args_info_description = "";
 
 const char *gengetopt_args_info_help[] = {
-  "  -h, --help               Print help and exit",
-  "  -V, --version            Print version and exit",
+  "  -h, --help                   Print help and exit",
+  "  -V, --version                Print version and exit",
+  "\nMode:",
+  "  -C, --combine                Combine a set of DB's, each coming from a \n                                 different dataset subset  (default=off)",
+  "  -R, --reorganize             Reorganize a set of DB's, such as from 21000 DB \n                                 files to 1000 DB files, ie expanding/shrinking \n                                 the number of genes a DB contains  \n                                 (default=off)",
   "\nMain:",
-  "  -x, --db=filename        Input a set of databaselet filenames (including \n                             path)",
-  "  -i, --input=filename     Input gene mapping",
-  "  -D, --dir_out=directory  Database directory  (default=`.')",
-  "  -N, --is_nibble          Whether the input DB is nibble type  (default=off)",
-  "  -s, --split              Split to one-gene per file  (default=off)",
+  "  -i, --input=filename         Input gene mapping",
+  "\nCombine Mode:",
+  "  -x, --db=filename            Input a set of databaselet filenames (including \n                                 path)",
+  "  -D, --dir_out=directory      Output database directory  (default=`.')",
+  "  -N, --is_nibble              Whether the input DB is nibble type  \n                                 (default=off)",
+  "  -s, --split                  Split to one-gene per file  (default=off)",
+  "\nReorganize Mode:",
+  "  -A, --dataset=filename       Dataset-platform mapping file",
+  "  -d, --db_dir=directory       Source DB collection directory",
+  "  -n, --src_db_num=INT         Source DB number of files",
+  "  -b, --dest_db_num=INT        Destination DB number of files",
+  "  -B, --dest_db_dir=directory  Destination DB directory",
     0
 };
 
 typedef enum {ARG_NO
   , ARG_FLAG
   , ARG_STRING
+  , ARG_INT
 } cmdline_parser_arg_type;
 
 static
 cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info,
                         struct cmdline_parser_params *params, const char *additional_error);
 
+static int
+cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error);
 
 static char *
 gengetopt_strdup (const char *s);
 {
   args_info->help_given = 0 ;
   args_info->version_given = 0 ;
+  args_info->combine_given = 0 ;
+  args_info->reorganize_given = 0 ;
+  args_info->input_given = 0 ;
   args_info->db_given = 0 ;
-  args_info->input_given = 0 ;
   args_info->dir_out_given = 0 ;
   args_info->is_nibble_given = 0 ;
   args_info->split_given = 0 ;
+  args_info->dataset_given = 0 ;
+  args_info->db_dir_given = 0 ;
+  args_info->src_db_num_given = 0 ;
+  args_info->dest_db_num_given = 0 ;
+  args_info->dest_db_dir_given = 0 ;
 }
 
 static
 void clear_args (struct gengetopt_args_info *args_info)
 {
+  args_info->combine_flag = 0;
+  args_info->reorganize_flag = 0;
+  args_info->input_arg = NULL;
+  args_info->input_orig = NULL;
   args_info->db_arg = NULL;
   args_info->db_orig = NULL;
-  args_info->input_arg = NULL;
-  args_info->input_orig = NULL;
   args_info->dir_out_arg = gengetopt_strdup (".");
   args_info->dir_out_orig = NULL;
   args_info->is_nibble_flag = 0;
   args_info->split_flag = 0;
+  args_info->dataset_arg = NULL;
+  args_info->dataset_orig = NULL;
+  args_info->db_dir_arg = NULL;
+  args_info->db_dir_orig = NULL;
+  args_info->src_db_num_orig = NULL;
+  args_info->dest_db_num_orig = NULL;
+  args_info->dest_db_dir_arg = NULL;
+  args_info->dest_db_dir_orig = NULL;
   
 }
 
 
   args_info->help_help = gengetopt_args_info_help[0] ;
   args_info->version_help = gengetopt_args_info_help[1] ;
-  args_info->db_help = gengetopt_args_info_help[3] ;
-  args_info->input_help = gengetopt_args_info_help[4] ;
-  args_info->dir_out_help = gengetopt_args_info_help[5] ;
-  args_info->is_nibble_help = gengetopt_args_info_help[6] ;
-  args_info->split_help = gengetopt_args_info_help[7] ;
+  args_info->combine_help = gengetopt_args_info_help[3] ;
+  args_info->reorganize_help = gengetopt_args_info_help[4] ;
+  args_info->input_help = gengetopt_args_info_help[6] ;
+  args_info->db_help = gengetopt_args_info_help[8] ;
+  args_info->dir_out_help = gengetopt_args_info_help[9] ;
+  args_info->is_nibble_help = gengetopt_args_info_help[10] ;
+  args_info->split_help = gengetopt_args_info_help[11] ;
+  args_info->dataset_help = gengetopt_args_info_help[13] ;
+  args_info->db_dir_help = gengetopt_args_info_help[14] ;
+  args_info->src_db_num_help = gengetopt_args_info_help[15] ;
+  args_info->dest_db_num_help = gengetopt_args_info_help[16] ;
+  args_info->dest_db_dir_help = gengetopt_args_info_help[17] ;
   
 }
 
 cmdline_parser_release (struct gengetopt_args_info *args_info)
 {
   unsigned int i;
+  free_string_field (&(args_info->input_arg));
+  free_string_field (&(args_info->input_orig));
   free_string_field (&(args_info->db_arg));
   free_string_field (&(args_info->db_orig));
-  free_string_field (&(args_info->input_arg));
-  free_string_field (&(args_info->input_orig));
   free_string_field (&(args_info->dir_out_arg));
   free_string_field (&(args_info->dir_out_orig));
+  free_string_field (&(args_info->dataset_arg));
+  free_string_field (&(args_info->dataset_orig));
+  free_string_field (&(args_info->db_dir_arg));
+  free_string_field (&(args_info->db_dir_orig));
+  free_string_field (&(args_info->src_db_num_orig));
+  free_string_field (&(args_info->dest_db_num_orig));
+  free_string_field (&(args_info->dest_db_dir_arg));
+  free_string_field (&(args_info->dest_db_dir_orig));
   
   
   for (i = 0; i < args_info->inputs_num; ++i)
     write_into_file(outfile, "help", 0, 0 );
   if (args_info->version_given)
     write_into_file(outfile, "version", 0, 0 );
+  if (args_info->combine_given)
+    write_into_file(outfile, "combine", 0, 0 );
+  if (args_info->reorganize_given)
+    write_into_file(outfile, "reorganize", 0, 0 );
+  if (args_info->input_given)
+    write_into_file(outfile, "input", args_info->input_orig, 0);
   if (args_info->db_given)
     write_into_file(outfile, "db", args_info->db_orig, 0);
-  if (args_info->input_given)
-    write_into_file(outfile, "input", args_info->input_orig, 0);
   if (args_info->dir_out_given)
     write_into_file(outfile, "dir_out", args_info->dir_out_orig, 0);
   if (args_info->is_nibble_given)
     write_into_file(outfile, "is_nibble", 0, 0 );
   if (args_info->split_given)
     write_into_file(outfile, "split", 0, 0 );
+  if (args_info->dataset_given)
+    write_into_file(outfile, "dataset", args_info->dataset_orig, 0);
+  if (args_info->db_dir_given)
+    write_into_file(outfile, "db_dir", args_info->db_dir_orig, 0);
+  if (args_info->src_db_num_given)
+    write_into_file(outfile, "src_db_num", args_info->src_db_num_orig, 0);
+  if (args_info->dest_db_num_given)
+    write_into_file(outfile, "dest_db_num", args_info->dest_db_num_orig, 0);
+  if (args_info->dest_db_dir_given)
+    write_into_file(outfile, "dest_db_dir", args_info->dest_db_dir_orig, 0);
   
 
   i = EXIT_SUCCESS;
 int
 cmdline_parser_required (struct gengetopt_args_info *args_info, const char *prog_name)
 {
-  return EXIT_SUCCESS;
+  int result = EXIT_SUCCESS;
+
+  if (cmdline_parser_required2(args_info, prog_name, NULL) > 0)
+    result = EXIT_FAILURE;
+
+  return result;
+}
+
+int
+cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error)
+{
+  int error = 0;
+
+  /* checks for required options */
+  if (! args_info->input_given)
+    {
+      fprintf (stderr, "%s: '--input' ('-i') option required%s\n", prog_name, (additional_error ? additional_error : ""));
+      error = 1;
+    }
+  
+  
+  /* checks for dependences among options */
+
+  return error;
 }
 
 
   case ARG_FLAG:
     *((int *)field) = !*((int *)field);
     break;
+  case ARG_INT:
+    if (val) *((int *)field) = strtol (val, &stop_char, 0);
+    break;
   case ARG_STRING:
     if (val) {
       string_field = (char **)field;
     break;
   };
 
+  /* check numeric conversion */
+  switch(arg_type) {
+  case ARG_INT:
+    if (val && !(stop_char && *stop_char == '\0')) {
+      fprintf(stderr, "%s: invalid numeric value: %s\n", package_name, val);
+      return 1; /* failure */
+    }
+    break;
+  default:
+    ;
+  };
 
   /* store the original value */
   switch(arg_type) {
       static struct option long_options[] = {
         { "help",	0, NULL, 'h' },
         { "version",	0, NULL, 'V' },
+        { "combine",	0, NULL, 'C' },
+        { "reorganize",	0, NULL, 'R' },
+        { "input",	1, NULL, 'i' },
         { "db",	1, NULL, 'x' },
-        { "input",	1, NULL, 'i' },
         { "dir_out",	1, NULL, 'D' },
         { "is_nibble",	0, NULL, 'N' },
         { "split",	0, NULL, 's' },
+        { "dataset",	1, NULL, 'A' },
+        { "db_dir",	1, NULL, 'd' },
+        { "src_db_num",	1, NULL, 'n' },
+        { "dest_db_num",	1, NULL, 'b' },
+        { "dest_db_dir",	1, NULL, 'B' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVx:i:D:Ns", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVCRi:x:D:NsA:d:n:b:B:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
           return 0;
         
           break;
+        case 'C':	/* Combine a set of DB's, each coming from a different dataset subset.  */
+        
+        
+          if (update_arg((void *)&(args_info->combine_flag), 0, &(args_info->combine_given),
+              &(local_args_info.combine_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "combine", 'C',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'R':	/* Reorganize a set of DB's, such as from 21000 DB files to 1000 DB files, ie expanding/shrinking the number of genes a DB contains.  */
+        
+        
+          if (update_arg((void *)&(args_info->reorganize_flag), 0, &(args_info->reorganize_given),
+              &(local_args_info.reorganize_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "reorganize", 'R',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'i':	/* Input gene mapping.  */
+        
+        
+          if (update_arg( (void *)&(args_info->input_arg), 
+               &(args_info->input_orig), &(args_info->input_given),
+              &(local_args_info.input_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "input", 'i',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'x':	/* Input a set of databaselet filenames (including path).  */
         
         
             goto failure;
         
           break;
-        case 'i':	/* Input gene mapping.  */
-        
-        
-          if (update_arg( (void *)&(args_info->input_arg), 
-               &(args_info->input_orig), &(args_info->input_given),
-              &(local_args_info.input_given), optarg, 0, 0, ARG_STRING,
-              check_ambiguity, override, 0, 0,
-              "input", 'i',
-              additional_error))
-            goto failure;
-        
-          break;
-        case 'D':	/* Database directory.  */
+        case 'D':	/* Output database directory.  */
         
         
           if (update_arg( (void *)&(args_info->dir_out_arg), 
             goto failure;
         
           break;
+        case 'A':	/* Dataset-platform mapping file.  */
+        
+        
+          if (update_arg( (void *)&(args_info->dataset_arg), 
+               &(args_info->dataset_orig), &(args_info->dataset_given),
+              &(local_args_info.dataset_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "dataset", 'A',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'd':	/* Source DB collection directory.  */
+        
+        
+          if (update_arg( (void *)&(args_info->db_dir_arg), 
+               &(args_info->db_dir_orig), &(args_info->db_dir_given),
+              &(local_args_info.db_dir_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "db_dir", 'd',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'n':	/* Source DB number of files.  */
+        
+        
+          if (update_arg( (void *)&(args_info->src_db_num_arg), 
+               &(args_info->src_db_num_orig), &(args_info->src_db_num_given),
+              &(local_args_info.src_db_num_given), optarg, 0, 0, ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "src_db_num", 'n',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'b':	/* Destination DB number of files.  */
+        
+        
+          if (update_arg( (void *)&(args_info->dest_db_num_arg), 
+               &(args_info->dest_db_num_orig), &(args_info->dest_db_num_given),
+              &(local_args_info.dest_db_num_given), optarg, 0, 0, ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "dest_db_num", 'b',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'B':	/* Destination DB directory.  */
+        
+        
+          if (update_arg( (void *)&(args_info->dest_db_dir_arg), 
+               &(args_info->dest_db_dir_orig), &(args_info->dest_db_dir_given),
+              &(local_args_info.dest_db_dir_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "dest_db_dir", 'B',
+              additional_error))
+            goto failure;
+        
+          break;
 
         case 0:	/* Long option with no short option */
         case '?':	/* Invalid option.  */
 
 
 
+  if (check_required)
+    {
+      error += cmdline_parser_required2 (args_info, argv[0], additional_error);
+    }
 
   cmdline_parser_release (&local_args_info);
 

File tools/DBCombiner/cmdline.h

 {
   const char *help_help; /**< @brief Print help and exit help description.  */
   const char *version_help; /**< @brief Print version and exit help description.  */
+  int combine_flag;	/**< @brief Combine a set of DB's, each coming from a different dataset subset (default=off).  */
+  const char *combine_help; /**< @brief Combine a set of DB's, each coming from a different dataset subset help description.  */
+  int reorganize_flag;	/**< @brief Reorganize a set of DB's, such as from 21000 DB files to 1000 DB files, ie expanding/shrinking the number of genes a DB contains (default=off).  */
+  const char *reorganize_help; /**< @brief Reorganize a set of DB's, such as from 21000 DB files to 1000 DB files, ie expanding/shrinking the number of genes a DB contains help description.  */
+  char * input_arg;	/**< @brief Input gene mapping.  */
+  char * input_orig;	/**< @brief Input gene mapping original value given at command line.  */
+  const char *input_help; /**< @brief Input gene mapping help description.  */
   char * db_arg;	/**< @brief Input a set of databaselet filenames (including path).  */
   char * db_orig;	/**< @brief Input a set of databaselet filenames (including path) original value given at command line.  */
   const char *db_help; /**< @brief Input a set of databaselet filenames (including path) help description.  */
-  char * input_arg;	/**< @brief Input gene mapping.  */
-  char * input_orig;	/**< @brief Input gene mapping original value given at command line.  */
-  const char *input_help; /**< @brief Input gene mapping help description.  */
-  char * dir_out_arg;	/**< @brief Database directory (default='.').  */
-  char * dir_out_orig;	/**< @brief Database directory original value given at command line.  */
-  const char *dir_out_help; /**< @brief Database directory help description.  */
+  char * dir_out_arg;	/**< @brief Output database directory (default='.').  */
+  char * dir_out_orig;	/**< @brief Output database directory original value given at command line.  */
+  const char *dir_out_help; /**< @brief Output database directory help description.  */
   int is_nibble_flag;	/**< @brief Whether the input DB is nibble type (default=off).  */
   const char *is_nibble_help; /**< @brief Whether the input DB is nibble type help description.  */
   int split_flag;	/**< @brief Split to one-gene per file (default=off).  */
   const char *split_help; /**< @brief Split to one-gene per file help description.  */
+  char * dataset_arg;	/**< @brief Dataset-platform mapping file.  */
+  char * dataset_orig;	/**< @brief Dataset-platform mapping file original value given at command line.  */
+  const char *dataset_help; /**< @brief Dataset-platform mapping file help description.  */
+  char * db_dir_arg;	/**< @brief Source DB collection directory.  */
+  char * db_dir_orig;	/**< @brief Source DB collection directory original value given at command line.  */
+  const char *db_dir_help; /**< @brief Source DB collection directory help description.  */
+  int src_db_num_arg;	/**< @brief Source DB number of files.  */
+  char * src_db_num_orig;	/**< @brief Source DB number of files original value given at command line.  */
+  const char *src_db_num_help; /**< @brief Source DB number of files help description.  */
+  int dest_db_num_arg;	/**< @brief Destination DB number of files.  */
+  char * dest_db_num_orig;	/**< @brief Destination DB number of files original value given at command line.  */
+  const char *dest_db_num_help; /**< @brief Destination DB number of files help description.  */
+  char * dest_db_dir_arg;	/**< @brief Destination DB directory.  */
+  char * dest_db_dir_orig;	/**< @brief Destination DB directory original value given at command line.  */
+  const char *dest_db_dir_help; /**< @brief Destination DB directory help description.  */
   
   unsigned int help_given ;	/**< @brief Whether help was given.  */
   unsigned int version_given ;	/**< @brief Whether version was given.  */
+  unsigned int combine_given ;	/**< @brief Whether combine was given.  */
+  unsigned int reorganize_given ;	/**< @brief Whether reorganize was given.  */
+  unsigned int input_given ;	/**< @brief Whether input was given.  */
   unsigned int db_given ;	/**< @brief Whether db was given.  */
-  unsigned int input_given ;	/**< @brief Whether input was given.  */
   unsigned int dir_out_given ;	/**< @brief Whether dir_out was given.  */
   unsigned int is_nibble_given ;	/**< @brief Whether is_nibble was given.  */
   unsigned int split_given ;	/**< @brief Whether split was given.  */
+  unsigned int dataset_given ;	/**< @brief Whether dataset was given.  */
+  unsigned int db_dir_given ;	/**< @brief Whether db_dir was given.  */
+  unsigned int src_db_num_given ;	/**< @brief Whether src_db_num was given.  */
+  unsigned int dest_db_num_given ;	/**< @brief Whether dest_db_num was given.  */
+  unsigned int dest_db_dir_given ;	/**< @brief Whether dest_db_dir was given.  */
 
   char **inputs ; /**< @brief unamed options (options without names) */
   unsigned inputs_num ; /**< @brief unamed options number */

File tools/SeekPrep/SeekPrep.cpp

 			for (k = 0; k < Dat.GetGenes(); ++k)
 				for (j = (k + 1); j < Dat.GetGenes(); ++j)
 					Dat.Set(k, j, CMeta::GetNaN());
+
+			omp_set_num_threads(8);
+
+			#pragma omp parallel for \
+			shared(pcl, Dat, veciGenes, pn) \
+			private(k,iOne,j,iTwo) \
+			firstprivate(numG) \
+			schedule(dynamic)
 			for (k = 0; k < numG; ++k) {
 				if ((iOne = veciGenes[k]) == -1)
 					continue;
 				sArgs.top_avg_percent_arg);
 
 			//DEBUGGING
-			for(i=0; i<vecGeneAvg.size(); i++){
-				fprintf(stderr, "%s\t%.3f\n", vecstrGenes[i].c_str(), vecGeneAvg[i]);
-			}
+			//for(i=0; i<vecGeneAvg.size(); i++){
+			//	fprintf(stderr, "%s\t%.3f\n", vecstrGenes[i].c_str(), vecGeneAvg[i]);
+			//}
 
 			CSeekTools::WriteArray(outFile, vecGeneAvg);
 		}
 			CSeekWriter::GetGenePresence(Dat, vecstrGenes, vecGenePresence);
 
 			//DEBUGGING
-			for(i=0; i<vecGenePresence.size(); i++){
-				fprintf(stderr, "%s\t%d\n", vecstrGenes[i].c_str(), vecGenePresence[i]);
-			}
+			//for(i=0; i<vecGenePresence.size(); i++){
+			//	fprintf(stderr, "%s\t%d\n", vecstrGenes[i].c_str(), vecGenePresence[i]);
+			//}
 
 			CSeekTools::WriteArray(outFile, vecGenePresence);
 		}