Commits

Qian Zhu  committed 65ac4e5

Fixed a bug in SeekServer
Prompt error if search dataset is not in right format

  • Participants
  • Parent commits 784d471

Comments (0)

Files changed (8)

File src/seekcentral.cpp

 	m_bNormPlatform = bNormPlatform;
 	m_bLogit = src->m_bLogit;
 	m_eDistMeasure = eDistMeasure;
-	m_vecstrGenes.resize(src->m_vecstrGenes.size());
 
 	m_bOutputWeightComponent = src->m_bOutputWeightComponent;
 	m_bSimulateWeight = src->m_bSimulateWeight;
 	m_iNumRandom = 1;
 	m_randRandom = NULL;	
 
+	m_vecstrGenes.resize(src->m_vecstrGenes.size());
 	copy(src->m_vecstrGenes.begin(), src->m_vecstrGenes.end(), m_vecstrGenes.begin());
 
 	m_vecstrDatasets.resize(src->m_vecstrDatasets.size());
 	//m_DB = src->m_DB; //shared DB
 
 	m_vecDBDataset.resize(src->m_vecDB.size());
-	for(i=0; i<m_vecDB.size(); i++)
+	for(i=0; i<m_vecDB.size(); i++){
+		m_vecDBDataset[i].resize(src->m_vecDBDataset[i].size());
 		copy(src->m_vecDBDataset[i].begin(), src->m_vecDBDataset[i].end(),
 		m_vecDBDataset[i].begin());
+	}
 
 	CSeekTools::LoadDatabase(m_vecDB, m_iGenes, m_iDatasets,
 		m_vc, src->m_vc, m_vp, src->m_vp, m_vecstrDatasets,
 			}
 		}
 	}else{
-		if(!CSeekTools::ReadMultipleQueries(search_dset, m_vecstrSearchDatasets))
+		if(!CSeekTools::ReadMultipleQueries(search_dset, m_vecstrSearchDatasets)){
+			fprintf(stderr, "Error reading search datasets\n");
 			return false;
+		}
+		if(m_vecstrSearchDatasets.size()!=m_vecstrAllQuery.size()){
+			fprintf(stderr, "Search_dset file doesn't have enough lines. Remember 1 line / query!\n");
+			return false;
+		}
 	}
 
 	m_searchdsetMap.resize(m_vecstrAllQuery.size());
 	for(i=0; i<m_vecstrAllQuery.size(); i++){
 		m_searchdsetMap[i] = new CSeekIntIntMap(m_vecstrDatasets.size());
-		for(j=0; j<m_vecstrSearchDatasets[i].size(); j++)
+		for(j=0; j<m_vecstrSearchDatasets[i].size(); j++){
 			m_searchdsetMap[i]->Add(
 				m_mapstrintDataset[m_vecstrSearchDatasets[i][j]]);
+		}
 	}
 
 	if(!CalculateRestart()) return false;

File src/seekdataset.cpp

 			float vv = 0;
 			unsigned char x = 0;
 			ushort iGeneMapSize = geneMap->GetNumSet();
-			if(logit){
+			if(logit){ //NOT checked
 				for(ii=0; ii<iGeneMapSize; ii++){
 					for(j=0, i = allRGenes[ii], a=GetGeneAverage(i);
 						j<iNumQueries; j++){
 						}
 					}
 				}
-			}else{
+			}else{ //if just normal score
 				for(ii=0; ii<iGeneMapSize; ii++){
 					for(j=0, i = allRGenes[ii], a=GetGeneAverage(i);
 						j<iNumQueries; j++){
 						if((x = r[queryIndex[j]][i])==255) continue;
-						//fprintf(stderr, "Correlation %d %d %.5f %.5f %.5f %.5f\n", queryIndex[j], i, quant[x], a, platform_avg[j], platform_stdev[j]);
 						vv = (quant[x] - a - platform_avg[j])
 							/ platform_stdev[j];
 						vv = max((float) min(vv, (float)3.2), (float)-3.2);
-
 						if(vv>cutoff){
 							rData[i][j]= (ushort) (vv*100.0) + 320;
 							//fprintf(stderr, "r %.2f\n", quant[x]);
 			}
 		}
 	}else if(bCorrelation){ //correlation mode
+		//assumed to be from -1 to +1
 
 		for(ii=0; ii<iNumGenes; ii++){
 			unsigned char x;
 			for(i = geneMap->GetReverse(ii), j=0; j<iNumQueries; j++){
 				if((x = r[queryIndex[j]][i])==255) continue;
 				float vv = quant[x];
+
+				//for functional network=================================
+				//vv = vv * 6.0 - 3.0; //transform values from 0-1 to values from -3 to +3
+
 				vv = max((float) min(vv, (float)3.2), (float)-3.2);
 				if(vv>cutoff){
 					rData[i][j] = (ushort) (vv*100.0) + 320;

File tools/SeekEvaluator/SeekEvaluator.cpp

 		return true;
 	}
 	else if(met==AVGP || met==PR){
-		vector<float> *vf = Precision(sortedGenes, goldstdGenePresence,
-			nan);
+		vector<float> *vf = Precision(sortedGenes, goldstdGenePresence, nan);
 		/*if(met==PR_ALL){
 			for(i=0; i<vf->size()-1; i++){
 				fprintf(stdout, "%.5f ", vf->at(i));

File tools/SeekMiner/stdafx.cpp

  * The main challenge in performing the user's query is finding the right datasets.
  * As not all microarrays are relevant to exploring the query's coexpression,
  * SeekMiner particularly favors those datasets where the query genes are highly
- * correlated among each other. As we would expect, the query gene coregulation would suggest that
+ * correlated among each other. The intuition is based on the observation that the coregulation
+ * between the query genes would suggest that they participate in the same biological process,
  * the biological process involving these genes is highly active. So datasets that pass this criteria would
  * be very informative to the search process.
  *

File tools/SeekServer/SeekServer.cpp

 		bNormPlatform = true;
 	}
 
+	//fprintf(stderr, "%s\n%s\n%s\n%.2f\n", strOutputDir.c_str(), strQuery.c_str(), strSearchDatasets.c_str(), query_fraction_required);
+
 	bool r = csu->Initialize(strOutputDir, strQuery, strSearchDatasets, csfinal,
 		new_fd, query_fraction_required, eDM, bSubtractGeneAvg,
 		bNormPlatform);
 		CSeekDataset::Z_SCORE, //to be overwritten by individual search instance's setting
 		bSubtractAvg, bNormPlatform, //to be overwritten by individual search instance's settings
 		bLogit, //always false
-		sArgs.score_cutoff_arg, sArgs.per_q_required_arg, !!sArgs.square_z_flag, //default
+		sArgs.score_cutoff_arg, 
+		0.0, //min query fraction (to be overwrriten)
+		!!sArgs.square_z_flag, //default
 		false, 1, NULL, useNibble)) //default
 		return -1;
 

File tools/SeekServer/SeekServer.ggo

 section "Optional - Parameter tweaking"
 option	"score_cutoff"		c	"Cutoff on the gene-gene score before adding, default: no cutoff"
 								float default="-9999"
-option	"per_q_required"	C	"Fraction (max 1.0) of query required to correlate with a gene, in order to count the gene's query score. A gene may not correlate with a query gene if it is absent, or its correlation with query does not pass cut-off (specified by --score_cutoff). Use this with caution. Be careful if using with --score_cutoff."
-								float default="0.0"
 option	"square_z"			e	"If using z-score, square-transform z-scores. Usually used in conjunction with --score-cutoff"							
 								flag	off
 

File tools/SeekServer/cmdline.c

   "  -n, --num_db=INT              Number of databaselets in database  \n                                  (default=`1000')",
   "\nOptional - Parameter tweaking:",
   "  -c, --score_cutoff=FLOAT      Cutoff on the gene-gene score before adding, \n                                  default: no cutoff  (default=`-9999')",
-  "  -C, --per_q_required=FLOAT    Fraction (max 1.0) of query required to \n                                  correlate with a gene, in order to count the \n                                  gene's query score. A gene may not correlate \n                                  with a query gene if it is absent, or its \n                                  correlation with query does not pass cut-off \n                                  (specified by --score_cutoff). Use this with \n                                  caution. Be careful if using with \n                                  --score_cutoff.  (default=`0.0')",
   "  -e, --square_z                If using z-score, square-transform z-scores. \n                                  Usually used in conjunction with \n                                  --score-cutoff  (default=off)",
   "\nMISC:",
   "  -N, --is_nibble               If true, the input DB is nibble type  \n                                  (default=off)",
   args_info->quant_given = 0 ;
   args_info->num_db_given = 0 ;
   args_info->score_cutoff_given = 0 ;
-  args_info->per_q_required_given = 0 ;
   args_info->square_z_given = 0 ;
   args_info->is_nibble_given = 0 ;
   args_info->buffer_given = 0 ;
   args_info->num_db_orig = NULL;
   args_info->score_cutoff_arg = -9999;
   args_info->score_cutoff_orig = NULL;
-  args_info->per_q_required_arg = 0.0;
-  args_info->per_q_required_orig = NULL;
   args_info->square_z_flag = 0;
   args_info->is_nibble_flag = 0;
   args_info->buffer_arg = 20;
   args_info->quant_help = gengetopt_args_info_help[11] ;
   args_info->num_db_help = gengetopt_args_info_help[12] ;
   args_info->score_cutoff_help = gengetopt_args_info_help[14] ;
-  args_info->per_q_required_help = gengetopt_args_info_help[15] ;
-  args_info->square_z_help = gengetopt_args_info_help[16] ;
-  args_info->is_nibble_help = gengetopt_args_info_help[18] ;
-  args_info->buffer_help = gengetopt_args_info_help[19] ;
-  args_info->output_text_help = gengetopt_args_info_help[20] ;
-  args_info->additional_db_help = gengetopt_args_info_help[21] ;
+  args_info->square_z_help = gengetopt_args_info_help[15] ;
+  args_info->is_nibble_help = gengetopt_args_info_help[17] ;
+  args_info->buffer_help = gengetopt_args_info_help[18] ;
+  args_info->output_text_help = gengetopt_args_info_help[19] ;
+  args_info->additional_db_help = gengetopt_args_info_help[20] ;
   
 }
 
   free_string_field (&(args_info->quant_orig));
   free_string_field (&(args_info->num_db_orig));
   free_string_field (&(args_info->score_cutoff_orig));
-  free_string_field (&(args_info->per_q_required_orig));
   free_string_field (&(args_info->buffer_orig));
   free_string_field (&(args_info->additional_db_arg));
   free_string_field (&(args_info->additional_db_orig));
     write_into_file(outfile, "num_db", args_info->num_db_orig, 0);
   if (args_info->score_cutoff_given)
     write_into_file(outfile, "score_cutoff", args_info->score_cutoff_orig, 0);
-  if (args_info->per_q_required_given)
-    write_into_file(outfile, "per_q_required", args_info->per_q_required_orig, 0);
   if (args_info->square_z_given)
     write_into_file(outfile, "square_z", 0, 0 );
   if (args_info->is_nibble_given)
         { "quant",	1, NULL, 'Q' },
         { "num_db",	1, NULL, 'n' },
         { "score_cutoff",	1, NULL, 'c' },
-        { "per_q_required",	1, NULL, 'C' },
         { "square_z",	0, NULL, 'e' },
         { "is_nibble",	0, NULL, 'N' },
         { "buffer",	1, NULL, 'b' },
         { 0,  0, 0, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVt:x:i:d:p:P:u:U:Q:n:c:C:eNb:OB:", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVt:x:i:d:p:P:u:U:Q:n:c:eNb:OB:", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
-        case 'C':	/* Fraction (max 1.0) of query required to correlate with a gene, in order to count the gene's query score. A gene may not correlate with a query gene if it is absent, or its correlation with query does not pass cut-off (specified by --score_cutoff). Use this with caution. Be careful if using with --score_cutoff..  */
-        
-        
-          if (update_arg( (void *)&(args_info->per_q_required_arg), 
-               &(args_info->per_q_required_orig), &(args_info->per_q_required_given),
-              &(local_args_info.per_q_required_given), optarg, 0, "0.0", ARG_FLOAT,
-              check_ambiguity, override, 0, 0,
-              "per_q_required", 'C',
-              additional_error))
-            goto failure;
-        
-          break;
         case 'e':	/* If using z-score, square-transform z-scores. Usually used in conjunction with --score-cutoff.  */
         
         

File tools/SeekServer/cmdline.h

   float score_cutoff_arg;	/**< @brief Cutoff on the gene-gene score before adding, default: no cutoff (default='-9999').  */
   char * score_cutoff_orig;	/**< @brief Cutoff on the gene-gene score before adding, default: no cutoff original value given at command line.  */
   const char *score_cutoff_help; /**< @brief Cutoff on the gene-gene score before adding, default: no cutoff help description.  */
-  float per_q_required_arg;	/**< @brief Fraction (max 1.0) of query required to correlate with a gene, in order to count the gene's query score. A gene may not correlate with a query gene if it is absent, or its correlation with query does not pass cut-off (specified by --score_cutoff). Use this with caution. Be careful if using with --score_cutoff. (default='0.0').  */
-  char * per_q_required_orig;	/**< @brief Fraction (max 1.0) of query required to correlate with a gene, in order to count the gene's query score. A gene may not correlate with a query gene if it is absent, or its correlation with query does not pass cut-off (specified by --score_cutoff). Use this with caution. Be careful if using with --score_cutoff. original value given at command line.  */
-  const char *per_q_required_help; /**< @brief Fraction (max 1.0) of query required to correlate with a gene, in order to count the gene's query score. A gene may not correlate with a query gene if it is absent, or its correlation with query does not pass cut-off (specified by --score_cutoff). Use this with caution. Be careful if using with --score_cutoff. help description.  */
   int square_z_flag;	/**< @brief If using z-score, square-transform z-scores. Usually used in conjunction with --score-cutoff (default=off).  */
   const char *square_z_help; /**< @brief If using z-score, square-transform z-scores. Usually used in conjunction with --score-cutoff help description.  */
   int is_nibble_flag;	/**< @brief If true, the input DB is nibble type (default=off).  */
   unsigned int quant_given ;	/**< @brief Whether quant was given.  */
   unsigned int num_db_given ;	/**< @brief Whether num_db was given.  */
   unsigned int score_cutoff_given ;	/**< @brief Whether score_cutoff was given.  */
-  unsigned int per_q_required_given ;	/**< @brief Whether per_q_required was given.  */
   unsigned int square_z_given ;	/**< @brief Whether square_z was given.  */
   unsigned int is_nibble_given ;	/**< @brief Whether is_nibble was given.  */
   unsigned int buffer_given ;	/**< @brief Whether buffer was given.  */