Commits

Jian Zhou committed ebcac0f

make SVMperfer accept test labels; set bias_featurenum = 0 when initializing svm model; add option to write the full SVM-Light format model

  • Participants
  • Parent commits 9944767

Comments (0)

Files changed (6)

File src/svmperf.cpp

 	struct_parm.newconstretrain = 100;
 	struct_parm.ccache_size = 5;
 	struct_parm.batch_size = 100;
+	struct_parm.bias_featurenum = 0;
 
 	//Learn_parms
 	//strcpy (learn_parm.predfile, "trans_predictions");

File src/svmperf.h

 	}
 
 	void ReadModel(char* model_file) {
-		FreeModel();
+		//FreeModel();
 		structmodel = read_struct_model(model_file, &struct_parm);
 	}
 
-	void WriteModel(char* model_file) {
-		if (kernel_parm.kernel_type == LINEAR) {
+	void WriteModel(char* model_file, int simple_model_flag) {
+		if (kernel_parm.kernel_type == LINEAR && simple_model_flag) {
 			ofstream ofsm;
 			ofsm.open(model_file);
 			for (size_t i = 0; i < structmodel.sizePsi; i++) {

File tools/SVMperfer/SVMperfer.cpp

 	if (sArgs.model_given && sArgs.labels_given) { //learn once and write to file
 		pTrainSample = CSVMPERF::CreateSample(PCL, vecLabels);
 		SVM.Learn(*pTrainSample);
-		SVM.WriteModel(sArgs.model_arg);
+		SVM.WriteModel(sArgs.model_arg, sArgs.simple_model_flag);
 	} else if (sArgs.model_given && sArgs.output_given) { //read model and classify all
 		vector<SVMLabel> vecAllLabels;
 

File tools/SVMperfer/SVMperfer.ggo

 										string	typestr="filename"  yes
 option	"model"					m	"Model file"
 										string	typestr="filename"  no
+option	"test_labels"			T	"Test Labels file"
+										string	typestr="filename"	no
 option  "all"       			a   "Always classify all genes in PCLs"  
 										flag off
 
 										float default="0.5" no
 option "tradeoff"    			t   "SVM tradeoff constant C"
 										float default="1" no
+option "simple_model"			A   "Write model files with only linear weights"
+										flag	on
 option "params"   				p   "Parameter file"
 										string  typestr="filename"   no
 option	"mmap"					M	"Memory map binary input"

File tools/SVMperfer/cmdline.c

 /*
-  File autogenerated by gengetopt version 2.22.4
+  File autogenerated by gengetopt version 2.22
   generated with the following command:
-  /usr/bin/gengetopt -iSVMperfer.ggo --default-optional -u -N -e 
+  /Genomics/grid/users/jzthree/bin/gengetopt -iSVMperfer.ggo --default-optional -u -N -e 
 
   The developers of gengetopt consider the fixed text that goes in all
   gengetopt output files to be in the public domain:
 #include <stdlib.h>
 #include <string.h>
 
-#ifndef FIX_UNUSED
-#define FIX_UNUSED(X) (void) (X) /* avoid warnings for unused params */
-#endif
-
-#include <getopt.h>
+#include "getopt.h"
 
 #include "cmdline.h"
 
   "  -o, --output=filename       Output file ",
   "  -i, --input=filename        Input PCL file ",
   "  -m, --model=filename        Model file",
+  "  -T, --test_labels=filename  Test Labels file",
   "  -a, --all                   Always classify all genes in PCLs  (default=off)",
   "  -S, --slack                 Use slack rescaling (not implemented for ROC \n                                loss)  (default=off)",
   "\nOptions:",
   "  -e, --error_function=INT    Sets the loss function for SVM learning: Choice \n                                of:\n\n                                0\tZero/one loss: 1 if vector of predictions \n                                contains error, 0 otherwise.\n\n                                1\tF1: 100 minus the F1-score in percent.\n\n                                2\tErrorrate: Percentage of errors in \n                                prediction vector.\n\n                                3\tPrec/Rec Breakeven: 100 minus PRBEP in \n                                percent.\n\n                                4\tPrec@k: 100 minus precision at k in percent.\n\n                                5\tRec@k: 100 minus recall at k in percent.\n\n                                10\tROCArea: Percentage of swapped pos/neg \n                                pairs (i.e. 100 - ROCArea).\n                                  (default=`10')",
   "  -k, --k_value=FLOAT         Value of k parameter used for Prec@k and Rec@k in \n                                (0,1)  (default=`0.5')",
   "  -t, --tradeoff=FLOAT        SVM tradeoff constant C  (default=`1')",
+  "  -A, --simple_model          Write model files with only linear weights  \n                                (default=on)",
   "  -p, --params=filename       Parameter file",
   "  -M, --mmap                  Memory map binary input  (default=off)",
     0
 void clear_args (struct gengetopt_args_info *args_info);
 
 static int
-cmdline_parser_internal (int argc, char **argv, struct gengetopt_args_info *args_info,
+cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info,
                         struct cmdline_parser_params *params, const char *additional_error);
 
 static int
   args_info->output_given = 0 ;
   args_info->input_given = 0 ;
   args_info->model_given = 0 ;
+  args_info->test_labels_given = 0 ;
   args_info->all_given = 0 ;
   args_info->slack_given = 0 ;
   args_info->verbosity_given = 0 ;
   args_info->error_function_given = 0 ;
   args_info->k_value_given = 0 ;
   args_info->tradeoff_given = 0 ;
+  args_info->simple_model_given = 0 ;
   args_info->params_given = 0 ;
   args_info->mmap_given = 0 ;
 }
 static
 void clear_args (struct gengetopt_args_info *args_info)
 {
-  FIX_UNUSED (args_info);
   args_info->labels_arg = NULL;
   args_info->labels_orig = NULL;
   args_info->output_arg = NULL;
   args_info->input_orig = NULL;
   args_info->model_arg = NULL;
   args_info->model_orig = NULL;
+  args_info->test_labels_arg = NULL;
+  args_info->test_labels_orig = NULL;
   args_info->all_flag = 0;
   args_info->slack_flag = 0;
   args_info->verbosity_arg = 0;
   args_info->k_value_orig = NULL;
   args_info->tradeoff_arg = 1;
   args_info->tradeoff_orig = NULL;
+  args_info->simple_model_flag = 1;
   args_info->params_arg = NULL;
   args_info->params_orig = NULL;
   args_info->mmap_flag = 0;
   args_info->output_help = gengetopt_args_info_help[4] ;
   args_info->input_help = gengetopt_args_info_help[5] ;
   args_info->model_help = gengetopt_args_info_help[6] ;
-  args_info->all_help = gengetopt_args_info_help[7] ;
-  args_info->slack_help = gengetopt_args_info_help[8] ;
-  args_info->verbosity_help = gengetopt_args_info_help[10] ;
-  args_info->skip_help = gengetopt_args_info_help[11] ;
-  args_info->normalize_help = gengetopt_args_info_help[12] ;
-  args_info->cross_validation_help = gengetopt_args_info_help[13] ;
-  args_info->error_function_help = gengetopt_args_info_help[14] ;
-  args_info->k_value_help = gengetopt_args_info_help[15] ;
-  args_info->tradeoff_help = gengetopt_args_info_help[16] ;
-  args_info->params_help = gengetopt_args_info_help[17] ;
-  args_info->mmap_help = gengetopt_args_info_help[18] ;
+  args_info->test_labels_help = gengetopt_args_info_help[7] ;
+  args_info->all_help = gengetopt_args_info_help[8] ;
+  args_info->slack_help = gengetopt_args_info_help[9] ;
+  args_info->verbosity_help = gengetopt_args_info_help[11] ;
+  args_info->skip_help = gengetopt_args_info_help[12] ;
+  args_info->normalize_help = gengetopt_args_info_help[13] ;
+  args_info->cross_validation_help = gengetopt_args_info_help[14] ;
+  args_info->error_function_help = gengetopt_args_info_help[15] ;
+  args_info->k_value_help = gengetopt_args_info_help[16] ;
+  args_info->tradeoff_help = gengetopt_args_info_help[17] ;
+  args_info->simple_model_help = gengetopt_args_info_help[18] ;
+  args_info->params_help = gengetopt_args_info_help[19] ;
+  args_info->mmap_help = gengetopt_args_info_help[20] ;
   
 }
 
 void
 cmdline_parser_print_version (void)
 {
-  printf ("%s %s\n",
-     (strlen(CMDLINE_PARSER_PACKAGE_NAME) ? CMDLINE_PARSER_PACKAGE_NAME : CMDLINE_PARSER_PACKAGE),
-     CMDLINE_PARSER_VERSION);
+  printf ("%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION);
 }
 
 static void print_help_common(void) {
   printf("\n");
 
   if (strlen(gengetopt_args_info_description) > 0)
-    printf("%s\n\n", gengetopt_args_info_description);
+    printf("%s\n", gengetopt_args_info_description);
 }
 
 void
   clear_args (args_info);
   init_args_info (args_info);
 
-  args_info->inputs = 0;
+  args_info->inputs = NULL;
   args_info->inputs_num = 0;
 }
 
   free_string_field (&(args_info->input_orig));
   free_string_field (&(args_info->model_arg));
   free_string_field (&(args_info->model_orig));
+  free_string_field (&(args_info->test_labels_arg));
+  free_string_field (&(args_info->test_labels_orig));
   free_string_field (&(args_info->verbosity_orig));
   free_string_field (&(args_info->skip_orig));
   free_string_field (&(args_info->cross_validation_orig));
 
 
 static void
-write_into_file(FILE *outfile, const char *opt, const char *arg, const char *values[])
+write_into_file(FILE *outfile, const char *opt, const char *arg, char *values[])
 {
-  FIX_UNUSED (values);
   if (arg) {
     fprintf(outfile, "%s=\"%s\"\n", opt, arg);
   } else {
     write_into_file(outfile, "input", args_info->input_orig, 0);
   if (args_info->model_given)
     write_into_file(outfile, "model", args_info->model_orig, 0);
+  if (args_info->test_labels_given)
+    write_into_file(outfile, "test_labels", args_info->test_labels_orig, 0);
   if (args_info->all_given)
     write_into_file(outfile, "all", 0, 0 );
   if (args_info->slack_given)
     write_into_file(outfile, "k_value", args_info->k_value_orig, 0);
   if (args_info->tradeoff_given)
     write_into_file(outfile, "tradeoff", args_info->tradeoff_orig, 0);
+  if (args_info->simple_model_given)
+    write_into_file(outfile, "simple_model", 0, 0 );
   if (args_info->params_given)
     write_into_file(outfile, "params", args_info->params_orig, 0);
   if (args_info->mmap_given)
 char *
 gengetopt_strdup (const char *s)
 {
-  char *result = 0;
+  char *result = NULL;
   if (!s)
     return result;
 
 }
 
 int
-cmdline_parser (int argc, char **argv, struct gengetopt_args_info *args_info)
+cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info)
 {
   return cmdline_parser2 (argc, argv, args_info, 0, 1, 1);
 }
 
 int
-cmdline_parser_ext (int argc, char **argv, struct gengetopt_args_info *args_info,
+cmdline_parser_ext (int argc, char * const *argv, struct gengetopt_args_info *args_info,
                    struct cmdline_parser_params *params)
 {
   int result;
-  result = cmdline_parser_internal (argc, argv, args_info, params, 0);
+  result = cmdline_parser_internal (argc, argv, args_info, params, NULL);
 
   return result;
 }
 
 int
-cmdline_parser2 (int argc, char **argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required)
+cmdline_parser2 (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required)
 {
   int result;
   struct cmdline_parser_params params;
   params.check_ambiguity = 0;
   params.print_errors = 1;
 
-  result = cmdline_parser_internal (argc, argv, args_info, &params, 0);
+  result = cmdline_parser_internal (argc, argv, args_info, &params, NULL);
 
   return result;
 }
 {
   int result = EXIT_SUCCESS;
 
-  if (cmdline_parser_required2(args_info, prog_name, 0) > 0)
+  if (cmdline_parser_required2(args_info, prog_name, NULL) > 0)
     result = EXIT_FAILURE;
 
   return result;
 cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error)
 {
   int error = 0;
-  FIX_UNUSED (additional_error);
 
   /* checks for required options */
   if (! args_info->input_given)
 static
 int update_arg(void *field, char **orig_field,
                unsigned int *field_given, unsigned int *prev_given, 
-               char *value, const char *possible_values[],
-               const char *default_value,
+               char *value, char *possible_values[], const char *default_value,
                cmdline_parser_arg_type arg_type,
                int check_ambiguity, int override,
                int no_free, int multiple_option,
   const char *val = value;
   int found;
   char **string_field;
-  FIX_UNUSED (field);
 
   stop_char = 0;
   found = 0;
       return 1; /* failure */
     }
 
-  FIX_UNUSED (default_value);
     
   if (field_given && *field_given && ! override)
     return 0;
 
 
 int
-cmdline_parser_internal (
-  int argc, char **argv, struct gengetopt_args_info *args_info,
+cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info,
                         struct cmdline_parser_params *params, const char *additional_error)
 {
   int c;	/* Character of the parsed option.  */
         { "output",	1, NULL, 'o' },
         { "input",	1, NULL, 'i' },
         { "model",	1, NULL, 'm' },
+        { "test_labels",	1, NULL, 'T' },
         { "all",	0, NULL, 'a' },
         { "slack",	0, NULL, 'S' },
         { "verbosity",	1, NULL, 'v' },
         { "error_function",	1, NULL, 'e' },
         { "k_value",	1, NULL, 'k' },
         { "tradeoff",	1, NULL, 't' },
+        { "simple_model",	0, NULL, 'A' },
         { "params",	1, NULL, 'p' },
         { "mmap",	0, NULL, 'M' },
-        { 0,  0, 0, 0 }
+        { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVl:o:i:m:aSv:s:nc:e:k:t:p:M", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVl:o:i:m:T:aSv:s:nc:e:k:t:Ap:M", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
+        case 'T':	/* Test Labels file.  */
+        
+        
+          if (update_arg( (void *)&(args_info->test_labels_arg), 
+               &(args_info->test_labels_orig), &(args_info->test_labels_given),
+              &(local_args_info.test_labels_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "test_labels", 'T',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'a':	/* Always classify all genes in PCLs.  */
         
         
             goto failure;
         
           break;
+        case 'A':	/* Write model files with only linear weights.  */
+        
+        
+          if (update_arg((void *)&(args_info->simple_model_flag), 0, &(args_info->simple_model_given),
+              &(local_args_info.simple_model_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "simple_model", 'A',
+              additional_error))
+            goto failure;
+        
+          break;
         case 'p':	/* Parameter file.  */
         
         

File tools/SVMperfer/cmdline.h

 /** @file cmdline.h
  *  @brief The header file for the command line option parser
- *  generated by GNU Gengetopt version 2.22.4
+ *  generated by GNU Gengetopt version 2.22
  *  http://www.gnu.org/software/gengetopt.
  *  DO NOT modify this file, since it can be overwritten
  *  @author GNU Gengetopt by Lorenzo Bettini */
 #endif /* __cplusplus */
 
 #ifndef CMDLINE_PARSER_PACKAGE
-/** @brief the program name (used for printing errors) */
+/** @brief the program name */
 #define CMDLINE_PARSER_PACKAGE "SVMperfer"
 #endif
 
-#ifndef CMDLINE_PARSER_PACKAGE_NAME
-/** @brief the complete program name (used for help and version) */
-#define CMDLINE_PARSER_PACKAGE_NAME "SVMperfer"
-#endif
-
 #ifndef CMDLINE_PARSER_VERSION
 /** @brief the program version */
 #define CMDLINE_PARSER_VERSION "1.0"
   char * model_arg;	/**< @brief Model file.  */
   char * model_orig;	/**< @brief Model file original value given at command line.  */
   const char *model_help; /**< @brief Model file help description.  */
+  char * test_labels_arg;	/**< @brief Test Labels file.  */
+  char * test_labels_orig;	/**< @brief Test Labels file original value given at command line.  */
+  const char *test_labels_help; /**< @brief Test Labels file help description.  */
   int all_flag;	/**< @brief Always classify all genes in PCLs (default=off).  */
   const char *all_help; /**< @brief Always classify all genes in PCLs help description.  */
   int slack_flag;	/**< @brief Use slack rescaling (not implemented for ROC loss) (default=off).  */
   float tradeoff_arg;	/**< @brief SVM tradeoff constant C (default='1').  */
   char * tradeoff_orig;	/**< @brief SVM tradeoff constant C original value given at command line.  */
   const char *tradeoff_help; /**< @brief SVM tradeoff constant C help description.  */
+  int simple_model_flag;	/**< @brief Write model files with only linear weights (default=on).  */
+  const char *simple_model_help; /**< @brief Write model files with only linear weights help description.  */
   char * params_arg;	/**< @brief Parameter file.  */
   char * params_orig;	/**< @brief Parameter file original value given at command line.  */
   const char *params_help; /**< @brief Parameter file help description.  */
   unsigned int output_given ;	/**< @brief Whether output was given.  */
   unsigned int input_given ;	/**< @brief Whether input was given.  */
   unsigned int model_given ;	/**< @brief Whether model was given.  */
+  unsigned int test_labels_given ;	/**< @brief Whether test_labels was given.  */
   unsigned int all_given ;	/**< @brief Whether all was given.  */
   unsigned int slack_given ;	/**< @brief Whether slack was given.  */
   unsigned int verbosity_given ;	/**< @brief Whether verbosity was given.  */
   unsigned int error_function_given ;	/**< @brief Whether error_function was given.  */
   unsigned int k_value_given ;	/**< @brief Whether k_value was given.  */
   unsigned int tradeoff_given ;	/**< @brief Whether tradeoff was given.  */
+  unsigned int simple_model_given ;	/**< @brief Whether simple_model was given.  */
   unsigned int params_given ;	/**< @brief Whether params was given.  */
   unsigned int mmap_given ;	/**< @brief Whether mmap was given.  */
 
  * @param args_info the structure where option information will be stored
  * @return 0 if everything went fine, NON 0 if an error took place
  */
-int cmdline_parser (int argc, char **argv,
+int cmdline_parser (int argc, char * const *argv,
   struct gengetopt_args_info *args_info);
 
 /**
  * @return 0 if everything went fine, NON 0 if an error took place
  * @deprecated use cmdline_parser_ext() instead
  */
-int cmdline_parser2 (int argc, char **argv,
+int cmdline_parser2 (int argc, char * const *argv,
   struct gengetopt_args_info *args_info,
   int override, int initialize, int check_required);
 
  * @param params additional parameters for the parser
  * @return 0 if everything went fine, NON 0 if an error took place
  */
-int cmdline_parser_ext (int argc, char **argv,
+int cmdline_parser_ext (int argc, char * const *argv,
   struct gengetopt_args_info *args_info,
   struct cmdline_parser_params *params);