Commits

YOUNG-SUK LEE  committed a9f7727

polish code

  • Participants
  • Parent commits 50da58b

Comments (0)

Files changed (6)

File src/libsvm.cpp

 namespace LIBSVM {
 
 #include "libsvm.h"
-struct svm_node* CLIBSVM::x_space = NULL;
+  struct svm_node* CLIBSVM::x_space = NULL;
 
-bool CLIBSVM::initialize() {
+  bool CLIBSVM::initialize() {
 
-	/* set default */
+    /* set default */
 
-        parm.cache_size = 100;
-        parm.C = 0.01;
-	parm.eps = 1e-3;
-        parm.svm_type = C_SVC;
-        parm.p = 0.1;
-        parm.shrinking = 1;
-        parm.nr_weight = 0;
-        parm.weight_label = NULL;
-        parm.weight = NULL;
-        parm.probability = 0;
-        parm.nu = 0.5;
-        parm.coef0 = 0;
-        parm.gamma = 0;
-        parm.degree = 3;
-        parm.kernel_type = LINEAR;
-        
-        model = NULL;
+    parm.cache_size = 100;
+    parm.C = 0.01;
+    parm.eps = 1e-3;
+    parm.svm_type = C_SVC;
+    parm.p = 0.1;
+    parm.shrinking = 1;
+    parm.nr_weight = 0;
+    parm.weight_label = NULL;
+    parm.weight = NULL;
+    parm.probability = 0;
+    parm.nu = 0.5;
+    parm.coef0 = 0;
+    parm.gamma = 0;
+    parm.degree = 3;
+    parm.kernel_type = LINEAR;
 
-        balance = 0;//Off
+    model = NULL;
 
-	return true;
-}
+    balance = 0;//Off
 
-bool CLIBSVM::parms_check() {
-	if (parm.C < 0) {
-	  fprintf(
-	    stderr,
-	    "\nTrade-off between training error and margin is not set (C<0)!\nC value will be set to default value. Clight = Cpef * 100 / n \n");
-	  fprintf(stderr, "be less than 1.0 !!!\n\n");
-	  return false;
-	}
-	if (parm.eps <= 0) {
-	  fprintf(stderr,
-		"\nThe epsilon parameter must be greater than zero!\n\n");
-	  return false;
-	}
+    return true;
+  }
 
-        if (parm.nu < 0 | parm.nu > 1) {
-            fprintf(stderr, "nu parameter must be between 0 and 1");
-            return false;
+  bool CLIBSVM::parms_check() {
+    if (parm.C < 0) {
+      fprintf(
+          stderr,
+          "\nTrade-off between training error and margin is not set (C<0)!\nC value will be set to default value. Clight = Cpef * 100 / n \n");
+      fprintf(stderr, "be less than 1.0 !!!\n\n");
+      return false;
+    }
+    if (parm.eps <= 0) {
+      fprintf(stderr,
+          "\nThe epsilon parameter must be greater than zero!\n\n");
+      return false;
+    }
+
+    if (parm.nu < 0 | parm.nu > 1) {
+      fprintf(stderr, "nu parameter must be between 0 and 1");
+      return false;
+    }
+
+    //TODO: add more parameter checks 
+
+    return true;
+  }
+
+  void CLIBSVM::SetXSpace(Sleipnir::CPCL& PCL) {
+    size_t j, k, iGene, numFeatures, numLabels;
+    std::vector<std::string> vecGeneNames;
+    string GeneName;
+    float d;
+
+    numFeatures = PCL.GetExperiments();
+    numLabels = PCL.GetGenes();
+    vecGeneNames = PCL.GetGeneNames();
+
+    cerr << "total data" << endl;
+    cerr << "number of features (columns) used: " << numFeatures << endl;
+    cerr << "number of labels in data: " << numLabels << endl;
+    cerr << "number of gene (rows) names: " << vecGeneNames.size() << endl;
+
+    x_space = Malloc(struct svm_node, (1+numFeatures) * numLabels);
+
+    j = 0;//element index
+
+    for ( std::vector<std::string>::iterator it = vecGeneNames.begin(); it != vecGeneNames.end(); ++it) {
+      GeneName = *it;
+      iGene = PCL.GetGene(GeneName); 
+
+      for ( k = 0; k < numFeatures; k++){
+        x_space[j].index = k;
+        if (!Sleipnir::CMeta::IsNaN(d = PCL.Get(iGene, k))) {
+          x_space[j].value = d;
+        }else{
+          // impute 0 for missing values
+          x_space[j].value = 0;
         }
+        j++;
+      }
 
-        //TODO: add more parameter checks 
+      x_space[j].index = -1;
+      j++;
+    }
 
-	return true;
-}
 
-void CLIBSVM::SetXSpace(Sleipnir::CPCL& PCL) {
-	size_t j, k, iGene, numFeatures, numLabels;
-        std::vector<std::string> vecGeneNames;
-        string GeneName;
-        float d;
+  }
 
-        numFeatures = PCL.GetExperiments();
-        numLabels = PCL.GetGenes();
-        vecGeneNames = PCL.GetGeneNames();
-        
-cerr << "number of features used: " << numFeatures << endl;
-cerr << "number of labels in data: " << numLabels << endl;
-cerr << "number of gene names: " << vecGeneNames.size() << endl;
+  SAMPLE * CLIBSVM::CreateSample(Sleipnir::CPCL& PCL, vector<SVMLabel> SVMLabels) {
+    size_t i, j, k, s, iGene, iProblem, numFeatures, numLabels, max_index;
+    float d;
 
-        x_space = Malloc(struct svm_node, (1+numFeatures) * numLabels);
+    struct svm_problem* prob;
 
-        j = 0;//element index
-        
-        for ( std::vector<std::string>::iterator it = vecGeneNames.begin(); it != vecGeneNames.end(); ++it) {
-          GeneName = *it;
-          iGene = PCL.GetGene(GeneName); 
+    prob = Malloc(struct svm_problem,1);
 
-          for ( k = 0; k < numFeatures; k++){
-            x_space[j].index = k;
-            if (!Sleipnir::CMeta::IsNaN(d = PCL.Get(iGene, k))) {
-              x_space[j].value = d;
-            }else{
-              // impute 0 for missing values
-              x_space[j].value = 0;
-            }
-            j++;
-          }
+    numFeatures = PCL.GetExperiments();
+    numLabels = 0;
 
-          x_space[j].index = -1;
-          j++;
-        }
+    iProblem = 0;
 
+    for (i = 0; i < SVMLabels.size(); i++) {
+      if (!SVMLabels[i].hasIndex){
+        SVMLabels[i].SetIndex(PCL.GetGene(SVMLabels[i].GeneName));
+      }
+      iGene = SVMLabels[i].index;
+      if (iGene != -1) {
+        numLabels++;
+      }
+    }
 
-}
+    cerr << "sampled data" << endl;
+    cerr << "number of features used: " << numFeatures << endl;
+    cerr << "number of labels given: " << SVMLabels.size() << endl;
+    cerr << "number of labels in data: " << numLabels << endl;
 
-SAMPLE * CLIBSVM::CreateSample(Sleipnir::CPCL& PCL, vector<SVMLabel> SVMLabels) {
-	size_t i, j, k, s, iGene, iProblem, numFeatures, numLabels, max_index;
-        float d;
+    prob->l = numLabels;
+    prob->y = Malloc(double,numLabels);
+    prob->x = Malloc(struct svm_node *, numLabels);
 
-        struct svm_problem* prob;
+    if(x_space == NULL) {
+      SetXSpace(PCL);
+    }
 
-        prob = Malloc(struct svm_problem,1);
+    max_index = numFeatures;
 
-        numFeatures = PCL.GetExperiments();
-        numLabels = 0;
-        
-        iProblem = 0;
+    s = 0;//sample index
+    for (i = 0; i < SVMLabels.size(); i++) {
+      iGene = SVMLabels[i].index;
 
-	for (i = 0; i < SVMLabels.size(); i++) {
-                if (!SVMLabels[i].hasIndex){
-                  SVMLabels[i].SetIndex(PCL.GetGene(SVMLabels[i].GeneName));
-                }
-		iGene = SVMLabels[i].index;
-		if (iGene != -1) {
-                  numLabels++;
-		}
-	}
+      if (iGene != -1){
+        (prob->x)[s] = &x_space[iGene*(1+numFeatures)];
+        (prob->y)[s] = SVMLabels[i].Target;
+        s++;
+      }
+    }
 
-cerr << "number of features used: " << numFeatures << endl;
-cerr << "number of labels given: " << SVMLabels.size() << endl;
-cerr << "number of labels in data: " << numLabels << endl;
+    SAMPLE* pSample = new SAMPLE;
 
-        prob->l = numLabels;
-        prob->y = Malloc(double,numLabels);
-        prob->x = Malloc(struct svm_node *, numLabels);
+    pSample->n = prob->l;//number of labels
+    pSample->problems = prob;
+    pSample->numFeatures = numFeatures;
+    return pSample;
+  }
 
-        if(x_space == NULL) {
-          SetXSpace(PCL);
-        }
+  //TODO: create sample for dab/dat files
+  //
 
-        max_index = numFeatures;
-
-        s = 0;//sample index
-        for (i = 0; i < SVMLabels.size(); i++) {
-            iGene = SVMLabels[i].index;
-
-            if (iGene != -1){
-              (prob->x)[s] = &x_space[iGene*(1+numFeatures)];
-              (prob->y)[s] = SVMLabels[i].Target;
-              s++;
-            }
-        }
-
-        SAMPLE* pSample = new SAMPLE;
-
-        pSample->n = prob->l;//number of labels
-        pSample->problems = prob;
-        pSample->numFeatures = numFeatures;
-	return pSample;
-}
-
-//TODO: create sample for dab/dat files
-//
-
-vector<Result> CLIBSVM::Classify(Sleipnir::CPCL &PCL,
-        vector<SVMLabel> SVMLabels) {
+  vector<Result> CLIBSVM::Classify(Sleipnir::CPCL &PCL,
+      vector<SVMLabel> SVMLabels) {
     size_t i, j, iGene;
     double predict_label;
     double* dec_values;
         predict_label = svm_predict_values(model,x, dec_values);
         dec_value = dec_values[0]; //assume that positive class is the first class TODO: currently hacklyi
 
-//cerr << dec_value << endl;
-
         vecResult[j].GeneName = SVMLabels[i].GeneName;
         vecResult[j].Target = SVMLabels[i].Target;
         vecResult[j].Value = dec_value;
-        
+
         j++;
 
       }
     free(pSample);
     //delete pSample ;
     free(dec_values);
-//    x = NULL;
-//    dec_values = NULL;
-//    pSample = NULL;
+    return vecResult;
+  }
 
-    return vecResult;
-}
 
-    
-//TODO: classify for dab/dat files
-//
+  //TODO: classify for dab/dat files
+  //
 
 }

File src/libsvm.h

 namespace LIBSVM {
 
 
-extern "C" {
+  extern "C" {
 #define class Class2
 #include <libsvm/svm.h>
 #undef class
 
-}
-
-typedef struct sample { /* a sample is a set of examples */
-   size_t     n;            /* n is the total number of examples */
-   size_t  numFeatures; 
-   struct svm_problem *problems;
-//   struct svm_node *x_space;
-   sample() {
-     n = 0;
-     numFeatures = 0;
-     problems = NULL;
-//     x_space = NULL;
-   }
-   
-   ~sample(){
-     //no destructor for problem struct
-     free(problems->y);
-     free(problems->x);
-//     free(x_space);
-     problems = NULL;
-   }
-} SAMPLE;
- 
-
-class SVMLabel {
-public:
-	string GeneName;
-	double Target;
-	size_t index;
-	bool hasIndex;
-        
-	SVMLabel(std::string name, double target) {
-		GeneName = name;
-		Target = target;
-		hasIndex = false;
-		index = -1;
-	}
-
-	SVMLabel() {
-		GeneName = "";
-		Target = 0;
-	}
-	void SetIndex(size_t i) {
-		index = i;
-		hasIndex = true;
-	}
-};
-
-class Result {
-public:
-	std::string GeneName;
-	double Target;
-	double Value;
-	int CVround;
-	int Rank;
-	Result() {
-		GeneName = "";
-		Target = 0;
-		Value = Sleipnir::CMeta::GetNaN();
-	}
-
-	Result(std::string name, int cv = -1) {
-		GeneName = name;
-		Target = 0;
-		Value = 0;
-		CVround = cv;
-		Rank = -1;
-	}
-	string toString() {
-		stringstream ss;
-		ss << GeneName << '\t' << Target << '\t' << Value << '\t' << "CV"
-				<< CVround;
-		if (Rank != -1) {
-			ss << '\t' << Rank;
-		}
-		return ss.str();
-	}
-
-};
-
-enum EFilter {
-	EFilterInclude = 0, EFilterExclude = EFilterInclude + 1,
-};
-
-//this class encapsulates the model and parameters and has no associated data
-
-class CLIBSVM {
-public:
-  //struct svm_parameter parm;
-  struct svm_model* model;
-  struct svm_parameter parm;
-  int balance;
-
-  static struct svm_node *x_space;
-
-  CLIBSVM() {
-    initialize();
   }
 
-  ~CLIBSVM() {
-    svm_free_and_destroy_model( &model );
-    model = NULL;
-  }
-
-  void SetBalance(int bal){
-    balance = bal;
-  }
-
-  void SetSVMType(int type) {
-    parm.svm_type = type;
-  }
-
-  void SetTradeoff(double tradeoff) {
-    parm.C = tradeoff; //TODO: only applicable for vanilla svm
-  }
-
-  void SetKernel(int K) {
-    parm.kernel_type = K;
-  }
-
-  void SetPolyD(int D) {
-    parm.degree = D;
-  }
-
-  void SetRBFGamma(double g) {
-    parm.gamma = g;
-  }
-
-  void SetNu(double nu) {
-    parm.nu = nu;
-  }
-
-  void ReadModel(char* model_file) {
-    FreeModel();
-    model = svm_load_model(model_file); 
-  }
-
-  void FreeModel() {
-    svm_free_and_destroy_model(&model);
-  }
-
-  void WriteModel(char* model_file) {
-    svm_save_model(model_file, model);
-  }
-  
-
-  //static members process data
-  //
-  
-  static void SetXSpace(Sleipnir::CPCL& PCL);
-
-  //
-  //single gene predictions
-
-  //TODO: add functions to handle PCL files
-  //creates a svm_problem for a given gene index in a microarray set
-  //static svm_problem* CreateProblem(Sleipnir::CPCLSet &PCLSet, size_t iGene, size_t iProblem);
-
-  //creates a svm_problem for a given gene in a Dat file using all other genes as features
-  //static svm_problem* CreateProblem(Sleipnir::CDat& Dat, size_t iGene, size_t iProblem);
-
-  //Creates a sample using a PCLset and SVMlabels Looks up genes by name.
-  //static SAMPLE* CreateSample(Sleipnir::CPCLSet &PCLSet,
-  //			vector<SVMLabel> SVMLabels);
-  
-  //Creates a sample of svm_problems using a single PCL and SVMlabels Looks up genes by name.
-  static SAMPLE* CreateSample(Sleipnir::CPCL &PCL, vector<SVMLabel> SVMLabels);
-
-  //Same as above except creates bootstrap samples and does not duplicate data
-  //static SAMPLE** CreateSampleBootStrap(Sleipnir::CPCL &PCL,
-  //	vector<SVMLabel>& SVMLabels, 
-  //      vector<vector<size_t> > vecvecIndex);
-
-  //Creates a sample using a Dat and SVMlabels. Looks up genes by name
-  static SAMPLE* CreateSample(Sleipnir::CDat& CDat,
-			vector<SVMLabel> SMVLabels);
-
-  //Classify single genes
-  vector<Result> Classify(Sleipnir::CPCL& PCL, vector<SVMLabel> SVMLabels);
-  
-  //vector<Result> Classify(Sleipnir::CPCLSet& PCLSet,
-  //			vector<SVMLabel> SVMLabels);
-  //vector<Result> Classify(Sleipnir::CDat& Dat, vector<SVMLabel> SVMLabels);
-
-  //MEMBER functions wraps learning
-  void Learn(SAMPLE &sample) {
-    //only L2 for LibSVM
-    //cerr << "SLACK NORM =" << struct_parm.slack_norm << endl;
-    //slack_norm = type of regularization
-
-    //Take care of the labels here
-    size_t i;
-    size_t numn, nump;
-
-    struct svm_problem* prob = sample.problems;
-
-    numn = nump = 0;
-
-    for(i = 0; i < sample.n; i++){
-      if (((*prob).y)[i] > 0){
-        nump ++;
-      }else{
-        numn ++;
-      }
+  typedef struct sample { /* a sample is a set of examples */
+    size_t     n;            /* n is the total number of examples */
+    size_t  numFeatures; 
+    struct svm_problem *problems;
+    sample() {
+      n = 0;
+      numFeatures = 0;
+      problems = NULL;
     }
 
-    if (balance) {
-cerr << "balancing the weights between postivies and negatives. " << endl;
-      parm.nr_weight = 2;
-      parm.weight_label = (int *) realloc(parm.weight_label, sizeof(int)*parm.nr_weight);
-      parm.weight = (double *) realloc(parm.weight, sizeof(double)*parm.nr_weight);
-      parm.weight_label[0] = 1;
-      parm.weight[0] = numn;
-      parm.weight_label[1] = -1;
-      parm.weight[1] = nump;
+    ~sample(){
+      //no destructor for problem struct
+      free(problems->y);
+      free(problems->x);
+      problems = NULL;
     }
+  } SAMPLE;
 
-    if(parms_check()){
-      model = svm_train(prob,&parm);
-    }else{
-    }
-    prob = NULL;
 
-  }
-/*
-  static void FreeSample(SAMPLE s){
-    FreeProblem(s.problems);
-    free(s.x_space);
-  }
+  class SVMLabel {
+    public:
+      string GeneName;
+      double Target;
+      size_t index;
+      bool hasIndex;
 
-  static void FreeProblem(svm_problem *prob){
-    int i ;
-    free(prob->y);
-    free(prob->x);
-    return;
-  }
-*/
-  static void PrintSample(SAMPLE s){
-    PrintProblem(s.problems);
-  }
+      SVMLabel(std::string name, double target) {
+        GeneName = name;
+        Target = target;
+        hasIndex = false;
+        index = -1;
+      }
 
-  static void PrintProblem(svm_problem *prob){
-    size_t i, j ;
-    i = j = 0;
+      SVMLabel() {
+        GeneName = "";
+        Target = 0;
+      }
+      void SetIndex(size_t i) {
+        index = i;
+        hasIndex = true;
+      }
+  };
 
-    for(i = 0 ; i < 3 ; i++){
-      for(j = 0 ; j < 2 ; j ++){
-        PrintNode((prob->x)[i][j]);
+  class Result {
+    public:
+      std::string GeneName;
+      double Target;
+      double Value;
+      int CVround;
+      int Rank;
+      Result() {
+        GeneName = "";
+        Target = 0;
+        Value = Sleipnir::CMeta::GetNaN();
       }
-    }
 
-    return;
-  }
+      Result(std::string name, int cv = -1) {
+        GeneName = name;
+        Target = 0;
+        Value = 0;
+        CVround = cv;
+        Rank = -1;
+      }
+      string toString() {
+        stringstream ss;
+        ss << GeneName << '\t' << Target << '\t' << Value << '\t' << "CV"
+          << CVround;
+        if (Rank != -1) {
+          ss << '\t' << Rank;
+        }
+        return ss.str();
+      }
 
-  static void PrintNode(svm_node node){
-    cerr << "index: " << node.index << endl;
-    cerr << "value: " << node.value << endl;
-  }
+  };
 
+  enum EFilter {
+    EFilterInclude = 0, EFilterExclude = EFilterInclude + 1,
+  };
 
-  //no pairwise learning for libSVM wrapper
+  //this class encapsulates the model and parameters and has no associated data
 
-  bool parms_check();
-  bool initialize();
-	
-  // functions to convert probablity
-  //void sigmoid_train(Sleipnir::CDat& Results, vector<SVMLabelPair*>& SVMLabels, float& A, float& B);
-  //void sigmoid_predict(Sleipnir::CDat& Results, vector<SVMLabelPair*>& SVMLabels, float A, float B);
-        
-        //not sure exactly what this does in svmperf compare to just ReadModel
-	// read in a SVM model file that's only has the w vector written out for linear kernel
-/*	void ReadModelLinear(char* model_file) {
-	  FreeModel();
-	  structmodel = read_struct_model_w_linear(model_file, &struct_parm);
-	}*/
-	
-	//STRUCTMODEL read_struct_model_w_linear(char *file, STRUCT_LEARN_PARM *sparm);
-};
+  class CLIBSVM {
+    public:
+      struct svm_model* model;
+      struct svm_parameter parm;
+      int balance;
+
+      static struct svm_node *x_space;
+
+      CLIBSVM() {
+        initialize();
+      }
+
+      ~CLIBSVM() {
+        svm_free_and_destroy_model( &model );
+        model = NULL;
+      }
+
+      void SetBalance(int bal){
+        balance = bal;
+      }
+
+      void SetSVMType(int type) {
+        parm.svm_type = type;
+      }
+
+      void SetTradeoff(double tradeoff) {
+        parm.C = tradeoff; //TODO: only applicable for vanilla svm
+      }
+
+      void SetKernel(int K) {
+        parm.kernel_type = K;
+      }
+
+      void SetPolyD(int D) {
+        parm.degree = D;
+      }
+
+      void SetRBFGamma(double g) {
+        parm.gamma = g;
+      }
+
+      void SetNu(double nu) {
+        parm.nu = nu;
+      }
+
+      void ReadModel(char* model_file) {
+        FreeModel();
+        model = svm_load_model(model_file); 
+      }
+
+      void FreeModel() {
+        svm_free_and_destroy_model(&model);
+      }
+
+      void WriteModel(char* model_file) {
+        svm_save_model(model_file, model);
+      }
+
+
+      //static members process data
+      //
+
+      static void SetXSpace(Sleipnir::CPCL& PCL);
+
+      //single gene predictions
+
+      //TODO: add functions to handle PCL files
+
+      //Creates a sample of svm_problems using a single PCL and SVMlabels Looks up genes by name.
+      static SAMPLE* CreateSample(Sleipnir::CPCL &PCL, vector<SVMLabel> SVMLabels);
+
+      //TODO: Same as above except creates bootstrap samples and does not duplicate data
+
+      //Creates a sample using a Dat and SVMlabels. Looks up genes by name
+      static SAMPLE* CreateSample(Sleipnir::CDat& CDat,
+          vector<SVMLabel> SMVLabels);
+
+      //Classify single genes
+      vector<Result> Classify(Sleipnir::CPCL& PCL, vector<SVMLabel> SVMLabels);
+
+
+      //MEMBER functions wraps learning
+      void Learn(SAMPLE &sample) {
+        //only L2 for LibSVM
+        //cerr << "SLACK NORM =" << struct_parm.slack_norm << endl;
+        //slack_norm = type of regularization
+
+        //Take care of the labels here
+        size_t i;
+        size_t numn, nump;
+
+        struct svm_problem* prob = sample.problems;
+
+        numn = nump = 0;
+
+        for(i = 0; i < sample.n; i++){
+          if (((*prob).y)[i] > 0){
+            nump ++;
+          }else{
+            numn ++;
+          }
+        }
+
+        if (balance) {
+          cerr << "balancing the weights between postivies and negatives. " << endl;
+          parm.nr_weight = 2;
+          parm.weight_label = (int *) realloc(parm.weight_label, sizeof(int)*parm.nr_weight);
+          parm.weight = (double *) realloc(parm.weight, sizeof(double)*parm.nr_weight);
+          parm.weight_label[0] = 1;
+          parm.weight[0] = numn;
+          parm.weight_label[1] = -1;
+          parm.weight[1] = nump;
+        }
+
+        if(parms_check()){
+          model = svm_train(prob,&parm);
+        }else{
+        }
+        prob = NULL;
+
+      }
+
+      static void PrintSample(SAMPLE s){
+        PrintProblem(s.problems);
+      }
+
+      static void PrintProblem(svm_problem *prob){
+        size_t i, j ;
+        i = j = 0;
+
+        for(i = 0 ; i < 3 ; i++){
+          for(j = 0 ; j < 2 ; j ++){
+            PrintNode((prob->x)[i][j]);
+          }
+        }
+
+        return;
+      }
+
+      static void PrintNode(svm_node node){
+        cerr << "index: " << node.index << endl;
+        cerr << "value: " << node.value << endl;
+      }
+
+
+      //no pairwise learning for libSVM wrapper
+
+      bool parms_check();
+      bool initialize();
+
+      //TODO: functions to convert probablity
+
+  };
 }
 
 #endif // NO_SVM_LIBSVM

File tools/LibSVMer/LibSVMer.cpp

 
 using namespace LIBSVM;
 
-/*
-vector<LIBSVM::SVMLabel>* Subsampling( vector<LIBSVM::SVMLabel>* pTrainVector, size_t num, size_t numSample) {
-  size_t iSample, iSubsample, numPos, index, len;
-  size_t i;
+vector<LIBSVM::SVMLabel> ReadLabels(ifstream & ifsm) {
 
-cerr << "subsampling: " << num << endl;
+  static const size_t c_iBuffer = 1024;
+  char acBuffer[c_iBuffer];
+  vector<string> vecstrTokens;
+  vector<LIBSVM::SVMLabel> vecLabels;
+  size_t numPositives, numNegatives;
+  numPositives = numNegatives = 0;
+  while (!ifsm.eof()) {
+    ifsm.getline(acBuffer, c_iBuffer - 1);
+    acBuffer[c_iBuffer - 1] = 0;
+    vecstrTokens.clear();
+    CMeta::Tokenize(acBuffer, vecstrTokens);
+    if (vecstrTokens.empty())
+      continue;
+    if (vecstrTokens.size() != 2) {
+      cerr << "Illegal label line (" << vecstrTokens.size() << "): "
+        << acBuffer << endl;
+      continue;
+    }
+    vecLabels.push_back(LIBSVM::SVMLabel(vecstrTokens[0], atof(
+            vecstrTokens[1].c_str())));
+    if (vecLabels.back().Target > 0)
+      numPositives++;
+    else
+      numNegatives++;
+  }
+  return vecLabels;
+}
 
-  len = numSample;
-  
-cerr << "number of samples: " << len << endl;
 
-  vector<LIBSVM::SVMLabel>* ppTmpTrain[len * num];
+struct SortResults {
 
-  vector<LIBSVM::SVMLabel> Negatives;
-  vector<LIBSVM::SVMLabel> Positives;
-  
-  for( iSample = 0 ; iSample < len ; iSample ++ ) {
-    numPos = 0;
-    Negatives.empty();
-    Positives.empty();
-    
-    for(vector<LIBSVM::SVMLabel>::iterator it = pTrainVector[iSample].begin() ;
-        it != pTrainVector[iSample].end(); it++){
-      if ( (*it).Target == 1 ) { // if positive
-        numPos ++;
-        Positives.push_back(*it);
-      }else if ( (*it).Target == -1 )
-        Negatives.push_back(*it);
-    }
+  bool operator()(const LIBSVM::Result& rOne, const LIBSVM::Result & rTwo) const {
+    return (rOne.Value > rTwo.Value);
+  }
+};
 
 
-    for( iSubsample = 0 ; iSubsample < num ; iSubsample ++ ) {
-      index = num * iSample + iSubsample;
-      (*ppTmpTrain[ index ]).reserve((size_t) (numPos * 10));
-//pTmpTrain[ index ] = new vector<LIBSVM::SVMLabel>;
-      //copy( Positives.begin( ), Positives.end( ), pTmpTrain[ index ].begin( ) ); doesn't work..
-      for( i = 0 ; i < numPos ; i ++ ) {
-        (*ppTmpTrain)[ index ].push_back(Positives.at( i ) );
-        (*ppTmpTrain)[ index ].push_back(Negatives.at( rand() % Negatives.size() )) ; //with replacement!!
-      }
+size_t PrintResults(vector<LIBSVM::Result> vecResults, ofstream & ofsm) {
+  sort(vecResults.begin(), vecResults.end(), SortResults());
+  int LabelVal;
+  for (size_t i = 0; i < vecResults.size(); i++) {
+    ofsm << vecResults[i].GeneName << '\t' << vecResults[i].Target << '\t'
+      << vecResults[i].Value << endl;
+  }
+};
 
-cerr << "blah" << endl;
-cerr << (*ppTmpTrain[ index ]).size() << endl;
+struct ParamStruct {
+  vector<float> vecK, vecTradeoff;
+  vector<size_t> vecLoss;
+  vector<char*> vecNames;
+};
+
+int main(int iArgs, char** aszArgs) {
+
+  gengetopt_args_info sArgs;
+
+  CPCL PCL;//data
+  LIBSVM::CLIBSVM SVM;//model
+
+  size_t i, j, iGene, jGene;
+  ifstream ifsm;
+
+  bool added;
+  added = false;
+
+  if (cmdline_parser(iArgs, aszArgs, &sArgs)) {
+    cmdline_parser_print_help();
+    return 1;
+  }
+
+
+  //Set model parameters
+
+  if (sArgs.cross_validation_arg < 1){
+    cerr << "cross_valid is <1. Must be set at least 1" << endl;
+    return 1;
+  }
+  else if(sArgs.cross_validation_arg < 2){
+    cerr << "cross_valid is set to 1. No cross validation holdouts will be run." << endl;
+  }
+
+  if (sArgs.num_cv_runs_arg < 1){
+    cerr << "number of cv runs is < 1. Must be set at least 1" << endl;
+    return 1;
+  }
+
+  SVM.SetTradeoff(sArgs.tradeoff_arg);
+  SVM.SetNu(sArgs.nu_arg);
+  SVM.SetSVMType(sArgs.svm_type_arg);
+  SVM.SetBalance(sArgs.balance_flag);
+
+  if (!SVM.parms_check()) {
+    cerr << "Sanity check failed, see above errors" << endl;
+    return 1;
+  }
+
+  //TODO: allow multiple PCL files
+  //size_t iFile; //TODO
+  // vector<string> PCLs; //TODO
+
+  //check data file
+  if (sArgs.input_given) {
+    if (!PCL.Open(sArgs.input_arg, sArgs.skip_arg, sArgs.mmap_flag)) {
+      cerr << "Could not open input PCL" << endl;
+      return 1;
     }
   }
 
-  return &ppTmpTrain;
-//  pTmpTest
-}*/
+  //read label files
+  vector<LIBSVM::SVMLabel> vecLabels;
+  set<string> setLabeledGenes;
+  if (sArgs.labels_given) {
+    ifsm.clear();
+    ifsm.open(sArgs.labels_arg);
+    if (ifsm.is_open())
+      vecLabels = ReadLabels(ifsm);
+    else {
+      cerr << "Could not read label file" << endl;
+      return 1;
+    }
+    for (i = 0; i < vecLabels.size(); i++)
+      setLabeledGenes.insert(vecLabels[i].GeneName);
+  }
 
-vector<LIBSVM::SVMLabel> ReadLabels(ifstream & ifsm) {
+  LIBSVM::SAMPLE* pTrainSample;
 
-	static const size_t c_iBuffer = 1024;
-	char acBuffer[c_iBuffer];
-	vector<string> vecstrTokens;
-	vector<LIBSVM::SVMLabel> vecLabels;
-	size_t numPositives, numNegatives;
-	numPositives = numNegatives = 0;
-	while (!ifsm.eof()) {
-		ifsm.getline(acBuffer, c_iBuffer - 1);
-		acBuffer[c_iBuffer - 1] = 0;
-		vecstrTokens.clear();
-		CMeta::Tokenize(acBuffer, vecstrTokens);
-		if (vecstrTokens.empty())
-			continue;
-		if (vecstrTokens.size() != 2) {
-			cerr << "Illegal label line (" << vecstrTokens.size() << "): "
-					<< acBuffer << endl;
-			continue;
-		}
-		vecLabels.push_back(LIBSVM::SVMLabel(vecstrTokens[0], atof(
-				vecstrTokens[1].c_str())));
-		if (vecLabels.back().Target > 0)
-			numPositives++;
-		else
-			numNegatives++;
-	}
-	return vecLabels;
-}
+  size_t numSample;
+  numSample = sArgs.cross_validation_arg * sArgs.num_cv_runs_arg;
+  vector<LIBSVM::SVMLabel> pTrainVector[numSample];
+  vector<LIBSVM::SVMLabel> pTestVector[numSample];
+  vector<LIBSVM::Result> AllResults;
+  vector<LIBSVM::Result> tmpAllResults;
 
+  if (sArgs.model_given && sArgs.labels_given) { //learn once and write to file
+    //TODO
+    /*
+    pTrainSample = CLIBSVM::CreateSample(PCL, vecLabels);
+    SVM.Learn(*pTrainSample);
+    SVM.WriteModel(sArgs.model_arg);
+    */
 
-struct SortResults {
+  } else if (sArgs.model_given && sArgs.output_given) { //read model and classify all
+    //TODO: test
+    /*
+    vector<SVMLabel> vecAllLabels;
+    for (size_t i = 0; i < PCL.GetGenes(); i++)
+      vecAllLabels.push_back(SVMLabel(PCL.GetGene(i), 0));
 
-	bool operator()(const LIBSVM::Result& rOne, const LIBSVM::Result & rTwo) const {
-		return (rOne.Value > rTwo.Value);
-	}
-};
+    SVM.ReadModel(sArgs.model_arg);
+    AllResults = SVM.Classify(PCL, vecAllLabels);
+    ofstream ofsm;
+    ofsm.open(sArgs.output_arg);
+    if (ofsm.is_open())
+      PrintResults(AllResults, ofsm);
+    else {
+      cerr << "Could not open output file" << endl;
+    }
+    */
 
+  } else if (sArgs.output_given && sArgs.labels_given) {
+    size_t ii, index;
+    //do learning and classifying with cross validation
+    if( sArgs.cross_validation_arg > 1 && sArgs.num_cv_runs_arg >= 1 ){
+      for (ii = 0; ii < sArgs.num_cv_runs_arg; ii++) {                    
+        std::random_shuffle(vecLabels.begin(), vecLabels.end());
 
-size_t PrintResults(vector<LIBSVM::Result> vecResults, ofstream & ofsm) {
-	sort(vecResults.begin(), vecResults.end(), SortResults());
-	int LabelVal;
-	for (size_t i = 0; i < vecResults.size(); i++) {
-		ofsm << vecResults[i].GeneName << '\t' << vecResults[i].Target << '\t'
-				<< vecResults[i].Value << endl;
-	}
-};
-
-struct ParamStruct {
-	vector<float> vecK, vecTradeoff;
-	vector<size_t> vecLoss;
-	vector<char*> vecNames;
-};
-
-ParamStruct ReadParamsFromFile(ifstream& ifsm, string outFile) {
-	static const size_t c_iBuffer = 1024;
-	char acBuffer[c_iBuffer];
-	char* nameBuffer;
-	vector<string> vecstrTokens;
-	size_t extPlace;
-	string Ext, FileName;
-	if ((extPlace = outFile.find_first_of(".")) != string::npos) {
-		FileName = outFile.substr(0, extPlace);
-		Ext = outFile.substr(extPlace, outFile.size());
-	} else {
-		FileName = outFile;
-		Ext = "";
-	}
-	ParamStruct PStruct;
-	size_t index = 0;
-	while (!ifsm.eof()) {
-		ifsm.getline(acBuffer, c_iBuffer - 1);
-		acBuffer[c_iBuffer - 1] = 0;
-		vecstrTokens.clear();
-		CMeta::Tokenize(acBuffer, vecstrTokens);
-		if (vecstrTokens.empty())
-			continue;
-		if (vecstrTokens.size() != 3) {
-			cerr << "Illegal params line (" << vecstrTokens.size() << "): "
-					<< acBuffer << endl;
-			continue;
-		}
-		if (acBuffer[0] == '#') {
-			cerr << "skipping " << acBuffer << endl;
-		} else {
-			PStruct.vecLoss.push_back(atoi(vecstrTokens[0].c_str()));
-			PStruct.vecTradeoff.push_back(atof(vecstrTokens[1].c_str()));
-			PStruct.vecK.push_back(atof(vecstrTokens[2].c_str()));
-			PStruct.vecNames.push_back(new char[c_iBuffer]);
-			if (PStruct.vecLoss[index] == 4 || PStruct.vecLoss[index] == 5)
-				sprintf(PStruct.vecNames[index], "%s_l%d_c%4.6f_k%4.3f%s",
-						FileName.c_str(), PStruct.vecLoss[index],
-						PStruct.vecTradeoff[index], PStruct.vecK[index],
-						Ext.c_str());
-			else
-				sprintf(PStruct.vecNames[index], "%s_l%d_c%4.6f%s",
-						FileName.c_str(), PStruct.vecLoss[index],
-						PStruct.vecTradeoff[index], Ext.c_str());
-			index++;
-		}
-
-	}
-	return PStruct;
-}
-
-int main(int iArgs, char** aszArgs) {
-
-	gengetopt_args_info sArgs;
-
-	CPCL PCL;
-	LIBSVM::CLIBSVM SVM;
-
-	size_t i, j, iGene, jGene;
-	ifstream ifsm;
-
-        bool added;
-        added = false;
-
-	if (cmdline_parser(iArgs, aszArgs, &sArgs)) {
-		cmdline_parser_print_help();
-		return 1;
-	}
-
-        //TODO: update documentation and cmdline .. doesn't use most parameters
-	//SVM.SetVerbosity(sArgs.verbosity_arg); // no verbosity param for libsvm TODO: update documentation
-	//SVM.SetLossFunction(sArgs.error_function_arg); //libsvm only has one loss function TODO: update documentation
-        
-	
-	if (sArgs.cross_validation_arg < 1){
-	  cerr << "cross_valid is <1. Must be set at least 1" << endl;
-	  return 1;
-	}
-	else if(sArgs.cross_validation_arg < 2){
-	  cerr << "cross_valid is set to 1. No cross validation holdouts will be run." << endl;
-	}
-
-        if (sArgs.num_cv_runs_arg < 1){
-          cerr << "number of cv runs is < 1. Must be set at least 1" << endl;
-          return 1;
+        for (i = 0; i < sArgs.cross_validation_arg; i++) {                  
+          index = sArgs.cross_validation_arg * ii + i;
+          pTestVector[index].reserve((size_t) vecLabels.size()
+              / sArgs.cross_validation_arg + sArgs.cross_validation_arg);
+          pTrainVector[index].reserve((size_t) vecLabels.size()
+              / (sArgs.cross_validation_arg)
+              * (sArgs.cross_validation_arg - 1)
+              + sArgs.cross_validation_arg);
+          for (j = 0; j < vecLabels.size(); j++) {
+            if (j % sArgs.cross_validation_arg == i) {
+              pTestVector[index].push_back(vecLabels[j]);
+            } else {
+              pTrainVector[index].push_back(vecLabels[j]);
+            }
+          }
         }
 
-        if (sArgs.negative_subsamples_arg < 0){
-          cerr << "number of negative subsample runs is < 0. Must be non-negative" << endl;
-          return 1;
+      }
+    }  
+    else{ // if you have less than 2 fold cross, no cross validation is done, all train genes are used and predicted
+
+      // no holdout so train is the same as test gene set
+      pTestVector[0].reserve((size_t) vecLabels.size() + sArgs.cross_validation_arg);
+      pTrainVector[0].reserve((size_t) vecLabels.size() + sArgs.cross_validation_arg);
+
+      for (j = 0; j < vecLabels.size(); j++) {
+        pTestVector[0].push_back(vecLabels[j]);		      
+        pTrainVector[0].push_back(vecLabels[j]);		    
+      }
+    }
+
+
+    vector<SVMLabel> vec_allUnlabeledLabels;
+    vector<Result> vec_allUnlabeledResults;
+    vector<Result> vec_tmpUnlabeledResults;
+    if (sArgs.all_flag) {
+      vec_allUnlabeledLabels.reserve(PCL.GetGenes());
+      vec_allUnlabeledResults.reserve(PCL.GetGenes());
+      for (i = 0; i < PCL.GetGenes(); i++) {
+        if (setLabeledGenes.find(PCL.GetGene(i))
+            == setLabeledGenes.end()) {
+          vec_allUnlabeledLabels.push_back(
+              SVMLabel(PCL.GetGene(i), 0));
+          vec_allUnlabeledResults.push_back(Result(PCL.GetGene(i)));
+        }
+      }
+    }
+
+
+    for (i = 0; i < sArgs.cross_validation_arg * sArgs.num_cv_runs_arg; i++) {
+      pTrainSample = LIBSVM::CLIBSVM::CreateSample(PCL, //TODO: make more efficient
+          pTrainVector[i]);
+
+      cerr << "Cross Validation Trial " << i << endl;
+
+      SVM.Learn(*pTrainSample);
+      cerr << "Learned" << endl;
+
+
+      tmpAllResults = SVM.Classify(PCL, pTestVector[i]);
+      cerr << "Classified " << tmpAllResults.size() << " examples" << endl;
+      for(std::vector<LIBSVM::Result>::iterator it = tmpAllResults.begin() ; it != tmpAllResults.end() ; it ++){
+        added = false;
+        for(std::vector<LIBSVM::Result>::iterator ita = AllResults.begin() ; ita != AllResults.end() ; ita ++){
+          if ( (*it).GeneName.compare((*ita).GeneName) == 0 ){
+            (*ita).Value += (*it).Value;
+            added = true;
+            break;
+          }
+
         }
 
-        if ( (sArgs.negative_subsamples_arg > 0 && sArgs.num_cv_runs_arg > 1) ) {
-          cerr << "negative subsamping for multiple cv runs has yet been implemented." << endl;
-          return 1;
-        }
-      
-        SVM.SetTradeoff(sArgs.tradeoff_arg);
-        SVM.SetNu(sArgs.nu_arg);
-        SVM.SetSVMType(sArgs.svm_type_arg);
-        CLIBSVM temp;
-        
-        SVM.SetBalance(sArgs.balance_flag);
-//cerr << SVM.posFeatOnly << endl;
+        if(!added)
+          AllResults.push_back((*it));
 
-	if (!SVM.parms_check()) {
-		cerr << "Sanity check failed, see above errors" << endl;
-		return 1;
-	}
+      }
+      tmpAllResults.resize(0);
+      if (sArgs.all_flag) {
+        vec_tmpUnlabeledResults = SVM.Classify(
+            PCL, vec_allUnlabeledLabels);
+        for (j = 0; j < vec_tmpUnlabeledResults.size(); j++)
+          vec_allUnlabeledResults[j].Value
+            += vec_tmpUnlabeledResults[j].Value;
 
-	size_t iFile;
-	vector<string> PCLs;
-	if (sArgs.input_given) {
-		if (!PCL.Open(sArgs.input_arg, sArgs.skip_arg, sArgs.mmap_flag)) {
-			cerr << "Could not open input PCL" << endl;
-			return 1;
-		}
-	}
+      }
+      LIBSVM::CLIBSVM::PrintSample(*pTrainSample);
 
-	vector<LIBSVM::SVMLabel> vecLabels;
-	set<string> setLabeledGenes;
-	if (sArgs.labels_given) {
-		ifsm.clear();
-		ifsm.open(sArgs.labels_arg);
-		if (ifsm.is_open())
-			vecLabels = ReadLabels(ifsm);
-		else {
-			cerr << "Could not read label file" << endl;
-			return 1;
-		}
-		for (i = 0; i < vecLabels.size(); i++)
-			setLabeledGenes.insert(vecLabels[i].GeneName);
-	}
+      size_t mem = CMeta::GetMemoryUsage();
+      cerr << "before free: " << mem << endl;
 
-	LIBSVM::SAMPLE* pTrainSample;
+      if (i > 0) {
+        //LIBSVM::CLIBSVM::FreeSample(*pTrainSample);
+        free(pTrainSample);
+      }
 
-        size_t numSample;
-        if(sArgs.negative_subsamples_arg > 0)
-          numSample = sArgs.cross_validation_arg * sArgs.num_cv_runs_arg * sArgs.negative_subsamples_arg;
-        else
-          numSample = sArgs.cross_validation_arg * sArgs.num_cv_runs_arg;
-	vector<LIBSVM::SVMLabel> pTrainVector[numSample];
-	vector<LIBSVM::SVMLabel> pTestVector[numSample];
-	vector<LIBSVM::Result> AllResults;
-	vector<LIBSVM::Result> tmpAllResults;
-
-	if (sArgs.model_given && sArgs.labels_given) { //learn once and write to file
-		pTrainSample = CLIBSVM::CreateSample(PCL, vecLabels);
-		SVM.Learn(*pTrainSample);
-		SVM.WriteModel(sArgs.model_arg);
-	} else if (sArgs.model_given && sArgs.output_given) { //read model and classify all
-		vector<SVMLabel> vecAllLabels;
-
-		for (size_t i = 0; i < PCL.GetGenes(); i++)
-			vecAllLabels.push_back(SVMLabel(PCL.GetGene(i), 0));
-
-		SVM.ReadModel(sArgs.model_arg);
-		AllResults = SVM.Classify(PCL, vecAllLabels);
-		ofstream ofsm;
-		ofsm.open(sArgs.output_arg);
-		if (ofsm.is_open())
-			PrintResults(AllResults, ofsm);
-		else {
-			cerr << "Could not open output file" << endl;
-		}
-	} else if (sArgs.output_given && sArgs.labels_given) {
-                size_t ii, index;
-		//do learning and classifying with cross validation
-//                if( sArgs.cross_validation_arg > 1 && sArgs.bagging )
-                if( sArgs.cross_validation_arg > 1 && sArgs.negative_subsamples_arg > 0){
-cerr << "negative subsampling" << endl;
-        	  vector<LIBSVM::SVMLabel> pTmpTrain[sArgs.cross_validation_arg * sArgs.num_cv_runs_arg];
-        	  vector<LIBSVM::SVMLabel> pTmpTest[sArgs.cross_validation_arg * sArgs.num_cv_runs_arg];
-          
-                  for(i = 0; i < sArgs.cross_validation_arg; i++) {
-                    index = i;
-                      
-                    pTmpTest[index].reserve((size_t) vecLabels.size()
-  			   / sArgs.cross_validation_arg + sArgs.cross_validation_arg);
-                    pTmpTrain[index].reserve((size_t) vecLabels.size()
-			    / (sArgs.cross_validation_arg)
-			    * (sArgs.cross_validation_arg - 1)
-			    + sArgs.cross_validation_arg);
-                    for (j = 0; j < vecLabels.size(); j++) {
-//cerr << vecLabels[j].GeneName << endl;
-		      if (j % sArgs.cross_validation_arg == i) {
-		        pTmpTest[index].push_back(vecLabels[j]);
-		      } else {
-		        pTmpTrain[index].push_back(vecLabels[j]);
-		      }
-		    }
-                  }
-                
-size_t iSample, iSubsample, numPos;
-size_t len, num;
-num = sArgs.negative_subsamples_arg;
-cerr << "subsampling: " << num << endl;
-len = sArgs.cross_validation_arg;
-cerr << "number of samples: " << len << endl;
-
-vector<LIBSVM::SVMLabel> Negatives;
-vector<LIBSVM::SVMLabel> Positives;
-  
-for( iSample = 0 ; iSample < len ; iSample ++ ) {
-    numPos = 0;
-    Negatives.empty();
-    Positives.empty();
-    
-    for(vector<LIBSVM::SVMLabel>::iterator it = pTmpTrain[iSample].begin() ;
-        it != pTmpTrain[iSample].end(); it++){
-      if ( (*it).Target == 1 ) { // if positive
-        numPos ++;
-        Positives.push_back(*it);
-      }else if ( (*it).Target == -1 )
-        Negatives.push_back(*it);
+      mem = CMeta::GetMemoryUsage();
+      cerr << "after free: " << mem << endl;
+      cerr << "end of a cv run" << endl;
     }
 
+    for(std::vector<LIBSVM::Result>::iterator it = AllResults.begin();
+        it != AllResults.end(); ++ it){
+      (*it).Value /= sArgs.num_cv_runs_arg;
 
-    for( iSubsample = 0 ; iSubsample < num ; iSubsample ++ ) {
-      index = num * iSample + iSubsample;
-//      pTmpTrain[ index ].reserve((size_t) (numPos * 10));
-      for( i = 0 ; i < numPos ; i ++ ) {
-        pTrainVector[ index ].push_back(Positives.at( i ) );
-        pTrainVector[ index ].push_back(Negatives.at( rand() % Negatives.size() )) ; //with replacement!!
-      }
+    }
 
-cerr << "blah" << endl;
-cerr << pTrainVector[ index ].size() << endl;
-      pTestVector[ index ] = pTmpTest[ iSample ] ;
+
+
+    if (sArgs.all_flag) { //add the unlabeled results
+      for (j = 0; j < vec_allUnlabeledResults.size(); j++)
+        vec_allUnlabeledResults[j].Value
+          /= (sArgs.cross_validation_arg * sArgs.num_cv_runs_arg);
+      AllResults.insert(AllResults.end(),
+          vec_allUnlabeledResults.begin(),
+          vec_allUnlabeledResults.end());
+    }
+
+    ofstream ofsm;
+    ofsm.clear();
+    ofsm.open(sArgs.output_arg);
+    PrintResults(AllResults, ofsm);
+    return 0;
+
+  } else {
+    cerr << "More options are needed" << endl;
   }
-}
-
-                }
-                else if( sArgs.cross_validation_arg > 1 && sArgs.num_cv_runs_arg >= 1 ){
-//                  size_t ii, index;
-                  for (ii = 0; ii < sArgs.num_cv_runs_arg; ii++) {                    
-                    std::random_shuffle(vecLabels.begin(), vecLabels.end());
-
-                  for (i = 0; i < sArgs.cross_validation_arg; i++) {                  
-                    index = sArgs.cross_validation_arg * ii + i;
-//cerr << index << endl;                    
-  		    pTestVector[index].reserve((size_t) vecLabels.size()
-  					   / sArgs.cross_validation_arg + sArgs.cross_validation_arg);
-		    pTrainVector[index].reserve((size_t) vecLabels.size()
-					    / (sArgs.cross_validation_arg)
-					    * (sArgs.cross_validation_arg - 1)
-					    + sArgs.cross_validation_arg);
-		    for (j = 0; j < vecLabels.size(); j++) {
-//cerr << vecLabels[j].GeneName << endl;
-		      if (j % sArgs.cross_validation_arg == i) {
-			pTestVector[index].push_back(vecLabels[j]);
-		      } else {
-			pTrainVector[index].push_back(vecLabels[j]);
-		      }
-		    }
-		  }
-
-                  }
-                }  
-		else{ // if you have less than 2 fold cross, no cross validation is done, all train genes are used and predicted
-		  
-		  // no holdout so train is the same as test gene set
-		  pTestVector[0].reserve((size_t) vecLabels.size() + sArgs.cross_validation_arg);
-		  pTrainVector[0].reserve((size_t) vecLabels.size() + sArgs.cross_validation_arg);
-		  
-		  for (j = 0; j < vecLabels.size(); j++) {
-		    pTestVector[0].push_back(vecLabels[j]);		      
-		    pTrainVector[0].push_back(vecLabels[j]);		    
-		  }
-		}
-		
-		
-		vector<SVMLabel> vec_allUnlabeledLabels;
-		vector<Result> vec_allUnlabeledResults;
-		vector<Result> vec_tmpUnlabeledResults;
-		if (sArgs.all_flag) {
-			vec_allUnlabeledLabels.reserve(PCL.GetGenes());
-			vec_allUnlabeledResults.reserve(PCL.GetGenes());
-			for (i = 0; i < PCL.GetGenes(); i++) {
-				if (setLabeledGenes.find(PCL.GetGene(i))
-						== setLabeledGenes.end()) {
-					vec_allUnlabeledLabels.push_back(
-							SVMLabel(PCL.GetGene(i), 0));
-					vec_allUnlabeledResults.push_back(Result(PCL.GetGene(i)));
-				}
-			}
-		}
-
-		if (sArgs.params_given) { //reading paramters from file //TODO??? figure out how this code works
-			ifsm.close();
-			ifsm.clear();
-			ifsm.open(sArgs.params_arg);
-			if (!ifsm.is_open()) {
-				cerr << "Could not open: " << sArgs.params_arg << endl;
-				return 1;
-			}
-			ParamStruct PStruct;
-			string outFile(sArgs.output_arg);
-			PStruct = ReadParamsFromFile(ifsm, outFile);
-
-			size_t iParams;
-			ofstream ofsm;
-			LIBSVM::SAMPLE * ppTrainSample[sArgs.cross_validation_arg];
-			
-			//build all the samples since they are being reused
-			for (i = 0; i < sArgs.cross_validation_arg; i++)
-				ppTrainSample[i] = LIBSVM::CLIBSVM::CreateSample(PCL,
-						pTrainVector[i]);
-			
-			for (iParams = 0; iParams < PStruct.vecTradeoff.size(); iParams++) {
-			//	SVM.SetLossFunction(PStruct.vecLoss[iParams]);
-				SVM.SetTradeoff(PStruct.vecTradeoff[iParams]);
-			//	SVM.SetPrecisionFraction(PStruct.vecK[iParams]);
-				for (j = 0; j < vec_allUnlabeledResults.size(); j++)
-					vec_allUnlabeledResults[j].Value = 0;
-				for (i = 0; i < sArgs.cross_validation_arg; i++) {
-					cerr << "Cross Validation Trial " << i << endl;
-					SVM.Learn(*ppTrainSample[i]);
-					
-					cerr << "Learned" << endl;					
-					
-					tmpAllResults = SVM.Classify(PCL, pTestVector[i]);
-					cerr << "Classified " << tmpAllResults.size()
-							<< " examples" << endl;
-					AllResults.insert(AllResults.end(), tmpAllResults.begin(),
-							tmpAllResults.end());
-					tmpAllResults.resize(0);
-					if (sArgs.all_flag && vec_allUnlabeledLabels.size() > 0) {
-						vec_tmpUnlabeledResults = SVM.Classify(PCL,
-								vec_allUnlabeledLabels);
-						for (j = 0; j < vec_tmpUnlabeledResults.size(); j++)
-							vec_allUnlabeledResults[j].Value
-									+= vec_tmpUnlabeledResults[j].Value;
-					}
-
-				}
-
-
-				ofsm.open(PStruct.vecNames[iParams]);
-				if (sArgs.all_flag) { //add the unlabeled results
-					for (j = 0; j < vec_tmpUnlabeledResults.size(); j++)
-						vec_allUnlabeledResults[j].Value
-								/= sArgs.cross_validation_arg;
-					AllResults.insert(AllResults.end(),
-							vec_allUnlabeledResults.begin(),
-							vec_allUnlabeledResults.end());
-				}
-
-				PrintResults(AllResults, ofsm);
-				ofsm.close();
-				ofsm.clear();
-				if (i > 0 || iParams > 0)
-					SVM.FreeModel();
-				AllResults.resize(0);
-			}
-		} else { //run once
-			for (i = 0; i < sArgs.cross_validation_arg * sArgs.num_cv_runs_arg; i++) {
-				pTrainSample = LIBSVM::CLIBSVM::CreateSample(PCL, //TODO: make more efficient
-						pTrainVector[i]);
-
-				cerr << "Cross Validation Trial " << i << endl;
-
-				SVM.Learn(*pTrainSample);
-				cerr << "Learned" << endl;
-
-
-				tmpAllResults = SVM.Classify(PCL,
-						pTestVector[i]);
-				cerr << "Classified " << tmpAllResults.size() << " examples"
-						<< endl;
-                                for(std::vector<LIBSVM::Result>::iterator it = tmpAllResults.begin() ; it != tmpAllResults.end() ; it ++){
-                                  added = false;
-                                  for(std::vector<LIBSVM::Result>::iterator ita = AllResults.begin() ; ita != AllResults.end() ; ita ++){
-                                    if ( (*it).GeneName.compare((*ita).GeneName) == 0 ){
-                                      (*ita).Value += (*it).Value;
-                                      added = true;
-                                      break;
-                                    }
-
-                                  }
-
-                                  if(!added)
-                                    AllResults.push_back((*it));
-
-//				AllResults.insert(AllResults.end(), tmpAllResults.begin(),
-//						tmpAllResults.end());
-//
-                                }
-				tmpAllResults.resize(0);
-				if (sArgs.all_flag) {
-					vec_tmpUnlabeledResults = SVM.Classify(
-							PCL, vec_allUnlabeledLabels);
-					for (j = 0; j < vec_tmpUnlabeledResults.size(); j++)
-						vec_allUnlabeledResults[j].Value
-								+= vec_tmpUnlabeledResults[j].Value;
-
-				}
-cerr << "blah" << endl;
-                                LIBSVM::CLIBSVM::PrintSample(*pTrainSample);
-
-                                size_t mem = CMeta::GetMemoryUsage();
-                                cerr << "before free: " << mem << endl;
-
-				if (i > 0) {
-					//LIBSVM::CLIBSVM::FreeSample(*pTrainSample);
-                                        free(pTrainSample);
-				}
-
-                                mem = CMeta::GetMemoryUsage();
-                                cerr << "after free: " << mem << endl;
-                                cerr << "end of a cv run" << endl;
-			}
-
-                        for(std::vector<LIBSVM::Result>::iterator it = AllResults.begin();
-                            it != AllResults.end(); ++ it){
-                          (*it).Value /= sArgs.num_cv_runs_arg;
-
-                        }
-
-
-
-			if (sArgs.all_flag) { //add the unlabeled results
-				for (j = 0; j < vec_allUnlabeledResults.size(); j++)
-					vec_allUnlabeledResults[j].Value
-							/= (sArgs.cross_validation_arg * sArgs.num_cv_runs_arg);
-				AllResults.insert(AllResults.end(),
-						vec_allUnlabeledResults.begin(),
-						vec_allUnlabeledResults.end());
-			}
-
-//                        tmpAllResults.clear();
-                        
-
-			ofstream ofsm;
-			ofsm.clear();
-			ofsm.open(sArgs.output_arg);
-			PrintResults(AllResults, ofsm);
-			return 0;
-		}
-	} else {
-		cerr << "More options are needed" << endl;
-	}
 
 }
 

File tools/LibSVMer/LibSVMer.ggo

 										int default="5" no
 option  "num_cv_runs"                   r       "Number of cross-validation runs"
                                                                                 int default="1" no
-option  "negative_subsamples"            g       "Number of subsample runs"
-                                                                                int default="0" no
 option "svm_type"                       v       "Sets type of SVM (default 0)
 0\tC-SVC
 1\tnu-SVC
 										float default="1" no
 option "nu"                             u   "nu parameter of nu-SVC, one-class SVM"
                                                                                 float default="0.5" no
-option "params"   				p   "Parameter file"
-										string  typestr="filename"   no
-
 option	"mmap"					M	"Memory map binary input"
 										flag	off

File tools/LibSVMer/cmdline.c

 const char *gengetopt_args_info_description = "";
 
 const char *gengetopt_args_info_help[] = {
-  "  -h, --help                    Print help and exit",
-  "  -V, --version                 Print version and exit",
+  "  -h, --help                  Print help and exit",
+  "  -V, --version               Print version and exit",
   "\nMain:",
-  "  -l, --labels=filename         Labels file",
-  "  -o, --output=filename         Output file ",
-  "  -i, --input=filename          Input PCL file ",
-  "  -m, --model=filename          Model file",
-  "  -a, --all                     Always classify all genes in PCLs  \n                                  (default=off)",
+  "  -l, --labels=filename       Labels file",
+  "  -o, --output=filename       Output file ",
+  "  -i, --input=filename        Input PCL file ",
+  "  -m, --model=filename        Model file",
+  "  -a, --all                   Always classify all genes in PCLs  (default=off)",
   "\nOptions:",
-  "  -s, --skip=INT                Number of columns to skip in input pcls  \n                                  (default=`2')",
-  "  -n, --normalize               Normalize PCLS to 0 mean 1 variance  \n                                  (default=off)",
-  "  -c, --cross_validation=INT    Number of cross-validation sets ( arg of 1 will \n                                  turn off cross-validation )  (default=`5')",
-  "  -r, --num_cv_runs=INT         Number of cross-validation runs  (default=`1')",
-  "  -g, --negative_subsamples=INT Number of subsample runs  (default=`0')",
-  "  -v, --svm_type=INT            Sets type of SVM (default 0)\n\n                                  0\tC-SVC\n\n                                  1\tnu-SVC\n\n                                  2\tone-class SVM\n                                    (default=`0')",
-  "  -b, --balance                 weight classes such that C_P * n_P = C_N * n_N  \n                                  (default=off)",
-  "  -t, --tradeoff=FLOAT          SVM tradeoff constant C of C-SVC  (default=`1')",
-  "  -u, --nu=FLOAT                nu parameter of nu-SVC, one-class SVM  \n                                  (default=`0.5')",
-  "  -p, --params=filename         Parameter file",
-  "  -M, --mmap                    Memory map binary input  (default=off)",
+  "  -s, --skip=INT              Number of columns to skip in input pcls  \n                                (default=`2')",
+  "  -n, --normalize             Normalize PCLS to 0 mean 1 variance  \n                                (default=off)",
+  "  -c, --cross_validation=INT  Number of cross-validation sets ( arg of 1 will \n                                turn off cross-validation )  (default=`5')",
+  "  -r, --num_cv_runs=INT       Number of cross-validation runs  (default=`1')",
+  "  -v, --svm_type=INT          Sets type of SVM (default 0)\n\n                                0\tC-SVC\n\n                                1\tnu-SVC\n\n                                2\tone-class SVM\n                                  (default=`0')",
+  "  -b, --balance               weight classes such that C_P * n_P = C_N * n_N  \n                                (default=off)",
+  "  -t, --tradeoff=FLOAT        SVM tradeoff constant C of C-SVC  (default=`1')",
+  "  -u, --nu=FLOAT              nu parameter of nu-SVC, one-class SVM  \n                                (default=`0.5')",
+  "  -M, --mmap                  Memory map binary input  (default=off)",
     0
 };
 
   args_info->normalize_given = 0 ;
   args_info->cross_validation_given = 0 ;
   args_info->num_cv_runs_given = 0 ;
-  args_info->negative_subsamples_given = 0 ;
   args_info->svm_type_given = 0 ;
   args_info->balance_given = 0 ;
   args_info->tradeoff_given = 0 ;
   args_info->nu_given = 0 ;
-  args_info->params_given = 0 ;
   args_info->mmap_given = 0 ;
 }
 
   args_info->cross_validation_orig = NULL;
   args_info->num_cv_runs_arg = 1;
   args_info->num_cv_runs_orig = NULL;
-  args_info->negative_subsamples_arg = 0;
-  args_info->negative_subsamples_orig = NULL;
   args_info->svm_type_arg = 0;
   args_info->svm_type_orig = NULL;
   args_info->balance_flag = 0;
   args_info->tradeoff_orig = NULL;
   args_info->nu_arg = 0.5;
   args_info->nu_orig = NULL;
-  args_info->params_arg = NULL;
-  args_info->params_orig = NULL;
   args_info->mmap_flag = 0;
   
 }
   args_info->normalize_help = gengetopt_args_info_help[10] ;
   args_info->cross_validation_help = gengetopt_args_info_help[11] ;
   args_info->num_cv_runs_help = gengetopt_args_info_help[12] ;
-  args_info->negative_subsamples_help = gengetopt_args_info_help[13] ;
-  args_info->svm_type_help = gengetopt_args_info_help[14] ;
-  args_info->balance_help = gengetopt_args_info_help[15] ;
-  args_info->tradeoff_help = gengetopt_args_info_help[16] ;
-  args_info->nu_help = gengetopt_args_info_help[17] ;
-  args_info->params_help = gengetopt_args_info_help[18] ;
-  args_info->mmap_help = gengetopt_args_info_help[19] ;
+  args_info->svm_type_help = gengetopt_args_info_help[13] ;
+  args_info->balance_help = gengetopt_args_info_help[14] ;
+  args_info->tradeoff_help = gengetopt_args_info_help[15] ;
+  args_info->nu_help = gengetopt_args_info_help[16] ;
+  args_info->mmap_help = gengetopt_args_info_help[17] ;
   
 }
 
   free_string_field (&(args_info->skip_orig));
   free_string_field (&(args_info->cross_validation_orig));
   free_string_field (&(args_info->num_cv_runs_orig));
-  free_string_field (&(args_info->negative_subsamples_orig));
   free_string_field (&(args_info->svm_type_orig));
   free_string_field (&(args_info->tradeoff_orig));
   free_string_field (&(args_info->nu_orig));
-  free_string_field (&(args_info->params_arg));
-  free_string_field (&(args_info->params_orig));
   
   
 
     write_into_file(outfile, "cross_validation", args_info->cross_validation_orig, 0);
   if (args_info->num_cv_runs_given)
     write_into_file(outfile, "num_cv_runs", args_info->num_cv_runs_orig, 0);
-  if (args_info->negative_subsamples_given)
-    write_into_file(outfile, "negative_subsamples", args_info->negative_subsamples_orig, 0);
   if (args_info->svm_type_given)
     write_into_file(outfile, "svm_type", args_info->svm_type_orig, 0);
   if (args_info->balance_given)
     write_into_file(outfile, "tradeoff", args_info->tradeoff_orig, 0);
   if (args_info->nu_given)
     write_into_file(outfile, "nu", args_info->nu_orig, 0);
-  if (args_info->params_given)
-    write_into_file(outfile, "params", args_info->params_orig, 0);
   if (args_info->mmap_given)
     write_into_file(outfile, "mmap", 0, 0 );
   
         { "normalize",	0, NULL, 'n' },
         { "cross_validation",	1, NULL, 'c' },
         { "num_cv_runs",	1, NULL, 'r' },
-        { "negative_subsamples",	1, NULL, 'g' },
         { "svm_type",	1, NULL, 'v' },
         { "balance",	0, NULL, 'b' },
         { "tradeoff",	1, NULL, 't' },
         { "nu",	1, NULL, 'u' },
-        { "params",	1, NULL, 'p' },
         { "mmap",	0, NULL, 'M' },
         { NULL,	0, NULL, 0 }
       };
 
-      c = getopt_long (argc, argv, "hVl:o:i:m:as:nc:r:g:v:bt:u:p:M", long_options, &option_index);
+      c = getopt_long (argc, argv, "hVl:o:i:m:as:nc:r:v:bt:u:M", long_options, &option_index);
 
       if (c == -1) break;	/* Exit from `while (1)' loop.  */
 
             goto failure;
         
           break;
-        case 'g':	/* Number of subsample runs.  */
-        
-        
-          if (update_arg( (void *)&(args_info->negative_subsamples_arg), 
-               &(args_info->negative_subsamples_orig), &(args_info->negative_subsamples_given),
-              &(local_args_info.negative_subsamples_given), optarg, 0, "0", ARG_INT,
-              check_ambiguity, override, 0, 0,
-              "negative_subsamples", 'g',
-              additional_error))
-            goto failure;
-        
-          break;
         case 'v':	/* Sets type of SVM (default 0)
         0\tC-SVC
         1\tnu-SVC
             goto failure;
         
           break;
-        case 'p':	/* Parameter file.  */
-        
-        
-          if (update_arg( (void *)&(args_info->params_arg), 
-               &(args_info->params_orig), &(args_info->params_given),
-              &(local_args_info.params_given), optarg, 0, 0, ARG_STRING,
-              check_ambiguity, override, 0, 0,
-              "params", 'p',
-              additional_error))
-            goto failure;
-        
-          break;
         case 'M':	/* Memory map binary input.  */
         
         

File tools/LibSVMer/cmdline.h

   int num_cv_runs_arg;	/**< @brief Number of cross-validation runs (default='1').  */
   char * num_cv_runs_orig;	/**< @brief Number of cross-validation runs original value given at command line.  */
   const char *num_cv_runs_help; /**< @brief Number of cross-validation runs help description.  */
-  int negative_subsamples_arg;	/**< @brief Number of subsample runs (default='0').  */
-  char * negative_subsamples_orig;	/**< @brief Number of subsample runs original value given at command line.  */
-  const char *negative_subsamples_help; /**< @brief Number of subsample runs help description.  */
   int svm_type_arg;	/**< @brief Sets type of SVM (default 0)
   0\tC-SVC
   1\tnu-SVC
   float nu_arg;	/**< @brief nu parameter of nu-SVC, one-class SVM (default='0.5').  */
   char * nu_orig;	/**< @brief nu parameter of nu-SVC, one-class SVM original value given at command line.  */
   const char *nu_help; /**< @brief nu parameter of nu-SVC, one-class SVM help description.  */
-  char * params_arg;	/**< @brief Parameter file.  */
-  char * params_orig;	/**< @brief Parameter file original value given at command line.  */
-  const char *params_help; /**< @brief Parameter file help description.  */
   int mmap_flag;	/**< @brief Memory map binary input (default=off).  */
   const char *mmap_help; /**< @brief Memory map binary input help description.  */
   
   unsigned int normalize_given ;	/**< @brief Whether normalize was given.  */
   unsigned int cross_validation_given ;	/**< @brief Whether cross_validation was given.  */
   unsigned int num_cv_runs_given ;	/**< @brief Whether num_cv_runs was given.  */
-  unsigned int negative_subsamples_given ;	/**< @brief Whether negative_subsamples was given.  */
   unsigned int svm_type_given ;	/**< @brief Whether svm_type was given.  */
   unsigned int balance_given ;	/**< @brief Whether balance was given.  */
   unsigned int tradeoff_given ;	/**< @brief Whether tradeoff was given.  */
   unsigned int nu_given ;	/**< @brief Whether nu was given.  */
-  unsigned int params_given ;	/**< @brief Whether params was given.  */
   unsigned int mmap_given ;	/**< @brief Whether mmap was given.  */
 
 } ;