Commits

Jian Zhou committed 36982d5

Added SVM multiclass

Comments (0)

Files changed (8)

src/svmstruct.cpp

+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#include "stdafx.h"
+#include "svmstruct.h"
+#include "pclset.h"
+#include "dataset.h"
+#include "meta.h"
+#include "genome.h"
+#include "compactmatrix.h"
+#include <vector>
+#include <set>
+
+#define  SLACK_RESCALING    1
+#define  MARGIN_RESCALING   2
+
+namespace SVMArc {
+	extern "C" {
+		//    void free_struct_model(STRUCTMODEL sm);
+		void free_struct_sample(SAMPLE s);
+		//    void svm_learn_struct_joint_custom(SAMPLE sample,
+		//            STRUCT_LEARN_PARM *sparm,
+		//            LEARN_PARM *lparm, KERNEL_PARM *kparm,
+		//            STRUCTMODEL *sm);
+		//    SAMPLE read_struct_examples_sleipnir(DOC **all_docs, double*all_labels, int example_size, int total_features, STRUCT_LEARN_PARM *sparm);
+		//    void free_struct_model(STRUCTMODEL sm);
+		//    void free_struct_sample(SAMPLE s);
+		//    void set_struct_verbosity(long verb);
+		//    double estimate_r_delta_average(DOC **, long, KERNEL_PARM *);
+		//    MODEL *read_model(char *);
+		LABEL classify_struct_example(PATTERN x, STRUCTMODEL *sm,
+			STRUCT_LEARN_PARM *sparm);
+		DOC* create_example(long, long, long, double, SVECTOR *);
+		SVECTOR * create_svector(WORD *, char *, double);
+		void set_struct_verbosity(long verb);
+
+	}
+
+	void CSVMSTRUCTMC::SetVerbosity(size_t V) {
+		struct_verbosity = (long) V;
+	}
+
+	bool CSVMSTRUCTMC::initialize() {
+
+		//set directionality
+
+
+		/* set default */
+		Alg = DEFAULT_ALG_TYPE;
+		//Learn_parms
+		struct_parm.C=0.01;
+		struct_parm.slack_norm=1;
+		struct_parm.epsilon=DEFAULT_EPS;
+		struct_parm.custom_argc=0;
+		struct_parm.loss_function=DEFAULT_LOSS_FCT;
+		struct_parm.loss_type=DEFAULT_RESCALING;
+		struct_parm.newconstretrain=100;
+		struct_parm.ccache_size=5;
+		struct_parm.batch_size=100;
+		//Learn_parms
+		//strcpy (learn_parm.predfile, "trans_predictions");
+		strcpy(learn_parm.alphafile, "");
+		//verbosity=0;/*verbosity for svm_light*/
+		//struct_verbosity = 1; /*verbosity for struct learning portion*/
+		learn_parm.biased_hyperplane=1;
+		learn_parm.remove_inconsistent=0;
+		learn_parm.skip_final_opt_check=0;
+		learn_parm.svm_maxqpsize=10;
+		learn_parm.svm_newvarsinqp=0;
+		learn_parm.svm_iter_to_shrink=-9999;
+		learn_parm.maxiter=100000;
+		learn_parm.kernel_cache_size=40;
+		learn_parm.svm_c=99999999;  /* overridden by struct_parm.C */
+		learn_parm.eps=0.001;       /* overridden by struct_parm.epsilon */
+		learn_parm.transduction_posratio=-1.0;
+		learn_parm.svm_costratio=1.0;
+		learn_parm.svm_costratio_unlab=1.0;
+		learn_parm.svm_unlabbound=1E-5;
+		learn_parm.epsilon_crit=0.001;
+		learn_parm.epsilon_a=1E-10;  /* changed from 1e-15 */
+		learn_parm.compute_loo=0;
+		learn_parm.rho=1.0;
+		learn_parm.xa_depth=0;
+		kernel_parm.kernel_type=0;
+		kernel_parm.poly_degree=3;
+		kernel_parm.rbf_gamma=1.0;
+		kernel_parm.coef_lin=1;
+		kernel_parm.coef_const=1;
+		strcpy(kernel_parm.custom, "empty");
+
+		if (learn_parm.svm_iter_to_shrink == -9999) {
+			learn_parm.svm_iter_to_shrink = 100;
+		}
+
+		if ((learn_parm.skip_final_opt_check)
+			&& (kernel_parm.kernel_type == LINEAR)) {
+				printf(
+					"\nIt does not make sense to skip the final optimality check for linear kernels.\n\n");
+				learn_parm.skip_final_opt_check = 0;
+		}
+
+		//struct parms
+
+		/* set number of features to -1, indicating that it will be computed
+		in init_struct_model() */
+		struct_parm.num_features = -1;
+
+		return true;
+	}
+
+	bool CSVMSTRUCTMC::parms_check() {
+		if ((learn_parm.skip_final_opt_check) && (learn_parm.remove_inconsistent)) {
+			fprintf(
+				stderr,
+				"\nIt is necessary to do the final optimality check when removing inconsistent \nexamples.\n");
+			return false;
+		}
+		if ((learn_parm.svm_maxqpsize < 2)) {
+			fprintf(
+				stderr,
+				"\nMaximum size of QP-subproblems not in valid range: %ld [2..]\n",
+				learn_parm.svm_maxqpsize);
+			return false;
+		}
+		if ((learn_parm.svm_maxqpsize < learn_parm.svm_newvarsinqp)) {
+			fprintf(
+				stderr,
+				"\nMaximum size of QP-subproblems [%ld] must be larger than the number of\n",
+				learn_parm.svm_maxqpsize);
+			fprintf(
+				stderr,
+				"new variables [%ld] entering the working set in each iteration.\n",
+				learn_parm.svm_newvarsinqp);
+			return false;
+		}
+		if (learn_parm.svm_iter_to_shrink < 1) {
+			fprintf(
+				stderr,
+				"\nMaximum number of iterations for shrinking not in valid range: %ld [1,..]\n",
+				learn_parm.svm_iter_to_shrink);
+			return false;
+		}
+		if (struct_parm.C < 0) {
+			fprintf(
+				stderr,
+				"\nTrade-off between training error and margin is not set (C<0)!\nC value will be set to default value. Clight = Cpef * 100 / n \n");
+		}
+		if (learn_parm.transduction_posratio > 1) {
+			fprintf(stderr,
+				"\nThe fraction of unlabeled examples to classify as positives must\n");
+			fprintf(stderr, "be less than 1.0 !!!\n\n");
+			return false;
+		}
+		if (learn_parm.svm_costratio <= 0) {
+			fprintf(stderr,
+				"\nThe COSTRATIO parameter must be greater than zero!\n\n");
+			return false;
+		}
+		if (struct_parm.epsilon <= 0) {
+			fprintf(stderr,
+				"\nThe epsilon parameter must be greater than zero!\n\n");
+			return false;
+		}
+		if ((struct_parm.slack_norm < 1) || (struct_parm.slack_norm > 2)) {
+			fprintf(stderr,
+				"\nThe norm of the slacks must be either 1 (L1-norm) or 2 (L2-norm)!\n\n");
+			return false;
+		}
+
+		if ((struct_parm.loss_type != SLACK_RESCALING) && (struct_parm.loss_type
+			!= MARGIN_RESCALING)) {
+				fprintf(
+					stderr,
+					"\nThe loss type must be either 1 (slack rescaling) or 2 (margin rescaling)!\n\n");
+				return false;
+		}
+
+		if (learn_parm.rho < 0) {
+			fprintf(stderr,
+				"\nThe parameter rho for xi/alpha-estimates and leave-one-out pruning must\n");
+			fprintf(stderr,
+				"be greater than zero (typically 1.0 or 2.0, see T. Joachims, Estimating the\n");
+			fprintf(stderr,
+				"Generalization Performance of an SVM Efficiently, ICML, 2000.)!\n\n");
+			return false;
+		}
+		if ((learn_parm.xa_depth < 0) || (learn_parm.xa_depth > 100)) {
+			fprintf(stderr,
+				"\nThe parameter depth for ext. xi/alpha-estimates must be in [0..100] (zero\n");
+			fprintf(stderr,
+				"for switching to the conventional xa/estimates described in T. Joachims,\n");
+			fprintf(
+				stderr,
+				"Estimating the Generalization Performance of an SVM Efficiently, ICML, 2000.)\n");
+			return false;
+		}
+
+
+
+		return true;
+	}
+
+
+
+	DOC* CSVMSTRUCTMC::CreateDoc(Sleipnir::CPCL &PCL, size_t iGene, size_t iDoc) {
+		WORD* aWords;
+		size_t i, j, iWord, iWords, iPCL, iExp;
+		float d;
+		DOC* pRet;
+		pRet->fvec->words[0].weight;
+		//get number of features
+		iWords = PCL.GetExperiments();
+		//cerr<<"Newing WORDS "<<(iWords+1)*sizeof(WORD)<<endl;
+		aWords = new WORD[iWords + 1];
+		//set the words
+		for (i = 0; i < iWords; ++i) {
+			aWords[i].wnum = i + 1;
+			if (!Sleipnir::CMeta::IsNaN(d = PCL.Get(iGene, i)))
+				aWords[i].weight = d;
+			else
+				aWords[i].weight = 0;
+		}
+		aWords[i].wnum = 0;
+		// cerr<<"START Create Example"<<endl;
+		pRet = create_example(iDoc, 0, 0, 1, create_svector(aWords, "", 1));
+		//cerr<<"END create example"<<endl;
+		delete[] aWords;
+		return pRet;
+	}
+
+
+
+
+
+	SAMPLE* CSVMSTRUCTMC::CreateSample(Sleipnir::CPCL &PCL, vector<SVMLabel> SVMLabels) {
+		size_t i, j, iGene, iDoc;
+		int     n;       /* number of examples */
+		int *target;
+		long num_classes=0;
+		SAMPLE* pSample = new SAMPLE;
+		EXAMPLE* examples;
+		DOC** docs;
+		vector<DOC*> vec_pDoc;
+		vec_pDoc.reserve(SVMLabels.size());
+		vector<int> vecClass;
+		vecClass.reserve(SVMLabels.size());
+
+		iDoc = 0;
+		float numPositives, numNegatives;
+		numPositives = numNegatives = 0;
+		for (i = 0; i < SVMLabels.size(); i++) {
+			//     cout<< "processing gene " << SVMLabels[i].GeneName << endl;
+			if (!SVMLabels[i].hasIndex) {
+				SVMLabels[i].SetIndex(PCL.GetGene(SVMLabels[i].GeneName));
+			}
+			iGene = SVMLabels[i].index;
+			//   cout << SVMLabels[i].GeneName<<" gene at location "<<iGene << endl;
+			if (iGene != -1) {
+				//       cout << "creating doc" << endl;
+				iDoc++;
+				vec_pDoc.push_back(CreateDoc(PCL, iGene, iDoc - 1));
+				vecClass.push_back(SVMLabels[i].Target);
+			}
+		}
+
+		//copy patterns and labels to new vector
+		docs = new DOC*[vec_pDoc.size()];
+		n = vec_pDoc.size();
+		//cout << "Read in " << n << "Standards"<<endl;
+		copy(vec_pDoc.begin(), vec_pDoc.end(), docs);
+		vec_pDoc.clear();
+
+		//cerr << "NEW Class array" << endl;
+		target = new int[vecClass.size()];
+		copy(vecClass.begin(), vecClass.end(), target);
+		vecClass.clear();
+
+
+
+		examples=(EXAMPLE *)my_malloc(sizeof(EXAMPLE)*n);
+		for(i=0;i<n;i++)     /* find highest class label */
+			if(num_classes < target[i]) 
+				num_classes=target[i];
+
+		for(i=0;i<n;i++)     /* make sure all class labels are positive */
+			if(target[i]<1) {
+				printf("\nERROR: The class label '%d' of example number %ld is not greater than '1'!\n",target[i],i+1);
+				exit(1);
+			} 
+			for(i=0;i<n;i++) {          /* copy docs over into new datastructure */
+				examples[i].x.doc=docs[i];
+				examples[i].y.Class=target[i];
+				examples[i].y.scores=NULL;
+				examples[i].y.num_classes=num_classes;
+			}
+			free(target);
+			free(docs);
+			pSample->n=n;
+			pSample->examples=examples;
+
+			if(struct_verbosity>=0)
+				printf(" (%d examples) ",pSample->n);
+
+
+
+
+			return pSample;
+			//cerr<<"DONE CreateSample"<<endl;
+	}
+
+	//Single gene classification
+
+	vector<Result> CSVMSTRUCTMC::Classify(Sleipnir::CPCL &PCL,
+		vector<SVMLabel> SVMLabels) {
+			size_t i, j,k, iGene, iDoc;
+			vector<int> vecClass;
+			vector<Result> vecResult;
+			iDoc = 0;
+			PATTERN pattern;
+			pattern.totdoc = 1;
+			cerr << "CLASSIFY classifying " << endl;
+			LABEL label;
+			for (i = 0; i < SVMLabels.size(); i++) {
+				if (!SVMLabels[i].hasIndex) {
+					SVMLabels[i].SetIndex(PCL.GetGene(SVMLabels[i].GeneName));
+				}
+				iGene = SVMLabels[i].index;
+				   //cout << "CLASS gene=" << iGene << endl;
+				if (iGene != -1) {
+					iDoc++;
+
+					//cout << "CLASS iDOC=" << iDoc << endl;
+					pattern.doc = CreateDoc(PCL, iGene, iDoc);
+					//cerr<<"Doc Created"<<endl;
+					label	= classify_struct_example(pattern, &structmodel,
+						&struct_parm);
+					//cerr<<"CLASSIED"<<endl;
+					vecClass.push_back(SVMLabels[i].Target);
+					vecResult.resize(iDoc);
+					vecResult[iDoc - 1].GeneName = SVMLabels[i].GeneName;
+					vecResult[iDoc - 1].Target = SVMLabels[i].Target;
+					vecResult[iDoc - 1].Value = label.Class;
+					vecResult[iDoc - 1].num_class=struct_parm.num_classes;
+					//vecResult[iDoc - 1].Scores.reserve(label.num_classes);
+					for (k = 1; k <= struct_parm.num_classes; k++)
+								vecResult[iDoc - 1].Scores.push_back(label.scores[k]);
+					//cerr<<"CLASSIFY Called FreeDoc"<<endl;
+					FreeDoc(pattern.doc);
+					//cerr<<"CLASSIFY End FreeDoc"<<endl;
+				}
+			}
+
+			return vecResult;
+	}
+
+
+	void CSVMSTRUCTMC::FreeSample_leave_Doc(SAMPLE s){
+		/* Frees the memory of sample s. */
+		int i;
+		for(i=0;i<s.n;i++) {
+			free(s.examples[i].x.doc);
+			free_label(s.examples[i].y);
+		}
+		free(s.examples);
+	}
+
+
+
+}
+
+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+
+#ifndef NO_SVM_STRUCT
+#ifndef SVMSTRUCTI_H
+#define SVMSTRUCTI_H
+#include "pclset.h"
+#include "meta.h"
+#include "dat.h"
+
+#include <stdio.h>
+
+/* removed to support cygwin */
+//#include <execinfo.h>
+
+namespace SVMArc {
+	extern "C" {
+
+#define class Class
+
+#include <svm_multiclass/svm_light/svm_common.h>
+#include <svm_multiclass/svm_light/svm_learn.h>
+#include <svm_multiclass/svm_struct_api_types.h>
+#include <svm_multiclass/svm_struct/svm_struct_common.h>
+#include <svm_multiclass/svm_struct_api.h>
+#include <svm_multiclass/svm_struct/svm_struct_learn.h>
+#undef class
+		//#include "svm_struct_api.h"
+
+	}
+
+	class SVMLabel {
+	public:
+		string GeneName;
+		size_t Target;
+		size_t index;
+		bool hasIndex;
+		SVMLabel(std::string name, size_t target) {
+			GeneName = name;
+			Target = target;
+			hasIndex = false;
+			index = -1;
+		}
+
+		SVMLabel() {
+			GeneName = "";
+			Target = 0;
+		}
+		void SetIndex(size_t i) {
+			index = i;
+			hasIndex = true;
+		}
+	};
+
+	class Result {
+	public:
+		std::string GeneName;
+		int Target;
+		int Value;
+		vector<double> Scores;
+		int num_class;
+		int CVround;
+		int Rank;
+		Result() {
+			GeneName = "";
+			Target = 0;
+			Value = Sleipnir::CMeta::GetNaN();
+		}
+
+		Result(std::string name, int cv = -1) {
+			GeneName = name;
+			Target = 0;
+			Value = 0;
+			CVround = cv;
+			Rank = -1;
+			num_class = 0;
+
+		}
+		string toString() {
+			stringstream ss;
+			ss << GeneName << '\t' << Target << '\t' << Value << '\t' << "CV"
+				<< CVround;
+			if (Rank != -1) {
+				ss << '\t' << Rank;
+			}
+			return ss.str();
+		}
+
+	};
+
+	enum EFilter {
+		EFilterInclude = 0, EFilterExclude = EFilterInclude + 1,
+	};
+
+	//this class encapsulates the model and parameters and has no associated data
+
+
+	//class for SVMStruct
+	class CSVMSTRUCTMC {
+
+	public:
+		LEARN_PARM learn_parm;
+		KERNEL_PARM kernel_parm;
+		STRUCT_LEARN_PARM struct_parm;
+		STRUCTMODEL structmodel;
+		int Alg;
+		CSVMSTRUCTMC() {
+			initialize();
+			//set_struct_verbosity(5);
+		}
+
+		void SetLossFunction(size_t loss_f) {
+			struct_parm.loss_function = loss_f;
+		}
+
+		void SetTradeoff(double tradeoff) {
+			struct_parm.C = tradeoff;
+		}
+		void SetLearningAlgorithm(int alg) {
+			Alg = alg;
+		}
+		void SetKernel(int K) {
+			kernel_parm.kernel_type = K;
+		}
+		void SetPolyD(int D) {
+			kernel_parm.poly_degree = D;
+		}
+
+		//void UseCPSP() {
+		//	Alg = 9;
+		//	struct_parm.preimage_method = 2;
+		//	struct_parm.sparse_kernel_size = 500;
+		//	struct_parm.bias = 0;
+		//}
+
+		//void SetRBFGamma(double g) {
+		//	kernel_parm.rbf_gamma = g;
+		//	UseCPSP();
+		//}
+
+		void UseSlackRescaling() {
+			struct_parm.loss_type = SLACK_RESCALING;
+		}
+
+		void UseMarginRescaling() {
+			struct_parm.loss_type = MARGIN_RESCALING;
+		}
+
+
+
+		void ReadModel(char* model_file) {
+			FreeModel();
+			structmodel = read_struct_model(model_file, &struct_parm);
+		}
+
+		void WriteModel(char* model_file) {
+			if (kernel_parm.kernel_type == LINEAR) {
+				ofstream ofsm;
+				ofsm.open(model_file);
+				for (size_t i = 0; i < structmodel.sizePsi; i++) {
+					ofsm << structmodel.w[i+1] << endl;
+				}
+			} else {
+				write_struct_model(model_file, &structmodel, &struct_parm);
+			}
+		}
+
+		void WriteWeights(ostream& osm) {
+			osm << structmodel.w[0];
+			for (size_t i = 1; i < structmodel.sizePsi + 1; i++)
+				osm << '\t' << structmodel.w[i];
+			osm << endl;
+		}
+
+		static void FreePattern(pattern x) {
+			free_pattern(x);
+		}
+
+		static void FreeLabel(label y) {
+			free_label(y);
+		}
+
+		void FreeModel() {
+			free_struct_model(structmodel);
+		}
+
+		static void FreeSample(sample s) {
+			free_struct_sample(s);
+		}
+
+		static void FreeDoc(DOC* pDoc) {
+			free_example(pDoc, true);
+		}
+		void SetVerbosity(size_t V);
+
+		//static members process data
+		//single gene predictions
+
+
+		//creates a Doc for a given gene index in a single microarray
+		static DOC* CreateDoc(Sleipnir::CPCL &PCL, size_t iGene, size_t iDoc);
+
+
+		//Creates a sample using a single PCL and SVMlabels Looks up genes by name.
+		static SAMPLE
+			* CreateSample(Sleipnir::CPCL &PCL, vector<SVMLabel> SVMLabels);
+
+		//Classify single genes
+		vector<Result> Classify(Sleipnir::CPCL& PCL, vector<SVMLabel> SVMLabels);
+
+		//MEMBER functions wraps learning
+		void Learn(SAMPLE &sample) {
+			cerr << "SLACK NORM =" << struct_parm.slack_norm << endl;
+			/*  if (kernel_parm.kernel_type==CUSTOM)
+			svm_learn_struct_joint_custom(sample, &struct_parm, &learn_parm, &kernel_parm, &structmodel);
+			else*/
+
+
+			cerr << "ALG=" << Alg << endl;
+
+			if(Alg == 0)
+				svm_learn_struct(sample,&struct_parm,&learn_parm,&kernel_parm,&structmodel,NSLACK_ALG);
+			else if(Alg == 1)
+				svm_learn_struct(sample,&struct_parm,&learn_parm,&kernel_parm,&structmodel,NSLACK_SHRINK_ALG);
+			else if(Alg == 2)
+				svm_learn_struct_joint(sample,&struct_parm,&learn_parm,&kernel_parm,&structmodel,ONESLACK_PRIMAL_ALG);
+			else if(Alg == 3)
+				svm_learn_struct_joint(sample,&struct_parm,&learn_parm,&kernel_parm,&structmodel,ONESLACK_DUAL_ALG);
+			else if(Alg == 4)
+				svm_learn_struct_joint(sample,&struct_parm,&learn_parm,&kernel_parm,&structmodel,ONESLACK_DUAL_CACHE_ALG);
+			else if(Alg == 9)
+				svm_learn_struct_joint_custom(sample,&struct_parm,&learn_parm,&kernel_parm,&structmodel);
+			else
+				exit(1);
+			//
+		}
+
+
+		bool parms_check();
+		bool initialize();
+
+
+
+		// free the sample but don't free the Docs
+		static void FreeSample_leave_Doc(SAMPLE s);
+
+
+
+		STRUCTMODEL read_struct_model_w_linear(char *file, STRUCT_LEARN_PARM *sparm);
+	};
+
+
+};
+
+
+#endif // NO_SVM_SVMSTRUCT
+#endif // SVMSTRUCT_H

tools/SVMmulticlass/SVMmulti.cpp

+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <vector>
+#include <queue>
+
+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#include "stdafx.h"
+#include "cmdline.h"
+#include "statistics.h"
+
+using namespace SVMArc;
+//#include "../../extlib/svm_light/svm_light/kernel.h"
+
+vector<SVMArc::SVMLabel> ReadLabels(ifstream & ifsm) {
+
+	static const size_t c_iBuffer = 1024;
+	char acBuffer[c_iBuffer];
+	vector<string> vecstrTokens;
+	vector<SVMArc::SVMLabel> vecLabels;
+	size_t numPositives, numNegatives;
+	numPositives = numNegatives = 0;
+	while (!ifsm.eof()) {
+		ifsm.getline(acBuffer, c_iBuffer - 1);
+		acBuffer[c_iBuffer - 1] = 0;
+		vecstrTokens.clear();
+		CMeta::Tokenize(acBuffer, vecstrTokens);
+		if (vecstrTokens.empty())
+			continue;
+		if (vecstrTokens.size() != 2) {
+			cerr << "Illegal label line (" << vecstrTokens.size() << "): "
+				<< acBuffer << endl;
+			continue;
+		}
+		vecLabels.push_back(SVMArc::SVMLabel(vecstrTokens[0], atoi(
+			vecstrTokens[1].c_str())));
+		if (vecLabels.back().Target > 0)
+			numPositives++;
+		else
+			numNegatives++;
+	}
+	return vecLabels;
+}
+
+struct SortResults {
+
+	bool operator()(const SVMArc::Result& rOne, const SVMArc::Result & rTwo) const {
+		return (rOne.Value < rTwo.Value);
+	}
+};
+
+size_t PrintResults(vector<SVMArc::Result> vecResults, ofstream & ofsm) {
+	sort(vecResults.begin(), vecResults.end(), SortResults());
+	int LabelVal;
+	for (size_t i = 0; i < vecResults.size(); i++) {
+		ofsm << vecResults[i].GeneName << '\t' << vecResults[i].Target << '\t'
+			<< vecResults[i].Value<<'\t';
+		for(size_t j=1;j<=vecResults[i].num_class;j++)
+			ofsm << vecResults[i].Scores[j]<<'\t';
+		ofsm<< endl;
+
+	}
+};
+
+
+int main(int iArgs, char** aszArgs) {
+	gengetopt_args_info sArgs;
+
+	CPCL PCL;
+	SVMArc::CSVMSTRUCTMC SVM;
+
+	size_t i, j, k , iGene, jGene;
+	double bestscore;
+;
+	ifstream ifsm;
+	if (cmdline_parser(iArgs, aszArgs, &sArgs)) {
+		cmdline_parser_print_help();
+		return 1;
+	}
+
+	//Set Parameters
+	SVM.SetLearningAlgorithm(sArgs.learning_algorithm_arg);
+	SVM.SetVerbosity(sArgs.verbosity_arg);
+	SVM.SetLossFunction(sArgs.loss_function_arg);
+
+
+	if (sArgs.cross_validation_arg < 1){
+		cerr << "cross_valid is <1. Must be set at least 1" << endl;
+		return 1;
+	}
+	else if(sArgs.cross_validation_arg < 2){
+		cerr << "cross_valid is set to 1. No cross validation holdouts will be run." << endl;
+	}
+
+	SVM.SetTradeoff(sArgs.tradeoff_arg);
+	if (sArgs.slack_flag)
+		SVM.UseSlackRescaling();
+	else
+		SVM.UseMarginRescaling();
+
+
+	if (!SVM.parms_check()) {
+		cerr << "Parameter check not passed, see above errors" << endl;
+		return 1;
+	}
+
+	//  cout << "there are " << vecLabels.size() << " labels processed" << endl;
+	size_t iFile;
+	vector<string> PCLs;
+	if (sArgs.input_given) {
+		if (!PCL.Open(sArgs.input_arg, sArgs.skip_arg, sArgs.mmap_flag)) {
+			cerr << "Could not open input PCL" << endl;
+			return 1;
+		}
+	}
+
+	//Read labels from file
+	vector<SVMArc::SVMLabel> vecLabels;
+	set<string> setLabeledGenes;
+	if (sArgs.labels_given) {
+		ifsm.clear();
+		ifsm.open(sArgs.labels_arg);
+		if (ifsm.is_open())
+			vecLabels = ReadLabels(ifsm);
+		else {
+			cerr << "Could not read label file" << endl;
+			return 1;
+		}
+		for (i = 0; i < vecLabels.size(); i++)
+			setLabeledGenes.insert(vecLabels[i].GeneName);
+	}
+
+
+	//Training
+	SVMArc::SAMPLE* pTrainSample;
+	vector<SVMArc::SVMLabel> pTrainVector[sArgs.cross_validation_arg];
+	vector<SVMArc::SVMLabel> pTestVector[sArgs.cross_validation_arg];
+	vector<SVMArc::Result> AllResults;
+	vector<SVMArc::Result> tmpAllResults;
+
+	if (sArgs.model_given && sArgs.labels_given) { //learn once and write to file
+		pTrainSample = CSVMSTRUCTMC::CreateSample(PCL, vecLabels);
+		SVM.Learn(*pTrainSample);
+		SVM.WriteModel(sArgs.model_arg);
+	} else if (sArgs.model_given && sArgs.output_given) { //read model and classify all
+		vector<SVMLabel> vecAllLabels;
+
+		for (size_t i = 0; i < PCL.GetGenes(); i++)
+			vecAllLabels.push_back(SVMLabel(PCL.GetGene(i), 0));
+
+		SVM.ReadModel(sArgs.model_arg);
+		AllResults = SVM.Classify(PCL, vecAllLabels);
+		ofstream ofsm;
+		ofsm.open(sArgs.output_arg);
+		if (ofsm.is_open())
+			PrintResults(AllResults, ofsm);
+		else {
+			cerr << "Could not open output file" << endl;
+		}
+	} else if (sArgs.output_given && sArgs.labels_given) {
+		//do learning and classifying with cross validation
+		//set up training data
+		if( sArgs.cross_validation_arg > 1){	    
+			for (i = 0; i < sArgs.cross_validation_arg; i++) {
+				pTestVector[i].reserve((size_t) vecLabels.size()
+					/ sArgs.cross_validation_arg + sArgs.cross_validation_arg);
+				pTrainVector[i].reserve((size_t) vecLabels.size()
+					/ (sArgs.cross_validation_arg)
+					* (sArgs.cross_validation_arg - 1)
+					+ sArgs.cross_validation_arg);
+				for (j = 0; j < vecLabels.size(); j++) {
+					if (j % sArgs.cross_validation_arg == i) {
+						pTestVector[i].push_back(vecLabels[j]);
+					} else {
+						pTrainVector[i].push_back((vecLabels[j]));
+					}
+				}
+			}
+		}
+		else{ // if you have less than 2 fold cross, no cross validation is done, all train genes are used and predicted
+
+			// no holdout so train is the same as test gene set
+			pTestVector[0].reserve((size_t) vecLabels.size() + sArgs.cross_validation_arg);
+			pTrainVector[0].reserve((size_t) vecLabels.size() + sArgs.cross_validation_arg);
+
+			for (j = 0; j < vecLabels.size(); j++) {
+				pTestVector[0].push_back(vecLabels[j]);		      
+				pTrainVector[0].push_back(vecLabels[j]);		    
+			}
+		}
+		//set up training data done
+
+		//set up validation data
+		vector<SVMLabel> vec_allUnlabeledLabels;
+		vector<Result> vec_allUnlabeledResults;
+		vector<Result> vec_tmpUnlabeledResults;
+		if (sArgs.all_flag) {
+			vec_allUnlabeledLabels.reserve(PCL.GetGenes());
+			vec_allUnlabeledResults.reserve(PCL.GetGenes());
+			for (i = 0; i < PCL.GetGenes(); i++) {
+				if (setLabeledGenes.find(PCL.GetGene(i))
+					== setLabeledGenes.end()) {
+						vec_allUnlabeledLabels.push_back(
+							SVMLabel(PCL.GetGene(i), 0));
+						vec_allUnlabeledResults.push_back(Result(PCL.GetGene(i)));
+				}
+			}
+		}
+		//run once
+		for (i = 0; i < sArgs.cross_validation_arg; i++) {
+			pTrainSample = SVMArc::CSVMSTRUCTMC::CreateSample(PCL,
+				pTrainVector[i]);
+
+			cerr << "Cross Validation Trial " << i << endl;
+			SVM.Learn(*pTrainSample);
+			cerr << "Learned" << endl;
+			tmpAllResults = SVM.Classify(PCL,	pTestVector[i]);
+			cerr << "Classified " << tmpAllResults.size() << " examples"<< endl;
+			AllResults.insert(AllResults.end(), tmpAllResults.begin(), tmpAllResults.end());
+			tmpAllResults.resize(0);
+			if (sArgs.all_flag) {
+				vec_tmpUnlabeledResults = SVM.Classify(
+					PCL, vec_allUnlabeledLabels);
+				
+				if(i == 0){
+					for (j = 0; j < vec_tmpUnlabeledResults.size(); j++){
+						vec_allUnlabeledResults[j].num_class = vec_tmpUnlabeledResults[j].num_class;
+						for( k = 1; k <= vec_tmpUnlabeledResults[j].num_class; k++)
+							vec_allUnlabeledResults[j].Scores.push_back(vec_tmpUnlabeledResults[j].Scores[k]);
+					}
+				}
+				else{
+					for (j = 0; j < vec_tmpUnlabeledResults.size(); j++)
+						for( k = 1; k <= vec_tmpUnlabeledResults[j].num_class; k++)
+							vec_allUnlabeledResults[j].Scores[k] += vec_tmpUnlabeledResults[j].Scores[k];
+				}
+
+			}
+			if (i > 0) {
+				SVMArc::CSVMSTRUCTMC::FreeSample(*pTrainSample);
+			}
+		}
+
+		if (sArgs.all_flag) { //add the unlabeled results
+			for (j = 0; j < vec_allUnlabeledResults.size(); j++)
+				for( k = 1; k <= vec_allUnlabeledResults[j].num_class; k++){
+					if(k==1){
+						vec_allUnlabeledResults[j].Scores[k]/= sArgs.cross_validation_arg;
+						bestscore=vec_allUnlabeledResults[j].Scores[k];
+						vec_allUnlabeledResults[j].Value=k;
+					}else{
+						vec_allUnlabeledResults[j].Scores[k]/= sArgs.cross_validation_arg;
+						if(vec_allUnlabeledResults[j].Scores[k] < bestscore){
+							bestscore = vec_allUnlabeledResults[j].Scores[k];
+							vec_allUnlabeledResults[j].Value=k;
+						}
+					}
+				}
+
+			AllResults.insert(AllResults.end(),
+				vec_allUnlabeledResults.begin(),
+				vec_allUnlabeledResults.end());
+		}
+
+		ofstream ofsm;
+		ofsm.clear();
+		ofsm.open(sArgs.output_arg);
+		PrintResults(AllResults, ofsm);
+		return 0;
+
+	} else {
+		cerr << "More options are needed" << endl;
+	}
+
+}
+

tools/SVMmulticlass/SVMmulti.ggo

+package	"SVMmulti"
+version	"1.0"
+purpose	"Wrapper for SVM-multiclass"
+
+section "Main"
+option	"labels"				l	"Labels file"
+										string	typestr="filename"	no
+option	"output"				o	"Output file "
+										string	typestr="filename"  no
+option	"input"					i	"Input PCL file "
+										string	typestr="filename"  yes
+option	"model"					m	"Model file"
+										string	typestr="filename"  no
+option  "all"       			a   "Always classify all genes in PCLs"  
+										flag off
+
+option  "slack"       			S   "Use slack rescaling (instead of margin rescaling)"
+										flag off
+
+section "Options"
+option "verbosity"  			v   "Sets the svm_struct verbosity"
+										int default="0" no
+option "skip"      				s   "Number of columns to skip in input pcls"
+										int default="2" no
+option	"normalize"				n	"Normalize PCLS to 0 mean 1 variance"
+										flag	off
+option	"cross_validation"		c	"Number of cross-validation sets ( arg of 1 will turn off cross-validation )"
+										int default="5" no
+option "loss_function" 		e   "Sets the loss function for SVM learning: Choice of:
+0\tZero/one loss: 1 if vector of predictions contains error, 0 otherwise.
+\n" 
+										int default="0" no
+option "tradeoff"    			t   "SVM tradeoff constant C"
+										float default="1" no
+option "learning_algorithm" 		w   "Choice of structural learning algorithm (default 4):
+0\tn-slack algorithm 
+1\tn-slack algorithm with shrinking heuristic
+2\t1-slack algorithm (primal) 
+3\t1-slack algorithm (dual)
+4\t1-slack algorithm (dual) with constraint cache\n" 
+										int default="3" no
+option	"mmap"					M	"Memory map binary input"
+										flag	off

tools/SVMmulticlass/cmdline.c

+/*
+  File autogenerated by gengetopt version 2.22
+  generated with the following command:
+  /Genomics/grid/users/jzthree/bin/gengetopt -iSVMmulti.ggo --default-optional -u -N -e 
+
+  The developers of gengetopt consider the fixed text that goes in all
+  gengetopt output files to be in the public domain:
+  we make no copyright claims on it.
+*/
+
+/* If we use autoconf.  */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "getopt.h"
+
+#include "cmdline.h"
+
+const char *gengetopt_args_info_purpose = "Wrapper for SVM-multiclass";
+
+const char *gengetopt_args_info_usage = "Usage: SVMmulti [OPTIONS]... [FILES]...";
+
+const char *gengetopt_args_info_description = "";
+
+const char *gengetopt_args_info_help[] = {
+  "  -h, --help                    Print help and exit",
+  "  -V, --version                 Print version and exit",
+  "\nMain:",
+  "  -l, --labels=filename         Labels file",
+  "  -o, --output=filename         Output file ",
+  "  -i, --input=filename          Input PCL file ",
+  "  -m, --model=filename          Model file",
+  "  -a, --all                     Always classify all genes in PCLs  \n                                  (default=off)",
+  "  -S, --slack                   Use slack rescaling (instead of margin \n                                  rescaling)  (default=off)",
+  "\nOptions:",
+  "  -v, --verbosity=INT           Sets the svm_struct verbosity  (default=`0')",
+  "  -s, --skip=INT                Number of columns to skip in input pcls  \n                                  (default=`2')",
+  "  -n, --normalize               Normalize PCLS to 0 mean 1 variance  \n                                  (default=off)",
+  "  -c, --cross_validation=INT    Number of cross-validation sets ( arg of 1 will \n                                  turn off cross-validation )  (default=`5')",
+  "  -e, --loss_function=INT       Sets the loss function for SVM learning: Choice \n                                  of:\n\n                                  0\tZero/one loss: 1 if vector of predictions \n                                  contains error, 0 otherwise.\n\n                                  \n                                    (default=`0')",
+  "  -t, --tradeoff=FLOAT          SVM tradeoff constant C  (default=`1')",
+  "  -w, --learning_algorithm=INT  Choice of structural learning algorithm \n                                  (default 4):\n\n                                  0\tn-slack algorithm \n\n                                  1\tn-slack algorithm with shrinking heuristic\n\n                                  2\t1-slack algorithm (primal) \n\n                                  3\t1-slack algorithm (dual)\n\n                                  4\t1-slack algorithm (dual) with constraint \n                                  cache\n                                    (default=`3')",
+  "  -M, --mmap                    Memory map binary input  (default=off)",
+    0
+};
+
+typedef enum {ARG_NO
+  , ARG_FLAG
+  , ARG_STRING
+  , ARG_INT
+  , ARG_FLOAT
+} cmdline_parser_arg_type;
+
+static
+void clear_given (struct gengetopt_args_info *args_info);
+static
+void clear_args (struct gengetopt_args_info *args_info);
+
+static int
+cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info,
+                        struct cmdline_parser_params *params, const char *additional_error);
+
+static int
+cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error);
+
+static char *
+gengetopt_strdup (const char *s);
+
+static
+void clear_given (struct gengetopt_args_info *args_info)
+{
+  args_info->help_given = 0 ;
+  args_info->version_given = 0 ;
+  args_info->labels_given = 0 ;
+  args_info->output_given = 0 ;
+  args_info->input_given = 0 ;
+  args_info->model_given = 0 ;
+  args_info->all_given = 0 ;
+  args_info->slack_given = 0 ;
+  args_info->verbosity_given = 0 ;
+  args_info->skip_given = 0 ;
+  args_info->normalize_given = 0 ;
+  args_info->cross_validation_given = 0 ;
+  args_info->loss_function_given = 0 ;
+  args_info->tradeoff_given = 0 ;
+  args_info->learning_algorithm_given = 0 ;
+  args_info->mmap_given = 0 ;
+}
+
+static
+void clear_args (struct gengetopt_args_info *args_info)
+{
+  args_info->labels_arg = NULL;
+  args_info->labels_orig = NULL;
+  args_info->output_arg = NULL;
+  args_info->output_orig = NULL;
+  args_info->input_arg = NULL;
+  args_info->input_orig = NULL;
+  args_info->model_arg = NULL;
+  args_info->model_orig = NULL;
+  args_info->all_flag = 0;
+  args_info->slack_flag = 0;
+  args_info->verbosity_arg = 0;
+  args_info->verbosity_orig = NULL;
+  args_info->skip_arg = 2;
+  args_info->skip_orig = NULL;
+  args_info->normalize_flag = 0;
+  args_info->cross_validation_arg = 5;
+  args_info->cross_validation_orig = NULL;
+  args_info->loss_function_arg = 0;
+  args_info->loss_function_orig = NULL;
+  args_info->tradeoff_arg = 1;
+  args_info->tradeoff_orig = NULL;
+  args_info->learning_algorithm_arg = 3;
+  args_info->learning_algorithm_orig = NULL;
+  args_info->mmap_flag = 0;
+  
+}
+
+static
+void init_args_info(struct gengetopt_args_info *args_info)
+{
+
+
+  args_info->help_help = gengetopt_args_info_help[0] ;
+  args_info->version_help = gengetopt_args_info_help[1] ;
+  args_info->labels_help = gengetopt_args_info_help[3] ;
+  args_info->output_help = gengetopt_args_info_help[4] ;
+  args_info->input_help = gengetopt_args_info_help[5] ;
+  args_info->model_help = gengetopt_args_info_help[6] ;
+  args_info->all_help = gengetopt_args_info_help[7] ;
+  args_info->slack_help = gengetopt_args_info_help[8] ;
+  args_info->verbosity_help = gengetopt_args_info_help[10] ;
+  args_info->skip_help = gengetopt_args_info_help[11] ;
+  args_info->normalize_help = gengetopt_args_info_help[12] ;
+  args_info->cross_validation_help = gengetopt_args_info_help[13] ;
+  args_info->loss_function_help = gengetopt_args_info_help[14] ;
+  args_info->tradeoff_help = gengetopt_args_info_help[15] ;
+  args_info->learning_algorithm_help = gengetopt_args_info_help[16] ;
+  args_info->mmap_help = gengetopt_args_info_help[17] ;
+  
+}
+
+void
+cmdline_parser_print_version (void)
+{
+  printf ("%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION);
+}
+
+static void print_help_common(void) {
+  cmdline_parser_print_version ();
+
+  if (strlen(gengetopt_args_info_purpose) > 0)
+    printf("\n%s\n", gengetopt_args_info_purpose);
+
+  if (strlen(gengetopt_args_info_usage) > 0)
+    printf("\n%s\n", gengetopt_args_info_usage);
+
+  printf("\n");
+
+  if (strlen(gengetopt_args_info_description) > 0)
+    printf("%s\n", gengetopt_args_info_description);
+}
+
+void
+cmdline_parser_print_help (void)
+{
+  int i = 0;
+  print_help_common();
+  while (gengetopt_args_info_help[i])
+    printf("%s\n", gengetopt_args_info_help[i++]);
+}
+
+void
+cmdline_parser_init (struct gengetopt_args_info *args_info)
+{
+  clear_given (args_info);
+  clear_args (args_info);
+  init_args_info (args_info);
+
+  args_info->inputs = NULL;
+  args_info->inputs_num = 0;
+}
+
+void
+cmdline_parser_params_init(struct cmdline_parser_params *params)
+{
+  if (params)
+    { 
+      params->override = 0;
+      params->initialize = 1;
+      params->check_required = 1;
+      params->check_ambiguity = 0;
+      params->print_errors = 1;
+    }
+}
+
+struct cmdline_parser_params *
+cmdline_parser_params_create(void)
+{
+  struct cmdline_parser_params *params = 
+    (struct cmdline_parser_params *)malloc(sizeof(struct cmdline_parser_params));
+  cmdline_parser_params_init(params);  
+  return params;
+}
+
+static void
+free_string_field (char **s)
+{
+  if (*s)
+    {
+      free (*s);
+      *s = 0;
+    }
+}
+
+
+static void
+cmdline_parser_release (struct gengetopt_args_info *args_info)
+{
+  unsigned int i;
+  free_string_field (&(args_info->labels_arg));
+  free_string_field (&(args_info->labels_orig));
+  free_string_field (&(args_info->output_arg));
+  free_string_field (&(args_info->output_orig));
+  free_string_field (&(args_info->input_arg));
+  free_string_field (&(args_info->input_orig));
+  free_string_field (&(args_info->model_arg));
+  free_string_field (&(args_info->model_orig));
+  free_string_field (&(args_info->verbosity_orig));
+  free_string_field (&(args_info->skip_orig));
+  free_string_field (&(args_info->cross_validation_orig));
+  free_string_field (&(args_info->loss_function_orig));
+  free_string_field (&(args_info->tradeoff_orig));
+  free_string_field (&(args_info->learning_algorithm_orig));
+  
+  
+  for (i = 0; i < args_info->inputs_num; ++i)
+    free (args_info->inputs [i]);
+
+  if (args_info->inputs_num)
+    free (args_info->inputs);
+
+  clear_given (args_info);
+}
+
+
+static void
+write_into_file(FILE *outfile, const char *opt, const char *arg, char *values[])
+{
+  if (arg) {
+    fprintf(outfile, "%s=\"%s\"\n", opt, arg);
+  } else {
+    fprintf(outfile, "%s\n", opt);
+  }
+}
+
+
+int
+cmdline_parser_dump(FILE *outfile, struct gengetopt_args_info *args_info)
+{
+  int i = 0;
+
+  if (!outfile)
+    {
+      fprintf (stderr, "%s: cannot dump options to stream\n", CMDLINE_PARSER_PACKAGE);
+      return EXIT_FAILURE;
+    }
+
+  if (args_info->help_given)
+    write_into_file(outfile, "help", 0, 0 );
+  if (args_info->version_given)
+    write_into_file(outfile, "version", 0, 0 );
+  if (args_info->labels_given)
+    write_into_file(outfile, "labels", args_info->labels_orig, 0);
+  if (args_info->output_given)
+    write_into_file(outfile, "output", args_info->output_orig, 0);
+  if (args_info->input_given)
+    write_into_file(outfile, "input", args_info->input_orig, 0);
+  if (args_info->model_given)
+    write_into_file(outfile, "model", args_info->model_orig, 0);
+  if (args_info->all_given)
+    write_into_file(outfile, "all", 0, 0 );
+  if (args_info->slack_given)
+    write_into_file(outfile, "slack", 0, 0 );
+  if (args_info->verbosity_given)
+    write_into_file(outfile, "verbosity", args_info->verbosity_orig, 0);
+  if (args_info->skip_given)
+    write_into_file(outfile, "skip", args_info->skip_orig, 0);
+  if (args_info->normalize_given)
+    write_into_file(outfile, "normalize", 0, 0 );
+  if (args_info->cross_validation_given)
+    write_into_file(outfile, "cross_validation", args_info->cross_validation_orig, 0);
+  if (args_info->loss_function_given)
+    write_into_file(outfile, "loss_function", args_info->loss_function_orig, 0);
+  if (args_info->tradeoff_given)
+    write_into_file(outfile, "tradeoff", args_info->tradeoff_orig, 0);
+  if (args_info->learning_algorithm_given)
+    write_into_file(outfile, "learning_algorithm", args_info->learning_algorithm_orig, 0);
+  if (args_info->mmap_given)
+    write_into_file(outfile, "mmap", 0, 0 );
+  
+
+  i = EXIT_SUCCESS;
+  return i;
+}
+
+int
+cmdline_parser_file_save(const char *filename, struct gengetopt_args_info *args_info)
+{
+  FILE *outfile;
+  int i = 0;
+
+  outfile = fopen(filename, "w");
+
+  if (!outfile)
+    {
+      fprintf (stderr, "%s: cannot open file for writing: %s\n", CMDLINE_PARSER_PACKAGE, filename);
+      return EXIT_FAILURE;
+    }
+
+  i = cmdline_parser_dump(outfile, args_info);
+  fclose (outfile);
+
+  return i;
+}
+
+void
+cmdline_parser_free (struct gengetopt_args_info *args_info)
+{
+  cmdline_parser_release (args_info);
+}
+
+/** @brief replacement of strdup, which is not standard */
+char *
+gengetopt_strdup (const char *s)
+{
+  char *result = NULL;
+  if (!s)
+    return result;
+
+  result = (char*)malloc(strlen(s) + 1);
+  if (result == (char*)0)
+    return (char*)0;
+  strcpy(result, s);
+  return result;
+}
+
+int
+cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info)
+{
+  return cmdline_parser2 (argc, argv, args_info, 0, 1, 1);
+}
+
+int
+cmdline_parser_ext (int argc, char * const *argv, struct gengetopt_args_info *args_info,
+                   struct cmdline_parser_params *params)
+{
+  int result;
+  result = cmdline_parser_internal (argc, argv, args_info, params, NULL);
+
+  return result;
+}
+
+int
+cmdline_parser2 (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required)
+{
+  int result;
+  struct cmdline_parser_params params;
+  
+  params.override = override;
+  params.initialize = initialize;
+  params.check_required = check_required;
+  params.check_ambiguity = 0;
+  params.print_errors = 1;
+
+  result = cmdline_parser_internal (argc, argv, args_info, &params, NULL);
+
+  return result;
+}
+
+int
+cmdline_parser_required (struct gengetopt_args_info *args_info, const char *prog_name)
+{
+  int result = EXIT_SUCCESS;
+
+  if (cmdline_parser_required2(args_info, prog_name, NULL) > 0)
+    result = EXIT_FAILURE;
+
+  return result;
+}
+
+int
+cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error)
+{
+  int error = 0;
+
+  /* checks for required options */
+  if (! args_info->input_given)
+    {
+      fprintf (stderr, "%s: '--input' ('-i') option required%s\n", prog_name, (additional_error ? additional_error : ""));
+      error = 1;
+    }
+  
+  
+  /* checks for dependences among options */
+
+  return error;
+}
+
+
+static char *package_name = 0;
+
+/**
+ * @brief updates an option
+ * @param field the generic pointer to the field to update
+ * @param orig_field the pointer to the orig field
+ * @param field_given the pointer to the number of occurrence of this option
+ * @param prev_given the pointer to the number of occurrence already seen
+ * @param value the argument for this option (if null no arg was specified)
+ * @param possible_values the possible values for this option (if specified)
+ * @param default_value the default value (in case the option only accepts fixed values)
+ * @param arg_type the type of this option
+ * @param check_ambiguity @see cmdline_parser_params.check_ambiguity
+ * @param override @see cmdline_parser_params.override
+ * @param no_free whether to free a possible previous value
+ * @param multiple_option whether this is a multiple option
+ * @param long_opt the corresponding long option
+ * @param short_opt the corresponding short option (or '-' if none)
+ * @param additional_error possible further error specification
+ */
+static
+int update_arg(void *field, char **orig_field,
+               unsigned int *field_given, unsigned int *prev_given, 
+               char *value, char *possible_values[], const char *default_value,
+               cmdline_parser_arg_type arg_type,
+               int check_ambiguity, int override,
+               int no_free, int multiple_option,
+               const char *long_opt, char short_opt,
+               const char *additional_error)
+{
+  char *stop_char = 0;
+  const char *val = value;
+  int found;
+  char **string_field;
+
+  stop_char = 0;
+  found = 0;
+
+  if (!multiple_option && prev_given && (*prev_given || (check_ambiguity && *field_given)))
+    {
+      if (short_opt != '-')
+        fprintf (stderr, "%s: `--%s' (`-%c') option given more than once%s\n", 
+               package_name, long_opt, short_opt,
+               (additional_error ? additional_error : ""));
+      else
+        fprintf (stderr, "%s: `--%s' option given more than once%s\n", 
+               package_name, long_opt,
+               (additional_error ? additional_error : ""));
+      return 1; /* failure */
+    }
+
+    
+  if (field_given && *field_given && ! override)
+    return 0;
+  if (prev_given)
+    (*prev_given)++;
+  if (field_given)
+    (*field_given)++;
+  if (possible_values)
+    val = possible_values[found];
+
+  switch(arg_type) {
+  case ARG_FLAG:
+    *((int *)field) = !*((int *)field);
+    break;
+  case ARG_INT:
+    if (val) *((int *)field) = strtol (val, &stop_char, 0);
+    break;
+  case ARG_FLOAT:
+    if (val) *((float *)field) = (float)strtod (val, &stop_char);
+    break;
+  case ARG_STRING:
+    if (val) {
+      string_field = (char **)field;
+      if (!no_free && *string_field)
+        free (*string_field); /* free previous string */
+      *string_field = gengetopt_strdup (val);
+    }
+    break;
+  default:
+    break;
+  };
+
+  /* check numeric conversion */
+  switch(arg_type) {
+  case ARG_INT:
+  case ARG_FLOAT:
+    if (val && !(stop_char && *stop_char == '\0')) {
+      fprintf(stderr, "%s: invalid numeric value: %s\n", package_name, val);
+      return 1; /* failure */
+    }
+    break;
+  default:
+    ;
+  };
+
+  /* store the original value */
+  switch(arg_type) {
+  case ARG_NO:
+  case ARG_FLAG:
+    break;
+  default:
+    if (value && orig_field) {
+      if (no_free) {
+        *orig_field = value;
+      } else {
+        if (*orig_field)
+          free (*orig_field); /* free previous string */
+        *orig_field = gengetopt_strdup (value);
+      }
+    }
+  };
+
+  return 0; /* OK */
+}
+
+
+int
+cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info,
+                        struct cmdline_parser_params *params, const char *additional_error)
+{
+  int c;	/* Character of the parsed option.  */
+
+  int error = 0;
+  struct gengetopt_args_info local_args_info;
+  
+  int override;
+  int initialize;
+  int check_required;
+  int check_ambiguity;
+  
+  package_name = argv[0];
+  
+  override = params->override;
+  initialize = params->initialize;
+  check_required = params->check_required;
+  check_ambiguity = params->check_ambiguity;
+
+  if (initialize)
+    cmdline_parser_init (args_info);
+
+  cmdline_parser_init (&local_args_info);
+
+  optarg = 0;
+  optind = 0;
+  opterr = params->print_errors;
+  optopt = '?';
+
+  while (1)
+    {
+      int option_index = 0;
+
+      static struct option long_options[] = {
+        { "help",	0, NULL, 'h' },
+        { "version",	0, NULL, 'V' },
+        { "labels",	1, NULL, 'l' },
+        { "output",	1, NULL, 'o' },
+        { "input",	1, NULL, 'i' },
+        { "model",	1, NULL, 'm' },
+        { "all",	0, NULL, 'a' },
+        { "slack",	0, NULL, 'S' },
+        { "verbosity",	1, NULL, 'v' },
+        { "skip",	1, NULL, 's' },
+        { "normalize",	0, NULL, 'n' },
+        { "cross_validation",	1, NULL, 'c' },
+        { "loss_function",	1, NULL, 'e' },
+        { "tradeoff",	1, NULL, 't' },
+        { "learning_algorithm",	1, NULL, 'w' },
+        { "mmap",	0, NULL, 'M' },
+        { NULL,	0, NULL, 0 }
+      };
+
+      c = getopt_long (argc, argv, "hVl:o:i:m:aSv:s:nc:e:t:w:M", long_options, &option_index);
+
+      if (c == -1) break;	/* Exit from `while (1)' loop.  */
+
+      switch (c)
+        {
+        case 'h':	/* Print help and exit.  */
+          cmdline_parser_print_help ();
+          cmdline_parser_free (&local_args_info);
+          exit (EXIT_SUCCESS);
+
+        case 'V':	/* Print version and exit.  */
+        
+        
+          if (update_arg( 0 , 
+               0 , &(args_info->version_given),
+              &(local_args_info.version_given), optarg, 0, 0, ARG_NO,
+              check_ambiguity, override, 0, 0,
+              "version", 'V',
+              additional_error))
+            goto failure;
+          cmdline_parser_free (&local_args_info);
+          return 0;
+        
+          break;
+        case 'l':	/* Labels file.  */
+        
+        
+          if (update_arg( (void *)&(args_info->labels_arg), 
+               &(args_info->labels_orig), &(args_info->labels_given),
+              &(local_args_info.labels_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "labels", 'l',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'o':	/* Output file .  */
+        
+        
+          if (update_arg( (void *)&(args_info->output_arg), 
+               &(args_info->output_orig), &(args_info->output_given),
+              &(local_args_info.output_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "output", 'o',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'i':	/* Input PCL file .  */
+        
+        
+          if (update_arg( (void *)&(args_info->input_arg), 
+               &(args_info->input_orig), &(args_info->input_given),
+              &(local_args_info.input_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "input", 'i',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'm':	/* Model file.  */
+        
+        
+          if (update_arg( (void *)&(args_info->model_arg), 
+               &(args_info->model_orig), &(args_info->model_given),
+              &(local_args_info.model_given), optarg, 0, 0, ARG_STRING,
+              check_ambiguity, override, 0, 0,
+              "model", 'm',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'a':	/* Always classify all genes in PCLs.  */
+        
+        
+          if (update_arg((void *)&(args_info->all_flag), 0, &(args_info->all_given),
+              &(local_args_info.all_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "all", 'a',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'S':	/* Use slack rescaling (instead of margin rescaling).  */
+        
+        
+          if (update_arg((void *)&(args_info->slack_flag), 0, &(args_info->slack_given),
+              &(local_args_info.slack_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "slack", 'S',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'v':	/* Sets the svm_struct verbosity.  */
+        
+        
+          if (update_arg( (void *)&(args_info->verbosity_arg), 
+               &(args_info->verbosity_orig), &(args_info->verbosity_given),
+              &(local_args_info.verbosity_given), optarg, 0, "0", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "verbosity", 'v',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 's':	/* Number of columns to skip in input pcls.  */
+        
+        
+          if (update_arg( (void *)&(args_info->skip_arg), 
+               &(args_info->skip_orig), &(args_info->skip_given),
+              &(local_args_info.skip_given), optarg, 0, "2", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "skip", 's',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'n':	/* Normalize PCLS to 0 mean 1 variance.  */
+        
+        
+          if (update_arg((void *)&(args_info->normalize_flag), 0, &(args_info->normalize_given),
+              &(local_args_info.normalize_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "normalize", 'n',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'c':	/* Number of cross-validation sets ( arg of 1 will turn off cross-validation ).  */
+        
+        
+          if (update_arg( (void *)&(args_info->cross_validation_arg), 
+               &(args_info->cross_validation_orig), &(args_info->cross_validation_given),
+              &(local_args_info.cross_validation_given), optarg, 0, "5", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "cross_validation", 'c',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'e':	/* Sets the loss function for SVM learning: Choice of:
+        0\tZero/one loss: 1 if vector of predictions contains error, 0 otherwise.
+        \n.  */
+        
+        
+          if (update_arg( (void *)&(args_info->loss_function_arg), 
+               &(args_info->loss_function_orig), &(args_info->loss_function_given),
+              &(local_args_info.loss_function_given), optarg, 0, "0", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "loss_function", 'e',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 't':	/* SVM tradeoff constant C.  */
+        
+        
+          if (update_arg( (void *)&(args_info->tradeoff_arg), 
+               &(args_info->tradeoff_orig), &(args_info->tradeoff_given),
+              &(local_args_info.tradeoff_given), optarg, 0, "1", ARG_FLOAT,
+              check_ambiguity, override, 0, 0,
+              "tradeoff", 't',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'w':	/* Choice of structural learning algorithm (default 4):
+        0\tn-slack algorithm 
+        1\tn-slack algorithm with shrinking heuristic
+        2\t1-slack algorithm (primal) 
+        3\t1-slack algorithm (dual)
+        4\t1-slack algorithm (dual) with constraint cache\n.  */
+        
+        
+          if (update_arg( (void *)&(args_info->learning_algorithm_arg), 
+               &(args_info->learning_algorithm_orig), &(args_info->learning_algorithm_given),
+              &(local_args_info.learning_algorithm_given), optarg, 0, "3", ARG_INT,
+              check_ambiguity, override, 0, 0,
+              "learning_algorithm", 'w',
+              additional_error))
+            goto failure;
+        
+          break;
+        case 'M':	/* Memory map binary input.  */
+        
+        
+          if (update_arg((void *)&(args_info->mmap_flag), 0, &(args_info->mmap_given),
+              &(local_args_info.mmap_given), optarg, 0, 0, ARG_FLAG,
+              check_ambiguity, override, 1, 0, "mmap", 'M',
+              additional_error))
+            goto failure;
+        
+          break;
+
+        case 0:	/* Long option with no short option */
+        case '?':	/* Invalid option.  */
+          /* `getopt_long' already printed an error message.  */
+          goto failure;
+
+        default:	/* bug: option not considered.  */
+          fprintf (stderr, "%s: option unknown: %c%s\n", CMDLINE_PARSER_PACKAGE, c, (additional_error ? additional_error : ""));
+          abort ();
+        } /* switch */
+    } /* while */
+
+
+
+  if (check_required)
+    {
+      error += cmdline_parser_required2 (args_info, argv[0], additional_error);
+    }
+
+  cmdline_parser_release (&local_args_info);
+
+  if ( error )
+    return (EXIT_FAILURE);
+
+  if (optind < argc)
+    {
+      int i = 0 ;
+      int found_prog_name = 0;
+      /* whether program name, i.e., argv[0], is in the remaining args
+         (this may happen with some implementations of getopt,
+          but surely not with the one included by gengetopt) */
+
+      i = optind;
+      while (i < argc)
+        if (argv[i++] == argv[0]) {
+          found_prog_name = 1;
+          break;
+        }
+      i = 0;
+
+      args_info->inputs_num = argc - optind - found_prog_name;
+      args_info->inputs =
+        (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ;
+      while (optind < argc)
+        if (argv[optind++] != argv[0])
+          args_info->inputs[ i++ ] = gengetopt_strdup (argv[optind-1]) ;
+    }
+
+  return 0;
+
+failure:
+  
+  cmdline_parser_release (&local_args_info);
+  return (EXIT_FAILURE);
+}

tools/SVMmulticlass/cmdline.h

+/** @file cmdline.h
+ *  @brief The header file for the command line option parser
+ *  generated by GNU Gengetopt version 2.22
+ *  http://www.gnu.org/software/gengetopt.
+ *  DO NOT modify this file, since it can be overwritten
+ *  @author GNU Gengetopt by Lorenzo Bettini */
+
+#ifndef CMDLINE_H
+#define CMDLINE_H
+
+/* If we use autoconf.  */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h> /* for FILE */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef CMDLINE_PARSER_PACKAGE
+/** @brief the program name */
+#define CMDLINE_PARSER_PACKAGE "SVMmulti"
+#endif
+
+#ifndef CMDLINE_PARSER_VERSION
+/** @brief the program version */
+#define CMDLINE_PARSER_VERSION "1.0"
+#endif
+
+/** @brief Where the command line options are stored */
+struct gengetopt_args_info
+{
+  const char *help_help; /**< @brief Print help and exit help description.  */
+  const char *version_help; /**< @brief Print version and exit help description.  */
+  char * labels_arg;	/**< @brief Labels file.  */
+  char * labels_orig;	/**< @brief Labels file original value given at command line.  */
+  const char *labels_help; /**< @brief Labels file help description.  */
+  char * output_arg;	/**< @brief Output file .  */
+  char * output_orig;	/**< @brief Output file  original value given at command line.  */
+  const char *output_help; /**< @brief Output file  help description.  */
+  char * input_arg;	/**< @brief Input PCL file .  */
+  char * input_orig;	/**< @brief Input PCL file  original value given at command line.  */
+  const char *input_help; /**< @brief Input PCL file  help description.  */
+  char * model_arg;	/**< @brief Model file.  */
+  char * model_orig;	/**< @brief Model file original value given at command line.  */
+  const char *model_help; /**< @brief Model file help description.  */
+  int all_flag;	/**< @brief Always classify all genes in PCLs (default=off).  */
+  const char *all_help; /**< @brief Always classify all genes in PCLs help description.  */
+  int slack_flag;	/**< @brief Use slack rescaling (instead of margin rescaling) (default=off).  */
+  const char *slack_help; /**< @brief Use slack rescaling (instead of margin rescaling) help description.  */
+  int verbosity_arg;	/**< @brief Sets the svm_struct verbosity (default='0').  */
+  char * verbosity_orig;	/**< @brief Sets the svm_struct verbosity original value given at command line.  */
+  const char *verbosity_help; /**< @brief Sets the svm_struct verbosity help description.  */
+  int skip_arg;	/**< @brief Number of columns to skip in input pcls (default='2').  */
+  char * skip_orig;	/**< @brief Number of columns to skip in input pcls original value given at command line.  */
+  const char *skip_help; /**< @brief Number of columns to skip in input pcls help description.  */
+  int normalize_flag;	/**< @brief Normalize PCLS to 0 mean 1 variance (default=off).  */
+  const char *normalize_help; /**< @brief Normalize PCLS to 0 mean 1 variance help description.  */
+  int cross_validation_arg;	/**< @brief Number of cross-validation sets ( arg of 1 will turn off cross-validation ) (default='5').  */
+  char * cross_validation_orig;	/**< @brief Number of cross-validation sets ( arg of 1 will turn off cross-validation ) original value given at command line.  */
+  const char *cross_validation_help; /**< @brief Number of cross-validation sets ( arg of 1 will turn off cross-validation ) help description.  */
+  int loss_function_arg;	/**< @brief Sets the loss function for SVM learning: Choice of:
+  0\tZero/one loss: 1 if vector of predictions contains error, 0 otherwise.
+  \n (default='0').  */
+  char * loss_function_orig;	/**< @brief Sets the loss function for SVM learning: Choice of:
+  0\tZero/one loss: 1 if vector of predictions contains error, 0 otherwise.
+  \n original value given at command line.  */
+  const char *loss_function_help; /**< @brief Sets the loss function for SVM learning: Choice of:
+  0\tZero/one loss: 1 if vector of predictions contains error, 0 otherwise.
+  \n help description.  */
+  float tradeoff_arg;	/**< @brief SVM tradeoff constant C (default='1').  */
+  char * tradeoff_orig;	/**< @brief SVM tradeoff constant C original value given at command line.  */
+  const char *tradeoff_help; /**< @brief SVM tradeoff constant C help description.  */
+  int learning_algorithm_arg;	/**< @brief Choice of structural learning algorithm (default 4):
+  0\tn-slack algorithm 
+  1\tn-slack algorithm with shrinking heuristic
+  2\t1-slack algorithm (primal) 
+  3\t1-slack algorithm (dual)
+  4\t1-slack algorithm (dual) with constraint cache\n (default='3').  */
+  char * learning_algorithm_orig;	/**< @brief Choice of structural learning algorithm (default 4):
+  0\tn-slack algorithm 
+  1\tn-slack algorithm with shrinking heuristic
+  2\t1-slack algorithm (primal) 
+  3\t1-slack algorithm (dual)
+  4\t1-slack algorithm (dual) with constraint cache\n original value given at command line.  */
+  const char *learning_algorithm_help; /**< @brief Choice of structural learning algorithm (default 4):
+  0\tn-slack algorithm 
+  1\tn-slack algorithm with shrinking heuristic
+  2\t1-slack algorithm (primal) 
+  3\t1-slack algorithm (dual)
+  4\t1-slack algorithm (dual) with constraint cache\n help description.  */
+  int mmap_flag;	/**< @brief Memory map binary input (default=off).  */
+  const char *mmap_help; /**< @brief Memory map binary input help description.  */
+  
+  unsigned int help_given ;	/**< @brief Whether help was given.  */
+  unsigned int version_given ;	/**< @brief Whether version was given.  */
+  unsigned int labels_given ;	/**< @brief Whether labels was given.  */
+  unsigned int output_given ;	/**< @brief Whether output was given.  */
+  unsigned int input_given ;	/**< @brief Whether input was given.  */
+  unsigned int model_given ;	/**< @brief Whether model was given.  */
+  unsigned int all_given ;	/**< @brief Whether all was given.  */
+  unsigned int slack_given ;	/**< @brief Whether slack was given.  */
+  unsigned int verbosity_given ;	/**< @brief Whether verbosity was given.  */
+  unsigned int skip_given ;	/**< @brief Whether skip was given.  */
+  unsigned int normalize_given ;	/**< @brief Whether normalize was given.  */
+  unsigned int cross_validation_given ;	/**< @brief Whether cross_validation was given.  */
+  unsigned int loss_function_given ;	/**< @brief Whether loss_function was given.  */
+  unsigned int tradeoff_given ;	/**< @brief Whether tradeoff was given.  */
+  unsigned int learning_algorithm_given ;	/**< @brief Whether learning_algorithm was given.  */
+  unsigned int mmap_given ;	/**< @brief Whether mmap was given.  */
+
+  char **inputs ; /**< @brief unamed options (options without names) */
+  unsigned inputs_num ; /**< @brief unamed options number */
+} ;
+
+/** @brief The additional parameters to pass to parser functions */
+struct cmdline_parser_params
+{
+  int override; /**< @brief whether to override possibly already present options (default 0) */
+  int initialize; /**< @brief whether to initialize the option structure gengetopt_args_info (default 1) */
+  int check_required; /**< @brief whether to check that all required options were provided (default 1) */
+  int check_ambiguity; /**< @brief whether to check for options already specified in the option structure gengetopt_args_info (default 0) */
+  int print_errors; /**< @brief whether getopt_long should print an error message for a bad option (default 1) */
+} ;
+
+/** @brief the purpose string of the program */
+extern const char *gengetopt_args_info_purpose;
+/** @brief the usage string of the program */
+extern const char *gengetopt_args_info_usage;
+/** @brief all the lines making the help output */
+extern const char *gengetopt_args_info_help[];
+
+/**
+ * The command line parser
+ * @param argc the number of command line options
+ * @param argv the command line options
+ * @param args_info the structure where option information will be stored
+ * @return 0 if everything went fine, NON 0 if an error took place
+ */
+int cmdline_parser (int argc, char * const *argv,
+  struct gengetopt_args_info *args_info);
+
+/**
+ * The command line parser (version with additional parameters - deprecated)
+ * @param argc the number of command line options
+ * @param argv the command line options
+ * @param args_info the structure where option information will be stored
+ * @param override whether to override possibly already present options
+ * @param initialize whether to initialize the option structure my_args_info
+ * @param check_required whether to check that all required options were provided
+ * @return 0 if everything went fine, NON 0 if an error took place
+ * @deprecated use cmdline_parser_ext() instead
+ */
+int cmdline_parser2 (int argc, char * const *argv,
+  struct gengetopt_args_info *args_info,
+  int override, int initialize, int check_required);
+
+/**
+ * The command line parser (version with additional parameters)
+ * @param argc the number of command line options
+ * @param argv the command line options
+ * @param args_info the structure where option information will be stored
+ * @param params additional parameters for the parser
+ * @return 0 if everything went fine, NON 0 if an error took place
+ */
+int cmdline_parser_ext (int argc, char * const *argv,
+  struct gengetopt_args_info *args_info,
+  struct cmdline_parser_params *params);
+
+/**
+ * Save the contents of the option struct into an already open FILE stream.
+ * @param outfile the stream where to dump options
+ * @param args_info the option struct to dump
+ * @return 0 if everything went fine, NON 0 if an error took place
+ */
+int cmdline_parser_dump(FILE *outfile,
+  struct gengetopt_args_info *args_info);
+
+/**
+ * Save the contents of the option struct into a (text) file.
+ * This file can be read by the config file parser (if generated by gengetopt)
+ * @param filename the file where to save
+ * @param args_info the option struct to save
+ * @return 0 if everything went fine, NON 0 if an error took place
+ */
+int cmdline_parser_file_save(const char *filename,
+  struct gengetopt_args_info *args_info);
+
+/**
+ * Print the help
+ */
+void cmdline_parser_print_help(void);
+/**
+ * Print the version
+ */
+void cmdline_parser_print_version(void);
+
+/**
+ * Initializes all the fields a cmdline_parser_params structure 
+ * to their default values
+ * @param params the structure to initialize
+ */
+void cmdline_parser_params_init(struct cmdline_parser_params *params);
+
+/**
+ * Allocates dynamically a cmdline_parser_params structure and initializes
+ * all its fields to their default values
+ * @return the created and initialized cmdline_parser_params structure
+ */
+struct cmdline_parser_params *cmdline_parser_params_create(void);
+
+/**
+ * Initializes the passed gengetopt_args_info structure's fields
+ * (also set default values for options that have a default)
+ * @param args_info the structure to initialize
+ */
+void cmdline_parser_init (struct gengetopt_args_info *args_info);
+/**
+ * Deallocates the string fields of the gengetopt_args_info structure
+ * (but does not deallocate the structure itself)
+ * @param args_info the structure to deallocate
+ */
+void cmdline_parser_free (struct gengetopt_args_info *args_info);
+
+/**
+ * Checks that all the required options were specified
+ * @param args_info the structure to check
+ * @param prog_name the name of the program that will be used to print
+ *   possible errors
+ * @return
+ */
+int cmdline_parser_required (struct gengetopt_args_info *args_info,
+  const char *prog_name);
+
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* CMDLINE_H */

tools/SVMmulticlass/stdafx.cpp

+/*****************************************************************************
+* This file is provided under the Creative Commons Attribution 3.0 license.
+*
+* You are free to share, copy, distribute, transmit, or adapt this work
+* PROVIDED THAT you attribute the work to the authors listed below.
+* For more information, please see the following web page:
+* http://creativecommons.org/licenses/by/3.0/
+*
+* This file is a component of the Sleipnir library for functional genomics,
+* authored by:
+* Curtis Huttenhower (chuttenh@princeton.edu)
+* Mark Schroeder
+* Maria D. Chikina
+* Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
+*
+* If you use this library, the included executable tools, or any related
+* code in your work, please cite the following publication:
+* Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
+* Olga G. Troyanskaya.
+* "The Sleipnir library for computational functional genomics"
+*****************************************************************************/
+#include "stdafx.h"
+
+/*!
+ * \page SVMmulti  SVMmulti
+ * 
+ * SVMmulti performs SVM learning using the SVM struct multiclass library.  It supports cross validation and
+ * reading from binary PCL files created by PCL2Bin.
+ * 
+ * \section sec_usage Usage
+ * 
+ * \subsection ssec_usage_basic Basic Usage
+ * 
+ * \code
+ * SVMmulti -l <labels_file> -p <params_file> -i <data.bin> -o <output_directory> -a
+ * \endcode
+ * 
+ * The labels file is of the format (NOTE WELL: IN ALL THE FOLLOWING FORMATS DELIMITERS ARE TABS --
+ * doxygen converts them to spaces automatically).
+ * \code
+ * ACTA2	-1
+ * ACTN4	1
+ * ADAM10	-1
+ * AGRN	1
+ * AGTR1	-1
+ * ALDOB	-1
+ * ALOX12	1
+ * ANGPT2	1
+ * APOA4	1
+ * AQP1	1
+ * \endcode
+ * where -1 indicates negative and 1 indicates positive.  The examples must be separated with tabs.
+ * 
+ * Output is of the format
+ * \code
+ * IGHV1-69	0	1.94073
+ * DAG1	1	1.9401
+ * FNDC3B	0	1.93543
+ * HPGD	-1	1.93181
+ * TPSAB1	0	1.92928
+ * CLIC5	1	1.92759
+ * \endcode
+ * where the first column is the example name, the second column is the gold standard status (matching labels)
+ * and the third column is the prediction from the SVM.
+ * 
+ * The params_file is of the format
+ * \code
+ * 10	0.1	0.5
+ * 10	0.01	0.5
+ * 10	0.001	0.5
+ * 10	0.0001	0.5
+ * 10	0.00001	0.5
+ * 10	0.000001	0.5
+ * \endcode
+ * where the first column represents the error function, the second column represents the tradeoff constant
+ * and the third column represents k_value (for precision at k recall, but unused for the AUC error function
+ * in the example above.
+ * 
+ * SVMmulti can also be used to output a model or learn a network, although currently those features are undocumented.
+ * 
+ * \subsection ssec_usage_detailed Detailed Usage
+ * 
+ * \includ