Commits

Jian Zhou committed f300dd7

SVMhierarchy

Comments (0)

Files changed (3)

src/svmstructtree.cpp

 				continue;
 			}
 
-			for (int i=0; i<multilabels.size();i++)
+			for (int i=1; i<multilabels.size();i++)
 				multilabels[i]=0;
+			multilabels[0]=1; //root node is always on
 			for(int i=1; i < vecstrTokens.size();i++){
 				it =  onto_map.find(vecstrTokens[i]);
 				if(it == onto_map.end())

tools/SVMhierarchy/SVMhierarchy.cpp

 		return 1;
 	}
 
-		//Read labels from file
+	//Read labels from file
 	vector<SVMArc::SVMLabel> vecLabels;
 	set<string> setLabeledGenes;
 	if (sArgs.labels_given) {
 		}
 		for (i = 0; i < vecLabels.size(); i++)
 			setLabeledGenes.insert(vecLabels[i].GeneName);
+		cerr << "Read labels from file" << endl;
 	}
-	cerr << "Read labels from file" << endl;
+
 
 	SVM.InitializeLikAfterReadLabels();
 
 	vector<SVMArc::Result> AllResults;
 	vector<SVMArc::Result> tmpAllResults;
 
-	if (sArgs.model_given && sArgs.output_given && (!sArgs.labels_given) ) { //read model and classify all
-		vector<SVMLabel> vecAllLabels;
+	if (sArgs.model_given && sArgs.output_given && (!sArgs.labels_given) ) { 
+		if(!sArgs.test_labels_given){//read model and classify all
+			vector<SVMLabel> vecAllLabels;
 
-		for (size_t i = 0; i < PCL.GetGenes(); i++)
-			vecAllLabels.push_back(SVMLabel(PCL.GetGene(i), 0));
+			for (size_t i = 0; i < PCL.GetGenes(); i++)
+				vecAllLabels.push_back(SVMLabel(PCL.GetGene(i), 0));
 
-		SVM.ReadModel(sArgs.model_arg);
-		AllResults = SVM.Classify(PCL, vecAllLabels);
-		ofstream ofsm;
-		ofsm.open(sArgs.output_arg);
-		if (ofsm.is_open())
+			SVM.ReadModel(sArgs.model_arg);
+			AllResults = SVM.Classify(PCL, vecAllLabels);
+			ofstream ofsm;
+			ofsm.open(sArgs.output_arg);
+			if (ofsm.is_open())
+				SVM.PrintResults(AllResults, ofsm);
+			else {
+				cerr << "Could not open output file" << endl;
+			}
+		}
+		else//read model and classify only test examples
+		{
+			ifsm.clear();
+			ifsm.open(sArgs.test_labels_arg);
+			if (ifsm.is_open())
+				vecLabels = SVM.ReadLabels(ifsm);
+			else {
+				cerr << "Could not read label file" << endl;
+				exit(1);
+			}
+			for (i = 0; i < vecLabels.size(); i++)
+				setLabeledGenes.insert(vecLabels[i].GeneName);
+
+
+			pTestVector[0].reserve((size_t) vecLabels.size() + sArgs.cross_validation_arg);
+			for (j = 0; j < vecLabels.size(); j++) {
+				pTestVector[0].push_back(vecLabels[j]);		      
+			}
+			tmpAllResults = SVM.Classify(PCL,	pTestVector[0]);
+			cerr << "Classified " << tmpAllResults.size() << " examples"<< endl;
+			AllResults.insert(AllResults.end(), tmpAllResults.begin(), tmpAllResults.end());
+			tmpAllResults.resize(0);
+			ofstream ofsm;
+			ofsm.clear();
+			ofsm.open(sArgs.output_arg);
 			SVM.PrintResults(AllResults, ofsm);
-		else {
-			cerr << "Could not open output file" << endl;
+			return 0;
 		}
 	} else if (sArgs.output_given && sArgs.labels_given) {
 		//do learning and classifying with cross validation

tools/SVMhierarchy/SVMhierarchy.ggo

-package	"SVMhierarchy"
-version	"1.0"
-purpose	"Hierarchical Multilabel SVM powered by SVM-struct"
-
-section "Main"
-option	"labels"				l	"Labels file"
-										string	typestr="filename"	no
-option	"output"				o	"Output file "
-										string	typestr="filename"  no
-option	"input"					i	"Input PCL file "
-										string	typestr="filename"  yes
-option	"model"					m	"Model file"
-										string	typestr="filename"  no
-option	"ontoparam"				h	"Ontology file"
-										string	typestr="filename"  no
-option  "all"       			a   "Always classify all genes in PCLs"  
-										flag off
-option  "slack"       			S   "Use slack rescaling (NOT implemented for Hierarchical Multilabel SVM)"
-										flag off
-
-section "Options"
-option "verbosity"  			v   "Sets the svm_struct verbosity"
-										int default="0" no
-option "skip"      				s   "Number of columns to skip in input pcls"
-										int default="2" no
-option	"normalize"				n	"Normalize PCLS to 0 mean 1 variance"
-										flag	off
-option	"cross_validation"		c	"Number of cross-validation sets ( arg of 1 will turn off cross-validation )"
-										int default="5" no
-option "loss_function" 		e   "Sets the loss function for SVM learning: Choice of:
-0\tHamming distance loss: total number of differences in label vector.
-1\tHamming distance loss (modified): total number of differences in label vector; over-annotation not counted as difference. 
-2\tLikelihood distance loss. 
-3\tLikelihood distance loss (modified). 
-\n" 
-										int default="0" no
-option "tradeoff"    			t   "SVM tradeoff constant C"
-										float default="1" no
-option "learning_algorithm" 		w   "Choice of structural learning algorithm:
-0\tn-slack algorithm 
-1\tn-slack algorithm with shrinking heuristic
-2\t1-slack algorithm (primal) 
-3\t1-slack algorithm (dual)
-4\t1-slack algorithm (dual) with constraint cache\n" 
-										int default="3" no
-option	"threads"		p	"number of threads (only apply to algorithm 3 currently)"
-										int default="4" no
-option	"mmap"					M	"Memory map binary input"
+package	"SVMhierarchy"
+version	"1.0"
+purpose	"Hierarchical Multilabel SVM powered by SVM-struct"
+
+section "Main"
+option	"labels"				l	"Labels file"
+										string	typestr="filename"	no
+option	"output"				o	"Output file "
+										string	typestr="filename"  no
+option	"input"					i	"Input PCL file "
+										string	typestr="filename"  yes
+option	"model"					m	"Model file"
+										string	typestr="filename"  no
+option	"test_labels"			T	"Test Labels file"
+										string	typestr="filename"	no
+option	"ontoparam"				h	"Ontology file"
+										string	typestr="filename"  no
+option  "all"       			a   "Always classify all genes in PCLs"  
+										flag off
+option  "slack"       			S   "Use slack rescaling (NOT implemented for Hierarchical Multilabel SVM)"
+										flag off
+
+section "Options"
+option "verbosity"  			v   "Sets the svm_struct verbosity"
+										int default="0" no
+option "skip"      				s   "Number of columns to skip in input pcls"
+										int default="2" no
+option	"normalize"				n	"Normalize PCLS to 0 mean 1 variance"
+										flag	off
+option	"cross_validation"		c	"Number of cross-validation sets ( arg of 1 will turn off cross-validation )"
+										int default="5" no
+option "loss_function" 		e   "Sets the loss function for SVM learning: Choice of:
+0\tHamming distance loss: total number of differences in label vector.
+1\tHamming distance loss (modified): total number of differences in label vector; over-annotation not counted as difference. 
+2\tLikelihood distance loss. 
+3\tLikelihood distance loss (modified). 
+\n" 
+										int default="0" no
+option "tradeoff"    			t   "SVM tradeoff constant C"
+										float default="1" no
+option "learning_algorithm" 		w   "Choice of structural learning algorithm (default 4):
+0\tn-slack algorithm 
+1\tn-slack algorithm with shrinking heuristic
+2\t1-slack algorithm (primal) 
+3\t1-slack algorithm (dual)
+4\t1-slack algorithm (dual) with constraint cache\n" 
+										int default="3" no
+option	"threads"		p	"number of threads (only apply to algorithm 3 currently)"
+										int default="4" no
+option	"mmap"					M	"Memory map binary input"
 										flag	off