Commits

Jian Zhou committed a0b5c60

minor bug fixes/improvement for svmmulticlass/hierarchy

  • Participants
  • Parent commits ed5d3b8

Comments (0)

Files changed (3)

File src/svmstructtree.cpp

 		ifsm.clear();
 		ifsm.open(treefile);
 		if (!ifsm.is_open())
-			cerr << "Could not read label file" << endl;
-		cerr << "Onto File opened" << endl;
+			cerr << "Could not read Onto file" << endl;
+
 		static const size_t c_iBuffer = 65532; //change this if not enough
 		char acBuffer[c_iBuffer];
 		vector<string> vecstrTokens;
 			nodes[i]->children = newchildren;
 			//fill in ontology struct parameters
 			nodes[i]->index=i; //index
+			nodes[i]->inputlabelCount = 0;
 			if(nodes[i]->n_children==0) //isLeafnode
 				nodes[i]->isLeafnode=1;
 			else
 
 
 	vector<SVMLabel> CSVMSTRUCTTREE::ReadLabels(ifstream & ifsm) {
-		static const size_t c_iBuffer = 1024;
+		static const size_t c_iBuffer = 65532;
 		char acBuffer[c_iBuffer];
 		vector<string> vecstrTokens;
 		vector<char> multilabels;
 
 		if(struct_parm.num_classes==0)
 			cerr<< "Ontology must be read before reading labels!"<<endl;
+		else
+			cerr<<struct_parm.num_classes<< " Classes Read!"<<endl;
 		multilabels.resize(struct_parm.num_classes);
 		map<string,int>::iterator it;
 		while (!ifsm.eof()) {
 				multilabels[i]=0;
 			for(int i=1; i < vecstrTokens.size();i++){
 				it =  onto_map.find(vecstrTokens[i]);
-				if(it == onto_map.end()){
+				if(it == onto_map.end())
 					cerr<< "Unknown term: "<<vecstrTokens[1]<<endl;
+				else{
+					multilabels[onto_map[vecstrTokens[i]]]=1; // no label propagation currently, labels should be already propagated
+					struct_parm.treeStruct.nodes[ onto_map[vecstrTokens[i]] ]->inputlabelCount++;
 				}
-				else{
-					multilabels[onto_map[vecstrTokens[i]]]=1;
-				}
-
 			}
 			vecLabels.push_back(SVMArc::SVMLabel(vecstrTokens[0], multilabels));
 
 		return vecLabels;
 	}
 
+	void CSVMSTRUCTTREE::InitializeLikAfterReadLabels() {
+		struct_parm.condLikelihood = (double*)my_malloc(sizeof(double)*struct_parm.num_classes);
+		struct_parm.condLikelihood[0] = 0;
+		for(int i=1; i<struct_parm.num_classes;i++){
+			struct_parm.condLikelihood[i] = log(struct_parm.treeStruct.nodes[i]->parent->inputlabelCount) 
+				- log(struct_parm.treeStruct.nodes[i]->inputlabelCount);
+		}
+	}
 	SAMPLE* CSVMSTRUCTTREE::CreateSample(Sleipnir::CPCL &PCL, vector<SVMLabel> SVMLabels) {
 		size_t i, j, iGene, iDoc;
 		int     n;       /* number of examples */

File src/svmstructtree.h

 
 		//read labels
 		vector<SVMLabel> ReadLabels(ifstream & ifsm);
-
+		void InitializeLikAfterReadLabels();
 		//Creates a sample using a single PCL and SVMlabels Looks up genes by name.
 		SAMPLE* CreateSample(Sleipnir::CPCL &PCL, vector<SVMLabel> SVMLabels);
 

File tools/SVMhierarchy/SVMhierarchy.cpp

 	}
 	cerr << "Read labels from file" << endl;
 
+	SVM.InitializeLikAfterReadLabels();
 
 	//Training
 	SAMPLE* pTrainSample;
 
 			if(i == (sArgs.cross_validation_arg-1)){
 				if (sArgs.all_flag || sArgs.model_given ) {
-					pTrainSample = SVM.CreateSample(PCL, vecLabels);
-					cerr << "Train with All Labeled Data " <<  endl;
-					SVM.Learn(*pTrainSample);
-					cerr << "Learned" << endl;
-
+					if(sArgs.cross_validation_arg!=1){
+						pTrainSample = SVM.CreateSample(PCL, vecLabels);
+						cerr << "Train with All Labeled Data " <<  endl;
+						SVM.Learn(*pTrainSample);
+						cerr << "Learned" << endl;
+					}
 					if (sArgs.model_given ){  //learn once and write to file
 						SVM.WriteModel(sArgs.model_arg);
 						cerr <<" Model Writen to file "<<sArgs.model_arg<<endl;