1. Slavko Zitnik
  2. IOBIE

Commits

Slavko Zitnik  committed 700beb5

Chemdner 2013 SVM run

  • Participants
  • Parent commits d50e674
  • Branches master

Comments (0)

Files changed (1)

File src/main/java/si/zitnik/research/iobie/core/ner/test/Chemdner2013Evaluation.scala

View file
     }*/
 
     logger.info("Loading test ...")
-    var testData = importDevData().subExamples(0, 100)
+    var testData = importTrainData()
     logger.info("Tagging with classifier 1")
     val tokenFullResults = getResults(testData, tokenFullCRFClassifier)
 
 
     logger.info("Loading test ...")
-    testData = relabelToMentionExamples(importDevData()).subExamples(0, 100)
+    testData = relabelToMentionExamples(importTrainData())
     logger.info("Tagging with classifier 2")
     val tokenMentionResults = getMentionResults(testData, tokenMentionCRFClassifier)
 
-    val mergedResults = merge(tokenFullResults, tokenMentionResults)
+    var mergedResults = merge(tokenFullResults, tokenMentionResults)
 
     //create learning data
     val attInfo = new FastVector()
     val svm = new LibSVM()
     svm.buildClassifier(instances)
 
-    var a = svm.distributionForInstance(instances.instance(3))
-    a = svm.distributionForInstance(instances.instance(4))
-    a = svm.distributionForInstance(instances.instance(5))
-    a = svm.distributionForInstance(instances.instance(6))
-    a = svm.distributionForInstance(instances.instance(7))
+    //classify instances
+    for (i <- 0 to 15000 by 5000) {
 
+      logger.info("Loading test ...")
+      testData = importTestData(Some((i, i+5000)))
+      logger.info("Tagging with classifier 1")
+      val tokenFullResults = getResults(testData, tokenFullCRFClassifier)
 
+      logger.info("Loading test ...")
+      testData = relabelToMentionExamples(importTestData(Some((i, i+5000))))
+      logger.info("Tagging with classifier 2")
+      val tokenMentionResults = getMentionResults(testData, tokenMentionCRFClassifier)
 
-    exportCDI(tokenFullResults, "temp/CDI_merged.txt")
-    exportCEM(tokenFullResults, "temp/CEM_merged.txt")
+      mergedResults = merge(tokenFullResults, tokenMentionResults)
+
+      val finalMergedResults = new AdderMap[String, Constituent]
+      for (docId <- mergedResults.map.keySet) {
+        val buffer = mergedResults.map.get(docId).get
+        for (v <- buffer) {
+
+          val instance = new Instance(instances.numAttributes())
+          instance.setDataset(instances)
+          instance.setValue(0, v._2)
+          instance.setValue(1, v._3)
+          instance.setValue(2, v._4)
+          instance.setValue(3, v._5)
+
+          if (svm.distributionForInstance(instance)(0) > 0.5) {
+            finalMergedResults.put(docId, v._1)
+          }
+        }
+      }
+
+
+      exportCEM(finalMergedResults, "temp/CEM_3_merged_%d.txt".format(i))
+      exportCDI(finalMergedResults, "temp/CDI_3_merged_%d.txt".format(i))
+    }
   }
 }