Commits

Slavko Zitnik committed 8c940cf

CHEMDNER paper re-runs

Comments (0)

Files changed (1)

src/main/java/si/zitnik/research/iobie/core/ner/test/Chemdner2013Evaluation.scala

   def exportCEM(results: AdderMap[String, Constituent], filename: String) {
     val writer = new PrintWriter(filename, "UTF-8")
     results.map.foreach{ case (docId, constituents) => {
-      val newList = constituents.sortWith((a: Constituent, b: Constituent) => a.example.get(ExampleLabel.EXAMPLE_PROB).asInstanceOf[Double] > b.example.get(ExampleLabel.EXAMPLE_PROB).asInstanceOf[Double]).zipWithIndex
+
+      //remove duplicates
+      val merged = new mutable.HashMap[(Int,Int), Constituent]()
+      constituents.sortWith((a: Constituent, b: Constituent) => a.example.get(ExampleLabel.EXAMPLE_PROB).asInstanceOf[Double] < b.example.get(ExampleLabel.EXAMPLE_PROB).asInstanceOf[Double]).foreach(c => {
+        val startIdx = c.get(Label.START_IDX).asInstanceOf[Int]
+        val endIdx = c.example.get(c.endIdx-1).get(Label.START_IDX).asInstanceOf[Int] + c.example.get(c.endIdx-1).get(Label.OBS).asInstanceOf[String].size
+        merged.put((startIdx, endIdx), c)
+      })
+
+      val newList = merged.values.toList.sortWith((a: Constituent, b: Constituent) => a.example.get(ExampleLabel.EXAMPLE_PROB).asInstanceOf[Double] > b.example.get(ExampleLabel.EXAMPLE_PROB).asInstanceOf[Double]).zipWithIndex
       newList.foreach{ case (constituent, idx) => {
         val startIdx = constituent.get(Label.START_IDX)
         val endIdx = constituent.example.get(constituent.endIdx-1).get(Label.START_IDX).asInstanceOf[Int] + constituent.example.get(constituent.endIdx-1).get(Label.OBS).asInstanceOf[String].size
     var testData = importTrainData()
     logger.info("Tagging with classifier 1")
     val tokenFullResultsTrain = getResults(testData, tokenFullCRFClassifier)
-
+    logger.info("Tagging with classifier 1 done")
 
     logger.info("Loading test ...")
     testData = relabelToMentionExamples(importTrainData())
     logger.info("Tagging with classifier 2")
     val tokenMentionResultsTrain = getMentionResults(testData, tokenMentionCRFClassifier)
+    logger.info("Tagging with classifier 2 done")
 
     val mergedResultsTrain = merge(tokenFullResultsTrain, tokenMentionResultsTrain)
 
 
     //train SVM
     val svm = new LibSVM()
+    logger.info("Training SVM ...")
     svm.buildClassifier(instances)
+    logger.info("Training SVM done.")
 
     //3. label and export
     //classify instances
-      logger.info("Loading test ...")
+
       testData = importTestData()
-      logger.info("Tagging with classifier 1")
       val tokenFullResultsTest = getResults(testData, tokenFullCRFClassifier)
 
-      logger.info("Loading test ...")
       testData = relabelToMentionExamples(importTestData())
-      logger.info("Tagging with classifier 2")
       val tokenMentionResultsTest = getMentionResults(testData, tokenMentionCRFClassifier)
 
       val mergedResultsTest = merge(tokenFullResultsTest, tokenMentionResultsTest)
 
+    logger.info("SVM inference ...")
       val finalMergedResults = new AdderMap[String, Constituent]
       for (docId <- mergedResultsTest.map.keySet) {
         val buffer = mergedResultsTest.map.get(docId).get
             finalMergedResults.put(docId, v._1)
           }
         }
+        logger.info("End of SVM inference")
 
 
       exportCEM(finalMergedResults, "temp/CEM_test_merged.txt")