Commits

Slavko Zitnik committed 269fd34 Merge

Merge remote-tracking branch 'origin/master'

  • Participants
  • Parent commits a8d98fd, 1f04a5f

Comments (0)

Files changed (24)

File src/main/java/si/zitnik/research/iobie/algorithms/crf/Classifier.scala

 import si.zitnik.research.iobie.domain.{Examples, Example}
 import scala.collection.JavaConversions._
 import collection.mutable.ArrayBuffer
+import java.util
 
 /**
  * Created by IntelliJ IDEA.
 
 abstract
 class Classifier {
-  def classify(example: Example, normalized: Boolean = false): (ArrayList[String], Double)
+  def classify(example: Example, normalized: Boolean = false): (util.ArrayList[String], util.ArrayList[Double], Double)
 
   def classify(examplesTest: Examples): ArrayBuffer[ArrayBuffer[String]] = {
     val retVal = new ArrayBuffer[ArrayBuffer[String]]()

File src/main/java/si/zitnik/research/iobie/algorithms/crf/ExampleLabel.scala

   val DOC_ID = Value("DOCUMENT_ID")
   //Document ID
   val PARSE_TREE = Value("PARSE_TREE") //parse tree
+  val EXAMPLE_PROB = Value("EXAMPLE_PROB")
 }

File src/main/java/si/zitnik/research/iobie/algorithms/crf/Label.scala

   val ATTRIBUTE_TYPE = Value("ATTRIBUTE_TYPE")
   val SUBJECT_TYPE = Value("SUBJECT_TYPE")
   val OBJECT_TYPE = Value("OBJECT_TYPE")
+  val MARGINAL_PROB = Value("MARGINAL_PROBABILITY")
 }

File src/main/java/si/zitnik/research/iobie/algorithms/crf/linearchain/LCCRFClassifier.scala

 import java.util.ArrayList
 import com.typesafe.scalalogging.slf4j.Logging
 import breeze.linalg.SparseVector
+import java.util
 
 /**
  * Created by IntelliJ IDEA.
                        ) extends Classifier with Serializable {
 
   //TODO: check for normalized
-  def classify(example: Example, normalized: Boolean = false): (ArrayList[String], Double) = {
+  def classify(example: Example, normalized: Boolean = false): (util.ArrayList[String], util.ArrayList[Double], Double) = {
     dict.initExample(example)
     val scorer = new Scorer(learnLabelType, example, dict, w, wscale)
     val labelingIds = new ArrayList[Int]()
       labeling.add(dict.labelName(labelId))
     }
 
-    (labeling, score)
+    (labeling, null, score)
   }
 
   def test(data: Examples) {

File src/main/java/si/zitnik/research/iobie/algorithms/crf/test/SimpleCRFTest.scala

     val crfClassifier = crfLearner.trainAndTest(5, 15, testData)
 
     new Statistics(crfClassifier, testData).printStandardClassification(learnLabelType, "PER")
-    val (classfiedLabeling, _) = crfClassifier.classify(testData.get(0))
+    val (classfiedLabeling, _, _) = crfClassifier.classify(testData.get(0))
 
     testData.get(0).printLabeling(learnLabelType)
     println(classfiedLabeling.mkString(" "))

File src/main/java/si/zitnik/research/iobie/core/coreference/classifier/impl/CorefAllInOneClassifier.scala

 }
 
 private class AllInOneClassifier extends Classifier {
-  def classify(example: Example, normalized: Boolean): (util.ArrayList[String], Double) = {
+  def classify(example: Example, normalized: Boolean): (util.ArrayList[String], util.ArrayList[Double], Double) = {
     val retVal = new util.ArrayList[String]()
 
     for (i <- 0 until example.size()) {
       }
     }
 
-    (retVal, 0.)
+    (retVal, null, 0.)
   }
 
   def test(data: Examples) {}

File src/main/java/si/zitnik/research/iobie/core/coreference/classifier/impl/CorefSingletonClassifier.scala

 }
 
 class SingletonClassifier extends Classifier {
-  def classify(example: Example, normalized: Boolean): (util.ArrayList[String], Double) = {
+  def classify(example: Example, normalized: Boolean): (util.ArrayList[String], util.ArrayList[Double], Double) = {
     val retVal = new util.ArrayList[String]()
     for (i <- 0 until example.size()) {
       retVal.add("O")
     }
-    (retVal, 0.)
+    (retVal, null, 0.)
   }
 
   def test(data: Examples) {}

File src/main/java/si/zitnik/research/iobie/core/coreference/test/CoreferenceEvaluation.scala

     mentionExamplesTest.printStatistics(ommited = Array(Label.EXTENT, Label.OBS, Label.COREF), ommitedExample = Array(ExampleLabel.DOC_ID), ommitMentions = true)
 
 
-    visualizeDistances(mentionExamples, title = "Consecutive Mentions Distance Distribution", filename = "/Users/slavkoz/temp/trainDistr.png")
+    //visualizeDistances(mentionExamples, title = "Consecutive Mentions Distance Distribution", filename = "/Users/slavkoz/temp/trainDistr.png")
 
     //Do whole runners
     println("*******")
     /*
     //SemEval 2010
     */
-    evaluate ("SemEval2010", SemEvalData, FeatureFunctionPackages.bestSemEval2010CorefFeatureFunctions)
+    //evaluate ("SemEval2010", SemEvalData, FeatureFunctionPackages.bestSemEval2010CorefFeatureFunctions)
 
     //CoNLL 2012
 
     CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.BROADCAST_NEWS)
     evaluate ("CoNLL2012 BROADCAST_NEWS", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
 
-    CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.BROADCAST_CONVERSATION)
-    evaluate ("CoNLL2012 BROADCAST_CONVERSATION", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
+    //CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.BROADCAST_CONVERSATION)
+    //evaluate ("CoNLL2012 BROADCAST_CONVERSATION", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
 
-    CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.MAGAZINE)
-    evaluate ("CoNLL2012 MAGAZINE", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
+    //CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.MAGAZINE)
+    //evaluate ("CoNLL2012 MAGAZINE", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
 
     CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.NEWSWIRE)
     evaluate ("CoNLL2012 NEWSWIRE", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
 
-    CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.PIVOT_CORPUS)
-    evaluate ("CoNLL2012 PIVOT_CORPUS", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
+    //CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.PIVOT_CORPUS)
+    //evaluate ("CoNLL2012 PIVOT_CORPUS", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
 
-    CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.TELEPHONE_CONVERSATION)
-    evaluate ("CoNLL2012 TELEPHONE_CONVERSATION", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
+    //CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.TELEPHONE_CONVERSATION)
+    //evaluate ("CoNLL2012 TELEPHONE_CONVERSATION", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
 
-    CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.WEB_TEXT)
-    evaluate ("CoNLL2012 WEB_TEXT", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
+    //CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.WEB_TEXT)
+    //evaluate ("CoNLL2012 WEB_TEXT", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
 
     CoNLL2012Data.sources = CoNLL2012ImporterSourceTypeEnum.values.toArray
     evaluate ("CoNLL2012 ALL_TOGETHER", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
 
     //ACE2004
-    /*
-    ACE2004Data.sources = Array(ACE2004DocumentType.ARABIC_TREEBANK)
-    ACE2004Data.reload()
-    evaluate ("ACE2004 ARABIC_TREEBANK", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
+
+    //ACE2004Data.sources = Array(ACE2004DocumentType.ARABIC_TREEBANK)
+    //ACE2004Data.reload()
+    //evaluate ("ACE2004 ARABIC_TREEBANK", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
 
     ACE2004Data.sources = Array(ACE2004DocumentType.BROADCAST_NEWS)
     ACE2004Data.reload()
     evaluate ("ACE2004 BROADCAST_NEWS", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
 
-    ACE2004Data.sources = Array(ACE2004DocumentType.CHINESE_TREEBANK)
-    ACE2004Data.reload()
-    evaluate ("ACE2004 CHINESE_TREEBANK", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
+    //ACE2004Data.sources = Array(ACE2004DocumentType.CHINESE_TREEBANK)
+    //ACE2004Data.reload()
+    //evaluate ("ACE2004 CHINESE_TREEBANK", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
 
-    ACE2004Data.sources = Array(ACE2004DocumentType.FISHER_TRANSCRIPTS)
-    ACE2004Data.reload()
-    evaluate ("ACE2004 FISHER_TRANSCRIPTS", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
+    //ACE2004Data.sources = Array(ACE2004DocumentType.FISHER_TRANSCRIPTS)
+    //ACE2004Data.reload()
+    //evaluate ("ACE2004 FISHER_TRANSCRIPTS", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
 
     ACE2004Data.sources = Array(ACE2004DocumentType.NEWSWIRE)
     ACE2004Data.reload()
     ACE2004Data.sources = ACE2004DocumentType.values.toArray
     ACE2004Data.reloadCullota()
     evaluate ("ACE2004 ALL_TOGETHER", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
-    */
+
 
     //SIMPLE TEST:
     //best results on train domain from skipNumbers = 0 .. 50

File src/main/java/si/zitnik/research/iobie/core/coreference/util/MentionExamplesBuilder.scala

     genericBuilder((example: Example) => example.getAllMentions().map(Constituent.clone(_)), false)
   }
 
-  private def genericBuilder(constituentsGetter: (Example)=>mutable.Buffer[Constituent], clean: Boolean = true): Examples = {
+  def genericBuilder(constituentsGetter: (Example)=>mutable.Buffer[Constituent], clean: Boolean = true): Examples = {
     val mentionExamples = new Examples()
 
     //separation into groups of examples

File src/main/java/si/zitnik/research/iobie/core/coreference/util/MentionExamplesToCorefExamplesTransformer.scala

   def toPairwiseCorefExamples(example: Example): Examples = {
     val retVal = new Examples()
 
-    for (pair <- example.combinations(2)) {
-      val leftToken = pair.get(0)
-      val rightToken = pair.get(1)
-
-      val corefExample = toPairwiseCorefExample(leftToken, rightToken)
-      retVal.add(corefExample)
+    val len = example.size()
+    for (i <- 0 until len) {
+      for (j <- (i+1) until math.min(i+11, len)) {
+        val leftToken = example.get(i)
+        val rightToken = example.get(j)
+        val corefExample = toPairwiseCorefExample(leftToken, rightToken)
+        retVal.add(corefExample)
+      }
     }
 
     retVal

File src/main/java/si/zitnik/research/iobie/core/ner/mention/classifier/impl/NERMentionClassifier.scala

                             val classifier: Classifier,
                             val learnLabelType: Label.Value = Label.NE) extends Classifier {
 
-  def classify(mentionExample: Example, normalized: Boolean): (util.ArrayList[String], Double) = {
+  def classify(mentionExample: Example, normalized: Boolean): (util.ArrayList[String], util.ArrayList[Double], Double) = {
     classifier.classify(mentionExample, normalized)
   }
 

File src/main/java/si/zitnik/research/iobie/core/ner/mention/learner/NERMentionLearner.scala

 import si.zitnik.research.iobie.thirdparty.crfsuite.api.CRFSuiteLCCRFLearner
 import si.zitnik.research.iobie.coreference.classifier.impl.CorefMultipleClassifier
 import si.zitnik.research.iobie.core.ner.mention.classifier.impl.NERMentionClassifier
+import scala.collection.JavaConversions._
 
 /**
  *
 
     for (epoch <- 1 to math.max(allEpochs / epochsBetweenTest, 1)) {
       classifier = train(epochsBetweenTest)
-      logger.info("Training perf:")
-      classifier.test(testMentionExamples)
+      //logger.info("Training perf:")
+      //classifier.test(testMentionExamples)
 
       if (testMentionExamples != mentionExamples) {
         logger.info("Testing perf:")
-        classifier.test(testMentionExamples)
+        testMentionExamples.foreach(e => classifier.classify(e))
+        //classifier.test(testMentionExamples) //TODO: update methods
       }
     }
 

File src/main/java/si/zitnik/research/iobie/core/ner/test/Chemdner2013Evaluation.scala

 package si.zitnik.research.iobie.core.ner.test
 
 import si.zitnik.research.iobie.datasets.chemdner2013.{Chemdner2013DatasetType, Chemdner2013Importer}
-import si.zitnik.research.iobie.domain.Examples
+import si.zitnik.research.iobie.domain.{Example, Examples}
+import si.zitnik.research.iobie.core.ner.mention.learner.NERMentionLearner
+import si.zitnik.research.iobie.algorithms.crf.feature.packages.FeatureFunctionPackages
+import si.zitnik.research.iobie.algorithms.crf.{ExampleLabel, Label}
+import si.zitnik.research.iobie.coreference.util.MentionExamplesBuilder
+import java.util.ArrayList
+import si.zitnik.research.iobie.domain.constituent.Constituent
+import si.zitnik.research.iobie.thirdparty.opennlp.api.ParseTagger
+import collection.mutable
+import si.zitnik.research.iobie.gui.coref.ParseTreeVisualizer
+import scala.collection.JavaConversions._
+import collection.mutable.ArrayBuffer
+import si.zitnik.research.iobie.util.AdderMap
+import java.io.{PrintWriter, FileWriter}
 
 /**
  *   
  * @version: 1.0.0 
  * @since: 1.0.0
  */
-class Chemdner2013Evaluation {
+object Chemdner2013Evaluation {
+  val tokenTagsToInclude = Set("FAMILY", "NO CLASS", "FORMULA", "TRIVIAL", "IDENTIFIER", "ABBREVIATION", "MULTIPLE", "SYSTEMATIC")
+  val mentionTagsToInclude = Set("M")
 
-  def importTrainingData() = {
-    val examplesDev = new Chemdner2013Importer(Chemdner2013DatasetType.development).importForIE()
-    val examplesTrain = new Chemdner2013Importer(Chemdner2013DatasetType.training).importForIE()
+  def importTrainData() = {
+    val trainData = new Chemdner2013Importer(Chemdner2013DatasetType.training).importForIE().subExamples(0, 200)
+    new ParseTagger().tag(trainData)
 
-    val examples = new Examples()
-    examples.addAll(examplesDev)
-    examples.addAll(examplesTrain)
+    //val devData = new Chemdner2013Importer(Chemdner2013DatasetType.development).importForIE().subExamples(0, 100)
+    //new ParseTagger().tag(devData)
+    //devData
+
+    trainData
+  }
+
+  def importTestData() = {
+    val devData = new Chemdner2013Importer(Chemdner2013DatasetType.development).importForIE().subExamples(0, 200)
+    new ParseTagger().tag(devData)
+    devData
+  }
+
+  def buildConstituentsExamples(examples: Examples): Examples = {
+    val retVal = new MentionExamplesBuilder(examples, Set[String](), ExampleLabel.DOC_ID).
+      detectAndBuild()
+    retVal
+  }
+
+  def relabelToMentionExamples(examples: Examples) = {
+    examples.foreach(_.foreach(t => {
+      if (!t.get(Label.NE).asInstanceOf[String].equals("O")) {
+        t.put(Label.OBS, "M")
+      }
+    }))
     examples
   }
 
+  def exportCDI(results: AdderMap[String, Constituent], filename: String) {
+    val writer = new PrintWriter(filename, "UTF-8")
+    results.map.foreach{ case (docId, constituents) => {
+      val newList = constituents.sortWith((a: Constituent, b: Constituent) => a.get(Label.MARGINAL_PROB).asInstanceOf[Double] < b.get(Label.MARGINAL_PROB).asInstanceOf[Double]).zipWithIndex
+      newList.foreach{ case (constituent, idx) => {
+        //val startIdx = constituent.get(Label.START_IDX)
+        //val endIdx = constituent.example.get(constituent.endIdx-1).get(Label.START_IDX).asInstanceOf[Int] + constituent.example.get(constituent.endIdx-1).get(Label.OBS).asInstanceOf[String].size
+        val line = "%s\t%s\t%d\t%.2f\n".format(docId, constituent.getText(), idx+1, constituent.get(Label.MARGINAL_PROB).asInstanceOf[Double])
+        writer.write(line)
+        println()
+      }}
+    }}
+    writer.close()
+  }
+
+  def exportCEM(results: AdderMap[String, Constituent], filename: String) {
+    val writer = new PrintWriter(filename, "UTF-8")
+    results.map.foreach{ case (docId, constituents) => {
+      val newList = constituents.sortWith((a: Constituent, b: Constituent) => a.example.get(ExampleLabel.EXAMPLE_PROB).asInstanceOf[Double] < b.example.get(ExampleLabel.EXAMPLE_PROB).asInstanceOf[Double]).zipWithIndex
+      newList.foreach{ case (constituent, idx) => {
+        val startIdx = constituent.get(Label.START_IDX)
+        val endIdx = constituent.example.get(constituent.endIdx-1).get(Label.START_IDX).asInstanceOf[Int] + constituent.example.get(constituent.endIdx-1).get(Label.OBS).asInstanceOf[String].size
+        val line = "%s\t%s:%d:%d\t%d\t0.5\n".format(docId, constituent.example.get(ExampleLabel.TYPE), startIdx, endIdx, idx+1)
+        writer.write(line)
+        println()
+      }}
+    }}
+    writer.close()
+  }
+
   def main(args: Array[String]) {
-    val trainingData = importTrainingData()
-    trainingData.printStatistics()
+    //1. import data
+    val trainTokenFull = importTrainData() //token-based, full
+    //val trainTokenMention = relabelToMentionExamples(importTrainData()) //token-based, mention
+    //val trainConstituentFull = buildConstituentsExamples(importTrainData()) //constituent-based, full
+    //val trainConstituentMention = relabelToMentionExamples(buildConstituentsExamples(importTrainData())) //constituent-based, mention
+
+    val testTokenFull = importTestData()
+
+    //2. build CRF classifiers
+    val featureFunctions = FeatureFunctionPackages.standardFFunctions
+
+    val tokenFullCRFClassifier = new NERMentionLearner(trainTokenFull, featureFunctions, Label.NE, "tokenFull_chemdner_NER_model").train()
+    //val tokenMentionCRFClassifier = new NERMentionLearner(trainTokenMention, featureFunctions, Label.NE, "tokenMention_chemdner_NER_model").train()
+    //val constituentFullCRFClassifier = new NERMentionLearner(trainConstituentFull, featureFunctions, Label.NE, "constituentFull_chemdner_NER_model").train()
+    //val constituentMentionCRFClassifier = new NERMentionLearner(trainConstituentMention, featureFunctions, Label.NE, "constituentMention_chemdner_NER_model").train()
+
+    //3. label data with CRF
+    val tokenFullResults = new AdderMap[String, Constituent]
+    testTokenFull.foreach(e => {
+      val (neLabels, neMarginalProbabilities, seqProbability) = tokenFullCRFClassifier.classify(e)
+      //do labeling
+      e.setLabeling(Label.L1_NE, neLabels.toArray)
+      e.setLabeling(Label.MARGINAL_PROB, neMarginalProbabilities.toArray)
+      e.put(ExampleLabel.EXAMPLE_PROB, seqProbability)
+      tokenFullResults.put(e.get(ExampleLabel.DOC_ID).asInstanceOf[String], e.getLabelingConstituents(Label.L1_NE, tokenTagsToInclude))
+    })
+
+
+
+    //4. merge&deduplicate constituents
+    val fullResults = new AdderMap[String, Constituent]
+    fullResults.map.putAll(tokenFullResults.map)
+
+    //5. process data with SVM
+
+
+    //6. export CDI & CEM
+    //6a. export for CDI
+    //6b. export for CEM
+    exportCEM(fullResults, "temp/CEM_test.txt")
+    exportCDI(fullResults, "temp/CDI_test.txt")
+
+
   }
 }

File src/main/java/si/zitnik/research/iobie/datasets/chemdner2013/Chemdner2013Importer.scala

       var rightText = text
 
       while (!rightText.isEmpty) {
-        if (rightText.contains("-") | rightText.contains("/") | rightText.contains(".") | rightText.contains("+") | rightText.contains("@") | rightText.contains(",") | rightText.contains(":") | rightText.contains("˙")) {
-          val idx = Array(rightText.indexOf("-"), rightText.indexOf("/"), rightText.indexOf("."), rightText.indexOf("+"), rightText.indexOf("@"), rightText.indexOf(","), rightText.indexOf(":"), rightText.indexOf("˙")).filter(_ >= 0).min
+        if (rightText.contains("-") | rightText.contains("/") | rightText.contains(".") | rightText.contains("+") | rightText.contains("@") | rightText.contains(",") | rightText.contains(":") | rightText.contains("˙") | rightText.contains(" ")) {
+          val idx = Array(rightText.indexOf("-"), rightText.indexOf("/"), rightText.indexOf("."), rightText.indexOf("+"), rightText.indexOf("@"), rightText.indexOf(","), rightText.indexOf(":"), rightText.indexOf("˙"), rightText.indexOf(" ")).filter(_ >= 0).min
 
           //left
           val leftText = rightText.substring(0, idx)
 object Chemdner2013Importer {
 
   def main(args: Array[String]) {
-    //val trainExamples = new Chemdner2013Importer(Chemdner2013DatasetType.training).importForIE()
-    //trainExamples.printStatistics()
+    val trainExamples = new Chemdner2013Importer(Chemdner2013DatasetType.training).importForIE()
+    trainExamples.printStatistics()
 
     //val devExamples = new Chemdner2013Importer(Chemdner2013DatasetType.development).importForIE()
     //devExamples.printStatistics()
 
-    val testExamples = new Chemdner2013Importer(Chemdner2013DatasetType.test).importForIE()
-    testExamples.printStatistics()
+    //val testExamples = new Chemdner2013Importer(Chemdner2013DatasetType.test).importForIE()
+    //testExamples.printStatistics()
   }
 }

File src/main/java/si/zitnik/research/iobie/domain/Example.scala

     }
   }
 
+  def setLabeling(labelType: Label.Value, labels: Array[AnyRef]) {
+    if (labels.length != this.size) {
+      throw new Exception("Example and Labels size do not match!")
+    } else {
+      for ((l, i) <- labels.zipWithIndex) {
+        this.get(i).put(labelType, l)
+      }
+    }
+  }
+
   def setLabeling(labelType: Label.Value, classifier: Classifier) {
     val labels = classifier.classify(this)
     for ((token, label) <- this.zip(labels._1)) {

File src/main/java/si/zitnik/research/iobie/runners/CoNLL2000.scala

 
 
     new Statistics(crfClassifier, testData).printStandardClassification(learnLabelType, "I-NP")
-    val (classfiedLabeling, _) = crfClassifier.classify(testData.get(0))
+    val (classfiedLabeling, _, _) = crfClassifier.classify(testData.get(0))
     testData.get(0).printLabeling(Label.OBS)
     testData.get(0).printLabeling(Label.CHUNK)
     println(classfiedLabeling.mkString(" "))

File src/main/java/si/zitnik/research/iobie/runners/IJCNLP2012.scala

 
 
     for (example <- testData) {
-      val (classfiedLabeling, _) = crfClassifier.classify(example)
+      val (classfiedLabeling, _, _) = crfClassifier.classify(example)
 
       if (classfiedLabeling.contains("B-R") || classfiedLabeling.contains("I-R")) {
         example.printLabeling(Label.OBS)

File src/main/java/si/zitnik/research/iobie/statistics/FMeasure.scala

 import sun.reflect.generics.reflectiveObjects.NotImplementedException
 import collection.mutable.ArrayBuffer
 import scala.collection.JavaConversions._
+import java.util
 
 /**
  * This is a standard calculation of F-Measure as proposed for keyword selection approaches
       }
 
 
-      def classify(example: Example, normalized: Boolean): (java.util.ArrayList[String], Double) = throw new NotImplementedException()
+      def classify(example: Example, normalized: Boolean): (util.ArrayList[String], util.ArrayList[Double], Double) = throw new NotImplementedException()
       def test(data: Examples) {throw new NotImplementedException()}
     }
 

File src/main/java/si/zitnik/research/iobie/statistics/MUCStatistics.scala

 import si.zitnik.research.iobie.domain.IOBIEConversions._
 import sun.reflect.generics.reflectiveObjects.NotImplementedException
 import collection.mutable.ArrayBuffer
+import java.util
 
 /**
  * MUC6 calculated as proposed in:
         a
       }
 
-      def classify(example: Example, normalized: Boolean): (ArrayList[String], Double) = null
+      def classify(example: Example, normalized: Boolean): (util.ArrayList[String], util.ArrayList[Double], Double) = null
       def test(data: Examples) {throw new NotImplementedException()}
     }
 

File src/main/java/si/zitnik/research/iobie/thirdparty/crfsuite/api/CRFSuiteLCCRFClassifier.scala

   val command = List(
     IOBIEPropertiesUtil.getProperty(IOBIEProperties.CRFSUITE_CMD),
     "tag",
+    "-p", //TODO: sequence probability
+    "-i", //TODO: marginal probability
     "-m",
     IOBIEPropertiesUtil.getProperty(IOBIEProperties.CRFSUITE_MODEL_FOLDER) + "/" + modelSaveFilename)
 
   //TODO override examples to be faster
 
   //TODO add probability
-  def classify(example: Example, normalized: Boolean): (util.ArrayList[String], Double) = {
+  def classify(example: Example, normalized: Boolean): (util.ArrayList[String], util.ArrayList[Double], Double) = {
     featureDict.initExample(example)
 
     //START CRFSuite
     errorGobbler.join()
     outputGobbler.join()
 
-    (outputGobbler.data, 0.)
+    (outputGobbler.data, outputGobbler.marginalProbabilities, outputGobbler.sequenceProbability)
   }
 
   def processExamples(examples: Examples): ArrayBuffer[ArrayBuffer[String]] = {

File src/main/java/si/zitnik/research/iobie/thirdparty/crfsuite/api/CRFSuiteLCCRFLearner.scala

                            val featureFunctions: ArrayList[FeatureFunction],
                            val modelSaveFilename: String = "model.obj",
                            val featureThreshold: Int = 3,
-                           val printCRFSuiteOutput: Boolean = false) extends Learner(examples) with Logging {
+                           val printCRFSuiteOutput: Boolean = true) extends Learner(examples) with Logging {
 
   val featureDict = new FeatureDict(learnLabelType, featureFunctions, examples, featureThreshold)
   val command = List(
     IOBIEPropertiesUtil.getProperty(IOBIEProperties.CRFSUITE_CMD),
     "learn",
+    //"-p", //TODO: sequence probability - this paramerer only needed for tagging
+    //"-i", //TODO: marginal probability - this paramerer only needed for tagging
     "-m",
     IOBIEPropertiesUtil.getProperty(IOBIEProperties.CRFSUITE_MODEL_FOLDER) + "/" + modelSaveFilename,
-    //"-p",
     //"max_iterations=100",
     "-")
 
     val p = pb.start()
 
     //INPUT DATA
+    logger.info("Adding data to CRFSuite input ...")
     CRFSuiteUtil.writeExamples(examples, learnLabelType, featureDict, p.getOutputStream())
     p.getOutputStream().close()
+    logger.info("... DONE. Running CRFSuite ...")
 
 
     //LOG CRFSuite's output
     //read everything from gobblers
     errorGobbler.join()
     outputGobbler.join()
-
+    logger.info("... CRFSuite run finished.")
 
     new CRFSuiteLCCRFClassifier(learnLabelType, modelSaveFilename, featureDict, printCRFSuiteOutput)
   }

File src/main/java/si/zitnik/research/iobie/thirdparty/factorie/api/FactorieLCCRFClassifier.scala

                                private val predictor: VariableSettingsSampler[Label]) extends Classifier {
   var defaultSampling = 4
 
-  def classify(example: Example, normalized: Boolean): (util.ArrayList[String], Double) = throw new NotImplementedException()
+  def classify(example: Example, normalized: Boolean): (util.ArrayList[String], util.ArrayList[Double], Double) = throw new NotImplementedException()
 
   override def classify(examplesTest: Examples): ArrayBuffer[ArrayBuffer[String]] = {
     val retVal = new ArrayBuffer[ArrayBuffer[String]]()

File src/main/java/si/zitnik/research/iobie/util/AdderMap.scala

 package si.zitnik.research.iobie.util
 
 import scala.collection.mutable._
+import java.util
+import collection.GenTraversable
 
 
 /**
     }
   }
 
+  def put(k: K, v: TraversableOnce[V]) {
+    map.get(k) match {
+      case Some(b) => b.appendAll(v)
+      case None => {
+        val b = new ArrayBuffer[V]()
+        b.appendAll(v)
+        map.put(k, b)
+      }
+    }
+  }
+
   override def toString = map.toString()
 
 }

File src/main/java/si/zitnik/research/iobie/util/StreamGobblerExample.scala

  */
 class StreamGobblerExample(private val is: InputStream) extends Thread with Logging {
   @volatile var data = new util.ArrayList[String]()
+  @volatile var marginalProbabilities = new util.ArrayList[Double]()
+  @volatile var sequenceProbability = 0.
 
   override
   def run() {
       while (sc.hasNextLine()) {
         val label = sc.nextLine()
         if (!label.isEmpty) {
-          data.add(label)
+          if (label.startsWith("@probability")) {
+            sequenceProbability = label.split("\t")(1).toDouble
+          } else {
+            val splitIndex = label.lastIndexOf(':')
+            data.add(label.substring(0, splitIndex))
+            marginalProbabilities.add(label.substring(splitIndex+1).toDouble)
+          }
         }
       }
     } catch {