Commits

Slavko Zitnik committed 0582759

BioNLP: feature functions separated into groups

  • Participants
  • Parent commits 98edda5

Comments (0)

Files changed (10)

File src/main/java/si/zitnik/research/iobie/algorithms/crf/Classifier.scala

 
   def classify(examplesTest: Examples): (ArrayBuffer[ArrayBuffer[String]], ArrayBuffer[ArrayBuffer[Double]], ArrayBuffer[Double]) = {
     val retVal = new ArrayBuffer[ArrayBuffer[String]]()
+    val retVal1 = new ArrayBuffer[ArrayBuffer[Double]]()
+    val retVal2 = new ArrayBuffer[Double]()
 
     for (example <- examplesTest) {
-      retVal.add(new ArrayBuffer[String]() ++ this.classify(example)._1) //TODO: optimize, transforma to ArrayBuffer for all
+      val classified = this.classify(example)
+      retVal.add(new ArrayBuffer[String]() ++ classified._1) //TODO: optimize, transforma to ArrayBuffer for all
+      retVal1.add(new ArrayBuffer[Double]() ++ classified._2)
+      retVal2.add(classified._3)
     }
 
-    (retVal, ArrayBuffer[ArrayBuffer[Double]](), ArrayBuffer[Double]())
+    (retVal, retVal1, retVal2)
   }
 
   def test(data: Examples)

File src/main/java/si/zitnik/research/iobie/algorithms/crf/feature/packages/FeatureFunctionPackages.scala

 import si.zitnik.research.iobie.algorithms.crf.feature._
 import coreference._
 import java.util
-import si.zitnik.research.iobie.algorithms.crf.feature.relation.BSubtilisFeatureFunction
+import si.zitnik.research.iobie.algorithms.crf.feature.relation.{IsBSubtilisPairFeatureFunction, IsBSubtilisFeatureFunction, BSubtilisFeatureFunction}
 
 /**
  * Created with IntelliJ IDEA.
   def standardRelFFPackages() = {
     val featureFunctions = new ArrayList[FeatureFunction]()
 
-    featureFunctions.addAll(FeatureFunctionPackages.standardFFunctions)
-    //featureFunctions.add(new UnigramDistributionFeatureFunction())
-    //featureFunctions.add(new BigramDistributionFeatureFunction())
+    //part A
+    featureFunctions.add(new BigramDistributionFeatureFunction())
+    featureFunctions.add(new UnigramDistributionFeatureFunction())
 
-    /*
+    featureFunctions.add(new StartsUpperFeatureFunction(-1))
+    featureFunctions.add(new StartsUpperFeatureFunction())
+    featureFunctions.add(new StartsUpperTwiceFeatureFunction(-1))
+    featureFunctions.add(new StartsUpperTwiceFeatureFunction())
+
+    featureFunctions.add(new HearstCooccurrenceFeatureFunction(userPredicate = "BHCOC"))
+    featureFunctions.add(new HearstCooccurrenceFeatureFunction(userPredicate = "UHCOC"))
+
+    featureFunctions.add(new MentionTokenDistanceFeatureFunction(userPredicate = "BMD"))
+    featureFunctions.add(new MentionTokenDistanceFeatureFunction(userPredicate = "UMD"))
+
+
+    //part B
+
+    featureFunctions.add(new ParseTreeMentionDepthFeatureFunction(userPredicate = "BPMD"))
+    featureFunctions.add(new ParseTreeMentionDepthFeatureFunction(userPredicate = "UPMD"))
+
+    featureFunctions.add(new ParseTreeParentValueFeatureFunction(userPredicate = "UPPT1", parentLength = 1))
+    featureFunctions.add(new ParseTreeParentValueFeatureFunction(userPredicate = "BPPT1", parentLength = 1))
+    featureFunctions.add(new ParseTreeParentValueFeatureFunction(userPredicate = "UPPT2", parentLength = 2))
+    featureFunctions.add(new ParseTreeParentValueFeatureFunction(userPredicate = "BPPT2", parentLength = 2))
+    featureFunctions.add(new ParseTreeParentValueFeatureFunction(userPredicate = "UPPT3", parentLength = 3))
+    featureFunctions.add(new ParseTreeParentValueFeatureFunction(userPredicate = "BPPT3", parentLength = 3))
+
+    featureFunctions.add(new ParseTreePathFeatureFunction(userPredicate = "BPT", maxPathFromOneNode = 3))
+    featureFunctions.add(new ParseTreePathFeatureFunction(userPredicate = "UPT", maxPathFromOneNode = 3))
+
+
+    //part C
+
+    featureFunctions.add(new BSubtilisFeatureFunction(userPredicate = "USub"))
+    featureFunctions.add(new BSubtilisFeatureFunction(userPredicate = "BSub"))
+
+    featureFunctions.add(new IsBSubtilisFeatureFunction(userPredicate = "BIsSub"))
+    featureFunctions.add(new IsBSubtilisFeatureFunction(userPredicate = "UIsSub"))
+
+    featureFunctions.add(new IsBSubtilisPairFeatureFunction(userPredicate = "BIsSubP"))
+    featureFunctions.add(new IsBSubtilisPairFeatureFunction(userPredicate = "UIsSubP"))
+
+
+    //generators
+
+    //part D
+
+    featureFunctions.addAll(new UnigramXffixFeatureFunctionGenerator(Label.OBS, 2, -5 to 5).generate())
+    featureFunctions.addAll(new UnigramXffixFeatureFunctionGenerator(Label.OBS, 3, -5 to 5).generate())
+    featureFunctions.addAll(new UnigramXffixFeatureFunctionGenerator(Label.OBS, -3, -5 to 5).generate())
+    featureFunctions.addAll(new UnigramXffixFeatureFunctionGenerator(Label.OBS, -2, -5 to 5).generate())
+
+
+    //part E
+
+    featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.OBS, userPredicate = "UOBS").generate())
+    featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.OBS, userPredicate = "BOBS").generate())
     featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.ENTITY_TYPE, range = -4 to 4, userPredicate = "UBEF").generate())
     featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.ENTITY_TYPE, range = -4 to 4, userPredicate = "BBEF").generate())
     featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.OBS, range = -4 to 4, userPredicate = "UBOF").generate())
     featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.OBS, range = -4 to 4, userPredicate = "BBOF").generate())
     featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.POS, range = -4 to 4, userPredicate = "UBPF").generate())
     featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.POS, range = -4 to 4, userPredicate = "BBPF").generate())
+    featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.LEMMA, range = -4 to 4, userPredicate = "UBLF").generate())
+    featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.LEMMA, range = -4 to 4, userPredicate = "BBLF").generate())
+    featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.COREF, userPredicate = "UBCF").generate())
+    featureFunctions.addAll(new LabelBigramFeatureFunctionGenerator(Label.COREF, userPredicate = "BBCF").generate())
 
     featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.ENTITY_TYPE, range = -4 to 4, userPredicate = "UUEF").generate())
     featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.ENTITY_TYPE, range = -4 to 4, userPredicate = "BUEF").generate())
     featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.OBS, range = -4 to 4, userPredicate = "BUOF").generate())
     featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.POS, range = -4 to 4, userPredicate = "UUPF").generate())
     featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.POS, range = -4 to 4, userPredicate = "BUPF").generate())
+    featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.LEMMA, range = -4 to 4, userPredicate = "UULF").generate())
+    featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.LEMMA, range = -4 to 4, userPredicate = "BULF").generate())
+    featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.COREF, userPredicate = "UUCF").generate())
+    featureFunctions.addAll(new LabelUnigramFeatureFunctionGenerator(Label.COREF, userPredicate = "BUCF").generate())
+
+
+    //part F
 
     featureFunctions.addAll(new ContextFeatureFunctionGenerator(userPredicate = "BContxt").generate())
     featureFunctions.addAll(new ContextFeatureFunctionGenerator(userPredicate = "UContxt").generate())
 
-    featureFunctions.add(new HearstCooccurrenceFeatureFunction(userPredicate = "BHCOC"))
-    featureFunctions.add(new HearstCooccurrenceFeatureFunction(userPredicate = "UHCOC"))
-
-    featureFunctions.add(new MentionTokenDistanceFeatureFunction(userPredicate = "BMD"))
-    featureFunctions.add(new MentionTokenDistanceFeatureFunction(userPredicate = "UMD"))
 
-    featureFunctions.add(new ParseTreeMentionDepthFeatureFunction(userPredicate = "BPMD"))
-    featureFunctions.add(new ParseTreeMentionDepthFeatureFunction(userPredicate = "UPMD"))
-    featureFunctions.add(new ParseTreeParentValueFeatureFunction(userPredicate = "UPPT1", parentLength = 1))
-    featureFunctions.add(new ParseTreeParentValueFeatureFunction(userPredicate = "BPPT1", parentLength = 1))
-    featureFunctions.add(new ParseTreeParentValueFeatureFunction(userPredicate = "UPPT2", parentLength = 2))
-    featureFunctions.add(new ParseTreeParentValueFeatureFunction(userPredicate = "BPPT2", parentLength = 2))
-    featureFunctions.add(new ParseTreeParentValueFeatureFunction(userPredicate = "UPPT3", parentLength = 3))
-    featureFunctions.add(new ParseTreeParentValueFeatureFunction(userPredicate = "BPPT3", parentLength = 3))
-    featureFunctions.add(new ParseTreePathFeatureFunction(userPredicate = "BPT", maxPathFromOneNode = 3))
-    featureFunctions.add(new ParseTreePathFeatureFunction(userPredicate = "UPT", maxPathFromOneNode = 3))
+    //part G
 
     featureFunctions.addAll(new PrevNextWordsFeatureFunctionGenerator(userPredicate = "BPN").generate())
     featureFunctions.addAll(new PrevNextWordsFeatureFunctionGenerator(userPredicate = "UPN").generate())
     featureFunctions.addAll(new PrevNextBetweenWordsFeatureFunctionGenerator(labelType = Label.POS, userPredicate = "BBPNLP").generate())
     featureFunctions.addAll(new PrevNextBetweenWordsFeatureFunctionGenerator(labelType = Label.POS, userPredicate = "UBPNLP").generate())
 
+
+    //part H
+
     featureFunctions.addAll(new SplitByDelimiterAndGenerateFirstNFeatureFunctionGenerator(labelType = Label.OBS, userPredicate = "BSBDaG").generate())
     featureFunctions.addAll(new SplitByDelimiterAndGenerateFirstNFeatureFunctionGenerator(labelType = Label.OBS, userPredicate = "USBDaG").generate())
     featureFunctions.addAll(new SplitByDelimiterAndGenerateFirstNFeatureFunctionGenerator(labelType = Label.LEMMA, userPredicate = "BSBDaGL").generate())
     featureFunctions.addAll(new SplitByDelimiterAndGenerateFirstNFeatureFunctionGenerator(labelType = Label.LEMMA, userPredicate = "USBDaGL").generate())
 
-    featureFunctions.add(new BSubtilisFeatureFunction(userPredicate = "USub"))
-    featureFunctions.add(new BSubtilisFeatureFunction(userPredicate = "BSub"))
-    */
+
     featureFunctions
   }
 

File src/main/java/si/zitnik/research/iobie/algorithms/crf/feature/relation/IsBSubtilisFeatureFunction.scala

+package si.zitnik.research.iobie.algorithms.crf.feature.relation
+
+import si.zitnik.research.iobie.algorithms.crf.{Label, FeatureFunction}
+import si.zitnik.research.iobie.domain.Example
+import si.zitnik.research.iobie.core.relationship.test.BioNLP2013Rules
+import si.zitnik.research.iobie.domain.constituent.Constituent
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: slavkoz
+ * Date: 21/01/14
+ * Time: 20:15
+ * To change this template use File | Settings | File Templates.
+ */
+class IsBSubtilisFeatureFunction(labelType: Label.Value = Label.OBS, userPredicate: String = "BIsSub") extends FeatureFunction(userPredicate + "=") {
+
+  //(example: Example, i: Int): Double
+  def score = (example: Example, i: Int) => {
+    if (BioNLP2013Rules.isBacillusSubtilisGene(example.get(i).asInstanceOf[Constituent])) {
+      predicate + "T"
+    } else {
+      null
+    }
+  }
+}

File src/main/java/si/zitnik/research/iobie/algorithms/crf/feature/relation/IsBSubtilisPairFeatureFunction.scala

+package si.zitnik.research.iobie.algorithms.crf.feature.relation
+
+import si.zitnik.research.iobie.algorithms.crf.{FeatureFunction, Label}
+import si.zitnik.research.iobie.domain.Example
+import si.zitnik.research.iobie.core.relationship.test.BioNLP2013Rules
+import si.zitnik.research.iobie.domain.constituent.Constituent
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: slavkoz
+ * Date: 21/01/14
+ * Time: 20:20
+ * To change this template use File | Settings | File Templates.
+ */
+class IsBSubtilisPairFeatureFunction(labelType: Label.Value = Label.OBS, userPredicate: String = "BIsSubP") extends FeatureFunction(userPredicate + "=") {
+
+  //(example: Example, i: Int): Double
+  def score: (Example, Int) => String = (example: Example, i: Int) => {
+    if (i==0) {
+      null
+    } else {
+      val left = BioNLP2013Rules.isBacillusSubtilisGene(example.get(i-1).asInstanceOf[Constituent])
+      val right = BioNLP2013Rules.isBacillusSubtilisGene(example.get(i).asInstanceOf[Constituent])
+
+      if (left && right) {
+        predicate + "Both"
+      } else if (left) {
+        predicate + "Left"
+      } else if (right) {
+        predicate + "Right"
+      } else {
+        null
+      }
+    }
+  }
+}
+

File src/main/java/si/zitnik/research/iobie/core/relationship/classifier/impl/RelationshipMultipleClassifier.scala

     retVal
   }
 
-  def classifyRelationships(testData: ArrayBuffer[Constituent]) = {
+  def classifyRelationships(testData: ArrayBuffer[Constituent]): HashSet[Relationship] = {
+    classifyRelationships(testData, 0.0, 0.0)
+  }
+
+  def classifyRelationships(testData: ArrayBuffer[Constituent], tokenThreshold: Double, sequenceThreshold: Double) = {
     val retVal = new HashSet[Relationship]()
 
     //create example set & classify
       val relationshipExamples = ExamplesToRelationshipExamplesTransformer.toRelationshipExamples(testData, ArrayBuffer[Relationship](), skipNumber)
       val labelings = classifier.classify(relationshipExamples)
 
-      for ((relationshipExample, labeling) <- relationshipExamples.zip(labelings._1)) {
+      for (idx <- 0 until relationshipExamples.size) {
+        val (relationshipExample, labeling, tokenProbs, seqProb) = (relationshipExamples.get(idx), labelings._1.get(idx), labelings._2.get(idx), labelings._3.get(idx))
         for (i <- 1 until labeling.size) {
-          if (!labeling(i).equals("O")) {
+          if (!labeling(i).equals("O") && seqProb >= sequenceThreshold && tokenProbs(i) >= tokenThreshold) {
             retVal.add(new Relationship(null, labeling(i), relationshipExample(i-1).asInstanceOf[Constituent].oldConstituent, relationshipExample(i).asInstanceOf[Constituent].oldConstituent))
           }
         }
     new ArrayBuffer[HashSet[Relationship]]() ++ examples.map(v => classifyRelationships(v))
   }
 
-  def classifyRelationships(testData: mutable.Buffer[ArrayBuffer[Constituent]]): ArrayBuffer[HashSet[Relationship]] = {
+  def classifyRelationships(testData: mutable.Buffer[ArrayBuffer[Constituent]], tokenThreshold: Double = 0.0, sequenceThreshold: Double = 0.0): ArrayBuffer[HashSet[Relationship]] = {
     //debug output
     //for (i <- 0 until testData.length) {
     //  if (testData(i)(0).example.get(ExampleLabel.DOC_ID).toString.equals("PMID-11069677-S3")) {
     //}
     //end
 
-    new ArrayBuffer[HashSet[Relationship]]() ++ testData.map(v => classifyRelationships(v))
+    new ArrayBuffer[HashSet[Relationship]]() ++ testData.map(v => classifyRelationships(v, tokenThreshold, sequenceThreshold))
   }
 
 

File src/main/java/si/zitnik/research/iobie/core/relationship/test/BioNLP2013Relationship.scala

 object BioNLP2013Relationship extends Logging {
   val minSkipMentions = 0
   val maxSkipMentions = 10
+  val tokenThreshold = 0.0
+  val sequenceThreshold = 0.0
 
   def importTrain() = {
     val allTrainExamples = new Examples()
 
-    //val devExamples = new BioNLPImporter(IOBIEPropertiesUtil.getProperty(IOBIEProperties.BIONLP2013_PATH), BioNLP2013DatasetTypes.development).importForIE()
+    val devExamples = new BioNLPImporter(IOBIEPropertiesUtil.getProperty(IOBIEProperties.BIONLP2013_PATH), BioNLP2013DatasetTypes.development).importForIE()
+    allTrainExamples.addAll(devExamples)
     //devExamples.printStatistics(ommitMentions = false)
     /*
     devExamples.printStatistics(ommitMentions = false)
     println("#Interaction.* relationship: %d".format(devExamples.flatMap(_.getAllRelationships().filter(_.get(Label.ATTRIBUTE_TYPE).equals("RELATIONSHIP")).filter(_.relationshipName.startsWith("Interaction."))).size))
     */
     val trainExamples = new BioNLPImporter(IOBIEPropertiesUtil.getProperty(IOBIEProperties.BIONLP2013_PATH), BioNLP2013DatasetTypes.train).importForIE()
-    trainExamples.printStatistics(ommitMentions = false)
+    allTrainExamples.addAll(trainExamples)
+    //trainExamples.printStatistics(ommitMentions = false)
     /*
     println("#realMentions: %d".format(trainExamples.flatMap(_.getAllMentions().filter(_.get(Label.ATTRIBUTE_TYPE).equals("MENTION"))).size))
     println("#actionMentions: %d".format(trainExamples.flatMap(_.getAllMentions().filter(_.get(Label.ATTRIBUTE_TYPE).equals("R_MENTION"))).size))
     println("#Interaction.* relationship: %d".format(trainExamples.flatMap(_.getAllRelationships().filter(_.get(Label.ATTRIBUTE_TYPE).equals("RELATIONSHIP")).filter(_.relationshipName.startsWith("Interaction."))).size))
     */
 
-    allTrainExamples.addAll(trainExamples)
-    //allTrainExamples.addAll(devExamples)
+
+
 
     //allTrainExamples.printStatistics(ommitMentions = false)
     allTrainExamples
     //val trainExamples = new BioNLPImporter(IOBIEPropertiesUtil.getProperty(IOBIEProperties.BIONLP2013_PATH), BioNLP2013DatasetTypes.trainAsTest).importForIE()
     //testExamples.addAll(trainExamples)
 
-    //val testExamples = new BioNLPImporter(IOBIEPropertiesUtil.getProperty(IOBIEProperties.BIONLP2013_PATH), BioNLP2013DatasetTypes.test).importForIE()
+    //val testExamples1 = new BioNLPImporter(IOBIEPropertiesUtil.getProperty(IOBIEProperties.BIONLP2013_PATH), BioNLP2013DatasetTypes.test).importForIE()
+    //testExamples.addAll(testExamples1)
     //testExamples.printStatistics(ommitMentions = false)
 
     testExamples
   }
 
   def main(args: Array[String]) {
+    //delete all existing trained models
+    for(file <- new File(IOBIEPropertiesUtil.getProperty(IOBIEProperties.CRFSUITE_MODEL_FOLDER)).listFiles()) {
+      file.delete()
+    }
+
     evaluateStandard()
     //evaluateLeaveOneOut()
   }
   def evaluateLeaveOneOut() {
     val trainDocIDs = new BioNLPImporter(IOBIEPropertiesUtil.getProperty(IOBIEProperties.BIONLP2013_PATH), BioNLP2013DatasetTypes.train).importForIE().
         map(_.get(ExampleLabel.DOC_ID).asInstanceOf[String]).toList.sorted
+    //val trainDocIDs = importTest().map(_.get(ExampleLabel.DOC_ID).asInstanceOf[String]).toList.sorted
     println("Number of examples: %d".format(trainDocIDs.size))
 
     var processCounter = 1
       t.tag(testExamples)
     })
 
+
+
     //0. print some statistics
     //OPTIONAL: add events as mentions
     /*
       mentions.addAll(e.getAllMentions())
       mentions.addAll(eventsToMentions(e))
       e.setMentions(mentions)
+    })*/
+    /*
+    //OPTIONAL: leave only B.S. genes as relation attributes
+    trainExamples.foreach(e => {
+      val relationships = e.getAllRelationships()
+      val newRelationships = new ArrayBuffer[Relationship]()
+      relationships.foreach(r => {
+        if (r.get(Label.ATTRIBUTE_TYPE).equals("RELATIONSHIP") && BioNLP2013Rules.isBacillusSubtilisGene(r.subj) && BioNLP2013Rules.isBacillusSubtilisGene(r.obj)) {
+          newRelationships.append(r)
+        }
+      })
+      e.setRelationships(newRelationships)
     })
+    */
 
-
+    /*
     printlnForR(SortedMap[Int, Int]() ++ RelationshipAnalysis.getRelationshipAttributeMentionsDistanceDistribution(trainExamples))
     printlnForR(SortedMap[Int, Int]() ++ RelationshipAnalysis.getRelationshipAttributeMentionsDistanceDistribution(
       trainExamples,
     //5b. detect full mention relationships (only mentions are attributes)
     trainAndDetectFullGeneMentionRelationships(trainExamples, testExamples)
 
+    //5c. train full mention relationships from events (only mentions are attributes)
+    //5d. detect full mention relationships from events (only mentions are attributes)
+    trainAndDetectFullGeneMentionRelationshipsFromEvents(trainExamples, testExamples)
+
     //6. rule based detection
     //BioNLP2013Rules.enrichByStaticRules(trainExamples)
     //BioNLP2013Rules.enrichByStaticRules(testExamples)
     //9. output to A2 files
     //BioNLP2013Exporter.exportA2("/Users/slavkoz/temp/bionlpTrain", trainExamples)
     BioNLP2013Exporter.exportA2("/Users/slavkoz/temp/bionlpDev", testExamples)
+    //BioNLP2013Exporter.exportA2("/Users/slavkoz/temp/bionlpTest", testExamples)
+    //BioNLP2013Exporter.exportA2("/Users/slavkoz/temp/bionlpAll", testExamples)
     //doSomeAnalytics(trainExamples, testExamples)
 
     //10. manually delete lines from A2 that GRN.py schema validation goes through.
       }).toBuffer)  */
 
       var identifiedCounter = 0
-      val taggedEvents = classifier.classifyRelationships(testExamples.map(e => e.getAllMentions()).toBuffer)
+      val taggedEvents = classifier.classifyRelationships(testExamples.map(e => e.getAllMentions()).toBuffer, tokenThreshold, sequenceThreshold)
       taggedEvents.zip(testExamples).foreach{ case (events, example) => {
         var nextEventIdx = 1
         identifiedCounter += events.size
       }).toBuffer)*/
 
       var identifiedCounter = 0
-      val taggedRelationships = classifier.classifyRelationships(testExamples.map(e => e.getAllMentions()).toBuffer)
+      val taggedRelationships = classifier.classifyRelationships(testExamples.map(e => e.getAllMentions()).toBuffer, tokenThreshold, sequenceThreshold)
       taggedRelationships.zip(testExamples).foreach{ case (relationships, example) => {
         var nextRelIdx = 1
         identifiedCounter += relationships.size
         mentions.addAll(e.getAllMentions().filter(BioNLP2013Rules.isBacillusSubtilisGene(_)))
         Collections.sort(mentions)
         mentions
-      }).toBuffer)
+      }).toBuffer, tokenThreshold, sequenceThreshold)
       taggedRelationships.zip(testExamples).foreach{ case (relationships, example) => {
         var nextRelIdx = example.getAllRelationships().filter(_.get(Label.ID).asInstanceOf[String].startsWith("R")).map(_.get(Label.ID).asInstanceOf[String].substring(1).toInt).+:(0).max + 1
         identifiedCounter += relationships.size
     }
   }
 
+  def trainAndDetectFullGeneMentionRelationshipsFromEvents(trainExamples: Examples, testExamples: Examples) {
+    for (skipNumber <- maxSkipMentions to maxSkipMentions) {
+      println("Skip mentions: %s".format((minSkipMentions to skipNumber).mkString(", ")))
+      val learner = new RelationshipMultipleLearner(
+        trainExamples,
+        featureFunctions = FeatureFunctionPackages.standardRelFFPackages(),
+        skipNumbers = (minSkipMentions to skipNumber).toArray,
+        modelSaveFilename = "bionlp_grn_genementions_events"
+      )
+
+      //attributes: only mentions (this is exactly when the attribute has ID key)
+      //relationships: only of event type
+      learner.useWithSpecificMentionsAndRelationships(trainExamples.map(e => {
+        (e.getAllMentions().filter(BioNLP2013Rules.isBacillusSubtilisGene(_)),
+          e.getAllRelationships().filter(r => {
+            r.get(Label.ATTRIBUTE_TYPE).asInstanceOf[String].equals("RELATIONSHIP") &&
+            (r.obj.get(Label.ATTRIBUTE_TYPE).equals("EVENT") || r.subj.get(Label.ATTRIBUTE_TYPE).equals("EVENT"))
+          }).map(r => {
+            if (!r.obj.containsKey(Label.ID)) {
+              r.obj = eventToTargetMention(r.obj.get(Label.VALUE).asInstanceOf[Relationship])
+            }
+            if (!r.subj.containsKey(Label.ID)) {
+              r.subj = eventToTargetMention(r.subj.get(Label.VALUE).asInstanceOf[Relationship])
+            }
+            r
+          }).filter(r => {
+            r.get(Label.ATTRIBUTE_TYPE).asInstanceOf[String].equals("RELATIONSHIP") &&
+              r.relationshipName.startsWith("Interaction")  &&
+              BioNLP2013Rules.isBacillusSubtilisGene(r.obj) &&
+              BioNLP2013Rules.isBacillusSubtilisGene(r.subj)
+          }))
+      }).toBuffer)
+
+      val classifier = learner.train()
+
+      /*classifier.test(trainExamples.map(e => {
+        (e.getAllMentions(),
+          e.getAllRelationships().filter(r => {
+            r.get(Label.ATTRIBUTE_TYPE).asInstanceOf[String].equals("RELATIONSHIP") &&
+              r.obj.containsKey(Label.ID) &&
+              r.subj.containsKey(Label.ID)
+          }))
+      }).toBuffer)*/
+
+      var identifiedCounter = 0
+      val taggedRelationships = classifier.classifyRelationships(testExamples.map(e => {
+        //add also events as mentions
+        val mentions = new ArrayBuffer[Constituent]()
+        mentions.addAll(e.getAllMentions().filter(BioNLP2013Rules.isBacillusSubtilisGene(_)))
+        Collections.sort(mentions)
+        mentions
+      }).toBuffer, tokenThreshold, sequenceThreshold)
+      taggedRelationships.zip(testExamples).foreach{ case (relationships, example) => {
+        var nextRelIdx = example.getAllRelationships().filter(_.get(Label.ID).asInstanceOf[String].startsWith("R")).map(_.get(Label.ID).asInstanceOf[String].substring(1).toInt).+:(0).max + 1
+        identifiedCounter += relationships.size
+        if (relationships.size > 0) {
+
+          relationships.foreach(r => {
+
+
+            r.example = example
+            r.put(Label.ATTRIBUTE_TYPE, "RELATIONSHIP")
+            r.put(Label.ID, "R"+nextRelIdx)
+            BioNLP2013Rules.modifyRelAttributeTypes(r)
+            println(example.get(ExampleLabel.DOC_ID)+ " - R" + nextRelIdx)
+            println(r)
+            r.example.printLabeling(Label.OBS)
+            nextRelIdx += 1
+          })
+          example.addRelationships(relationships)
+
+        }
+      }}
+
+      println("Identified full gene mention relationships with events: %d".format(identifiedCounter))
+    }
+  }
+
   def trainAndDetectFullRelationships(trainExamples: Examples, testExamples: Examples) {
     for (skipNumber <- maxSkipMentions to maxSkipMentions) {
       println("Skip mentions: %s".format((minSkipMentions to skipNumber).mkString(", ")))
         mentions.addAll(eventsToMentions(e))
         Collections.sort(mentions)
         mentions
-      }).toBuffer)
+      }).toBuffer, tokenThreshold, sequenceThreshold)
       taggedRelationships.zip(testExamples).foreach{ case (relationships, example) => {
         var nextRelIdx = example.getAllRelationships().filter(_.get(Label.ID).asInstanceOf[String].startsWith("R")).map(_.get(Label.ID).asInstanceOf[String].substring(1).toInt).+:(0).max + 1
         identifiedCounter += relationships.size
     retVal
   }
 
+  def eventToTargetMention(event: Relationship): Constituent = {
+    event.subj.asInstanceOf[Constituent]
+  }
+
 }

File src/main/java/si/zitnik/research/iobie/core/relationship/test/BioNLP2013Rules.scala

  * To change this template use File | Settings | File Templates.
  */
 object BioNLP2013Rules extends Logging {
-  lazy val subtilisGenes: mutable.HashSet[String] = loadGenes()
+  private val subtilisGenes: mutable.HashSet[String] = loadGenes(true)
 
   def enrichByStaticRules(testExamples: Examples) {
     println("\n\n\n")
 
-    enrichByStaticRulesHelper(testExamples, "Interaction.Transcription", "transcrib")
-    enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Transcription", ".*directs transcription.*")
-    enrichByStaticRulesHelperBetweenMentionWithList(testExamples, "Interaction.Transcription", ".*under.*control.*of.*")
+    var addedNum = 0
 
-    enrichByStaticRulesHelperBetweenMentionSubsentence(testExamples, "Interaction.Inhibition", ".*repressed.*")
-    enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Inhibition", ".*inactivate.*")
-    enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Inhibition", ".*inhibits.*")
-    enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Inhibition", ".*repressor to.*")
-    enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Inhibition", ".*is negatively regulated by.*", passiveType = true)
-    //enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Regulation", "controls")
-    //enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Regulation", "controlled by")
+    addedNum += enrichByStaticRulesHelper(testExamples, "Interaction.Transcription", "transcrib")
 
-    enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Activation", ".*is governed by.*", passiveType = true)
-    enrichByStaticRulesHelperBetweenMentionWithList(testExamples, "Interaction.Activation", ".*is governed by.*", passiveType = true)
-    enrichByStaticRulesHelperBetweenMentionWithListLeft(testExamples, "Interaction.Activation", ".*activated by.*")
-    enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Activation", ".*essential.*activat.*")
-    enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Activation", ".*to.*activat.*")
-    enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Activation", ".*turns on.*")
-    enrichByStaticRulesHelperBetweenMentionSubsentence(testExamples, "Interaction.Activation", ".*turns on.*")
 
-    enrichByStaticRulesHelperBetweenMentionWithList(testExamples, "Interaction.Inhibition", ".*represses.*")
-    enrichByStaticRulesHelperBetweenMentionWithList(testExamples, "Interaction.Inhibition", ".*to repress.*")
+    addedNum += enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Transcription", ".*directs transcription.*")
+    addedNum += enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Inhibition", ".*inactivate.*")
+    addedNum += enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Inhibition", ".*inhibits.*")
+    addedNum += enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Inhibition", ".*repressor to.*")
+    addedNum += enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Inhibition", ".*is negatively regulated by.*", passiveType = true)
+    addedNum += enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Activation", ".*is governed by.*", passiveType = true)
+    addedNum += enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Activation", ".*essential.*activat.*")
+    addedNum += enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Activation", ".*to.*activat.*")
+    addedNum += enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Activation", ".*turns on.*")
+    addedNum += enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Requirement", ".*requires.*", passiveType = true)
+    addedNum += enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Requirement", ".*required.*", passiveType = false)
+    addedNum += enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Binding", ".*binds.*to.*")
+    addedNum += enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Binding", "-binding.*")
 
-    enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Requirement", ".*requires.*", passiveType = true)
-    enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Requirement", ".*required.*", passiveType = false)
-    enrichByStaticRulesHelperBetweenMentionWithListLeft(testExamples, "Interaction.Requirement", ".*are required.*", passiveType = true)
 
+    addedNum += enrichByStaticRulesHelperBetweenMentionWithList(testExamples, "Interaction.Transcription", ".*under.*control.*of.*")
+    addedNum += enrichByStaticRulesHelperBetweenMentionWithList(testExamples, "Interaction.Activation", ".*is governed by.*", passiveType = true)
+    addedNum += enrichByStaticRulesHelperBetweenMentionWithList(testExamples, "Interaction.Inhibition", ".*represses.*")
+    addedNum += enrichByStaticRulesHelperBetweenMentionWithList(testExamples, "Interaction.Inhibition", ".*to repress.*")
+    addedNum += enrichByStaticRulesHelperBetweenMentionWithListLeft(testExamples, "Interaction.Activation", ".*activated by.*")
+    addedNum += enrichByStaticRulesHelperBetweenMentionWithListLeft(testExamples, "Interaction.Requirement", ".*are required.*", passiveType = true)
 
-    enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Binding", ".*binds.*to.*")
-    enrichByStaticRulesHelperBetweenMention(testExamples, "Interaction.Binding", "-binding.*")
+    addedNum += enrichByStaticRulesHelperBetweenMentionSubsentence(testExamples, "Interaction.Activation", ".*turns on.*")
+    addedNum += enrichByStaticRulesHelperBetweenMentionSubsentence(testExamples, "Interaction.Inhibition", ".*repressed.*")
 
+    println("Number of all relations added by rules: %d".format(addedNum))
   }
 
   /**
    * @param relName
    * @param keyword
    */
-  def enrichByStaticRulesHelperBetweenMention(testExamples: Examples, relName: String, keyword: String, passiveType: Boolean = false) {
+  def enrichByStaticRulesHelperBetweenMention(testExamples: Examples, relName: String, keyword: String, passiveType: Boolean = false) = {
 
     var numRelsAdded = 0
     for (example <- testExamples) {
       }
     }
     println("Manually added %d relationships for relationship %s.\n".format(numRelsAdded, relName))
+    numRelsAdded
   }
 
   /**
    * @param relName
    * @param keyword
    */
-  def enrichByStaticRulesHelperBetweenMentionSubsentence(testExamples: Examples, relName: String, keyword: String) {
+  def enrichByStaticRulesHelperBetweenMentionSubsentence(testExamples: Examples, relName: String, keyword: String) = {
 
     var numRelsAdded = 0
     for (example <- testExamples) {
       }
     }
     println("Manually added %d relationships for relationship %s.\n".format(numRelsAdded, relName))
+    numRelsAdded
   }
 
   /**
    * @param relName
    * @param keyword
    */
-  def enrichByStaticRulesHelperBetweenMentionWithList(testExamples: Examples, relName: String, keyword: String, passiveType: Boolean = false) {
+  def enrichByStaticRulesHelperBetweenMentionWithList(testExamples: Examples, relName: String, keyword: String, passiveType: Boolean = false) = {
     val separators = Set(",", ", and", "and")
 
     var numRelsAdded = 0
       }
     }
     println("Manually added %d relationships for relationship %s.\n".format(numRelsAdded, relName))
+    numRelsAdded
   }
 
   /**
    * @param relName
    * @param keyword
    */
-  def enrichByStaticRulesHelperBetweenMentionWithListLeft(testExamples: Examples, relName: String, keyword: String, passiveType: Boolean = false) {
+  def enrichByStaticRulesHelperBetweenMentionWithListLeft(testExamples: Examples, relName: String, keyword: String, passiveType: Boolean = false) = {
     val separators = Set(",", ", and", "and")
 
     var numRelsAdded = 0
       }
     }
     println("Manually added %d relationships for relationship %s.\n".format(numRelsAdded, relName))
+    numRelsAdded
   }
 
   /**
    * @param startsWithKeyword
    * @param keywordIdx
    */
-  def enrichByStaticRulesHelper(testExamples: Examples, relName: String, startsWithKeyword: String, keywordIdx: Int = 1) {
+  def enrichByStaticRulesHelper(testExamples: Examples, relName: String, startsWithKeyword: String, keywordIdx: Int = 1) = {
 
     var numRelsAdded = 0
     for (example <- testExamples) {
       }
     }
     println("Manually added %d relationships for relationship %s.\n".format(numRelsAdded, relName))
+    numRelsAdded
   }
 
 
   }
 
   def isBacillusSubtilisGene(mention: Constituent) = {
-    if (subtilisGenes.contains(mention.get(Label.OBS)) || subtilisGenes.contains(mention.get(Label.COREF))) {
+    if (
+      (mention.get(Label.OBS) != null && subtilisGenes.contains(mention.get(Label.OBS).toString.toLowerCase)) ||
+      (mention.get(Label.COREF) != null && subtilisGenes.contains(mention.get(Label.COREF).toString.toLowerCase))
+    ) {
       true
     } else {
       false
   }
 
 
-  def loadGenes() = {
+  def loadGenes(lowercase: Boolean = false) = {
     val retVal = mutable.HashSet[String]()
 
 
     for (line <- Source.fromInputStream(this.getClass.getClassLoader.getResourceAsStream("specific/Bacillus-subtilis.genes")).getLines()) {
       val geneName = line.split(" ")(1).replaceAll("\\[gene=", "").replaceAll("\\]", "")
-      retVal.add(geneName)
+      if (lowercase) {
+        retVal.add(geneName.toLowerCase)
+      } else {
+        retVal.add(geneName)
+      }
     }
 
     retVal
       //remove loops
       val tempRelationships = ArrayBuffer[Relationship]()
       for (relationship <- fullRels) {
-        if (relationship.subj.get(Label.COREF) != null && !relationship.subj.get(Label.COREF).asInstanceOf[String].isEmpty &&
+        if (relationship.subj.get(Label.COREF) != null && !relationship.subj.get(Label.COREF).toString.isEmpty &&
           !relationship.subj.get(Label.COREF).equals(relationship.obj.get(Label.COREF))) {
           tempRelationships.append(relationship)
         } else {

File src/main/java/si/zitnik/research/iobie/thirdparty/crfsuite/api/CRFSuiteLCCRFClassifier.scala

                         val modelSaveFilename: String,
                         val featureDict: FeatureDict,
                         val printCRFSuiteOutput: Boolean,
-                        val outputMarginals: Boolean = false,
-                        val outputSequenceProbabilities: Boolean = false) extends Classifier with Logging {
+                        val outputMarginals: Boolean = true,
+                        val outputSequenceProbabilities: Boolean = true) extends Classifier with Logging {
   val command =
     List(IOBIEPropertiesUtil.getProperty(IOBIEProperties.CRFSUITE_CMD), "tag") :::
     { if (outputSequenceProbabilities) List("-p") else List() } ::: //sequence probability

File src/main/java/si/zitnik/research/iobie/thirdparty/crfsuite/api/CRFSuiteLCCRFLearner.scala

                            val learnLabelType: Label.Value,
                            val featureFunctions: ArrayList[FeatureFunction],
                            val modelSaveFilename: String = "model.obj",
-                           val featureThreshold: Int = 3,
+                           val featureThreshold: Int = 5,
                            val printCRFSuiteOutput: Boolean = false,
                            val maxIterations: Option[Int] = None) extends Learner(examples) with Logging {
 

File src/main/java/si/zitnik/research/iobie/util/StreamGobblerExamples.scala

         if (label.isEmpty) {
           data.append(tempList)
           tempList = new ArrayBuffer[String]()
+          marginalProbabilities.append(tempMarginalList)
           tempMarginalList = new ArrayBuffer[Double]()
         } else {
           if (label.startsWith("@probability")) {