Commits

Slavko Zitnik  committed 16d41aa

Added pairwise coref classifier

  • Participants
  • Parent commits 44193fa

Comments (0)

Files changed (5)

         <dependency>
             <groupId>org.scalanlp</groupId>
             <artifactId>breeze_2.10</artifactId>
-            <version>0.3-SNAPSHOT</version>
+            <version>0.4-SNAPSHOT</version>
         </dependency>
 
         <!-- JSON -->

File src/main/java/si/zitnik/research/iobie/core/coreference/classifier/impl/CorefMultipleClassifier.scala

       val labelings = classifier.classify(examples)
 
       for ((example, labeling) <- examples.zip(labelings)) {
-        //TODO: refactor for better handling with singletons..
+        //TODO: refactor for better handling with singletons: (every mention must be within pairs)
         pairs.add((example(0).asInstanceOf[Constituent].oldConstituent, example(0).asInstanceOf[Constituent].oldConstituent)) //for singletons
         for (i <- 1 until(labeling.size)) {
           if (labeling(i).equals("C")) {

File src/main/java/si/zitnik/research/iobie/core/coreference/classifier/impl/CorefPairwiseClassifier.scala

+package si.zitnik.research.iobie.core.coreference.classifier.impl
+
+import si.zitnik.research.iobie.domain.{Example, Examples}
+import si.zitnik.research.iobie.algorithms.crf.{Label, Classifier}
+import si.zitnik.research.iobie.domain.cluster.Cluster
+import si.zitnik.research.iobie.domain.constituent.Constituent
+import scala.collection.JavaConversions._
+import si.zitnik.research.iobie.coreference.util.MentionExamplesToCorefExamplesTransformer
+import si.zitnik.research.iobie.coreference.clustering.impl.SimpleClusterer
+import sun.reflect.generics.reflectiveObjects.NotImplementedException
+import si.zitnik.research.iobie.algorithms.crf.stat.Statistics
+import collection.mutable._
+import si.zitnik.research.iobie.coreference.classifier.abst.CorefClassifier
+
+/**
+ * Multiple Coref. classifier with clustering.
+ *
+ * Examples must be separated by by DOC_ID (like for learning) or something else. Each example consists of one-document constituents.
+ */
+
+class CorefPairwiseClassifier(
+        classifier: Classifier,
+        learnLabelType: Label.Value) extends CorefClassifier {
+
+
+  /**
+   * In the results there are only clusters that contain at least two mentions.
+   * @param mentionExample
+   * @return
+   */
+  def classifyCluster(mentionExample: Example): HashSet[Cluster] = {
+    val pairs = new HashSet[(Constituent, Constituent)]()
+
+      val examples = MentionExamplesToCorefExamplesTransformer.toPairwiseCorefExamples(mentionExample)
+      val labelings = classifier.classify(examples)
+
+      for ((example, labeling) <- examples.zip(labelings)) {
+        //TODO: refactor for better handling with singletons - check CorefMultipleClassifier first
+        pairs.add((example(0).asInstanceOf[Constituent].oldConstituent, example(0).asInstanceOf[Constituent].oldConstituent)) //for singletons
+        for (i <- 1 until(labeling.size)) {
+          if (labeling(i).equals("C")) {
+            //old constituents are constituents built by MentionExamplesBuilder
+            pairs.add((example(i-1).asInstanceOf[Constituent].oldConstituent, example(i).asInstanceOf[Constituent].oldConstituent))
+          } else { //may be singleton
+            pairs.add((example(i).asInstanceOf[Constituent].oldConstituent, example(i).asInstanceOf[Constituent].oldConstituent))
+          }
+        }
+      }
+
+    //remove mentionExample references to clusters => to take into account only current results at clustering
+    //TODO: try what happens if clusters remain the same
+    removeClusterReferences(mentionExample)
+
+    SimpleClusterer.doClustering(pairs)
+  }
+
+  //only clusters containing more than 1 mention are returned
+  def classifyClusters(mentionExamples: Examples): ArrayBuffer[HashSet[Cluster]] = {
+    new ArrayBuffer[HashSet[Cluster]]() ++ mentionExamples.map(v => classifyCluster(v))
+  }
+
+
+
+  private def removeClusterReferences(mentionExample: Example) {
+    for (constituent <- mentionExample) {
+      constituent.asInstanceOf[Constituent].cluster = null
+    }
+  }
+
+  def classify(example: Example, normalized: Boolean) = {
+    throw new NotImplementedException()
+  }
+
+  override def classify(examples: Examples) = {
+    throw new NotImplementedException()
+  }
+
+  def test(examples: Examples) {
+      val pairExamples = new Examples()
+      examples.foreach(v => pairExamples.addAll(MentionExamplesToCorefExamplesTransformer.toPairwiseCorefExamples(v)))
+      new Statistics(classifier, pairExamples).printStandardClassification(learnLabelType, "C")
+  }
+
+}

File src/main/java/si/zitnik/research/iobie/core/coreference/learner/CorefPairwiseLearner.scala

+package si.zitnik.research.iobie.core.coreference.learner
+
+import si.zitnik.research.iobie.domain.Examples
+import scala.collection.JavaConversions._
+import com.typesafe.scalalogging.slf4j.Logging
+import si.zitnik.research.iobie.algorithms.crf._
+import java.util.ArrayList
+import si.zitnik.research.iobie.thirdparty.crfsuite.api.CRFSuiteLCCRFLearner
+import si.zitnik.research.iobie.coreference.util.MentionExamplesToCorefExamplesTransformer
+import si.zitnik.research.iobie.coreference.classifier.impl.CorefMultipleClassifier
+import si.zitnik.research.iobie.core.coreference.classifier.impl.CorefPairwiseClassifier
+
+/**
+ *
+ * Input FeatureFunctions should not refer to neighbouring examples! That is because one example represents one
+ * document and therefore coreferences are independent.
+ *
+ * The same must comply for input to classifier.
+ *
+ * -------------------
+ *
+ * Input examples should be built by MentionExamplesBuilder and should contain mentionId as Label.COREF key.
+ * @param examples
+ * @param featureFunctions
+ * @param learnLabelType
+ */
+class CorefPairwiseLearner(
+          examples: Examples,
+          val featureFunctions: ArrayList[FeatureFunction],
+          val learnLabelType: Label.Value = Label.COREF,
+          val modelSaveFilename: String = "coref_p_model") extends Learner(examples) with Logging {
+
+
+  def train() = {
+    train(50)
+  }
+
+  def train(epochs: Int) = {
+    val pairExamples = new Examples()
+    examples.foreach(v => pairExamples.addAll(MentionExamplesToCorefExamplesTransformer.toPairwiseCorefExamples(v)))
+    val classifier = new CRFSuiteLCCRFLearner(pairExamples, learnLabelType, featureFunctions, modelSaveFilename+".obj").train(epochs)
+
+    new CorefPairwiseClassifier(classifier, learnLabelType)
+  }
+
+  def trainAndTest(epochsBetweenTest: Int = 5, allEpochs: Int = 50, testMentionExamples: Examples = examples): CorefPairwiseClassifier = {
+    var classifier: CorefPairwiseClassifier = null
+
+    for (epoch <- 1 to math.max(allEpochs / epochsBetweenTest, 1)) {
+      classifier = train(epochsBetweenTest)
+      logger.info("Training perf:")
+      classifier.test(testMentionExamples)
+
+      if (testMentionExamples != examples) {
+        logger.info("Testing perf:")
+        classifier.test(testMentionExamples)
+      }
+    }
+
+    classifier
+  }
+
+}

File src/main/java/si/zitnik/research/iobie/core/coreference/util/MentionExamplesToCorefExamplesTransformer.scala

     corefExample
   }
 
+  //PAIRWISE
+
+  def toPairwiseCorefExamples(example: Example): Examples = {
+    val retVal = new Examples()
+
+    for (pair <- example.combinations(2)) {
+      val leftToken = pair.get(0)
+      val rightToken = pair.get(1)
+
+      val corefExample = toPairwiseCorefExample(leftToken, rightToken)
+      retVal.add(corefExample)
+    }
+
+    retVal
+  }
+
+  private def toPairwiseCorefExample(leftToken: Token, rightToken: Token): Example = {
+    val corefExample = new Example()
+
+    val newLeftToken = Constituent.clone(leftToken)
+    val newRightToken = Constituent.clone(rightToken)
+
+
+    //if there is no coref label, this domain is intended for inference :)
+    if (newLeftToken.contains(Label.COREF) && newRightToken.contains(Label.COREF)) {
+      var classVal = "O"
+      newLeftToken.put(Label.COREF, classVal)
+      classVal = if (rightToken(Label.COREF).equals(leftToken(Label.COREF))) { "C" } else { "O" }
+      newRightToken.put(Label.COREF, classVal)
+    }
+
+    corefExample.add(newLeftToken)
+    corefExample.add(newRightToken)
+
+    corefExample
+  }
+
 }