Commits

Slavko Zitnik  committed e640cc3

tests

  • Participants
  • Parent commits 17eabbb

Comments (0)

Files changed (4)

 
 
     <dependencies>
+        <!-- Apache Commons Math -->
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-math</artifactId>
+            <version>2.2</version>
+        </dependency>
+
+
         <!-- JENA (Ontology handling) -->
         <dependency>
             <groupId>org.apache.jena</groupId>

File src/main/java/si/zitnik/research/iobie/core/coreference/test/CoreferenceEvaluation.scala

 import si.zitnik.research.iobie.thirdparty.lemmagen.api.LemmaTagger
 import io.Source
 import si.zitnik.research.iobie.core.coreference.learner.CorefPairwiseLearner
+import si.zitnik.research.iobie.statistics.test.TTest
 
 /**
  * Created with IntelliJ IDEA.
  */
 object CoreferenceEvaluation {
 
-  def clusterResults(classifier: CorefClassifier, mentionExamples: Examples) {
+  def clusterResults(classifier: CorefClassifier, mentionExamples: Examples, print: Boolean = true) = {
     val allRealClusters = TaggedClusterer.doClustering(mentionExamples, ommitSingles = false)
     val allTaggedClusters = classifier.classifyClusters(mentionExamples)
     //CorefVisualizer.run(mentionExamples, allRealClusters, allTaggedClusters)
 
 
     val F = new ClusterStatistics().stat(allRealClusters, allTaggedClusters)
+    if (print)
     println("Results Pairwise: " + F)
 
     val MUC = MUCClusterStatistics.scoreExamples(allRealClusters, allTaggedClusters)
+    if (print)
     println("Results MUC:" + MUC)
 
     val BCubed = BCubedClusterStatistics.scoreExamples(allRealClusters, allTaggedClusters)
+    if (print)
     println("Results BCubed: " + BCubed)
 
     /*
     */
 
     val CEAFe = CEAFClusterStatistics(CEAFStatisticsType.ENTITY_BASED).scoreExamples(allRealClusters, allTaggedClusters)
+    if (print)
     println("Results CEAFe: " + CEAFe)
 
     /*
     */
 
     val conll2012Official = CoNLL2012Score.scoreExamples(MUC._3, BCubed._3, CEAFe._3)
+    if (print)
     println("Results CoNLL2012: " + conll2012Official)
+
+    (MUC._3, BCubed._3, CEAFe._3)
   }
 
   def visualizeDistances(mentionExamples: Examples, title: String = "Dataset Distance distribution", filename: String = "/Users/slavkoz/temp/distr.png") {
       featureFunctions = featureFunctionSet,
       modelSaveFilename = evaluationName
     ).train()
-    clusterResults(classifier, mentionExamplesTest)
+
+
+    val (mucMean, bCubedMean, ceafMean) = clusterResults(classifier, mentionExamplesTest)
+    val n = 10
+    val mucMeans = new Array[Double](n)
+    val bCubedMeans = new Array[Double](n)
+    val ceafMeans = new Array[Double](n)
+    for (i <- 0 until n) {
+     val (sampledExamplesTest, _) = RandomSampler.sample(mentionExamplesTest, trainPercent = 0.8)
+     val result = clusterResults(classifier, sampledExamplesTest, false)
+     mucMeans(i) = result._1
+     bCubedMeans(i) = result._2
+     ceafMeans(i) = result._3
+    }
+    println("MUC test:")
+    println(new TTest(mucMean, mucMeans, 0.05).toDetailString())
+    println("BCubed test:")
+    println(new TTest(bCubedMean, bCubedMeans, 0.05).toDetailString())
+    println("CEAFe test:")
+    println(new TTest(ceafMean, ceafMeans, 0.05).toDetailString())
+
+
   }
 
   def evaluate(evaluationName: String, dataProvider: Data, featureFunctionSet: ArrayList[FeatureFunction]) {
     //CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.MAGAZINE)
     //evaluate ("CoNLL2012 MAGAZINE", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
 
-    CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.NEWSWIRE)
-    evaluate ("CoNLL2012 NEWSWIRE", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
+    //CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.NEWSWIRE)
+    //evaluate ("CoNLL2012 NEWSWIRE", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
 
     //CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.PIVOT_CORPUS)
     //evaluate ("CoNLL2012 PIVOT_CORPUS", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
     //CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.WEB_TEXT)
     //evaluate ("CoNLL2012 WEB_TEXT", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
 
-    CoNLL2012Data.sources = CoNLL2012ImporterSourceTypeEnum.values.toArray
-    evaluate ("CoNLL2012 ALL_TOGETHER", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
+    //CoNLL2012Data.sources = CoNLL2012ImporterSourceTypeEnum.values.toArray
+    //evaluate ("CoNLL2012 ALL_TOGETHER", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
 
     //ACE2004
 
     //ACE2004Data.reload()
     //evaluate ("ACE2004 ARABIC_TREEBANK", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
 
-    ACE2004Data.sources = Array(ACE2004DocumentType.BROADCAST_NEWS)
-    ACE2004Data.reload()
-    evaluate ("ACE2004 BROADCAST_NEWS", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
+    //ACE2004Data.sources = Array(ACE2004DocumentType.BROADCAST_NEWS)
+    //ACE2004Data.reload()
+    //evaluate ("ACE2004 BROADCAST_NEWS", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
 
     //ACE2004Data.sources = Array(ACE2004DocumentType.CHINESE_TREEBANK)
     //ACE2004Data.reload()
     //ACE2004Data.reload()
     //evaluate ("ACE2004 FISHER_TRANSCRIPTS", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
 
-    ACE2004Data.sources = Array(ACE2004DocumentType.NEWSWIRE)
-    ACE2004Data.reload()
-    evaluate ("ACE2004 NEWSWIRE", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
+    //ACE2004Data.sources = Array(ACE2004DocumentType.NEWSWIRE)
+    //ACE2004Data.reload()
+    //evaluate ("ACE2004 NEWSWIRE", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
 
-    ACE2004Data.sources = ACE2004DocumentType.values.toArray
-    ACE2004Data.reloadCullota()
-    evaluate ("ACE2004 ALL_TOGETHER", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
+    //ACE2004Data.sources = ACE2004DocumentType.values.toArray
+    //ACE2004Data.reloadCullota()
+    //evaluate ("ACE2004 ALL_TOGETHER", ACE2004Data, FeatureFunctionPackages.bestACE2004CorefFeatureFunctions)
 
 
     //SIMPLE TEST:

File src/main/java/si/zitnik/research/iobie/statistics/test/TTest.scala

+package si.zitnik.research.iobie.statistics.test
+
+import org.apache.commons.math3.stat.inference.TestUtils
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: slavkoz
+ * Date: 5/6/14
+ * Time: 12:03 PM
+ * To change this template use File | Settings | File Templates.
+ */
+class TTest(mean: Double, sample: Array[Double], alpha: Double) {
+
+  def tStatistic() = {
+    TestUtils.t(mean, sample)
+  }
+
+  def pValue() = {
+    TestUtils.tTest(mean, sample)
+  }
+
+  def rejected() = {
+    TestUtils.tTest(mean, sample, alpha)
+  }
+
+  def toDetailString() = {
+    "T-test:\n\tdata: %s\n\tmu: %.2f\n\tt-statistic: %.4f\n\tp-value: %.4f\n\trejected with confidence level %.2f: %s".format(
+      sample.mkString(","),
+      mean,
+      tStatistic(),
+      pValue(),
+      (1-alpha),
+      rejected()+""
+    )
+  }
+
+}

File src/main/java/si/zitnik/research/iobie/test/CommonsMathTest.scala

+package si.zitnik.research.iobie.test
+
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics
+import org.apache.commons.math3.stat.inference.{TTest, TestUtils}
+import org.apache.commons.math3.distribution.NormalDistribution
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: slavkoz
+ * Date: 5/6/14
+ * Time: 10:32 AM
+ * To change this template use File | Settings | File Templates.
+ */
+object CommonsMathTest {
+
+  def main(args: Array[String]) {
+    val population = Array(1d, 2d, 3d, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10)
+    val populationMean = population.sum*1.0/population.length
+    val sample = population.to[Set].subsets(10).next().toArray[Double]
+    val alpha = 0.05
+
+    //for our cases: population mean = result on the whole dataset, sample = result on subdatasets
+
+
+    println("t-statistic: " + TestUtils.t(populationMean, sample))
+    println("p-value: " + TestUtils.tTest(populationMean, sample))
+    println("rejected with confidence level " + (1-alpha) + ": " + TestUtils.tTest(populationMean, sample, alpha))
+  }
+
+}