1. Slavko Zitnik
  2. IOBIE

Commits

Slavko Zitnik  committed 63a2bc1

T-test

  • Participants
  • Parent commits e640cc3
  • Branches master

Comments (0)

Files changed (3)

File src/main/java/si/zitnik/research/iobie/core/collective/test/CollectiveTest.scala

View file
     def importTestData() = data._2
   }
 
+  def printStat(examples: Examples) {
+    val docIds = examples.getAllDocIds()
+    println("DocIDs: "+docIds.size)
+    println("Mentions: "+ examples.map(_.getAllMentions().size).sum)
+    println("Relationships: "+ examples.map(_.getAllRelationships().size).sum)
+    println("Entities: "+docIds.map(id => {
+      examples.getDocumentExamples(id).map(_.getAllMentions().map(_.get(Label.COREF).asInstanceOf[Int]).toSet).flatten.toSet.size
+    }).sum)
+  }
+
   def main(args: Array[String]) {
     ACE2004Data.reload()
 
       ommitedExample = Array(ExampleLabel.DOC_ID)
     )
 
+    printStat(ACE2004Data.importTrainData())
+    printStat(ACE2004Data.importTestData())
+
     /*
     ACE2004Data.importTrainData().relabel(Label.NE, "FAC", "O")
     ACE2004Data.importTrainData().relabel(Label.NE, "VEH", "O")

File src/main/java/si/zitnik/research/iobie/core/coreference/test/CoreferenceEvaluation.scala

View file
 
 
     val (mucMean, bCubedMean, ceafMean) = clusterResults(classifier, mentionExamplesTest)
-    val n = 10
+    val n = 20
     val mucMeans = new Array[Double](n)
     val bCubedMeans = new Array[Double](n)
     val ceafMeans = new Array[Double](n)
      ceafMeans(i) = result._3
     }
     println("MUC test:")
-    println(new TTest(mucMean, mucMeans, 0.05).toDetailString())
+    println(new TTest(mucMean, mucMeans, 0.05).toDetailString(), 2.093)
     println("BCubed test:")
-    println(new TTest(bCubedMean, bCubedMeans, 0.05).toDetailString())
+    println(new TTest(bCubedMean, bCubedMeans, 0.05).toDetailString(), 2.093)
     println("CEAFe test:")
-    println(new TTest(ceafMean, ceafMeans, 0.05).toDetailString())
+    println(new TTest(ceafMean, ceafMeans, 0.05).toDetailString(), 2.093)
 
 
   }
     //evaluate ("SemEval2010", SemEvalData, FeatureFunctionPackages.bestSemEval2010CorefFeatureFunctions)
 
     //CoNLL 2012
+    (1 to 10).foreach({
+      CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.BROADCAST_NEWS)
+      evaluate ("CoNLL2012 BROADCAST_NEWS", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
+    })
 
-    CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.BROADCAST_NEWS)
-    evaluate ("CoNLL2012 BROADCAST_NEWS", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)
 
     //CoNLL2012Data.sources = Array(CoNLL2012ImporterSourceTypeEnum.BROADCAST_CONVERSATION)
     //evaluate ("CoNLL2012 BROADCAST_CONVERSATION", CoNLL2012Data, FeatureFunctionPackages.bestCoNLL2012CorefFeatureFunctions)

File src/main/java/si/zitnik/research/iobie/statistics/test/TTest.scala

View file
 package si.zitnik.research.iobie.statistics.test
 
 import org.apache.commons.math3.stat.inference.TestUtils
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics
 
 /**
  * Created with IntelliJ IDEA.
  * Time: 12:03 PM
  * To change this template use File | Settings | File Templates.
  */
-class TTest(mean: Double, sample: Array[Double], alpha: Double) {
+class TTest(mean: Double, sample: Array[Double], alpha: Double, tableTValue: Double = null) {
+  val sampleStats = new SummaryStatistics()
+  for (i <- 0 until sample.length) {
+    sampleStats.addValue(sample(i))
+  }
 
   def tStatistic() = {
     TestUtils.t(mean, sample)
     TestUtils.tTest(mean, sample, alpha)
   }
 
+  def confidenceInterval() = {
+    val stdDev = sampleStats.getStandardDeviation()
+    val denom = math.sqrt(sample.length)
+
+    tableTValue*stdDev/denom
+  }
+
   def toDetailString() = {
-    "T-test:\n\tdata: %s\n\tmu: %.2f\n\tt-statistic: %.4f\n\tp-value: %.4f\n\trejected with confidence level %.2f: %s".format(
+    "T-test:\n\tdata: %s\n\tmu: %.2f\n\tt-statistic: %.4f\n\tp-value: %.4f\n\trejected with confidence level %.2f: %s\n\tconfidence interval: %.2f +- %.2f".format(
       sample.mkString(","),
       mean,
       tStatistic(),
       pValue(),
       (1-alpha),
-      rejected()+""
+      rejected()+"",
+      sampleStats.getMean,
+      confidenceInterval()
     )
   }