Commits

vvcephei  committed db7c349

still working on topic experiments

  • Participants
  • Parent commits d213e84

Comments (0)

Files changed (10)

File src/main/scala/updown/app/NFoldExperiment.scala

-package updown.app
-
-import updown.data.io.TweetFeatureReader
-import updown.data.{SentimentLabel, GoldLabeledTweet}
-import org.clapper.argot.ArgotParser._
-import org.clapper.argot.{ArgotUsageException, ArgotParser}
-import org.clapper.argot.ArgotConverters._
-import com.weiglewilczek.slf4s.Logging
-import updown.util.Statistics
-
-abstract class NFoldExperiment extends Logging {
-  // this exists purely to make the ArgotConverters appear used to IDEA
-  convertByte _
-
-  def doExperiment(testSet: List[GoldLabeledTweet], trainSet: List[GoldLabeledTweet]):
-    (Double, List[(updown.data.SentimentLabel.Type, Double, Double, Double)])
-
-  def generateTrials(inputFile: String, nFolds: Int): Iterator[(List[GoldLabeledTweet], List[GoldLabeledTweet])] = {
-    val foldsToTweets = (for ((fold, list) <- TweetFeatureReader(inputFile).zipWithIndex.groupBy((pair) => {
-      val (_, index) = pair;
-      index % nFolds
-    })) yield {
-      (fold, list.map((pair) => {
-        val (tweet, _) = pair;
-        tweet
-      }))
-    }).toList
-
-    (for ((heldOutFold, heldOutData) <- foldsToTweets) yield {
-      (heldOutData,
-        foldsToTweets.filter((pair) => {
-          val (listFold, _) = pair;
-          listFold != heldOutFold
-        }).map((pair) => {
-          val (_, tweets) = pair;
-          tweets
-        }).flatten)
-    }).iterator
-  }
-
-
-
-  def initializeAverageList(list: List[(updown.data.SentimentLabel.Type, Double, Double, Double)]): List[(updown.data.SentimentLabel.Type, Double, Double, Double)] = {
-    if (list.length == 0)
-      Nil
-    else {
-      val ((lLabel, _, _, _) :: ls) = list
-      (lLabel, 0.0, 0.0, 0.0) :: initializeAverageList(ls)
-    }
-  }
-
-  def addAll(list: List[(updown.data.SentimentLabel.Type, Double, Double, Double)], to: List[(updown.data.SentimentLabel.Type, Double, Double, Double)]): List[(updown.data.SentimentLabel.Type, Double, Double, Double)] = {
-    if (list.length == 0)
-      Nil
-    else {
-      val ((lLabel, lPrecision, lRecall, lFScore) :: ls) = list
-      val ((tLabel, tPrecision, tRecall, tFScore) :: ts) = to
-      assert(lLabel == tLabel)
-      (lLabel, lPrecision + tPrecision, lRecall + tRecall, lFScore + tFScore) :: addAll(ls, ts)
-    }
-  }
-
-  def divideBy(list: List[(updown.data.SentimentLabel.Type, Double, Double, Double)], by: Double): List[(updown.data.SentimentLabel.Type, Double, Double, Double)] = {
-    if (list.length == 0)
-      Nil
-    else {
-      val ((lLabel, lPrecision, lRecall, lFScore) :: ls) = list
-      (lLabel, lPrecision / by, lRecall / by, lFScore / by) :: divideBy(ls, by)
-    }
-  }
-
-
-  def averageResults(results: scala.List[(Double, scala.List[(SentimentLabel.Type, Double, Double, Double)])]): (Double, scala.List[(SentimentLabel.Type, Double, Double, Double)]) = {
-    var avgAccuracy = 0.0
-    var avgLabelResultsList = initializeAverageList(results(0)._2)
-    for ((accuracy, labelResults) <- results) {
-      avgAccuracy += accuracy
-      avgLabelResultsList = addAll(labelResults, avgLabelResultsList)
-    }
-    avgAccuracy /= results.length
-    avgLabelResultsList = divideBy(avgLabelResultsList, results.length)
-    (avgAccuracy, avgLabelResultsList)
-
-  }
-
-  def main(args: Array[String]) {
-    val parser = new ArgotParser(this.getClass.getName, preUsage = Some("Updown"))
-    val goldInputFile = parser.option[String](List("g", "gold"), "gold", "gold labeled input")
-    val n = parser.option[Int](List("n", "folds"), "FOLDS", "the number of folds for the experiment (default 10)")
-
-    try {
-      parser.parse(args)
-
-      val nFolds: Int = n.value.getOrElse(10)
-
-      if (goldInputFile.value == None) {
-        parser.usage("You must specify a gold labeled input file via -g.")
-      }
-
-      val inputFile = goldInputFile.value.get
-      val results =
-        (for ((testSet, trainSet) <- generateTrials(inputFile, nFolds)) yield {
-          doExperiment(testSet, trainSet)
-        }).toList
-
-      val averages = averageResults(results)
-      System.err.println("\n" + Statistics.reportResults(averages))
-    }
-    catch {
-      case e: ArgotUsageException => println(e.message); sys.exit(0)
-    }
-  }
-}

File src/main/scala/updown/app/NFoldMaxentExperiment.scala

-package updown.app
-
-import updown.data.{SystemLabeledTweet, GoldLabeledTweet, SentimentLabel}
-import updown.util.Statistics
-
-object NFoldMaxentExperiment extends NFoldExperiment {
-  def doExperiment(testSet: List[GoldLabeledTweet], trainSet: List[GoldLabeledTweet]) = {
-    logger.info("performing Maxent experiment")
-    logger.debug("training model")
-    val model = TrainMaxentModel.trainWithGoldLabeledTweetIterator(trainSet.iterator)
-
-    logger.debug("testing model")
-    val res = Statistics.getEvalStats(for (tweet <- testSet) yield {
-      tweet match {
-        case GoldLabeledTweet(id, userid, features, goldLabel) =>
-          SystemLabeledTweet(id, userid, features, goldLabel,
-            SentimentLabel.figureItOut(model.getBestOutcome(model.eval(features.toArray))))
-      }
-    })
-    logger.info(Statistics.reportResults(res))
-    res
-  }
-}

File src/main/scala/updown/app/NFoldTopicExperiment.scala

-package updown.app
-
-import updown.data.{SystemLabeledTweet, GoldLabeledTweet, SentimentLabel}
-import updown.util.{Statistics, LDATopicModel, TopicModel}
-
-object NFoldTopicExperiment extends NFoldExperiment {
-
-
-  def label(model: TopicModel, tweet: GoldLabeledTweet, goodTopic: Int, badTopic: Int): SystemLabeledTweet = {
-    val GoldLabeledTweet(id, userid, features, goldLabel) = tweet
-    val topicDistribution = model.inferTopics(tweet)
-    val sortedDist = topicDistribution.zipWithIndex.sortBy((i) => 1.0 - i._1).map((i) => i._2)
-
-    SystemLabeledTweet(id, userid, features, goldLabel,
-      if (goodTopic == -1 || badTopic == -1) {
-        assert(goodTopic == badTopic)
-        SentimentLabel.Abstained
-      }
-      else if (sortedDist(0) == goodTopic) SentimentLabel.Positive
-      else if (sortedDist(0) == badTopic) SentimentLabel.Negative
-      else if (sortedDist(1) == goodTopic) SentimentLabel.Positive
-      else if (sortedDist(1) == badTopic) SentimentLabel.Negative
-      else if (sortedDist(2) == goodTopic) SentimentLabel.Positive
-      else SentimentLabel.Negative
-    )
-  }
-
-  def evaluate(model: TopicModel, testSet: scala.List[GoldLabeledTweet]): (Double, scala.List[(SentimentLabel.Type, Double, Double, Double)]) = {
-    val labelToTopicDist = model.getTopicsPerTarget
-    val badDist = labelToTopicDist(SentimentLabel.Negative).zipWithIndex.sortBy((i) => 1.0 - i._1).map((i) => i._2)
-    val goodDist = labelToTopicDist(SentimentLabel.Positive).zipWithIndex.sortBy((i) => 1.0 - i._1).map((i) => i._2)
-    val goodTopic = goodDist(0)
-    val badTopic = if (goodTopic != badDist(0)) badDist(0) else badDist(1)
-
-    val res = Statistics.getEvalStats(for (tweet <- testSet) yield {
-      label(model, tweet, goodTopic, badTopic)
-    })
-    logger.debug(Statistics.getEvalStats(for (tweet <- testSet) yield {
-      label(model, tweet, goodTopic, badTopic)
-    }).toString)
-    logger.info(Statistics.reportResults(res))
-    res
-  }
-
-  def doExperiment(testSet: List[GoldLabeledTweet], trainSet: List[GoldLabeledTweet]) = {
-    val model: TopicModel = new LDATopicModel(trainSet, 3, 1000, 100, 0.1)
-
-    logger.info("topic distribution:\n     :" + model.getTopicPriors)
-    logger.info({
-      val labelToTopicDist = model.getTopicsPerTarget
-      "topic distribution over labels:\n" + (for ((k, v) <- labelToTopicDist) yield "%5s:%s".format(k, v)).mkString("\n")
-    })
-    logger.info({
-      val topics = model.getTopics
-      "topic distributions\n" +
-        (for (i <- 0 until 3) yield "%5s: Topic(%s,%s)".format(i, topics(i).prior, topics(i).distribution.toList.sortBy((pair) => (1 - pair._2)))).mkString("\n")
-    })
-    evaluate(model, testSet)
-  }
-}

File src/main/scala/updown/app/experiment/NFoldExperiment.scala

+package updown.app.experiment
+
+import updown.data.io.TweetFeatureReader
+import updown.data.{SentimentLabel, GoldLabeledTweet}
+import org.clapper.argot.ArgotParser._
+import org.clapper.argot.{ArgotUsageException, ArgotParser}
+import org.clapper.argot.ArgotConverters._
+import com.weiglewilczek.slf4s.Logging
+import updown.util.Statistics
+
+abstract class NFoldExperiment extends Logging {
+  // this exists purely to make the ArgotConverters appear used to IDEA
+  convertByte _
+
+  def doExperiment(testSet: List[GoldLabeledTweet], trainSet: List[GoldLabeledTweet]):
+    (Double, List[(updown.data.SentimentLabel.Type, Double, Double, Double)])
+
+  def generateTrials(inputFile: String, nFolds: Int): Iterator[(List[GoldLabeledTweet], List[GoldLabeledTweet])] = {
+    val foldsToTweets = (for ((fold, list) <- TweetFeatureReader(inputFile).zipWithIndex.groupBy((pair) => {
+      val (_, index) = pair;
+      index % nFolds
+    })) yield {
+      (fold, list.map((pair) => {
+        val (tweet, _) = pair;
+        tweet
+      }))
+    }).toList
+
+    (for ((heldOutFold, heldOutData) <- foldsToTweets) yield {
+      (heldOutData,
+        foldsToTweets.filter((pair) => {
+          val (listFold, _) = pair;
+          listFold != heldOutFold
+        }).map((pair) => {
+          val (_, tweets) = pair;
+          tweets
+        }).flatten)
+    }).iterator
+  }
+
+  def main(args: Array[String]) {
+    val parser = new ArgotParser(this.getClass.getName, preUsage = Some("Updown"))
+    val goldInputFile = parser.option[String](List("g", "gold"), "gold", "gold labeled input")
+    val n = parser.option[Int](List("n", "folds"), "FOLDS", "the number of folds for the experiment (default 10)")
+
+    try {
+      parser.parse(args)
+
+      val nFolds: Int = n.value.getOrElse(10)
+
+      if (goldInputFile.value == None) {
+        parser.usage("You must specify a gold labeled input file via -g.")
+      }
+
+      val inputFile = goldInputFile.value.get
+      val results =
+        (for ((testSet, trainSet) <- generateTrials(inputFile, nFolds)) yield {
+          doExperiment(testSet, trainSet)
+        }).toList
+
+      logger.info("intermediate results:\n"+results.mkString("\n"))
+      println("\n" + Statistics.reportResults(Statistics.averageResults(results)))
+    }
+    catch {
+      case e: ArgotUsageException => println(e.message); sys.exit(0)
+    }
+  }
+}

File src/main/scala/updown/app/experiment/maxent/NFoldMaxentExperiment.scala

+package updown.app.experiment.maxent
+
+import updown.data.{SystemLabeledTweet, GoldLabeledTweet, SentimentLabel}
+import updown.util.Statistics
+import updown.app.experiment.NFoldExperiment
+import updown.app.TrainMaxentModel
+
+object NFoldMaxentExperiment extends NFoldExperiment {
+  def doExperiment(testSet: List[GoldLabeledTweet], trainSet: List[GoldLabeledTweet]) = {
+    logger.info("performing Maxent experiment")
+    logger.debug("training model")
+    val model = TrainMaxentModel.trainWithGoldLabeledTweetIterator(trainSet.iterator)
+
+    logger.debug("testing model")
+    val res = Statistics.getEvalStats(for (tweet <- testSet) yield {
+      tweet match {
+        case GoldLabeledTweet(id, userid, features, goldLabel) =>
+          SystemLabeledTweet(id, userid, features, goldLabel,
+            SentimentLabel.figureItOut(model.getBestOutcome(model.eval(features.toArray))))
+      }
+    })
+    logger.info(Statistics.reportResults(res))
+    res
+  }
+}

File src/main/scala/updown/app/experiment/topic/NFoldMajorityTopicExperiment.scala

+package updown.app.experiment.topic
+
+import updown.data.{SystemLabeledTweet, GoldLabeledTweet, SentimentLabel}
+import updown.util.{Statistics, LDATopicModel, TopicModel}
+
+object NFoldMajorityTopicExperiment extends NFoldTopicExperiment {
+
+  def label(model: TopicModel, tweet: GoldLabeledTweet, goodTopic: Int, badTopic: Int): SystemLabeledTweet = {
+    val GoldLabeledTweet(id, userid, features, goldLabel) = tweet
+    val topicDistribution = model.inferTopics(tweet)
+    val sortedDist = topicDistribution.zipWithIndex.sortBy((i) => 1.0 - i._1).map((i) => i._2)
+
+    // for now, we'll always guess positive or negative, never neutral
+    SystemLabeledTweet(id, userid, features, goldLabel,
+      if (goodTopic == badTopic) SentimentLabel.Abstained
+      else if (sortedDist(0) == goodTopic) SentimentLabel.Positive
+      else if (sortedDist(0) == badTopic) SentimentLabel.Negative
+      else if (sortedDist(1) == goodTopic) SentimentLabel.Positive
+      else if (sortedDist(1) == badTopic) SentimentLabel.Negative
+      else if (sortedDist(2) == goodTopic) SentimentLabel.Positive
+      else SentimentLabel.Negative
+    )
+  }
+
+  def evaluate(model: TopicModel, testSet: scala.List[GoldLabeledTweet]): (Double, scala.List[(SentimentLabel.Type, Double, Double, Double)]) = {
+    val labelToTopicDist = model.getTopicsPerTarget
+
+
+    val badDist = labelToTopicDist(SentimentLabel.Negative).zipWithIndex.sortBy((i) => 1.0 - i._1).map((i) => i._2)
+    logger.debug("badDist: "+badDist.toString)
+    val goodDist = labelToTopicDist(SentimentLabel.Positive).zipWithIndex.sortBy((i) => 1.0 - i._1).map((i) => i._2)
+    logger.debug("goodDist: "+goodDist.toString)
+
+    val (goodTopic, badTopic, neutralTopic): (Int, Int, Int) =
+    if (labelToTopicDist.contains(SentimentLabel.Neutral)) {
+      val neutralDist = labelToTopicDist(SentimentLabel.Neutral).zipWithIndex.sortBy((i) => 1.0 - i._1).map((i) => i._2)
+      logger.debug("neutralDist: "+neutralDist.toString)
+      val neutralTopic = neutralDist(0)
+        if (goodDist(0) != neutralTopic) {
+          if (goodDist(0) != badDist(0)) {
+            (goodDist(0), neutralDist, badDist(0))
+          } else {
+            //then we have a pathological case
+            logger.warn("pathological topic distribution: %s".format(labelToTopicDist.toString))
+            (-1, -1, -1)
+          }
+        } else {
+          val goodTopic = goodDist(1)
+          val badTopic =
+          if (badDist(0) != neutralTopic){
+             badDist(0)
+          } else {
+            badDist(1)
+          }
+          if (goodTopic == badTopic){
+            // then we have a pathological case
+            logger.warn("pathological topic distribution: %s".format(labelToTopicDist.toString))
+            (-1, -1, -1)
+          } else {
+            (goodTopic, neutralTopic, badTopic)
+          }
+        }
+      } else {
+        // there were no neutral training instances
+        if (goodDist(0) == badDist(0)) {
+          val neutralTopic = goodDist(0)
+          if (goodDist(1) == badDist(1)) {
+            // then we have a pathological case, and the topics are not sentimental
+            logger.warn("pathological topic distribution: %s".format(labelToTopicDist.toString))
+            (-1, -1, -1)
+          } else {
+            // then the neutral topic was dominant in both cases, and the second topic held the sentiment
+            (goodDist(1), neutralTopic, badDist(1))
+          }
+        } else {
+          // then the sentimental topic was dominant, and we just have to find the neutral topic
+          val goodTopic = goodDist(0)
+          val badTopic = badDist(0)
+          if (goodDist(1) != badTopic) {
+            (goodTopic, goodDist(1), badTopic)
+          } else {
+            (goodTopic, goodDist(2), badTopic)
+          }
+        }
+      }
+    assert ((goodTopic == -1 && badTopic == -1 && neutralTopic == -1) ||
+      (goodTopic != badTopic && badTopic != neutralTopic && goodTopic != neutralTopic))
+    logger.info("goodTopic:%d badTopic:%d neutralTopic:%d".format(goodTopic, badTopic, neutralTopic))
+
+    val res = Statistics.getEvalStats(for (tweet <- testSet) yield {
+      label(model, tweet, goodTopic, badTopic)
+    })
+    logger.debug(res.toString)
+    logger.info(Statistics.reportResults(res))
+    res
+  }
+}

File src/main/scala/updown/app/experiment/topic/NFoldSimilarityTopicExperiment.scala

+package updown.app.experiment.topic
+
+import updown.data.{SystemLabeledTweet, GoldLabeledTweet, SentimentLabel}
+import updown.util.{Statistics, TopicModel}
+
+object NFoldSimilarityTopicExperiment extends NFoldTopicExperiment {
+
+  def label(model: TopicModel, tweet: GoldLabeledTweet, labelToTopicDist: Map[SentimentLabel.Type,List[Double]]): SystemLabeledTweet = {
+    val topicDistribution = model.inferTopics(tweet)
+    logger.debug("inferred topicDist: "+topicDistribution.toString)
+    val similarities = (for ((k,v) <- labelToTopicDist) yield (Statistics.cosineSimilarity(topicDistribution, v), k)).toList.sorted.reverse
+    logger.debug("similarities: "+similarities.toString)
+    val GoldLabeledTweet(id, userid, features, goldLabel) = tweet
+
+    SystemLabeledTweet(id, userid, features, goldLabel,similarities(0)._2)
+  }
+
+  def evaluate(model: TopicModel, testSet: scala.List[GoldLabeledTweet]): (Double, scala.List[(SentimentLabel.Type, Double, Double, Double)]) = {
+    val res = Statistics.getEvalStats(for (tweet <- testSet) yield {
+      label(model, tweet, model.getTopicsPerTarget)
+    })
+    logger.debug(res.toString)
+    logger.info(Statistics.reportResults(res))
+    res
+  }
+}

File src/main/scala/updown/app/experiment/topic/NFoldTopicExperiment.scala

+package updown.app.experiment.topic
+
+import updown.data.{SystemLabeledTweet, GoldLabeledTweet, SentimentLabel}
+import updown.util.{Statistics, LDATopicModel, TopicModel}
+import updown.app.experiment.NFoldExperiment
+
+abstract class NFoldTopicExperiment extends NFoldExperiment {
+
+  def evaluate(model: TopicModel, testSet: scala.List[GoldLabeledTweet]):
+  (Double, scala.List[(SentimentLabel.Type, Double, Double, Double)])
+
+  def doExperiment(testSet: List[GoldLabeledTweet], trainSet: List[GoldLabeledTweet]) = {
+    val model: TopicModel = new LDATopicModel(trainSet, 3, 1000, 100, 0.1)
+
+    logger.info("topic distribution:\n     :" + model.getTopicPriors)
+    logger.info({
+      val labelToTopicDist = model.getTopicsPerTarget
+      "topic distribution over labels:\n" + (for ((k, v) <- labelToTopicDist) yield "%5s:%s".format(k, v)).mkString("\n")
+    })
+    logger.info({
+      val topics = model.getTopics
+      "topic distributions\n" +
+        (for (i <- 0 until 3) yield "%5s: Topic(%s,%s)".format(i, topics(i).prior, topics(i).distribution.toList.sortBy((pair) => (1 - pair._2)))).mkString("\n")
+    })
+    evaluate(model, testSet)
+  }
+}

File src/main/scala/updown/util/LDATopicModel.scala

 import cc.mallet.types._
 import scala.collection.JavaConversions._
 import updown.data.{SentimentLabel, GoldLabeledTweet}
+import java.util.logging.Level
 
 class LDATopicModel(tweets: List[GoldLabeledTweet], numTopics: Int, numIterations: Int, alphaSum: Double, beta: Double) extends TopicModel {
   private final val MAX_THREADS = 20
   model.addInstances(instanceList)
   model.setNumThreads(numTopics max MAX_THREADS)
   model.setNumIterations(numIterations)
+  ParallelTopicModel.logger.setLevel(Level.OFF)
   model.estimate()
 
   def getTopics: List[Topic] = {
       val sum = wordCounts.map((triple)=>triple._3).reduce(_ + _)
       Topic(priors(i), wordCounts.map((triple)=>(triple._1->(triple._3.toDouble/sum))).toMap)
     }).toList
+
+
+
+
+
     res
   }
 

File src/main/scala/updown/util/Statistics.scala

 package updown.util
 
 import updown.data.{SentimentLabel, SystemLabeledTweet}
+import com.weiglewilczek.slf4s.Logging
 
-object Statistics {
+object Statistics extends Logging {
 
   val accurracy: (Double, Double) => Double =
     (correct, total) => correct / total
   val fScore: (Double, Double) => Double =
     (precision, recall) => 2.0 * precision * recall / (precision + recall)
 
+  val dot: (List[Double], List[Double]) => Double =
+    (A,B) => {
+      assert (A.length == B.length)
+//      (0.0 /: (A zip B).map{case(a,b) => a*b}) {_ + _}
+      if (A.length > 0 && B.length > 0) {
+        val a::as = A
+        val b::bs = B
+        (a * b) + dot(as, bs)
+      } else 0
+    }
+
+  val mag: (List[Double])=>Double =
+    (A) => math.sqrt(A.map((i)=>i*i).reduce(_ + _))
+
+  val cosineSimilarity: (List[Double], List[Double]) => Double =
+    (A, B) => (dot(A, B) / (mag(A) * mag(B)))
+
   def tabulate(tweets: scala.List[SystemLabeledTweet]): (Double, Int) = {
     var correct = 0.0
     var total = 0
     var numAbstained = tweets.count(_.systemLabel == null)
 
     for (tweet <- tweets) {
-//      println(tweet.systemLabel + "|" + tweet.goldLabel)
+      //      println(tweet.systemLabel + "|" + tweet.goldLabel)
       /*
        * val normedTweet = tweet.normalize("alpha")
       *  val normedNormedTweet = normedTweet.normalize("int")
     (correct, total)
   }
 
+
+  def initializeAverageList(list: List[(updown.data.SentimentLabel.Type, Double, Double, Double)]): List[(updown.data.SentimentLabel.Type, Double, Double, Double)] = {
+    if (list.length == 0)
+      Nil
+    else {
+      val ((lLabel, _, _, _) :: ls) = list
+      (lLabel, 0.0, 0.0, 0.0) :: initializeAverageList(ls)
+    }
+  }
+
+  def addWithoutNaN(d1: Double, d2: Double): Double = {
+    /*if (d1.equals(Double.NaN)) {
+      d2
+    } else if (d2.equals(Double.NaN)) {
+      d1
+    } else {
+      d1 + d2
+    }*/
+    d1 + d2
+  }
+
+  def addAll(list: List[(updown.data.SentimentLabel.Type, Double, Double, Double)], to: List[(updown.data.SentimentLabel.Type, Double, Double, Double)]): List[(updown.data.SentimentLabel.Type, Double, Double, Double)] = {
+    if (list.length == 0)
+      Nil
+    else {
+      val ((lLabel, lPrecision, lRecall, lFScore) :: ls) = list
+      val ((tLabel, tPrecision, tRecall, tFScore) :: ts) = to
+      assert(lLabel == tLabel)
+      (lLabel, addWithoutNaN(lPrecision, tPrecision), addWithoutNaN(lRecall, tRecall), addWithoutNaN(lFScore, tFScore)) :: addAll(ls, ts)
+    }
+  }
+
+  def divideBy(list: List[(updown.data.SentimentLabel.Type, Double, Double, Double)], by: Double): List[(updown.data.SentimentLabel.Type, Double, Double, Double)] = {
+    if (list.length == 0)
+      Nil
+    else {
+      val ((lLabel, lPrecision, lRecall, lFScore) :: ls) = list
+      (lLabel, lPrecision / by, lRecall / by, lFScore / by) :: divideBy(ls, by)
+    }
+  }
+
+
+  def averageResults(results: scala.List[(Double, scala.List[(SentimentLabel.Type, Double, Double, Double)])]): (Double, scala.List[(SentimentLabel.Type, Double, Double, Double)]) = {
+    var avgAccuracy = 0.0
+    var avgLabelResultsList = initializeAverageList(results(0)._2)
+    for ((accuracy, labelResults) <- results) {
+      avgAccuracy += accuracy
+      avgLabelResultsList = addAll(labelResults, avgLabelResultsList)
+    }
+    avgAccuracy /= results.length
+    avgLabelResultsList = divideBy(avgLabelResultsList, results.length)
+    (avgAccuracy, avgLabelResultsList)
+  }
+
   def getEvalStats(tweets: scala.List[SystemLabeledTweet]): (Double, List[(SentimentLabel.Type, Double, Double, Double)]) = {
     val (correct, total) = tabulate(tweets)
-
+    logger.debug("goldLabels: %s".format((tweets.map((tweet) => tweet.goldLabel))))
+    logger.debug("systemLabels: %s".format((tweets.map((tweet) => tweet.systemLabel))))
     (accurracy(correct, total.toDouble),
       (for (label <- SentimentLabel.values) yield {
         val goldList = tweets.filter((tweet) => tweet.goldLabel == label)
       }).toList)
   }
 
-    def reportResults(resultTuple: (Double, scala.List[(SentimentLabel.Type, Double, Double, Double)])): String = {
+  def reportResults(resultTuple: (Double, scala.List[(SentimentLabel.Type, Double, Double, Double)])): String = {
     val (accuracy, labelResultsList) = resultTuple
     "Results:\n" +
       "%12s%6.2f\n".format("Accuracy", accuracy) +