vvcephei avatar vvcephei committed 49ec6a2

finally got the output format where I want it

Comments (0)

Files changed (5)

src/main/scala/updown/app/PerUserEvaluator.scala

 
   def computeEvaluation(tweets: scala.List[SystemLabeledTweet]): (Int, Int, Double, String) = {
     var totalError = 0.0;
-    var totalErrorAlt = 0.0
+    var totalErrorPos = 0.0
+    var totalErrorNeg = 0.0
+    var totalErrorNeu = 0.0
     var totalNumAbstained = 0
     val usersToTweets = new scala.collection.mutable.HashMap[String, List[Tweet]] {
       override def default(s: String) = List()
 
     val usersToTweetsFiltered = usersToTweets.filter(p => p._2.length >= minTPU)
 
-    for (userid <- usersToTweetsFiltered.keys) {
-      val curTweets = usersToTweetsFiltered(userid)
+    var nusers = 0
+    var nutweets = 0
+    for ((userid, curTweets) <- usersToTweetsFiltered) {
+      nusers += 1
 
       var numAbstained = 0
-      if (curTweets.length >= minTPU) {
-        var numGoldPos = 0.0;
-        var numSysPos = 0.0
-        var numGoldNeg = 0.0;
-        var numSysNeg = 0.0
-        var numGoldNeu = 0.0;
-        var numSysNeu = 0.0
+      var numGoldPos = 0.0;
+      var numSysPos = 0.0
+      var numGoldNeg = 0.0;
+      var numSysNeg = 0.0
+      var numGoldNeu = 0.0;
+      var numSysNeu = 0.0
 
-        for (tweet <- curTweets) {
+      for (tweet <- curTweets) {
+        nutweets += 1
+        tweet match {
+          case SystemLabeledTweet(_, _, _, SentimentLabel.Positive, _) => numGoldPos += 1
+          case SystemLabeledTweet(_, _, _, SentimentLabel.Negative, _) => numGoldNeg += 1
+          case SystemLabeledTweet(_, _, _, SentimentLabel.Neutral, _) => numGoldNeu += 1
+        }
+        if (doRandom == None) {
           tweet match {
-            case SystemLabeledTweet(_, _, _, SentimentLabel.Positive, _) => numGoldPos += 1
-            case SystemLabeledTweet(_, _, _, SentimentLabel.Negative, _) => numGoldNeg += 1
-            case SystemLabeledTweet(_, _, _, SentimentLabel.Neutral, _) => numGoldNeu += 1
+            case SystemLabeledTweet(_, _, _, _, SentimentLabel.Positive) => numSysPos += 1
+            case SystemLabeledTweet(_, _, _, _, SentimentLabel.Negative) => numSysNeg += 1
+            case SystemLabeledTweet(_, _, _, _, SentimentLabel.Neutral) => numSysNeu += 1
+            //              case SystemLabeledTweet(_, _, _, _, null) => numAbstained += 1
+            case _ => numAbstained += 1
           }
-          if (doRandom == None) {
-            tweet match {
-              case SystemLabeledTweet(_, _, _, _, SentimentLabel.Positive) => numSysPos += 1
-              case SystemLabeledTweet(_, _, _, _, SentimentLabel.Negative) => numSysNeg += 1
-              case SystemLabeledTweet(_, _, _, _, SentimentLabel.Neutral) => numSysNeu += 1
-              case SystemLabeledTweet(_, _, _, _, null) => numAbstained += 1
-            }
-          } else {
-            numAbstained += 1
-          }
+        } else {
+          numAbstained += 1
         }
+      }
 
-        numSysPos += numAbstained.toFloat / 3
-        /*if(doRandom.value != None) {
-          numSysPos = numGoldPos / 2
-          numAbstained = 0
-        }*/
-        totalError += math.pow(((numGoldPos + numGoldNeg + numGoldNeu) - (numSysPos + numSysNeg + numSysNeu)) / curTweets.length, 2)
-        totalErrorAlt += math.pow(((numGoldPos) - (numSysPos)) / curTweets.length, 2)
-        totalNumAbstained += numAbstained
-      }
+      //        numSysPos += numAbstained.toFloat / 3
+
+
+      /*if(doRandom.value != None) {
+        numSysPos = numGoldPos / 2
+        numAbstained = 0
+      }*/
+      totalError += math.pow(((numGoldPos + numGoldNeg + numGoldNeu) - (numSysPos + numSysNeg + numSysNeu)) / curTweets.length, 2)
+      totalErrorPos += math.pow(((numGoldPos) - (numSysPos)) / curTweets.length, 2)
+      totalErrorNeg += math.pow(((numGoldNeg) - (numSysNeg)) / curTweets.length, 2)
+      totalErrorNeu += math.pow(((numGoldNeu) - (numSysNeu)) / curTweets.length, 2)
+      totalNumAbstained += numAbstained
+      println("uid:%s abs:%d gP:%f sP:%f gN:%f sN:%f gn:%f sn:%f n:%d".format(userid, numAbstained, numGoldPos, numSysPos, numGoldNeg, numSysNeg, numGoldNeu, numSysNeu, curTweets.length))
     }
 
     totalError /= usersToTweetsFiltered.size
-    totalErrorAlt /= usersToTweetsFiltered.size
+    totalErrorPos /= usersToTweetsFiltered.size
+    totalErrorNeg /= usersToTweetsFiltered.size
+    totalErrorNeu /= usersToTweetsFiltered.size
+
+    System.out.println("pos:%f neg:%f neu:%f".format(totalErrorPos, totalErrorNeg, totalErrorNeu))
+    println("tweets:%d users:%d utweets: %d".format(tweets.length, nusers, nutweets))
 
     (usersToTweetsFiltered.size, totalNumAbstained, totalError,
       "(min of " + minTPU + " tweets per user)")

src/main/scala/updown/app/experiment/Experiment.scala

 package updown.app.experiment
 
 import updown.util.Statistics
-import updown.data.{TargetedSystemLabeledTweet, SystemLabeledTweet}
 import org.clapper.argot.{ArgotParser, SingleValueOption}
 import org.clapper.argot.ArgotConverters._
 import com.weiglewilczek.slf4s.Logging
+import updown.data.{SentimentLabel, TargetedSystemLabeledTweet, SystemLabeledTweet}
 
 abstract class Experiment extends Logging {
   val parser = new ArgotParser(this.getClass.getName, preUsage = Some("Updown"))
 
   def report(labeledTweets: List[SystemLabeledTweet]) {
     logger.info("Overall:\n" + Statistics.getEvalStats("", labeledTweets).toString)
-    val statsPerUser: List[ExperimentalResult] = Statistics.getEvalStatsPerUser("", labeledTweets)
-    logger.info("Per-user Summary:\n"+Statistics.mean(statsPerUser)+"\n"+Statistics.variance(statsPerUser))
-    if (statsPerUser.length > 0)
-      logger.debug("Per-user:\n" + statsPerUser.mkString("\n"))
-    else
-      logger.info("Per-user: No users were over the threshold.")
+
+    val (msePerUser, nUsers) = Statistics.getMSEPerUser(labeledTweets)
+    logger.info("Per-user Summary:\nN users:%d\n%s\n%s".format(nUsers, "%15s %5s".format("Label","MSE"),msePerUser.map{case LabelResult(_,label,_,_,_,mse)=>"%15s %.3f".format(SentimentLabel.toEnglishName(label),mse)}.mkString("\n")))
 
     targetsInputFile.value match {
       case Some(filename) =>
           case SystemLabeledTweet(id, uid, features, gLabel, sLabel) =>
             TargetedSystemLabeledTweet(id, uid, features, gLabel, sLabel, targets(id))
         }
-        val statsPerTarget: List[ExperimentalResult] = Statistics.getEvalStatsPerTarget("", targetedTweets)
+        val (statsPerTarget, nTargets) = Statistics.getEvalStatsPerTarget("", targetedTweets)
         if (statsPerTarget.length > 0){
-          logger.info("Per-target Summary:\n"+Statistics.mean(statsPerTarget)+"\n"+Statistics.variance(statsPerTarget))
-
-          logger.debug("Per-target:\n" + statsPerTarget.mkString("\n"))
+          logger.info("Per-target:\nN targets: %d\n%s".format(nTargets, statsPerTarget.mkString("\n")))
         }else
           logger.info("Per-target: No targets were over the threshold")
       case None =>

src/main/scala/updown/app/experiment/ExperimentalResult.scala

 import updown.data.SentimentLabel
 
 case class ExperimentalResult(name: String, n: Int, accuracy: Double, classes: List[LabelResult]) {
+  def header: String = "\n%15s%5s%11s%8s%9s%9s\n".format("Label", "N", "Precision", "Recall", "F-Score", "MSE")
 
   override def toString(): String =
     "%s Results:\n".format(name) +
       "%10s%6d\n".format("N", n) +
       "%10s%6.2f\n".format("Accuracy", accuracy) +
-      "\n%15s%5s%11s%8s%9s\n".format("Label", "N", "Precision", "Recall", "F-Score") +
+      header +
       (for (res <- classes) yield res.toString).mkString("\n") + "\n"
 
   def rename(newName: String): ExperimentalResult =
     val otherClassesMap = (other.classes.groupBy((labelResult) => labelResult.label).map((tup) => {
       val (k, (v: LabelResult) :: vs) = tup
       (k, v)
-    }).toMap).withDefaultValue(LabelResult(0, SentimentLabel.Abstained, 0.0, 0.0, 0.0))
+    }).toMap).withDefaultValue(LabelResult(0, SentimentLabel.Abstained, 0.0, 0.0, 0.0, 0.0))
     ExperimentalResult(name, n + other.n, accuracy + other.accuracy,
       (for ((label, classResult) <- classesMap.toList) yield classResult + otherClassesMap(label)).toList
     )
     val otherClassesMap = (other.classes.groupBy((labelResult) => labelResult.label).map((tup) => {
       val (k, (v: LabelResult) :: vs) = tup
       (k, v)
-    }).toMap).withDefaultValue(LabelResult(0, SentimentLabel.Abstained, 0.0, 0.0, 0.0))
+    }).toMap).withDefaultValue(LabelResult(0, SentimentLabel.Abstained, 0.0, 0.0, 0.0, 0.0))
     ExperimentalResult(name, n * other.n, accuracy * other.accuracy,
       (for ((label, classResult) <- classesMap.toList) yield classResult * otherClassesMap(label)).toList
     )
   }
 }
 
-case class LabelResult(n: Int, label: SentimentLabel.Type, precision: Double, recall: Double, f: Double) {
-  override def toString(): String = "%15s%5d%11.2f%8.2f%9.2f".format(SentimentLabel.toEnglishName(label), n, precision, recall, f)
+
+case class LabelResult(n: Int, label: SentimentLabel.Type, precision: Double, recall: Double, f: Double, mse: Double) {
+  override def toString(): String = "%15s%5d%11.2f%8.2f%9.2f%9.2f".format(SentimentLabel.toEnglishName(label), n, precision, recall, f, mse)
 
   def +(other: LabelResult): LabelResult = {
     assert(label == other.label)
-    LabelResult(n + other.n, label, precision + other.precision, recall + other.recall, f + other.f)
+    LabelResult(n + other.n, label, precision + other.precision, recall + other.recall, f + other.f, mse + other.mse)
   }
 
   def *(other: LabelResult): LabelResult = {
     assert(label == other.label)
-    LabelResult(n * other.n, label, precision * other.precision, recall * other.recall, f * other.f)
+    LabelResult(n * other.n, label, precision * other.precision, recall * other.recall, f * other.f, mse * other.mse)
   }
 
-  def /(scalar: Double): LabelResult = LabelResult((n.toFloat / scalar).toInt, label, precision / scalar, recall / scalar, f / scalar)
+  def /(scalar: Double): LabelResult = LabelResult((n.toFloat / scalar).toInt, label, precision / scalar, recall / scalar, f / scalar, mse / scalar)
 
-  def *(scalar: Double): LabelResult = LabelResult((n.toFloat * scalar).toInt, label, precision * scalar, recall * scalar, f * scalar)
+  def *(scalar: Double): LabelResult = LabelResult((n.toFloat * scalar).toInt, label, precision * scalar, recall * scalar, f * scalar, mse / scalar)
 }

src/main/scala/updown/preproc/impl/PreprocHCRTweets.scala

             case _ => SentimentLabel.Abstained
           }
         getTargetToLabelMap(rest) + ((target, label))
-      case Nil => Nil.toMap
+      case _ => Nil.toMap
     }
   }
 

src/main/scala/updown/util/Statistics.scala

 import com.weiglewilczek.slf4s.Logging
 import updown.app.experiment.{LabelResult, ExperimentalResult}
 import java.io.{OutputStreamWriter, BufferedOutputStream}
-import updown.data.{TargetedSystemLabeledTweet, SentimentLabel, SystemLabeledTweet}
+import updown.data.{Tweet, TargetedSystemLabeledTweet, SentimentLabel, SystemLabeledTweet}
 
 object Statistics extends Logging {
 
   def averageResults(newName: String, results: scala.List[ExperimentalResult]): ExperimentalResult = {
     var avgAccuracy = 0.0
     var avgN = 0.0
-    var avgLabelResults = scala.collection.mutable.Map[SentimentLabel.Type, LabelResult]().withDefault((label) => LabelResult(0, label, 0.0, 0.0, 0.0))
+    var avgLabelResults = scala.collection.mutable.Map[SentimentLabel.Type, LabelResult]().withDefault((label) => LabelResult(0, label, 0.0, 0.0, 0.0, 0.0))
     // first, sum
     for (ExperimentalResult(name, n, accuracy, classes) <- results) {
       avgAccuracy += accuracy
       avgN += n
-      for (LabelResult(n, label, precision, recall, f) <- classes) {
-        val LabelResult(oN, oLabel, oPrecision, oRecall, oF) = avgLabelResults(label)
-        avgLabelResults(label) = LabelResult(n + oN, label, precision + oPrecision, recall + oRecall, f + oF)
+      for (LabelResult(n, label, precision, recall, f, mse) <- classes) {
+        val LabelResult(oN, oLabel, oPrecision, oRecall, oF, oMse) = avgLabelResults(label)
+        avgLabelResults(label) = LabelResult(n + oN, label, precision + oPrecision, recall + oRecall, f + oF, mse + oMse)
       }
     }
     // then, scale
     val N = results.length
     ExperimentalResult(newName, (avgN / N).toInt, avgAccuracy / N,
-      (for ((_, LabelResult(n, label, precision, recall, f)) <- avgLabelResults.toList.sortBy {
+      (for ((_, LabelResult(n, label, precision, recall, f, mse)) <- avgLabelResults.toList.sortBy {
         case (k, v) => SentimentLabel.ordinality(k)
       }) yield {
-        LabelResult(n / N, label, precision / N, recall / N, f / N)
+        LabelResult(n / N, label, precision / N, recall / N, f / N, mse / N)
       }).toList)
   }
 
   def getEvalStats(resultName: String, tweets: scala.List[SystemLabeledTweet]): ExperimentalResult = {
     val (correct, total) = tabulate(tweets)
     ExperimentalResult(resultName, total, accurracy(correct, total),
-      (for (label <- List(SentimentLabel.Negative, SentimentLabel.Neutral, SentimentLabel.Positive)) yield {
+      (for (label <- List(SentimentLabel.Positive, SentimentLabel.Negative, SentimentLabel.Neutral)) yield {
         val goldList = tweets.filter((tweet) => tweet.goldLabel == label)
         logger.debug("%s gold tweets: %d".format(SentimentLabel.toEnglishName(label), goldList.length))
         val systemList = tweets.filter((tweet) => tweet.systemLabel == label)
           goldList.filter((tweet) => tweet.systemLabel == label).length,
           goldList.length
         )
-        LabelResult(goldList.length, label, labelPrecision, labelRecall, fScore(labelPrecision, labelRecall))
+
+        LabelResult(goldList.length, label, labelPrecision, labelRecall, fScore(labelPrecision, labelRecall), math.pow((goldList.length-systemList.length)/tweets.length, 2))
       }).toList)
   }
- 
 
-  def getEvalStatsPerUser(resultName: String, tweets: scala.List[SystemLabeledTweet]): List[ExperimentalResult] = {
-    val userToTweets = tweets.groupBy((tweet) => tweet.userid).toList.filter {
-      case (user, tweets) =>
-        tweets.length > MinTPU
-    }.sortBy {
-      case (user, tweets) => tweets.length
-    }.reverse
-    (for ((user, tweets) <- userToTweets) yield {
-      val res = Statistics.getEvalStats("%s %s".format(resultName, user), tweets)
-      res
-    }).toList
+  def getMSEPerUser(tweets: scala.List[SystemLabeledTweet]): (List[LabelResult], Int) = {
+    var totalError = 0.0;
+    var totalErrorPos = 0.0
+    var totalErrorNeg = 0.0
+    var totalErrorNeu = 0.0
+    var totalNumAbstained = 0
+    val usersToTweets = new scala.collection.mutable.HashMap[String, List[Tweet]] {
+      override def default(s: String) = List()
+    }
+
+    for (tweet <- tweets) usersToTweets.put(tweet.userid, usersToTweets(tweet.userid) ::: (tweet :: Nil))
+
+    val usersToTweetsFiltered = usersToTweets.filter(p => p._2.length >= MinTPU)
+    var users = 0
+    var utweets = 0
+    for (userid <- usersToTweetsFiltered.keys) {
+      users += 1
+      val curTweets = usersToTweetsFiltered(userid)
+
+      var numAbstained = 0
+      if (curTweets.length >= MinTPU) {
+        var numGoldPos = 0.0;
+        var numSysPos = 0.0
+        var numGoldNeg = 0.0;
+        var numSysNeg = 0.0
+        var numGoldNeu = 0.0;
+        var numSysNeu = 0.0
+
+        for (tweet <- curTweets) {
+          utweets += 1
+          tweet match {
+            case SystemLabeledTweet(_, _, _, SentimentLabel.Positive, _) => numGoldPos += 1
+            case SystemLabeledTweet(_, _, _, SentimentLabel.Negative, _) => numGoldNeg += 1
+            case SystemLabeledTweet(_, _, _, SentimentLabel.Neutral, _) => numGoldNeu += 1
+          }
+          {
+            tweet match {
+              case SystemLabeledTweet(_, _, _, _, SentimentLabel.Positive) => numSysPos += 1
+              case SystemLabeledTweet(_, _, _, _, SentimentLabel.Negative) => numSysNeg += 1
+              case SystemLabeledTweet(_, _, _, _, SentimentLabel.Neutral) => numSysNeu += 1
+              case _ => numAbstained += 1
+            }
+          }
+        }
+
+        totalError += math.pow(((numGoldPos + numGoldNeg + numGoldNeu) - (numSysPos + numSysNeg + numSysNeu)) / curTweets.length, 2)
+        totalErrorPos += math.pow(((numGoldPos) - (numSysPos)) / curTweets.length, 2)
+        totalErrorNeg += math.pow(((numGoldNeg) - (numSysNeg)) / curTweets.length, 2)
+        totalErrorNeu += math.pow(((numGoldNeu) - (numSysNeu)) / curTweets.length, 2)
+        totalNumAbstained += numAbstained
+      }
+    }
+
+    totalError /= usersToTweetsFiltered.size
+    totalErrorPos /= usersToTweetsFiltered.size
+    totalErrorNeg /= usersToTweetsFiltered.size
+    totalErrorNeu /= usersToTweetsFiltered.size
+
+    (List(LabelResult(-1, SentimentLabel.Positive, -1, -1, -1, totalErrorPos),
+      LabelResult(-1, SentimentLabel.Negative, -1, -1, -1, totalErrorNeg),
+      LabelResult(-1, SentimentLabel.Neutral, -1, -1, -1, totalErrorNeu)), usersToTweetsFiltered.size)
   }
 
-  def getEvalStatsPerTarget(resultName: String, tweets: scala.List[TargetedSystemLabeledTweet]): List[ExperimentalResult] = {
+  def getEvalStatsPerTarget(resultName: String, tweets: scala.List[TargetedSystemLabeledTweet]): (List[ExperimentalResult], Int) = {
     val targetToTweets = tweets.groupBy((tweet) => tweet.target).toList.filter {
       case (target, tweets) =>
         tweets.length > MinTPT
     }.sortBy {
       case (target, tweets) => tweets.length
     }.reverse
-    (for ((target, tweets) <- targetToTweets) yield {
+    ((for ((target, tweets) <- targetToTweets) yield {
       val res = Statistics.getEvalStats("%s %s".format(resultName, target), tweets.map {
         case TargetedSystemLabeledTweet(id, uid, features, gLabel, sLabel, target) => SystemLabeledTweet(id, uid, features, gLabel, sLabel)
       })
       res
-    }).toList
+    }).toList, targetToTweets.length)
   }
 }
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.