Commits

Mike Speriosu  committed 76e0450

Fixed some stuff in preprocessors; fixed classpath issue in updown executable.

  • Participants
  • Parent commits 19a6497

Comments (0)

Files changed (3)

 #!/bin/bash
 
-JARS=`echo $UPDOWN_DIR/lib/*.jar $UPDOWN_DIR/lib_managed/*/*.jar $UPDOWN_DIR/output/*.jar $UPDOWN_DIR/target/*.jar | tr ' ' ':'`
+JARS=`echo $UPDOWN_DIR/lib/*.jar $UPDOWN_DIR/lib_managed/*/*.jar $UPDOWN_DIR/lib_managed/*/*/*.jar $UPDOWN_DIR/lib_managed/*/*/*/*.jar $UPDOWN_DIR/output/*.jar $UPDOWN_DIR/target/*.jar | tr ' ' ':'`
 SCALA_LIB="$HOME/.sbt/boot/scala-2.9.1/lib/scala-library.jar"
 
 CP="$UPDOWN_DIR/target/classes:$SCALA_LIB:$JARS:$CLASSPATH"

File src/main/scala/updown/preproc/PreprocShammaTweets.scala

 
   val parser = new ArgotParser("updown preproc-shamma", preUsage=Some("Updown"))
   
-  val inputFile = parser.option[String](List("i","input"),"input", "path to shamma's Obama-McCain debate data file")
+  val inputFile = parser.option[String](List("i","input"),"input", "path to Shamma's Obama-McCain debate data file")
   val stopListFile =  parser.option[String](List("s","stoplist"),"stoplist", "path to stoplist file")
   
   val lineRE = """^(\d+)\t[^\t]+\t([^\t]+)\t([^\t]+)\t[^\t]*\t(.*)$""".r

File src/main/scala/updown/preproc/PreprocStanfordTweets.scala

 
   val lineRE = """^(\d+);;(\d+);;[^;]+;;[^;]+;;([^;]+);;(.*)$""".r
 
-  // TODO: verify the meanings of these values
   val STAN_POS = "4"
   val STAN_NEU = "2"
   val STAN_NEG = "0"
 
   def processOneLine(line: String, stoplist: Set[String]): Any = {
     val lineRE(sentimentRaw, tweetid, username, tweet) = line
-    if (sentimentRaw == STAN_POS || sentimentRaw == STAN_NEG) {
-      val tokens = BasicTokenizer(tweet)
-      val features = tokens.filterNot(stoplist(_)) ::: StringUtil.generateBigrams(tokens)
-      val label = if (sentimentRaw == STAN_POS) SentimentLabel.Positive else SentimentLabel.Negative
+    val tokens = BasicTokenizer(tweet)
+    val features = tokens.filterNot(stoplist(_)) ::: StringUtil.generateBigrams(tokens)
+    val label = sentimentRaw match {
+      case STAN_POS => SentimentLabel.Positive
+      case STAN_NEU => SentimentLabel.Neutral
+      case STAN_NEG => SentimentLabel.Negative
+    }
 
-      SuccessfulStanfordParse(tweetid, username, label, features)
-    }
+    SuccessfulStanfordParse(tweetid, username, label, features)
   }
 
   def main(args: Array[String]) {
     }
 
     if (inputFile.value == None) {
-      println("You must specify a input data file via -i ")
+      println("You must specify an input data file via -i ")
       sys.exit(0)
     }
     if (stopListFile.value == None) {
 
     var numTweets = 0
     var numPos = 0
+    var numNeg = 0
+    var numNeu = 0
     for (line <- lines) {
       processOneLine(line, stoplist) match {
         case SuccessfulStanfordParse(tweetid, username, label, features) =>
           numTweets += 1
-          if (label == SentimentLabel.Positive)
-            numPos += 1
+          label match {
+            case SentimentLabel.Positive => numPos += 1
+            case SentimentLabel.Negative => numNeg += 1
+            case SentimentLabel.Neutral => numNeu += 1
+          }
           printf("%s|%s|%s|%s\n", tweetid, username, features.mkString(",").replace("|", ""), label.toString)
         case _ => ()
       }
     }
 
-    System.err.println("Preprocessed " + numTweets + " tweets. Fraction positive: " + (numPos.toFloat / numTweets))
+    System.err.println("Preprocessed " + numTweets + " tweets.")
+    System.err.println("Fraction positive: " + (numPos.toFloat / numTweets))
+    System.err.println("Fraction negative: " + (numNeg.toFloat / numTweets))
+    System.err.println("Fraction neutral: " + (numNeu.toFloat / numTweets))
   }
 }