vvcephei avatar vvcephei committed a2da359

extensive refactoring of preprocessors and experiments. still more work to go

Comments (0)

Files changed (22)

src/main/scala/updown/app/TopicalChunker.scala

+package updown.app
+
+import org.clapper.argot.ArgotConverters._
+import org.clapper.argot.ArgotParser._
+import org.clapper.argot.{ArgotUsageException, ArgotParser}
+import updown.data.io.TweetFeatureReader
+import opennlp.tools.chunker.{ChunkerModel, ChunkerME}
+import java.io._
+import updown.preproc.impl.PreprocTSVFilesCat
+import updown.data.{GoldLabeledTweet, SentimentLabel}
+import com.weiglewilczek.slf4s.Logging
+import opennlp.tools.sentdetect.{SentenceModel, SentenceDetectorME, SentenceDetector}
+import opennlp.tools.tokenize.{TokenizerModel, TokenizerME, Tokenizer}
+import opennlp.tools.postag.{POSModel, POSTaggerME}
+import updown.util._
+
+object TopicalChunker extends Logging {
+  convertByte _
+
+  var iterations = 1000
+  var alpha = 30
+  var beta = 0.1
+  var numTopics = 3
+  val fileSeparator = System.getProperty("file.separator")
+  var childProcesses = List[Process]()
+
+
+  val parser = new ArgotParser(this.getClass.getName, preUsage = Some("Updown"))
+  val inputDocumentsOption = parser.option[String](List("i", "input"), "FILE", "training data for the model")
+  val originalDocumentsOption = parser.option[String](List("o", "original"), "FILE", "the original data")
+
+  val iterationOption = parser.option[Int](List("iterations"), "INT", "the number of iterations for the training the topicModel")
+  val alphaOption = parser.option[Int](List("alpha"), "INT", "the symmetric alpha hyperparameter for LDA")
+  val betaOption = parser.option[Double](List("beta"), "DOUBLE", "the symmetric beta hyperparameter for LDA")
+  val numTopicsOption = parser.option[Int](List("numTopics"), "INT", "the number of topics for LDA")
+  val saveModelOption = parser.option[String](List("save"), "FILE", "save the topic model to FILE")
+  val loadModelOption = parser.option[String](List("load"), "FILE", "load the topic model from FILE")
+
+  val outputOption = parser.option[String](List("o", "output"), "DIR", "the directory to dump topics into")
+  val wordleOption = parser.flag[Boolean](List("w", "wordle"), "generate wordles for the topics (requires -o DIR) " +
+    "(requires that you have downloaded IBM's word cloud generator)")
+  val wordleJarOption = parser.option[String](List("wordleJar"), "PATH", ("the path to IBM's word cloud generator " +
+    "(default %s)").format(WordleUtils.defaultJarPath))
+  val wordleConfigOption = parser.option[String](List("wordleConfig"), "PATH", ("the path to the config file for IBM's " +
+    "word cloud generator (default %s)").format(WordleUtils.defaultConfigurationPath))
+
+  val sdetector = new SentenceDetectorME(new SentenceModel(new FileInputStream(new File("/data/chunker/en-sent.bin"))))
+  val tokenizer = new TokenizerME(new TokenizerModel(new FileInputStream(new File("/data/chunker/en-token.bin"))))
+  val posTagger = new POSTaggerME(new POSModel(new FileInputStream(new File("/data/chunker/en-pos-maxent.bin"))))
+  val chunker = new ChunkerME(new ChunkerModel(new FileInputStream(new File("/data/chunker/en-chunker.bin"))))
+
+
+  def doOutput(model: TopicModel) {
+    if (outputOption.value.isDefined) {
+      val file = new File(outputOption.value.get + fileSeparator + "run")
+      file.mkdirs()
+      val outputDirForThisRun = file.getAbsolutePath
+      val summary = new BufferedWriter((new FileWriter((outputDirForThisRun + fileSeparator + "summary"))))
+      summary.write("%s\n".format(model.getTopicPriors.zipWithIndex.map {
+        case (a, b) => "Topic %s:%6.3f".format(b, a)
+      }.mkString("\n")))
+      summary.write("%s\n".format(model.getTopicsPerTarget.toList.map {
+        case (a, b) => "Label %9s:%s".format(SentimentLabel.toEnglishName(a), b.map {
+          "%7.3f".format(_)
+        }.mkString(""))
+      }.mkString("\n")))
+      summary.close()
+      val outputFiles =
+        (for ((topic, i) <- model.getTopics.zipWithIndex) yield {
+          val outFile = new File(outputDirForThisRun + fileSeparator + "topic" + i)
+          val output = new BufferedWriter(new FileWriter(outFile))
+          output.write("%s\n".format(topic.distribution.toList.sortBy((pair) => (1 - pair._2)).map {
+            case (a, b) => "%s\t%s".format(a, b)
+          }.mkString("\n")))
+          output.close()
+          outFile.getAbsolutePath
+        })
+      if (wordleOption.value.isDefined) {
+        logger.debug("making wordles and report")
+        val index = new BufferedWriter((new FileWriter((outputDirForThisRun + fileSeparator + "index.html"))))
+        index.write("<head><style>\n%s\n</style></head>\n".format(List(
+          "div.bordered{border-style: solid none none none; padding: 5px; border-width: 1px; border-color: gray;}",
+          "div#wordles{display:block; clear:both; padding-top:20px;}",
+          "div.wordle{float:left;width:45%;border-style:solid; border-width:1px; border-color:gray; margin:2px;}",
+          "div.wordle img{width: 100%;}",
+          ".table{display:block; clear: both;}",
+          ".row{display:block;clear:both;}",
+          ".cell{display:block;float:left;}",
+          ".values{display:block;float:left;width:300px;}",
+          ".value{display:block;float:left;width:60px;}",
+          "div.topicFreq .title{width:100px;}",
+          "div.labelDistribution .title{width:150px;}"
+        ).mkString("\n")))
+        index.write("<body>")
+        index.write("<div id=topicDistribution class=\"bordered table\">%s</div>\n".format(model.getTopicPriors.zipWithIndex.map {
+          case (a, b) => "<div class=\"topicFreq row\"><span class=\"title cell\">Topic %s</span><span class=\"value cell\">%6.3f</span></div>".format(b, a)
+        }.mkString("\n")))
+        index.write(("<div id=labelDistributions class=\"bordered table\">" +
+          "<div class=\"labelDistribution row\"><span class=\"title cell\">topic</span><span class=\"values cell\"><span class=\"value\">  0</span><span class=\"value\">  1</span><span class=\"value\">  2</span></span></div>" +
+          "%s</div>\n").format(model.getTopicsPerTarget.toList.sortBy({
+          case (a, b) => SentimentLabel.ordinality(a)
+        }).map {
+          case (a, b) => "<div class=\"labelDistribution row\"><span class=\"title cell\">Label %9s</span><span class=\"values cell\">%s</span></div>".format(SentimentLabel.toEnglishName(a), b.map {
+            "<span class=value>%7.3f</span>".format(_)
+          }.mkString(""))
+        }.mkString("\n")))
+        val jarPath = if (wordleJarOption.value.isDefined) wordleJarOption.value.get else WordleUtils.defaultJarPath
+        val configPath = if (wordleConfigOption.value.isDefined) wordleConfigOption.value.get else WordleUtils.defaultConfigurationPath
+        index.write("<div id=wordles class=bordered>")
+        childProcesses = childProcesses ::: WordleUtils.makeWordles(jarPath, configPath, outputFiles, Some(index))
+        index.write("</div></body>")
+        index.close()
+        logger.debug("done making report and initializing wordles")
+      }
+    }
+  }
+
+
+  def main(args: Array[String]) {
+    try {
+      parser.parse(args)
+      if (iterationOption.value.isDefined) {
+        iterations = iterationOption.value.get
+      }
+      if (alphaOption.value.isDefined) {
+        alpha = alphaOption.value.get
+      }
+      if (betaOption.value.isDefined) {
+        beta = betaOption.value.get
+      }
+      if (numTopicsOption.value.isDefined) {
+        numTopics = numTopicsOption.value.get
+      }
+      // Thanks to a bug in Mallet, we have to cap alphaSum
+      val alphaSum = 300 min (alpha * numTopics)
+
+
+
+      val inputDocumentsFileName =
+        inputDocumentsOption.value match {
+          case Some(filename) => filename
+          case None => parser.usage("You must specify a gold labeled training file via -i.")
+        }
+
+      val inputDocuments = TweetFeatureReader(inputDocumentsFileName)
+
+      logger.debug("alphaSum: " + alphaSum)
+      val model: TopicModel =
+        loadModelOption.value match {
+          case Some(filename) =>
+            new LDATopicModelFromFile(filename)
+          case None =>
+            new LDATopicModel(inputDocuments, numTopics, iterations, alphaSum, beta)
+        }
+      logger.debug("topic distribution:\n     :" + model.getTopicPriors)
+
+      logger.debug({
+        val topics = model.getTopics
+        "topic distributions\n" +
+          (for (i <- 0 until numTopics) yield "%5s: Topic(%s,%s)".format(i, topics(i).prior, topics(i).distribution.toList.sortBy((pair) => (1 - pair._2)).take(10))).mkString("\n")
+      })
+      doOutput(model)
+
+      val document1 = "UGK has always came out with hits, but mainstream hip hop never gave them their respect. It wasn't until 'Big Pimpin' with Jay-Z that mainstream had ever heard of UGK, but now with this hot album, mainstream has no reason not give UGK the respect their deserveThe best yet! They came correct as usual! Pimp C Forever!Everything you would want in a mobile notebook. Battery life is the only shortfall. Would be nice if the wireless card port was located more towards the rear of the machine. As the card's antenna is extended, it becomes somewhat of an obstacle when using the keyboard. This somewhat-pricey backpack is an awesome buy. This thing holds so much stuff it's incredible! In addition to my laptop it has a huge storage section which easily holds an Xbox 360 with room to spare. Side pockets hold a water bottle on each side. Under that, pockets hold your laptop plug and and cables you may need. Front section holds any writing utencils, plus cell phone, charger, keys, etc. Also has a pouch for an iPod with a small hole to put your headphone cord through. \\n\\nPROS: So much storage space! Shoulder straps stretch and flex so your load doesn't hurt your shoulders. Padded back support and cushioned shoulder straps. \\n\\nCONS: Price. I wouldn't have bought it without a gift card I had. Also the chest straps can't be moved up or down, only tightened. \\n\\nOVERALL: Great bag for young road warriors who want something more stylish and rugged than a briefcase. She-ra was my favorite thing when I was little, and now it is my 4-year old daughter's favorite as well. She never tires of the adventures of the Great Rebellion. She runs around the house pretending to be She-ra, and plays with my old action figures while she watches the shows. She-ra is a great role model for young girls, not because she is strong but because she uses her smarts, never gives up, and always helps those in need, even if they are sometimes the enemy. Bottom line: Buy all of the Jazz Icons titles. Starting with the Brubeck, this series is the pinnacle of jazz on DVD. The biggest stars in their prime, all of them beautifully shot and with gorgeous sound. Anyone remotely interested in American music needs to have the whole series, starting with the Brubeck, though the boxed sets are the best value overall. Volume 2 of the first season has more great She-ra adventures to enjoy. It has some good appearances by supporting characters my daughter loves like Frosta and Castaspella. We never tire of watching these DVD's and can't wait for Season 2 to come out. this game system is raw any and everybody ashould get this oneThis TV is darn near perfect. The picture is as crisp as I could ever hope for, or imagine possible. The blacks are dark and rich, and the colors are vivid and natural. I watched \"300\" thru my blu-ray on this TV and I saw EVERY detail possible. I then watched \"Planet Earth\" thru the blu-ray - and I never even heard a word of narration the movie beacuse I was transfixed on the visuals. The only draw back is the lack of image-retention technology. This however can be easily remedied by breaking in the TV with repeated viewings of non-static images. I put a chapter of \"Planet Earth\" on repeat for 8 hours a day for a week. That did the job perfectly. If you want the best possible picture, the best possible resolution, and the best possible reliability, this Panasonic (in all its forms 42\", 50\", 58\") is as perfect a TV as money can buy. So go get one. When I first took the camera out of box, it's sleek style and beauty was just as amazing as the pictures it takes. This compact camera is jam-packed with so many features. This is the ultimate picture taking machine! Don't believe me? Try it for yourself!!I love mario and agian he strikes with another great game. I am not sure show I last this long without my Tivo Series3! I had a stand alone Series2 on my HD tube TV upstairs that got me started with Tivo. It changed my TV viewing habits (and my life) but I was constantly frustrated with the standard definition signal. When I finished my basement home theater I went with my Comcast HD-DVR... the Comcast Motorola box was like going back into the dark ages with a poor user interface, painful to navigate menus, and of course no \"Photos, Music, and More. \" I went through 2 Comcast boxes because they ran so hot in my cabinet that I had to replace them. I am now going on 1 year with my Series3 and LOVE it... using 2 CableCARDS and my HD off-air tuner I get all of the local HD channels plus my Comcast HD channels with Tivo's great user interface. With Tivo's 2.5 desktop software I am now able to stream my music and photos from my Vista PC to my HD TV. The only thing I am still waiting for is the software upgrade that will allow me to pull TV content off of my Series3 to PC. Have I mentioned that... I love my Tivo Series3. This unit is 100% unbelievable! Toshiba could charge $100 more and it would still be a great deal. Yes, it \"only\" sports a 1080i interface, but for most of us, there is very little difference. I have never noticed any \"combing\" from the 1080i feed. Also, this is the best upconverting/upscaling DVD player I have ever used and I have tried plenty. The upscaling performance, itself is worth the price. My normal DVD's look about %80 as good as my HD ones. Havent Purchased yet but have seen the amazing movie so far lowest price is at best buy but the story is INCREDIBLE even though it is not rated the worst thing in there is a bloody face because a boy got abused and a few swear words but nothing major it is a movie that u are sure to fall in love with and i would definally want to see it again and again,in fact i do!This is the best purchase I have ever made, and blows all my other systems away. Faaaassssst performance, reliable and a great deal. If you have the money, make the purchase. If not...borrow it!this game is great it's just like all the others from ps2 some new wepones and soume old one with different names. ratchet still kicks but and still hase cool guns. This player is excellent. Because it says \"plays for sure,\" you know it will work on Windows OK, but I don't use Windows, I use Ubuntu. This player worked perfectly. It played my music, wither in MP3, WMA or OGG format, which gives it a major advantage over most players. I was also pleased to find that it plays videos recorded in a standard, MP4 XVid format, meaning its easy to copy videos to your player. \\n\\nReviewers on the Internet rave about the battery life - they are all right. I've played and played this thing and the battery meter hasn't dropped a bit. I plug it into my computer once a week or so to copy music so that must be all it takes to keep the battery topped off. \\n\\nThe Radio feature uses the headphone cord as an antenna. I've not tried different headphones, so I don't know if switching to a new set will impair that feature. It pulls in FM signal crisp and clear though. \\n\\nFinally, my last favorite feature - two headphone jacks, so you can listen with a friend. :-)Decent computer for the money. It will run most high speed games but the processor isnt all its cracked up to be. I would definiantly try to find an intel based laptop if i could redo this. The graphics are also a let down, it isnt much better than my friends normal integrated and that doesnt mooch off of local ram. However, despite these shortcomings, its a heck of a computer for the 400$ I paid during a sell. I suggest upgrading both ram and processor since vista is made for dual core and 2 gigs of ram. Go with ram first if you dont have cash for both. This product replaces my old Linksys Vonage router that was taken out by lightening. Its very simple and easy to set up if you follow the instructions, I had phone service again within minutes. A big plus is the small size of this adapter, fits just about anywhere you want to put it. I have seen this roast at least 12 times on tv and all it did was get funnier. I purchased this system and it was def worth every penny... If you don't feel like building your own box and want everything in one package, this is the way to go. I had been looking at adding another monitor to my computer, I had a 17\" and two 15\"s. I was looking at adding another 17 to match but when I saw the price on this, I couldn't help it. After I unpacked and hooked it up, I went online to get another. Unfortunately they were out by then. Beautiful monitor, amazing color, awesome brightness! Whenever they get these back in stock I would buy ten of them for $175!!!My iPod made me by this. I had heard from some that macs were better than a pc. When my HP desktop died I decided to give Mac's a try (since they will now run XP). I figured anyone who could design and manufacture the iPod had to know something.. right? Picked this up, took it home and have not been able to stop saying \"wow\". \\n*Small footprint. Not much bigger than a 20\" LCD\\n*Elegant design\\n*Runs XP better than my HP box did\\n-Software selection is smaller but what is available is very good\\n-Some of the installed software seems simplier than Microsofts products\\n-Slight learning curve with 1 button mouse which I have now fixed with a microsoft bluetooth mouse\\nAll in all I was impressed with my new computer. I purchased \"Paralells\" that lets me run XP in a window. I also downloaded \"Boot Camp\" which allows me to run XP or even Vista from start up.. just like it was made for it... amazing!\\nAll in all I would give it an 9... the only downside is the higher cost of the computer to a PC. I got this headphones for my creative zen vision w, to start with i didnt like the headphones which came in with the player, So i bought an creative in ear ear plugs. First it sounded great then after 2 months my ear started to feel the pain. \\n\\nThen i decided to go far the Bose On the Ear headphones eventhough it was $170+. It sounds great with the player, i watch back to back movies in my player and still it doesnot hurt, The Soft leather which is in contact with the ear is very good, Sometimes my ear tend to get little warm but little warm for back to back movies should be OK i guess.\\n\\nI still think it is a bit overpriced eventhough its BOSE!!!I'm not a huge movie buff, so I can't tell you if this is 'Leo's' best or not... but I will say this. It's about the only movie released in the last year I'd be tempted to buy and take home! Suspenseful throughout, a good ending, and I thought all the actors did a good job (but I usually don't pick winners when the awards come around... just a fair warning). I thought it'd be good with the names it carries, but it by far surpassed expectation. Genre-wise, I'd put it something between Training Day and 16 Blocks. Great movie, I highly recommend it. This card is well worth the price. This card is where the gaming industry had always intended gaming to be and in the near future this will be the standard. Things don’t just look real; they act real and feel real. Massively destructible buildings and landscapes; explosions that cause collateral damage; lifelike characters with spectacular new weapons; realistic smoke, fog and oozing fluids are all now possible with the AGEIA PhysX processor! You truly feel like you're in the game. Example shooting a concrete wall you see the small pieces of debris fly off and the details are crisp. Another is when pieces of debris flys off and it come in contact with some other physical object it will react as it would in real life. The lush foliage that sways naturally when brushed against by you or others. This game is only available certain games now... but others will come... \\n\"That's What I've Been Saying!\"This is a great series. I nice new look on the classic tale of Robin Hood. The acting is great, especially that of Keith Allen. It can sometimes be a pretty corny show, but if you don't let that bother you it is a very enjoyable show. It is clean, funny, and action packed. A truly creative look on the story of Robin Hood. It is a must see for all true Robin Hood fans. The title says it all! This is truly the best and most affordable way to get into next-gen DVD's!\\nI bought this machine simply cause of the price, the urge to get into the next-gen DVD format, and for a pretty decent collection of movies. So far I have Shooter and King Kong and both look OUTSTANDING on my HDTV on 720p. I ordered 300 and GoodFellas to also add to my collection since those are two of my favorite movies! Yes I know that 1080p is the way to go, but 720p still looks phenominal, especially being compared to regular ol' DVDs. \\nAlso the menus are quite nice, in that you can access the main menu anytime when watching the movie!! Some HD-DVD movies also have an ability to view special features, such as: picture-in-picture while watching the movie!! 300 is supposed to have some of the best special features on an HD-DVD!!\\nOverall, if you own an HDTV whether it's 720p or 1080p, BUY THIS NOW!!!! It is seriously the most affordable way to enjoy your favorite movies in HD. And if (and most likely) Blu-Ray wins the format war, don't fret! At least you didn't spend $500 - $1000 dollars when HD-DVD players first came out. So what do you got to lose? Buy this machine now!!!Wildly entertaining! Laughed all the way through the movie. The four independently styled actors meld perfectly to keep the excitement high and the comedy non-stop. I purchased this from best buy about 2 weeks ago. \\nI have seen three blu-ray movies on it. I think this is the most incredible picture I've ever seen. \\nI love this TV. I find this album very very good,i have all the other ws albums and they are all good in their own way. This album has the distinct sound of the white stripes. Its kind of like a garage band mixed with a lot of funk. Whether its the driving beating drums of Meg White or the wailing voice and guitar of Jack White its an all around fantastic album!I was fortunate enough to be invited to a private listening party to hear tracks from the album on 8/1/2008. After hearing Bitter for Sweet, Stiff Kittens and Semiotic Love on their Myspace page, the 3 additional tracks were even futher proof that these two gentleman are on to something! We were given the album two weeks early and I have been listening to it every day since then. The album is driven by Jades songwritting talents that match and compliment Davey's lyrical abilities so well. The subject matter seems to be a bit more personal than previous work released by these two musicians from AFI. The songs that stand out for me are Semiotic Love which strikes me as an emotional discharge in a very nice contrast to the semi uplifting beat/music; The Fear of Being found seems to be directly influenced by the works of the wonderful Depeche Mode and has that same meloncholy approach to describing feelings that most of us have yet are unable to completely speak freely about; Again, Again and Again presents a struggle in controlling the urge to dance, a great song; Bitter for Sweet is probably the closest thing on the album to a song from AFI is that is what you are looking for in this album. However, be warned that you are barking up the wrong tree if you want an continuation of DECEMBERUNDERGROUND. This is a new expression of musical abilties, keep your preconceived notions away and be ready to dance! I believe the Best Buy version has an additional song exclusive to them, so pick this version up along with the different versions released to other retailers and iTunes. You wont be sorry!This is a great product. It fits my laptop perfectly. I have a Del Inspirion 1501, it's a little snug but that is how i wanted it. It's perfect if you plan to place your laptop in your bag and carry it. I prevents it from getting scratched from carelessness. Awesome product, and it's made by Targus, they've been making products for a while. realyy great movie, its funny, action packed and very suspenseful:)))This is the best game to come around in a long, long time. I recommend this game to anybody. This is my third pair of noise canceling equipment. In this category, you get what you pay for. $200 is a hefty price for headphones but Sony delivers. Other pairs create a nasty white noise when the cancellation is active. While a very quiet noise is still produced with these, it is far better than cheaper brands and is not noticeable when music is playing (tones sound brilliant with great highs and a surprisingly nice bass) or a fair amount of background noise is present. I wore these on the ride home from best buy and I felt like I was driving inside my car's stereo because that is all i could hear! There is even a button that mutes what you are listening to so you can hear the outside world whenever you need to without taking them off. the legends of rock guitar hero has the best list of master tracks out of all 4 games (Xbox 360 included) i have tried a demo of this game and it will blow you away!!! peace>My December is not like breakaway and certainly not like thankful. its a raw sound that just begins to touch on what Kelly Clarkson can really do. Kelly puts all her emotions into these songs and you can feel, and hear it. its brilliant and she is brilliant!.. give it a chance.. cause you wont be disappointed!!This guitar looks nice and works well. It seems a bit heavier than the hammer or other wireless models. The range is more than needed with no issues. There is even a blue LED light that illuminates by the strum bar. The only downfall with the one I purchased was it was harder to get it to go into Star Power Mode. When you point the guitar up you had to give it a little shake as well unlike other models. I happened to walk in the store the day after they received this computer and based on the size and the price I was hooked. I'm a pilot and portability was a must, as my last notebook was a 9 pound monster. The size of the 13.3\" screen is just right for someone who's looking to mainly surf the internet and read email. \\nAt first use I found the computer to be somewhat pokey, mainly during the start-up and shut down cycle. Ultimately, I found that a piece of Toshiba software, Toshiba Flash Cards, was the culprit and removed it from the system. The computer now runs smoothly and efficiently. \\nBattery life on this system is far better than other laptops I've used in the past. I get nearly 4 hours on the battery, if I can refrain from using the optical drive. \\nThe form factor and layout of the computer is good also, it's just large enough to be comfortable to use. If you've been thinking about getting a 12.1\" notebook, but find it just a little too small to be comfortable, the 13.3\" size is a perfect compromise. \\nOverall, I'm very satisfied. I'd recommend this computer to anyone who needs portability with basic computing needs. This whole series has been great and I believe that anyone who purchases this series will feel the same and those who watch a episode or two will definitly want to own the series, Because its just that good!!!Great extras make rewatching and re-rewatching the film very enjoyable!\\nThe movie is well crafted with great direction. Kudos to the Brits for showing the US how to really do a \"buddy cop\" flick! The film is well paced with a reasonable amount of action, a great twist, excellent comic timing, and plenty of intra- and extra-genre references for the movie buff. great ps3 controller for any who is a big eagles fan like myself great purchase in time for madden 08 great to play with madden 08 go get one nowworks great,very easy setup under 5 min. just pop in the disk and it gives you step by step directions. Its a good anime to watch and when I bought it, it came in good condition. A good anime to buy and watch it never gets boring at all. If you like vampires with the living died in animation you will definitely like this one, I did. If you like robots with funny characters that pilot them you like this anime. This is one of the best and funniest movies I've ever seen BUY IT!!!!!!!!!Right out of the box."
+
+      val document = "This is one of the best and funniest movies I've ever seen BUY IT!!!!!!!!!  Right out of the box, this 40\" LCD reaches out and begs you to plug her in! I hooked it up easily and had a picture to make my wife proud almost immediately. The presets were already awesome and we were awestruck! The colors and clarity are sensational and trump those of the Sony Trinitron Wega we are replacing.\\nThe cosmetics of this set are elegant, with the black piano frame and hidden from notice front speakers.\\nThere is no noticable glare on this screen that I can detect and we have it in a large vaulted family area with lighting and windows galore. I chose the LCD over a Plasma for that reason.\\nI am delighted with the price and sales transaction from BestBuy and look forward to having them visit in a few days to fine tune the big screen. This trip may not even be necessary but I am sure I will learn more about this hightly sophisticated jewel of a Big Screen LCD HDML TV.\\nI think anyone would be pleased with this new Sony Model KDL-40V3000 and the its many features, which are listed in the specifications.  I bought recently a 42' 720p Panasonic Plasma Television from Best Buy East Washington Street. I recomment this store as well as this brand of television if not the Pioneer Plasma Televisions. Consumer Reports rate them at the top and Im seeing it every night on my new blue ray player, dont miss a chance to upgrade to the new technology in this television let alone the new blue ray technology. Blockbuster recently acquired the rights to carry blu ray movies in their stores and I believe this is the way to go.  Best digital SLR camera on the market. Can hold its own even when compared to its standard 35 mil. counterpart.  extremely happy with this buy, highly recommended. just bought the cumpter as a replacement for my old one. very good buy for the money. A++++++ buy  Finally, a dual zone receiver under $1000! With Yamaha's THD at such a low number, this receiver is the most efficient amplifier currently sold by Best Buy. I choose Yamaha for there reputation and sound quality. Also, I wanted dual zone on a budget with HDMI.  Easy to use,and very confortable no power cord need it,very reliable and no false alarms or warnings.  You need to buy this . . . that's about it. But if you want a little more this is a TV show from the early 80's that was honestly one of the greatest kids shows in the history of time. The use of Muppet technology in this show was ground breaking and should be watched by everyone. They live in a cave, sweet!  It's so beautiful yet so powerful. Paul Gilbert has been a huge influence on me a a guitarist. They don't get the credit they deserve.  Adrien Brody is not only hot, yet very talented. No wonder why he got an Oscar. I love this movie. If you love Adrien Brody then you will love this film.  Pro-I am very happy with the performance of this router. This router supports WDS (wireless distributuion system), which allows you to extend the wireless network. I purchased two routers, one used as a router, and the other (located in another building accross the street) is switched to be used as a access point. I now have very good wired and wiress internet access in both buildings. Buffalo was the only one that has a switch on the bottom of the router allowing you to switch it to a access (or bridge) point. Cons- I needed Buffalo support for help with the settings. (note WDS not avaliable with their Draft N products)  I KNOW NOTHING AT ALL ABOUT THESE LITTLE MP3 PLAYERS BUT THIS ONE WAS A GREAT CHOICE! IT WAS EASY TO FIGURE OUT AND COMES LOADED WITH A LOT OF FEATURES! I LOVE HOW EASY THE SOFTWARE IS TO MANEUVOR AND THE PICTURE QUALITY IS FANTATSIC! THE BATTERY HOLDS A CHARGE FOREVER!!! AND ...IT'S SO TINY AND CUTE!  Kissology is the best dvd put out by the band. Us kiss fans have been waited a long time for this dvd!!!!!!!!!!!!  This is the most soulful album I've heard in years. I love this album. You have to get it, once you hear it I know you'll tell a friend. I couldn't keep this to myself! I don't endorse artists often, but this man is the bomb. See him and his band live, wow!\\n\\nI never knew he made my favorite house song \"Don't Change for Me.\"  I would highly recommend this unit. I read MANY reviews about this unit and a lot of them said the built in fm transmitter did not work well at all. My experience has been the exact opposite. I live in the Minneapolis/St. Paul metropolitan area where there are lots of radio stations and I have not had any problems with the fm transmitter that is built into this unit. I also use the included \"sure connect\" that comes with it. I have found it to work extremely well and the sound quality is great! I am very pleased with this unit and would recommend it to anyone.  I have had this DVR since the day it became available last year. At first it had alot of issues, but DirecTV has made alot of upgrades to the software and now I do not hesitate to recommend this unit. You can set the unit to record every episode of a season, I now have it hooked to my home network and can view pictures and listen to .mp3s, and the picture is great!\\nOnce we buy a 2nd LCD for the bedroom, we'll be getting another HR20!  I have been looking for a new HDTV for about 1 year. After reviews and going to BB like every day I picked this TV.\\nPicture Quality is Second to none. It made the Sony SXRD look bad. This TV is also 3D ready. This Fall Samsung and their partners are to release Software (firmware update) and glasses that will turn this into 3D home TV. The 10,000 to one cantrast ration makes the blacks look deep. I have an xbox360 and the 1080p through component looks great. My fios TV through HDMI looks wonderful also. I'm waiting on either Blu-ray or HD-DVD to win the format war so I can get a HD-movie player.\\nI got this for $1700 and I would not trade it for any TV. If you are a gamer and a movie watcher you gotta get this tv. You have not experienced HD right if you don't have the right TV.  I have had various PCs in my home for over 25 years and this has got to be one of my favorites. Having moved to a small apartment in order to get my PhD, this computer is perfect...one wire to plug it in, TV and computer all in one, a great screen, compact, and easy to operate. I was planning on spending almost $4,000 to build a PC online to get everything I wanted, and this saved me a ton of money and gave me almost everything I would have included. The light-scribe DVD was just a fun bonus. I may decide to add my surround speakers later, but for now, the sound is even better for apartment living. It's one of the Best Buys I've ever made. Thanks!  I've had this sweeeet player (30 gig model, black) for over a year now and haven't had a single problem with it. Where to start???\\nThe Zen's screen is beautiful for anyone who enjoys watching videos...as it is approx. 256000 colors, compared to ipod's 65000 color screen (4x greater) and the details are pretty much crystal clear. Plus, you even have the option of adjusting the brightness of the screen (anywhere from 10% to 100%) which is a feature not included w/Ipod. U can choose ur own background pic as well to give it a unique look. the screen, however, can catch fingerprints easily but i dont have a problem since i use a cloth to clean it off every now and then.\\nBattery life is pretty long, 14 hours audio or 4 hours on video, which is way more than ipod's battery.\\nThe ZEN takes the win totally when it comes to features. It has a built in FM radio w/recorder, a microphone, and can function as a removable disk.\\nAs far as size goes, it is bigger than the Ipod, but I like it cause it feels firm in my hand. I dropped it maybe once or twice unfortunately, but it works fine. Scratches are very minimal, pretty much unnoticable, even though i have black color! u have to look pretty hard to see them.\\nFinally, sound is amazing with these headphones that are generally overlooked!!! The player also comes with an awesome equalizer and bass settings that can make it sound just right.\\nAlso, the touchpad might be annoying to some initially as it took me about 10 min getting used to it. However, u can adjust its sensitivity and then it becomes fine.\\n\\nPROS: screen quality! battery life! audio! TON OF MORE FEATURES THAN IPOD!!!\\nCONS: software not the best, player prone to fingerprints, some may find it bulky...comes w/USB charger, not a power adapter\\n\\nThis is an excellent player for the price (u can find better deals if u look around) and is officially the ipod killa!!!  Two Words.....Ah-Mazing\\nI loved it to the fullest\\nshe is the funniest person alive!!!  I was up late one night and fell asleep with the television on. I woke up to a beautiful song. Brandi Carlile, who is that? I was in a trance at her voice and songs. I just cant put into words how calm her songs made me. A true talent and gift to this world! I recommend to every one.  I would like to let people know that if you are looking for a nice ipod that plays videos this is the one to go with. I have over 10,000 songs and still have room for more like videos and photos. The photos come out clear and I love it!!!!! 80gigs might be a bit much but the 30 is just right. I still haven't filled it up!!!! Must buy!!  I got this for my boyfriend. It was actually for hunting season for the slow days but he uses it everyday .We looked at several brands. He compared everything and in our opinion this is by far the BEST value / memory / rechargable battery / size screen / great price . We know people that have the Zune and other players and the drawbacks and we are more than pleased with the Insignia. In fact im getting 1 for myself soon. Thank you Insignia !  I took this unit on a trip to Niagra Falls, Ontario, from southwestern, PA. It was unbelievable!!! The initial set-up had very little instructions on how to set it up, such as where the plugs (power) go, but it did not take long to figure it out. The voice was very clear, the screen was easy to view. I used it when we were in Canada, and all of the info was just as accurate. If you miss a turn even to pull in and get gas it recalculates and gets you back on track within seconds. It did route us to a bridge that was used only for the Customs agents, but it has a Detour button and when you push it...Recalculating past it, and we are moving again. The unit also keeps an estimated time of arrival on the screen so you can answer that question...When are we gonna be there??? Make sure you get the one that has the text - voice (this one does have it), it really helps when there are multiple streets to turn on. It has it's flaws when you first start using it, but once you figure out the many options it is very easy to use. Not a \"Best Buy\" but a Great Buy for the money!!  Hot very hot, the graphics are crazy, the controls are great and the challenge well lets say i loved to hate the game, but when i did pass a hard track i felt like a king.\\ngreat game.  The feature I enjoy the most about this television besides its picture and size is the ability to choose to save additional electricity. When you set up the LCD you can choose to prevent the \"vampire\" effect. I will be purchasing the 32\" shortly, because I am so pleased with my 26\".  I live in a two story unit on the top two floors of an old building (think: \"heat rises\") with 12 foot ceilings. On Friday morning, my central air went on vacation, and after a few phone calls, I found out no one could come out to make repairs until Monday (at the earliest).\\n\\nSince I was facing 95+ degree weather for the weekend, I decided I needed to do SOMETHING. I bought two of these little window units, and put one in upstairs and one downstairs. The temperature in my condo was at that point 94 degrees.\\n\\nAfter two hours of operation, my condo was a comfortable 76 degrees. By Saturday, once the two A/C's had the chance to \"catch up,\" they had no problem cooling my home to a more than comfortable 70 degrees.\\n\\nI must also say that although each unit is rated to cool 150 SqFt, that is understated. The downstairs unit is cooling roughly 550 SqFt and the upstairs unit is cooling roughly 600 SqFt. They are working non-stop, but doing a great job of cooling my home.\\n\\nThe units are a tad on the noisy side, but not unreasonably so -- the noise has faded into the background, now that I've become used to it.\\n\\nI highly recommend this product, especially considering the price and the fact that two of them are cooling 1150 SqFt to 70 degrees (despite the fact that combined they are rated to cool only 300 SqFt).\\n\\nA GREAT buy, and a lifesaver on a 95 degree weekend without central air!!  this cd is awesome if you want to see and hear some of the great songs on it check out the mhs marching bands 2007-08 halftime show  This game is really cool. I love how you can interact with different charecters and how Nancy Drew protrays different people in different games. I have learned alot from Nancy Drew games.\\nBUY NANCY DREW!!  Having read about this movie on other sites, I decided, \"Hey, this film is worth a shot.\" I also enjoy period, fantasy, romance and adventure films. A friend of mine bought the film for me and I can't stop repeating the lines. \"As You Wish,\" Hello. My name is Inigo Montoya. You killed my father. Prepare to die,\" \"Inconcievable,\" etc. The performances are magnificent-from Cary Elwes (in his second movie) to Robin Wright (before she Married Sean Penn) and the rest of the cast. The movie manages to make fun of traditional fairy tales while not taking itself seriously. As Rob Reiner says in the all-new documentary \"As You Wish: The Making Of The Princess Bride,\" it's a celebration of true love. The sword fight on the Cliffs Of Insanity is terrific and remeniscent of Errol Flynn swashbucklers. Of course, there is romance...and beautiful love scenes they are. With the film itself being free of any objectionable material, you might almost think that this is a Disney film. Well, it's not, but Walt Disney himself would be proud of this movie. Romance, adventure and comedy all in one spectacular movie. It's a chick flick. Recommended for girls' night out. Rated PG for mild adventure violence.  Spacious fridge and one of the few of its kind we could find with water on the door.  this game is aswem it letes u create ur own person and everything its by far the best game for the wii.the wii is the bestsystem and this game reassures that so if u want a very in tertaing game buy it!!PEACE OUT!!!!!!!!!!!!!  This bag not only accomodates some of the largest notebooks around, but will also seemelessly adjust to smaller models. The design takes into consideration all of your other needs; from storage of accessories, and other devices like your P.D.A., cell-phone, and music player. You can even keep your music player safely inside, and run your headphones through the simple but ingenius slot made for just that purpose. The padding adds a layer of comfort and security you seldom find in any case on the market. There is just not enough to be said about this bag. If you need to store, carry, or show off your notebook, or just want to know it's safe, this is the product to buy.  strategic rationality. spontaneous revelation. superb reluctance. Dont miss it.  It is a very nice product!And I like it very very much, thanks apple!!!  if you like hardcore metal then you will love children of bodom! they have a very unique sound that you wont find anywhere else. in my opinion the best song on this c.d is \"triple corpse hammerblow\" but the whole c.d is really worth buying. so buy this c.d! support the hardcore!!!  this MAC is amazing. More fun and better options have been added.  I have two of these printers. Worth every dime. The only problem I ever had with either is the pick up tray in the back pulling the next sheet in. And that was only after I had them for over a year (the rollers were probably needing service or replaced). Very good quality print, very easy to operate, very easy to scan and the scan quality is superb. Overall, I'd still give this printer a 5 star rating. I use these for business so they do get used quite a bit. Thanks, Canon!  Plays great\\nGreat gaming expierience\\nXbox Live rocks  They \"snacked on danger & Dined on death\" for 2 decades, dominating professional wrestling and Imortalizing themselves as the only tag team ever to\\nhold all 3 major titles (NWA(WCW), AWA, WWE(WWF)\\n\\nHawk and Animal, Better known as The Legion Of Doom,\\nrampaged thru the AWA, NWA, All Japan Pro Wrestling,\\nWCW, & WWE.\\n\\nIn the DVD's The stunning journey unfolds, from their auspicious debut in the early 80's to the tragic death of Michael \"Hawk\" Hegstrand in 2003. You'll ride along with\\nthe Road Warriors' most memorable bouts interviews and more.\\n\\nBrutal. Commanding. Unforgetable.\\n\\nOh, What a Rushhhhhhhhhhh!\\n\\nI have to say, that if your a major fan of this tag team then this is the DVD for you, both the DVD's have everything you expect from the LOD aka The Road Warriors, Disc one has interviews, clips and background of the most feared tag team in wrestling history, Disc 2 has 14 of LOD's best matches. This set of dvd's comes Highly recommended for you LOD fans.  My husband and I purchased this monitor with a complete package deal at our local Best Buy store and we love it! It is like watching tv, but typing. I never thought messing around on a computer could be this fun until we got this huge screen. It is a 22 inch Westinghouse monitor that goes great with our CP unit and internet capabilities. We are impressed by it so much and so is everyone that has seen it. Thank you Best Buy and hope others enjoy it as much as we have!  This movie is the best and its really funny.The movie makes fun of movies like scream,I know what you did last summer and a bunch of otheres.Its way better then scary movie.you should go buy this movie caus it is worth to buy and you will be laughing threw the whole thing I promise you that.  At first, you are not sure why you are watching this movie. Then, you are drawn into a charming. funny story remenicent of the best of the office, scrubs and your favorite romantic comedy. This is a chick flick for guys and a buddy picture for chicks. By the end I was grinning from ear to ear with tears in my eyes. A must see and one you will want to own and watch again and again.  this a great movie. the fight scene between leo and raph was great. this is one of the best tmnt movies i have seen  The P520 is a nice little recorder to capture the special moments that matter.\\nStill trying to get an update for the software!\\nAny links for windows 2000nt  By far the best new cd I have heard in awhile. His music combines a super sweet Jazz feel with a freestyling lyrical genius in only away Jake Smith can. And the best part is that he is even better live...This CD is far from boring and will have you nodding your head to the beat track after track. If you are looking for a new artist that truly encapsulates soul and R&B while maintaining a new fresh feel Jake Smith's Real is the REAL deal!!! So amazing.  I purchased this Toshiba HD DVD player after carefully considering the HD vs BLU-RAY format war. With no immediate end in sight, I decided to take my chances with HD DVD and I was not disappointed. The unit is easy to install and to operate and on my Panasonic 32 inch LCD TV with 720p resolution, the images in hi def are worth the investment. Especially impressive is the PLANET EARTH series, which was filmed in hi definition. What I learned from my initial excitement in buying different HD DVDs, is that not all HD DVDs look hi def, most likely from the compression process, the movie being older, etc. But recent movies, and of course, those filmed in hi definition, bring a beautiful, clear, crisp, video image. Audio reproduction is equally impressive. I would highly recommend this unit to anyone looking to jump into the world of hi definition. "
+
+      // now, annotate a document.
+      val instanceID = 0
+      val instance =
+        GoldLabeledTweet(instanceID.toString, "",
+          TokenizationPipes.filterOnStopset(Set())(
+            TokenizationPipes.filterOnRegex("(\\p{Alpha}|')+")(
+              TokenizationPipes.twokenize(
+                TokenizationPipes.toLowercase(List(document))))),
+          SentimentLabel.Positive)
+      val topicDist = model.inferTopics(instance)
+
+
+      val sents = sdetector.sentDetect(document).toList.map((s) => s.split("but|;").toList).flatten.filter((s) => s.split("\\s+").length >= 5)
+
+      val results =
+        (for (sent <- sents) yield {
+          val tokens = tokenizer.tokenize(sent)
+          val myTokens = TokenizationPipes.filterOnStopset(Set())(
+            TokenizationPipes.filterOnRegex("(\\p{Alpha}|')+")(
+              TokenizationPipes.twokenize(
+                TokenizationPipes.toLowercase(List(sent)))))
+          val topics = model.getTopics
+          val scores =
+            (for ((Topic(prior, distribution), index) <- topics.zipWithIndex) yield {
+              val d2 = distribution.withDefault((s) => 0.0)
+              prior * myTokens.map((token) => d2(token)).reduce(_ + _)
+            }).toList
+          val maxScore = scores.indexOf(scores.max)
+          logger.debug(maxScore + "\t" + scores + "\t" + sent)
+          maxScore
+        }).groupBy((x) => x).map {
+          case (k, v) => (k, v.length.toFloat / sents.length)
+        }.toList.sorted
+
+      logger.info(results.toString)
+      logger.info(topicDist.toString)
+
+      val chunks =
+        (for (sent <- sents) yield {
+          val tokens = tokenizer.tokenize(sent)
+
+          val tags = posTagger.tag(tokens)
+          val chunks = chunker.chunk(tokens, tags)
+          chunks
+          var chunked = List[List[String]]()
+          var chunk = List[String]()
+          for ((token, tag) <- tokens.toList zip chunks.toList) {
+            val (mark) = tag.charAt(0)
+            mark match {
+              case 'B' =>
+                chunked = chunk.reverse :: chunked
+                chunk = List(token)
+              case 'I' =>
+                chunk = token :: chunk
+              case 'O' => ()
+              case x =>
+                logger.error("Got unexpected token, tag output from the chunker: %s,%s".format(token, tag))
+            }
+          }
+          (chunk.reverse :: chunked).reverse
+        }).flatten.filter((c) => c.length >= 5)
+      val results2 =
+        (for (chunk <- chunks) yield {
+          val sent = chunk.mkString(" ")
+          val tokens = tokenizer.tokenize(sent)
+          val myTokens = TokenizationPipes.filterOnStopset(Set())(
+            TokenizationPipes.filterOnRegex("(\\p{Alpha}|')+")(
+              TokenizationPipes.twokenize(
+                TokenizationPipes.toLowercase(List(sent)))))
+          val topics = model.getTopics
+          val scores =
+            (for ((Topic(prior, distribution), index) <- topics.zipWithIndex) yield {
+              val d2 = distribution.withDefault((s) => 0.0)
+              if (myTokens.length > 0) {
+                prior * myTokens.map((token) => d2(token)).reduce(_ + _)
+              } else {
+                0.0
+              }
+            }).toList
+          val maxScore = scores.indexOf(scores.max)
+          logger.debug(maxScore + "\t" + scores + "\t" + sent)
+          maxScore
+        }).groupBy((x) => x).map {
+          case (k, v) => (k, v.length.toFloat / chunks.length)
+        }.toList.sorted
+
+      logger.info(results2.toString)
+      logger.info(topicDist.toString)
+
+      if (saveModelOption.value.isDefined) {
+        model.save(saveModelOption.value.get)
+      }
+    }
+
+
+    catch {
+      case e: ArgotUsageException => println(e.message); sys.exit(1)
+    }
+  }
+
+}

src/main/scala/updown/app/experiment/Experiment.scala

+package updown.app.experiment
+
+import updown.util.Statistics
+import updown.data.{TargetedSystemLabeledTweet, SystemLabeledTweet}
+import org.clapper.argot.{ArgotParser, SingleValueOption}
+import org.clapper.argot.ArgotConverters._
+import com.weiglewilczek.slf4s.Logging
+
+abstract class Experiment extends Logging {
+  val parser = new ArgotParser(this.getClass.getName, preUsage = Some("Updown"))
+  val targetsInputFile = parser.option[String](List("t", "targets"), "targets", "targets")
+
+  def report(labeledTweets: List[SystemLabeledTweet]) {
+    logger.info("Overall:\n" + Statistics.getEvalStats("", labeledTweets).toString)
+    val statsPerUser: List[ExperimentalResult] = Statistics.getEvalStatsPerUser("", labeledTweets)
+    logger.info("Per-user Summary:\n"+Statistics.mean(statsPerUser)+"\n"+Statistics.variance(statsPerUser))
+    if (statsPerUser.length > 0)
+      logger.debug("Per-user:\n" + statsPerUser.mkString("\n"))
+    else
+      logger.info("Per-user: No users were over the threshold.")
+
+    targetsInputFile.value match {
+      case Some(filename) =>
+        val targets: Map[String, String] =
+          (for (line <- scala.io.Source.fromFile(filename, "UTF-8").getLines) yield {
+            val arr = line.trim.split("\\|")
+            (arr(0) -> arr(1))
+          }).toMap.withDefault(_ => "UNKNOWN")
+
+        val targetedTweets = labeledTweets.map {
+          case SystemLabeledTweet(id, uid, features, gLabel, sLabel) =>
+            TargetedSystemLabeledTweet(id, uid, features, gLabel, sLabel, targets(id))
+        }
+        val statsPerTarget: List[ExperimentalResult] = Statistics.getEvalStatsPerTarget("", targetedTweets)
+        if (statsPerTarget.length > 0){
+          logger.info("Per-target Summary:\n"+Statistics.mean(statsPerTarget)+"\n"+Statistics.variance(statsPerTarget))
+
+          logger.debug("Per-target:\n" + statsPerTarget.mkString("\n"))
+        }else
+          logger.info("Per-target: No targets were over the threshold")
+      case None =>
+        logger.info("Per-target: No target file provided")
+    }
+  }
+}

src/main/scala/updown/app/experiment/ExperimentalResult.scala

+package updown.app.experiment
+
+import updown.data.SentimentLabel
+
+case class ExperimentalResult(name: String, n: Int, accuracy: Double, classes: List[LabelResult]) {
+
+  override def toString(): String =
+    "%s Results:\n".format(name) +
+      "%10s%6d\n".format("N", n) +
+      "%10s%6.2f\n".format("Accuracy", accuracy) +
+      "\n%15s%5s%11s%8s%9s\n".format("Label", "N", "Precision", "Recall", "F-Score") +
+      (for (res <- classes) yield res.toString).mkString("\n") + "\n"
+
+  def rename(newName: String): ExperimentalResult =
+    ExperimentalResult(newName, n,accuracy,classes)
+
+  def +(other: ExperimentalResult): ExperimentalResult = {
+    val classesMap = (classes.groupBy((labelResult) => labelResult.label).map((tup) => {
+      val (k, (v: LabelResult) :: vs) = tup
+      (k, v)
+    })).toMap
+    val otherClassesMap = (other.classes.groupBy((labelResult) => labelResult.label).map((tup) => {
+      val (k, (v: LabelResult) :: vs) = tup
+      (k, v)
+    }).toMap).withDefaultValue(LabelResult(0, SentimentLabel.Abstained, 0.0, 0.0, 0.0))
+    ExperimentalResult(name, n + other.n, accuracy + other.accuracy,
+      (for ((label, classResult) <- classesMap.toList) yield classResult + otherClassesMap(label)).toList
+    )
+  }
+
+  def *(other: ExperimentalResult): ExperimentalResult = {
+    val classesMap = (classes.groupBy((labelResult) => labelResult.label).map((tup) => {
+      val (k, (v: LabelResult) :: vs) = tup
+      (k, v)
+    })).toMap
+    val otherClassesMap = (other.classes.groupBy((labelResult) => labelResult.label).map((tup) => {
+      val (k, (v: LabelResult) :: vs) = tup
+      (k, v)
+    }).toMap).withDefaultValue(LabelResult(0, SentimentLabel.Abstained, 0.0, 0.0, 0.0))
+    ExperimentalResult(name, n * other.n, accuracy * other.accuracy,
+      (for ((label, classResult) <- classesMap.toList) yield classResult * otherClassesMap(label)).toList
+    )
+  }
+
+  def -(other: ExperimentalResult): ExperimentalResult = {
+    val negOther = other * -1
+    this + negOther
+  }
+
+  def /(scalar: Double): ExperimentalResult = {
+    ExperimentalResult(name, (n.toFloat / scalar).toInt, accuracy / scalar,
+      (for (labelResult <- classes) yield labelResult / scalar).toList
+    )
+  }
+
+  def *(scalar: Double): ExperimentalResult = {
+    ExperimentalResult(name, (n.toFloat * scalar).toInt, accuracy * scalar,
+      (for (labelResult <- classes) yield labelResult * scalar).toList
+    )
+  }
+}
+
+case class LabelResult(n: Int, label: SentimentLabel.Type, precision: Double, recall: Double, f: Double) {
+  override def toString(): String = "%15s%5d%11.2f%8.2f%9.2f".format(SentimentLabel.toEnglishName(label), n, precision, recall, f)
+
+  def +(other: LabelResult): LabelResult = {
+    assert(label == other.label)
+    LabelResult(n + other.n, label, precision + other.precision, recall + other.recall, f + other.f)
+  }
+
+  def *(other: LabelResult): LabelResult = {
+    assert(label == other.label)
+    LabelResult(n * other.n, label, precision * other.precision, recall * other.recall, f * other.f)
+  }
+
+  def /(scalar: Double): LabelResult = LabelResult((n.toFloat / scalar).toInt, label, precision / scalar, recall / scalar, f / scalar)
+
+  def *(scalar: Double): LabelResult = LabelResult((n.toFloat * scalar).toInt, label, precision * scalar, recall * scalar, f * scalar)
+}

src/main/scala/updown/app/experiment/NFoldExperiment.scala

 abstract class NFoldExperiment extends Logging {
   // this exists purely to make the ArgotConverters appear used to IDEA
   convertByte _
-  val parser = new ArgotParser(this.getClass.getName, preUsage = Some("Updown"))
+  val parser = new ArgotParser(this.getClass.getName)
+    
   val goldInputFile = parser.option[String](List("g", "gold"), "gold", "gold labeled input")
   val n = parser.option[Int](List("n", "folds"), "FOLDS", "the number of folds for the experiment (default 10)")
   var experimentalRun = 0
 
-  def doExperiment(testSet: List[GoldLabeledTweet], trainSet: List[GoldLabeledTweet]):
-  (Double, List[(updown.data.SentimentLabel.Type, Double, Double, Double)])
-
-  def after(): Int
+  def doExperiment(train: List[GoldLabeledTweet], test: List[GoldLabeledTweet]): ExperimentalResult
 
   def generateTrials(inputFile: String, nFolds: Int): Iterator[(List[GoldLabeledTweet], List[GoldLabeledTweet])] = {
     val polToTweetLists = TweetFeatureReader(inputFile).groupBy((tweet) => tweet.goldLabel)
     logger.info("takining %d items from each polarity class. This was the minimum number in any class".format(minListLength))
     val allTweetsFolded =
       (for (index <- 0 until minListLength) yield {
-          (for ((pol, tweetList) <- polToTweetLists) yield {
-            (pol, index, (index % nFolds, tweetList(index)))
-          }).toList.map{case(pol,index,item)=>item}
-          // this is really strange. If I just emit the item, it only emits every nth one.
-          // Somehow, emitting a tuple and then unmapping it fixes the problem.
-          // I'm guessing this is because the input is a map, and it is trying to make the output a map as well.
+        (for ((pol, tweetList) <- polToTweetLists) yield {
+          (pol, index, (index % nFolds, tweetList(index)))
+        }).toList.map {
+          case (pol, index, item) => item
+        }
+        // this is really strange. If I just emit the item, it only emits every nth one.
+        // Somehow, emitting a tuple and then unmapping it fixes the problem.
+        // I'm guessing this is because the input is a map, and it is trying to make the output a map as well.
       }).toList.flatten
 
-    val foldsToTweets = allTweetsFolded.groupBy{case(fold, tweet) => fold}
-      .map{case(fold,list)=>(fold,list.map{case(fold,tweet)=>tweet})}
+    val foldsToTweets = allTweetsFolded.groupBy {
+      case (fold, tweet) => fold
+    }
+      .map {
+      case (fold, list) => (fold, list.map {
+        case (fold, tweet) => tweet
+      })
+    }
 
     (for ((heldOutFold, heldOutData) <- foldsToTweets) yield {
-      (heldOutData, foldsToTweets.filter{case(setNo,list)=>setNo != heldOutFold}.map{case(setNo,list)=>list}.flatten.toList)
+      (heldOutData, foldsToTweets.filter {
+        case (setNo, list) => setNo != heldOutFold
+      }.map {
+        case (setNo, list) => list
+      }.flatten.toList)
     }).iterator
   }
 
   def main(args: Array[String]) {
     try {
       parser.parse(args)
-
       val nFolds: Int = n.value.getOrElse(10)
 
       if (goldInputFile.value == None) {
         (for ((testSet, trainSet) <- generateTrials(inputFile, nFolds)) yield {
           experimentalRun += 1
           logger.debug("starting run " + experimentalRun)
-          val result = doExperiment(testSet, trainSet)
+          val result = doExperiment(trainSet, testSet)
           logger.debug("ending run " + experimentalRun)
           result
         }).toList
 
       logger.info("intermediate results:\n" + results.mkString("\n"))
-      println("\n" + Statistics.reportResults(Statistics.averageResults(results)))
+      println("\n" + Statistics.averageResults("%d-fold Average".format(nFolds), results).toString)
       logger.debug("running cleanup code")
-      System.exit(after())
     }
     catch {
       case e: ArgotUsageException => println(e.message); sys.exit(1)

src/main/scala/updown/app/experiment/SplitExperiment.scala

+package updown.app.experiment
+
+import updown.data.io.TweetFeatureReader
+import org.clapper.argot.ArgotParser._
+import org.clapper.argot.ArgotConverters._
+import com.weiglewilczek.slf4s.Logging
+import updown.util.Statistics
+import org.clapper.argot.{ArgotUsageException, ArgotParser}
+import updown.data.{SystemLabeledTweet, GoldLabeledTweet}
+
+abstract class SplitExperiment extends Experiment {
+  // this exists purely to make the ArgotConverters appear used to IDEA
+  convertByte _
+  val goldTrainSet = parser.option[String](List("e", "test"), "FILE", "gold labeled training data")
+  val goldTestSet = parser.option[String](List("g", "train"), "FILE", "gold labeled test data")
+
+  def doExperiment(testSet: List[GoldLabeledTweet], trainSet: List[GoldLabeledTweet]): List[SystemLabeledTweet]
+
+  def after(): Int
+
+  def main(args: Array[String]) {
+    try {
+      parser.parse(args)
+
+      val trainFileName =
+        goldTrainSet.value match {
+          case Some(filename) => filename
+          case None => parser.usage("You must specify a gold labeled training file via -i.")
+        }
+      val testFileName =
+        goldTestSet.value match {
+          case Some(filename) => filename
+          case None => parser.usage("You must specify a gold labeled test file via -i.")
+        }
+      val result =
+      {
+          logger.debug("starting run")
+          val result = doExperiment(TweetFeatureReader(testFileName), TweetFeatureReader(trainFileName))
+          logger.debug("ending run")
+          result
+      }
+      
+      report(result)
+      logger.debug("running cleanup code")
+      System.exit(after())
+    }
+    catch {
+      case e: ArgotUsageException => println(e.message); sys.exit(1)
+    }
+  }
+}

src/main/scala/updown/app/experiment/StaticExperiment.scala

+package updown.app.experiment
+
+import updown.data.io.TweetFeatureReader
+import org.clapper.argot.ArgotParser._
+import org.clapper.argot.ArgotConverters._
+import org.clapper.argot.ArgotUsageException
+import updown.data.{SystemLabeledTweet, GoldLabeledTweet}
+
+abstract class StaticExperiment extends Experiment {
+  // this exists purely to make the ArgotConverters appear used to IDEA
+  convertByte _
+  val goldData = parser.option[String](List("g", "input"), "FILE", "gold labeled input data")
+
+  def doExperiment(dataSet: List[GoldLabeledTweet]): List[SystemLabeledTweet]
+
+  def after(): Int
+
+  def main(args: Array[String]) {
+    try {
+      parser.parse(args)
+
+      val dataFileName =
+        goldData.value match {
+          case Some(filename) => filename
+          case None => parser.usage("You must specify a gold labeled input file via -g.")
+        }
+
+      logger.debug("starting run")
+      val labeledTweets = doExperiment(TweetFeatureReader(dataFileName))
+      logger.debug("ending run")
+
+      report(labeledTweets)
+
+      logger.debug("running cleanup code")
+      System.exit(after())
+    }
+    catch {
+      case e: ArgotUsageException => println(e.message); sys.exit(1)
+    }
+  }
+}

src/main/scala/updown/app/experiment/labelprop/StaticJuntoExperiment.scala

+package updown.app.experiment.labelprop
+
+import opennlp.maxent.io.BinaryGISModelReader
+import org.clapper.argot.ArgotParser
+import org.clapper.argot.ArgotParser._
+import org.clapper.argot.ArgotConverters._
+import updown.lex.MPQALexicon._
+import updown.lex.MPQALexicon
+import upenn.junto.config.GraphBuilder._
+import opennlp.model.AbstractModel
+import upenn.junto.config.{Edge, Label, GraphBuilder}
+import updown.data.{ProbabilityLexicon, SystemLabeledTweet, GoldLabeledTweet, SentimentLabel}
+import java.util.zip.GZIPInputStream
+import java.io.{ObjectInputStream, FileInputStream, DataInputStream}
+import updown.data.io.TweetFeatureReader._
+import updown.app.experiment.{SplitExperiment, StaticExperiment}
+import upenn.junto.app.JuntoRunner._
+import upenn.junto.app.JuntoRunner
+import scala.collection.JavaConversions._
+
+object StaticJuntoExperiment extends StaticExperiment {
+  val DEFAULT_MU1 = .005
+  val DEFAULT_ITERATIONS = 100
+  val DEFAULT_EDGE_SEED_SET = "nfmoe"
+  val nodeRE = """^(.+_)(.+)$""".r
+  val posEmoticons = """:) :D =D =) :] =] :-) :-D :-] ;) ;D ;] ;-) ;-D ;-]""".split(" ")
+  val negEmoticons = """:( =( :[ =[ :-( :-[ :’( :’[ D:""".split(" ")
+
+  val TWEET_ = "tweet_"
+  val USER_ = "user_"
+  val NGRAM_ = "ngram_"
+  val POS = "POS"
+  val NEG = "NEG"
+  val NEU = "NEU"
+
+  // for weighting MPQA seeds
+  val BIG = 0.9
+  val BIG_COMP = .1
+  val SMALL = 0.8
+  val SMALL_COMP = .2
+
+  val modelInputFile = parser.option[String](List("m", "model"), "model", "model input")
+  val mpqaInputFile = parser.option[String](List("p", "mpqa"), "mpqa", "MPQA sentiment lexicon input file")
+  val followerGraphFile = parser.option[String](List("f", "follower-graph"), "follower-graph", "twitter follower graph input file")
+  val refCorpusProbsFile = parser.option[String](List("r", "reference-corpus-probabilities"), "ref-corp-probs", "reference corpus probabilities input file")
+
+  val edgeSeedSetOption = parser.option[String](List("e", "edge-seed-set-selection"), "edge-seed-set-selection", "edge/seed set selection")
+  val topNOutputFile = parser.option[String](List("z", "top-n-file"), "top-n-file", "top-n-file")
+
+  val mu1 = parser.option[Double](List("u", "mu1"), "mu1", "seed injection probability")
+  val iterations = parser.option[Int](List("n", "iterations"), "iterations", "number of iterations")
+
+  val getNgramWeightFn: (Any, List[GoldLabeledTweet]) => ((String) => Double) =
+    (refCorpusFileOption, trainSet) => {
+      refCorpusFileOption match {
+        case Some(filename: String) =>
+          val refCorpusNgramProbs = loadRefCorpusNgramProbs(filename)
+          val thisCorpusNgramProbs = computeNgramProbs(trainSet)
+          (ngram) => {
+            val numerator = thisCorpusNgramProbs(ngram)
+            val denominator = refCorpusNgramProbs.getNgramProb(ngram)
+
+            if (denominator == 0.0) 0.0 //ngram not found in reference corpus; assume NOT relevant to this corpus
+            else if (numerator > denominator) math.log(numerator / denominator)
+            else 0.0
+          }
+
+        case None => (str) => 1.0
+      }
+    }
+
+  def getTweetNgramEdges(tweets: List[GoldLabeledTweet], getNgramWeight: (String) => Double): List[Edge] = {
+    (for (tweet <- tweets) yield {
+      for (ngram <- tweet.features) yield {
+        val weight = getNgramWeight(ngram)
+        if (weight > 0.0) Some(new Edge(TWEET_ + tweet.id, NGRAM_ + ngram, weight)) else None
+      }
+    }).flatten.flatten
+  }
+
+  def getFollowerEdges(followerGraphFile: String): List[Edge] = {
+    (for (line <- scala.io.Source.fromFile(followerGraphFile, "utf-8").getLines) yield {
+      val tokens = line.split("\t")
+      if (tokens.length < 2 || tokens(0).length == 0 || tokens(1).length == 0) None else Some(new Edge(USER_ + tokens(0), USER_ + tokens(1), 1.0))
+    }).flatten.toList
+  }
+
+  def getUserTweetEdges(tweets: List[GoldLabeledTweet]): List[Edge] = (for (tweet <- tweets) yield new Edge(USER_ + tweet.userid, TWEET_ + tweet.id, 1.0))
+
+  def getMaxentSeeds(tweets: List[GoldLabeledTweet], model: AbstractModel): List[Label] = {
+    val labels = model.getDataStructures()(2).asInstanceOf[Array[String]]
+    val posIndex = labels.indexOf("1")
+    val negIndex = labels.indexOf("-1")
+    val neuIndex = labels.indexOf("0")
+
+    (for (tweet <- tweets) yield {
+      val result = model.eval(tweet.features.toArray)
+
+      val posProb = if (posIndex >= 0) result(posIndex) else 0.0
+      val negProb = if (negIndex >= 0) result(negIndex) else 0.0
+      val neuProb = if (neuIndex >= 0) result(neuIndex) else 0.0
+
+      new Label(TWEET_ + tweet.id, POS, posProb) :: new Label(TWEET_ + tweet.id, NEG, negProb) :: new Label(TWEET_ + tweet.id, NEU, neuProb) :: Nil
+    }).flatten
+  }
+
+  def getMPQASeeds(lexicon: MPQALexicon): List[Label] = {
+    (for (word <- lexicon.keySet.toList) yield {
+      val entry = lexicon(word)
+      val posWeight =
+        if (entry.isStrong && entry.isPositive) BIG
+        else if (entry.isWeak && entry.isPositive) SMALL
+        else if (entry.isStrong && entry.isNegative) BIG_COMP
+        else /*if(entry.isWeak && entry.isNegative)*/ SMALL_COMP
+
+      val negWeight =
+        if (entry.isStrong && entry.isPositive) BIG_COMP
+        else if (entry.isWeak && entry.isPositive) SMALL_COMP
+        else if (entry.isStrong && entry.isNegative) BIG
+        else /*if(entry.isWeak && entry.isNegative)*/ SMALL
+
+      val neuWeight = 0.5 //Matt has little to no inkling of what is appropriate here.
+
+
+      new Label(NGRAM_ + word, POS, posWeight) :: new Label(NGRAM_ + word, NEG, negWeight) :: new Label(NGRAM_ + word, NEU, neuWeight) :: Nil
+    }).flatten
+  }
+
+  def getEmoticonSeeds(): List[Label] = {
+    (for (emo <- posEmoticons) yield {
+      new Label(NGRAM_ + emo, POS, BIG) ::
+        new Label(NGRAM_ + emo, NEG, BIG_COMP) :: Nil
+    }).toList.flatten :::
+      (for (emo <- negEmoticons) yield {
+        new Label(NGRAM_ + emo, NEG, BIG) ::
+          new Label(NGRAM_ + emo, POS, BIG_COMP) :: Nil
+      }).toList.flatten :::
+      (for (emo <- negEmoticons) yield {
+        new Label(NGRAM_ + emo, NEG, BIG) ::
+          new Label(NGRAM_ + emo, POS, BIG_COMP) :: Nil
+      }).toList.flatten
+  }
+
+  def createGraph(tweets: List[GoldLabeledTweet], followerGraphFile: String, model: AbstractModel, lexicon: MPQALexicon, edgeSeedSet: String, getNgramWeight: (String) => Double) = {
+    val edges = (if (edgeSeedSet.contains("n")) getTweetNgramEdges(tweets, getNgramWeight) else Nil) :::
+      (if (edgeSeedSet.contains("f")) (getFollowerEdges(followerGraphFile) ::: getUserTweetEdges(tweets)) else Nil)
+    val seeds = (if (edgeSeedSet.contains("m")) getMaxentSeeds(tweets, model) else Nil) :::
+      (if (edgeSeedSet.contains("o")) getMPQASeeds(lexicon) else Nil) :::
+      (if (edgeSeedSet.contains("e")) getEmoticonSeeds else Nil)
+    GraphBuilder(edges, seeds)
+  }
+
+  def loadRefCorpusNgramProbs(filename: String): ProbabilityLexicon /*scala.collection.mutable.HashMap[String, Double]*/ = {
+    val refProbs = new ObjectInputStream(new GZIPInputStream(new FileInputStream(filename))).readObject()
+
+    refProbs match {
+      case refProbLex: ProbabilityLexicon => refProbLex
+      case _ => throw new ClassCastException
+    }
+  }
+
+  def getWordCount(tweets: List[GoldLabeledTweet]): Int = {
+    (for (tweet <- tweets) yield {
+      (for (feature <- tweet.features) yield {
+        1
+      }).sum
+    }).sum
+  }
+
+  def computeNgramProbs(tweets: List[GoldLabeledTweet]): scala.collection.mutable.HashMap[String, Double] = {
+    val probs = new scala.collection.mutable.HashMap[String, Double] {
+      override def default(s: String) = 0.0
+    }
+    for (tweet <- tweets) {
+      for (feature <- tweet.features) {
+        probs.put(feature, probs(feature) + 1.0)
+      }
+    }
+
+    probs.foreach(p => probs.put(p._1, p._2 / getWordCount(tweets)))
+
+    probs
+  }
+
+  def doExperiment(tweets: List[GoldLabeledTweet]) = {
+    logger.info("performing Junto experiment")
+    logger.debug("loading model")
+    val model =
+      modelInputFile.value match {
+        case Some(filename) =>
+          new BinaryGISModelReader(new DataInputStream(new FileInputStream(modelInputFile.value.get))).getModel
+        case None =>
+          parser.usage("You must specify a model input file")
+      }
+
+    val lexicon =
+      mpqaInputFile.value match {
+        case Some(filename) =>
+          MPQALexicon(filename)
+        case None =>
+          parser.usage("You must specify a lexicon file.")
+      }
+
+
+
+    val edgeSeedSet = edgeSeedSetOption.value.getOrElse(DEFAULT_EDGE_SEED_SET)
+
+    val getNgramWeight = getNgramWeightFn(refCorpusProbsFile.value, tweets)
+
+    val graph =
+      followerGraphFile.value match {
+        case Some(filename) =>
+          createGraph(tweets, filename, model, lexicon, edgeSeedSet, getNgramWeight)
+      }
+
+    logger.debug("running label prop")
+    JuntoRunner(graph, mu1.value.getOrElse(DEFAULT_MU1), .01, .01, iterations.value.getOrElse(DEFAULT_ITERATIONS), false)
+
+
+    val tweetIdsToPredictedLabels = new scala.collection.mutable.HashMap[String, SentimentLabel.Type]
+
+
+    logger.debug("testing model")
+    val ngramsToPositivity = new scala.collection.mutable.HashMap[String, Double]
+    val ngramsToNegativity = new scala.collection.mutable.HashMap[String, Double]
+    val ngramsToNeutrality = new scala.collection.mutable.HashMap[String, Double]
+
+    val thisCorpusNgramProbs = computeNgramProbs(tweets)
+
+    for ((id, vertex) <- graph._vertices) {
+      val nodeRE(nodeType, nodeName) = id
+      if (nodeType == TWEET_) {
+        val predictions = vertex.GetEstimatedLabelScores
+        val posProb = predictions.get(POS)
+        val negProb = predictions.get(NEG)
+        val neuProb = predictions.get(NEU)
+
+        tweetIdsToPredictedLabels(nodeName) =
+          if (posProb >= negProb && posProb >= neuProb)
+            SentimentLabel.Positive
+          else if (negProb >= posProb && negProb >= neuProb)
+            SentimentLabel.Negative
+          else
+            SentimentLabel.Neutral
+      }
+      else if (topNOutputFile.value != None && nodeType == NGRAM_ && !lexicon.contains(nodeName)
+        && getNgramWeight(nodeName) >= 1.0 && thisCorpusNgramProbs(nodeName) * getWordCount(tweets) >= 5.0) {
+        val predictions = vertex.GetEstimatedLabelScores
+        val posProb = predictions.get(POS)
+        val negProb = predictions.get(NEG)
+        val neuProb = predictions.get(NEU)
+
+        ngramsToPositivity.put(nodeName, posProb)
+        ngramsToNegativity.put(nodeName, negProb)
+        ngramsToNeutrality.put(nodeName, neuProb)
+
+      }
+    }
+    val res = for (tweet <- tweets) yield {
+
+      tweet match {
+        case GoldLabeledTweet(id, userid, features, goldLabel) =>
+          SystemLabeledTweet(id, userid, features, goldLabel,
+            if (tweetIdsToPredictedLabels.contains(id)) {
+              tweetIdsToPredictedLabels(id)
+            } else {
+              SentimentLabel.Abstained
+            })
+      }
+    }
+    res
+  }
+
+  def after(): Int = 0
+}

src/main/scala/updown/app/experiment/lexical/LexicalRatioExperiment.scala

+package updown.app.experiment.lexical
+
+import org.clapper.argot._
+
+import updown.lex._
+import updown.data._
+import updown.data.io._
+import updown.app.experiment.StaticExperiment
+import updown.util.Statistics
+import org.clapper.argot.ArgotConverters._
+
+/**
+ *
+ * This object classifies tweets according to whether they have more positive, negative, or neutral
+ * words in the MPQA sentiment lexicon.
+ *
+ * @author Mike Speriosu
+ */
+
+object LexicalRatioExperiment extends StaticExperiment {
+
+  val mpqaInputFile = parser.option[String](List("p", "mpqa"), "mpqa", "MPQA sentiment lexicon input file")
+
+  def classifyTweet(numPosWords: Int, numNegWords: Int, numNeuWords: Int): SentimentLabel.Type = {
+    if (numPosWords == numNegWords && numNeuWords == 0) null //this happens a lot...and more than 1/3 are either POS or NEG so accuracy would actually be improved by a random assignment of either NEG or POS
+    else if (numPosWords == numNegWords && numNeuWords == numPosWords) SentimentLabel.Neutral //  could reasonably abstain on this
+    else if (numPosWords == numNegWords && numNeuWords != 0) SentimentLabel.Neutral //Could reasonably abstain on this
+    else if (numNeuWords > numPosWords && numNeuWords > numNegWords) SentimentLabel.Neutral
+    else if (numPosWords > numNegWords && numPosWords > numNeuWords) SentimentLabel.Positive
+    else if (numNegWords > numPosWords && numNegWords > numNeuWords) SentimentLabel.Negative
+    else null
+  }
+
+  def classifyTweets(tweets: scala.List[Tweet], lexicon: MPQALexicon): List[SystemLabeledTweet] = {
+    (for (GoldLabeledTweet(id, userid, features, goldLabel) <- tweets) yield {
+
+      var numPosWords = 0
+      var numNegWords = 0
+      var numNeuWords = 0
+
+      for (feature <- features) {
+        if (lexicon.contains(feature)) {
+          val entry = lexicon(feature)
+          if (entry.isPositive) numPosWords += 1
+          if (entry.isNegative) numNegWords += 1
+          if (entry.isNeutral) numNeuWords += 1
+        }
+      }
+      SystemLabeledTweet(id, userid, features, goldLabel, classifyTweet(numPosWords, numNegWords, numNeuWords))
+    }).toList
+  }
+
+  def doExperiment(dataSet: List[GoldLabeledTweet]) = {
+    val mpqaFileName =
+      mpqaInputFile.value match {
+        case Some(filename: String) => filename
+        case None =>
+          parser.usage("You must specify an MPQA sentiment lexicon file via -p.")
+      }
+    val lexicon = MPQALexicon(mpqaFileName)
+    val tweets = classifyTweets(dataSet, lexicon)
+    tweets
+  }
+
+  def after(): Int = 0
+
+}

src/main/scala/updown/app/experiment/maxent/NFoldMaxentExperiment.scala

 import updown.app.TrainMaxentModel
 
 object NFoldMaxentExperiment extends NFoldExperiment {
-  def doExperiment(testSet: List[GoldLabeledTweet], trainSet: List[GoldLabeledTweet]) = {
+  def doExperiment(trainSet: List[GoldLabeledTweet], testSet: List[GoldLabeledTweet]) = {
     logger.info("performing Maxent experiment")
     logger.debug("training model")
     val model = TrainMaxentModel.trainWithGoldLabeledTweetIterator(trainSet.iterator)
 
     logger.debug("testing model")
-    val res = Statistics.getEvalStats(for (tweet <- testSet) yield {
+    val res = Statistics.getEvalStats("Maxent",for (tweet <- testSet) yield {
       tweet match {
         case GoldLabeledTweet(id, userid, features, goldLabel) =>
           SystemLabeledTweet(id, userid, features, goldLabel,
             SentimentLabel.figureItOut(model.getBestOutcome(model.eval(features.toArray))))
       }
     })
-    logger.info(Statistics.reportResults(res))
+    logger.info(res.toString)
     res
   }
   def after():Int=0

src/main/scala/updown/app/experiment/maxent/SplitMaxentExperiment.scala

+package updown.app.experiment.maxent
+
+import updown.data.{SystemLabeledTweet, GoldLabeledTweet, SentimentLabel}
+import updown.util.Statistics
+import updown.app.TrainMaxentModel
+import updown.app.experiment.{SplitExperiment, NFoldExperiment}
+
+object SplitMaxentExperiment extends SplitExperiment {
+  def doExperiment(trainSet: List[GoldLabeledTweet], testSet: List[GoldLabeledTweet]) = {
+    logger.info("performing Maxent experiment")
+    logger.debug("training model")
+    val model = TrainMaxentModel.trainWithGoldLabeledTweetIterator(trainSet.iterator)
+
+    logger.debug("testing model")
+    val res = for (tweet <- testSet) yield {
+      tweet match {
+        case GoldLabeledTweet(id, userid, features, goldLabel) =>
+          SystemLabeledTweet(id, userid, features, goldLabel,
+            SentimentLabel.figureItOut(model.getBestOutcome(model.eval(features.toArray))))
+      }
+    }
+    res
+  }
+  def after():Int=0
+}

src/main/scala/updown/app/experiment/maxent/StaticMaxentExperiment.scala

+package updown.app.experiment.maxent
+
+import updown.data.{SystemLabeledTweet, GoldLabeledTweet, SentimentLabel}
+import updown.app.TrainMaxentModel
+import updown.app.experiment.{StaticExperiment, SplitExperiment}
+import java.io.{FileInputStream, DataInputStream}
+import opennlp.maxent.io.BinaryGISModelReader
+import org.clapper.argot.ArgotConverters._
+
+object StaticMaxentExperiment extends StaticExperiment {
+  val modelInputFile = parser.option[String](List("m", "model"), "model", "model input")
+
+  def doExperiment(testSet: List[GoldLabeledTweet]) = {
+    logger.info("performing Maxent experiment")
+    logger.debug("loading model")
+    val model =
+      modelInputFile.value match {
+        case Some(filename) =>
+          new BinaryGISModelReader(new DataInputStream(new FileInputStream(modelInputFile.value.get))).getModel
+        case None =>
+          parser.usage("You must specify a model input file")
+      }
+
+    logger.debug("testing model")
+    val res = for (tweet <- testSet) yield {
+      tweet match {
+        case GoldLabeledTweet(id, userid, features, goldLabel) =>
+          SystemLabeledTweet(id, userid, features, goldLabel,
+            SentimentLabel.figureItOut(model.getBestOutcome(model.eval(features.toArray))))
+      }
+    }
+    res
+  }
+
+  def after(): Int = 0
+}

src/main/scala/updown/app/experiment/topic/NFoldMajorityTopicExperiment.scala

 
 import updown.data.{SystemLabeledTweet, GoldLabeledTweet, SentimentLabel}
 import updown.util.{Statistics, LDATopicModel, TopicModel}
+import updown.app.experiment.ExperimentalResult
 
 object NFoldMajorityTopicExperiment extends NFoldTopicExperiment {
 
     )
   }
 
-  def evaluate(model: TopicModel, testSet: scala.List[GoldLabeledTweet]): (Double, scala.List[(SentimentLabel.Type, Double, Double, Double)]) = {
+  def evaluate(model: TopicModel, testSet: scala.List[GoldLabeledTweet]) = {
     val labelToTopicDist = model.getTopicsPerTarget
 
     //This approach will only work if there is a very clear sentiment-topic correlation.
       logger.warn("No clear distribution for the neutral label. ")
     }
 
-    val res = Statistics.getEvalStats(for (tweet <- testSet) yield {
+    val res = Statistics.getEvalStats("Majority Topic",for (tweet <- testSet) yield {
       label(model, tweet, goodTopic, badTopic)
     })
-    logger.debug(res.toString)
-    logger.info(Statistics.reportResults(res))
+    logger.info(res.toString)
     res
   }
 }

src/main/scala/updown/app/experiment/topic/NFoldSimilarityTopicExperiment.scala

     SystemLabeledTweet(id, userid, features, goldLabel,SentimentLabel.unitSentiment(similarities(0)._2))
   }
 
-  def evaluate(model: TopicModel, testSet: scala.List[GoldLabeledTweet]): (Double, scala.List[(SentimentLabel.Type, Double, Double, Double)]) = {
+  def evaluate(model: TopicModel, testSet: scala.List[GoldLabeledTweet]) = {
     logger.debug("entering evaluation with %d items in the test set".format(testSet.length))
     val topicsPerTarget: Map[SentimentLabel.Type, List[Double]] = model.getTopicsPerTarget
     val start = System.currentTimeMillis()
-    val res = Statistics.getEvalStats(for ((tweet,i) <- testSet.zipWithIndex) yield {
+    val res = Statistics.getEvalStats("Similarity Topic",for ((tweet,i) <- testSet.zipWithIndex) yield {
       if (i%100 == 0) {
         logger.debug("%.0f%% remaining; average label time = %fs".format((1.0-(i+1).toDouble/testSet.length.toDouble)*100, (System.currentTimeMillis()-start).toDouble/(i+1.0) /1000.0))
       }
       label(model, tweet, topicsPerTarget)
     })
-    logger.debug(res.toString)
-    logger.info(Statistics.reportResults(res))
+    logger.info(res.toString)
     res
   }
 }

src/main/scala/updown/app/experiment/topic/NFoldTopicExperiment.scala

 package updown.app.experiment.topic
 
 import updown.data.{SystemLabeledTweet, GoldLabeledTweet, SentimentLabel}
-import updown.app.experiment.NFoldExperiment
 import org.clapper.argot.{SingleValueOption, ArgotParser}
 import org.clapper.argot.ArgotParser._
 import org.clapper.argot.ArgotConverters._
 import java.io.{FileWriter, BufferedWriter, File}
 import updown.util.{WordleUtils, Statistics, LDATopicModel, TopicModel}
+import updown.app.experiment.{ExperimentalResult, NFoldExperiment}
 
 abstract class NFoldTopicExperiment extends NFoldExperiment {
   var iterations = 1000
   val wordleConfigOption = parser.option[String](List("wordleConfig"), "PATH", ("the path to the config file for IBM's " +
     "word cloud generator (default %s)").format(WordleUtils.defaultConfigurationPath))
 
-  def evaluate(model: TopicModel, testSet: scala.List[GoldLabeledTweet]):
-  (Double, scala.List[(SentimentLabel.Type, Double, Double, Double)])
+  def evaluate(model: TopicModel, testSet: scala.List[GoldLabeledTweet]): ExperimentalResult
 
   def doOutput(model: TopicModel) {
     if (outputOption.value.isDefined) {

src/main/scala/updown/data/Tweet.scala

                             features: List[String],
                             goldLabel: SentimentLabel.Type) extends Tweet
 
+case class TargetedGoldLabeledTweet(id: String,
+                                    userid: String,
+                                    features: List[String],
+                                    goldLabel: SentimentLabel.Type,
+                                    target: String) extends Tweet
+
+
 case class SystemLabeledTweet(id: String,
                               userid: String,
                               features: List[String],
                               goldLabel: SentimentLabel.Type,
                               systemLabel: SentimentLabel.Type) extends Tweet
-	   
-	
 
 
+case class TargetedSystemLabeledTweet(id: String,
+                                      userid: String,
+                                      features: List[String],
+                                      goldLabel: SentimentLabel.Type,
+                                      systemLabel: SentimentLabel.Type,
+                                      target: String) extends Tweet
 
+
+
+

src/main/scala/updown/util/LDATopicModel.scala

 import scala.collection.JavaConversions._
 import updown.data.{SentimentLabel, GoldLabeledTweet}
 import java.util.logging.Level
+import java.io.File
 
 class LDATopicModel(tweets: List[GoldLabeledTweet], numTopics: Int, numIterations: Int, alphaSum: Double, beta: Double) extends TopicModel {
   private final val MAX_THREADS = 20
 
   private val (alphabet, instanceList) = getInstanceList(tweets)
-  private val model = new ParallelTopicModel(numTopics, alphaSum, beta)
+  private var model = new ParallelTopicModel(numTopics, alphaSum, beta)
   model.addInstances(instanceList)
   model.setNumThreads(numTopics max MAX_THREADS)
   model.setNumIterations(numIterations)
       Topic(priors(i), wordCounts.map((triple)=>(triple._1->(triple._3.toDouble/sum))).toMap)
     }).toList
 
-
-
-
-
     res
   }
 
       }
     model.getInferencer.getSampledDistribution(instance, numIterations, 1, 1).toList
   }
+
+  def save(filename: String) {
+    model.write(new File(filename))
+  }
 }
 

src/main/scala/updown/util/LDATopicModelFromFile.scala

+package updown.util
+
+import cc.mallet.topics.ParallelTopicModel
+import cc.mallet.types._
+import scala.collection.JavaConversions._
+import updown.data.{SentimentLabel, GoldLabeledTweet}
+import java.io.File
+
+class LDATopicModelFromFile(fileName: String) extends TopicModel {
+
+  private var model = ParallelTopicModel.read(new File(fileName))
+  private val alphabet = model.alphabet
+  private val numTopics = model.numTopics
+  private val numIterations = model.numIterations
+
+  def getTopics: List[Topic] = {
+    val priors = getTopicPriors
+    val topicsToAlphaIds = scala.collection.mutable.Map[Int,List[(Int,Double)]]()
+
+    val wordsTopicsCounts = (for ((topicCounts, typeIndex) <- model.typeTopicCounts.zipWithIndex) yield {
+      val word = alphabet.lookupObject(typeIndex).toString
+      (for (topicCount <- topicCounts) yield {
+        val topic = topicCount & model.topicMask
+        val count = topicCount >> model.topicBits
+        (word,topic,count)
+      }).iterator
+    }).iterator.flatten.toList
+
+
+    val res = (for (i <- 0 until numTopics) yield {
+      val wordCounts = wordsTopicsCounts.filter((triple)=>(triple._2==i && triple._3!=0))
+      val sum = wordCounts.map((triple)=>triple._3).reduce(_ + _)
+      Topic(priors(i), wordCounts.map((triple)=>(triple._1->(triple._3.toDouble/sum))).toMap)
+    }).toList
+
+    res
+  }
+
+  def getTopicPriors: List[Double] = {
+    val result: Array[Double] = new Array[Double](numTopics)
+    var sum = 0.0
+    for (topicAssignment <- model.getData) {
+      val temp: Array[Double] = model.getTopicProbabilities(topicAssignment.topicSequence)
+      for (i <- 0 until result.length) {
+        result(i) += temp(i)
+        sum += temp(i)
+      }
+    }
+    result.toList.map((double: Double) => double / sum)
+  }
+
+  def getTopicsPerInstance = {
+    (for (topicAssignment <- model.getData) yield {
+      val source = topicAssignment.instance.getName.toString
+      val dist = model.getTopicProbabilities(topicAssignment.topicSequence)
+      (source, dist.toList)
+    }).toList
+  }
+
+  def getTopicsPerTarget = {
+    val result = scala.collection.mutable.Map[SentimentLabel.Type,List[Double]]()
+    for (topicAssignment <- model.getData) {
+      val target = topicAssignment.instance.getTarget.asInstanceOf[SentimentLabel.Type]
+      result(target) = result.getOrElse(target, (new Array[Double](numTopics)).toList).zip(model.getTopicProbabilities(topicAssignment.topicSequence).toList).map((pair) => pair._1+pair._2)
+    }
+    (for ((key, value) <- result) yield {
+      val sum = value.reduce( _ + _ )
+      (key->value.map(_ / sum))
+    }).toMap
+  }
+
+  def inferTopics(tweet: GoldLabeledTweet): List[Double] = {
+    val instance = tweet match {
+        case GoldLabeledTweet(id, userid, features, goldLabel) =>
+          val featureSequence = new FeatureSequence(alphabet, features.length)
+          for (feature <- features) {
+            featureSequence.add(feature)
+          }
+          new Instance(featureSequence, goldLabel, id, null)
+      }
+    model.getInferencer.getSampledDistribution(instance, numIterations, 1, 1).toList
+  }
+
+  def save(filename: String) {
+    model.write(new File(filename))
+  }
+}
+

src/main/scala/updown/util/Statistics.scala

 package updown.util
 
-import updown.data.{SentimentLabel, SystemLabeledTweet}
 import com.weiglewilczek.slf4s.Logging
+import updown.app.experiment.{LabelResult, ExperimentalResult}
+import java.io.{OutputStreamWriter, BufferedOutputStream}
+import updown.data.{TargetedSystemLabeledTweet, SentimentLabel, SystemLabeledTweet}
 
 object Statistics extends Logging {
 
+  val MinTPU: Int = 3
+  val MinTPT: Int = 3
+
+  def mean(list: List[ExperimentalResult]): ExperimentalResult = {
+    (list.reduce(_ + _) / list.length).rename("Mean")
+  }
+
+  def variance(list: List[ExperimentalResult]): ExperimentalResult = {
+    val list_mean = mean(list)
+    mean(list.map((obj) => (obj - list_mean) * (obj - list_mean))).rename("Variance")
+  }
+
   val accurracy: (Double, Double) => Double =
     (correct, total) => correct / total
   val precision: (Double, Double) => Double =
     (precision, recall) => 2.0 * precision * recall / (precision + recall)
 
   val dot: (List[Double], List[Double]) => Double =
-    (A,B) => {
-      assert (A.length == B.length)
-      (0.0 /: (A zip B).map{case(a,b) => a*b}) {_ + _}
+    (A, B) => {
+      assert(A.length == B.length)
+      (0.0 /: (A zip B).map {
+        case (a, b) => a * b
+      }) {
+        _ + _
+      }
     }
 
-  val mag: (List[Double])=>Double =
-    (A) => math.sqrt(A.map((i)=>i*i).reduce(_ + _))
+  val mag: (List[Double]) => Double =
+    (A) => math.sqrt(A.map((i) => i * i).reduce(_ + _))
 
   val cosineSimilarity: (List[Double], List[Double]) => Double =
     (A, B) => (dot(A, B) / (mag(A) * mag(B)))
     var correct = 0.0
     var total = 0
     var numAbstained = tweets.count(_.systemLabel == null)
+    logger.debug("null sys labels: %d".format(tweets.count(_.systemLabel == null)))
+    for (tweet <- tweets) {
 
-    for (tweet <- tweets) {
-      //      println(tweet.systemLabel + "|" + tweet.goldLabel)
-      /*
-       * val normedTweet = tweet.normalize("alpha")
-      *  val normedNormedTweet = normedTweet.normalize("int")
-      *  println(normedTweet.systemLabel + "|" + normedTweet.goldLabel + "\t" + normedNormedTweet.systemLabel + "|" + normedNormedTweet.goldLabel)
-      */
-      //      val normedTweet = tweet.normalize("alpha")
       if (tweet.systemLabel == tweet.goldLabel) {
         correct += 1
       }
   }
 
 
-  def initializeAverageList(list: List[(updown.data.SentimentLabel.Type, Double, Double, Double)]): List[(updown.data.SentimentLabel.Type, Double, Double, Double)] = {
-    if (list.length == 0)
-      Nil
-    else {
-      val ((lLabel, _, _, _) :: ls) = list
-      (lLabel, 0.0, 0.0, 0.0) :: initializeAverageList(ls)
+  def averageResults(newName: String, results: scala.List[ExperimentalResult]): ExperimentalResult = {
+    var avgAccuracy = 0.0
+    var avgN = 0.0
+    var avgLabelResults = scala.collection.mutable.Map[SentimentLabel.Type, LabelResult]().withDefault((label) => LabelResult(0, label, 0.0, 0.0, 0.0))
+    // first, sum
+    for (ExperimentalResult(name, n, accuracy, classes) <- results) {
+      avgAccuracy += accuracy
+      avgN += n
+      for (LabelResult(n, label, precision, recall, f) <- classes) {
+        val LabelResult(oN, oLabel, oPrecision, oRecall, oF) = avgLabelResults(label)
+        avgLabelResults(label) = LabelResult(n + oN, label, precision + oPrecision, recall + oRecall, f + oF)
+      }
     }
+    // then, scale
+    val N = results.length
+    ExperimentalResult(newName, (avgN / N).toInt, avgAccuracy / N,
+      (for ((_, LabelResult(n, label, precision, recall, f)) <- avgLabelResults.toList.sortBy {
+        case (k, v) => SentimentLabel.ordinality(k)
+      }) yield {
+        LabelResult(n / N, label, precision / N, recall / N, f / N)
+      }).toList)
   }
 
-  def addWithoutNaN(d1: Double, d2: Double): Double = {
-    /*if (d1.equals(Double.NaN)) {
-      d2
-    } else if (d2.equals(Double.NaN)) {
-      d1
-    } else {
-      d1 + d2
-    }*/
-    d1 + d2
-  }
-
-  def addAll(list: List[(updown.data.SentimentLabel.Type, Double, Double, Double)], to: List[(updown.data.SentimentLabel.Type, Double, Double, Double)]): List[(updown.data.SentimentLabel.Type, Double, Double, Double)] = {
-    if (list.length == 0)
-      Nil
-    else {
-      val ((lLabel, lPrecision, lRecall, lFScore) :: ls) = list
-      val ((tLabel, tPrecision, tRecall, tFScore) :: ts) = to
-      assert(lLabel == tLabel)
-      (lLabel, addWithoutNaN(lPrecision, tPrecision), addWithoutNaN(lRecall, tRecall), addWithoutNaN(lFScore, tFScore)) :: addAll(ls, ts)
-    }
-  }
-
-  def divideBy(list: List[(updown.data.SentimentLabel.Type, Double, Double, Double)], by: Double): List[(updown.data.SentimentLabel.Type, Double, Double, Double)] = {
-    if (list.length == 0)
-      Nil
-    else {
-      val ((lLabel, lPrecision, lRecall, lFScore) :: ls) = list
-      (lLabel, lPrecision / by, lRecall / by, lFScore / by) :: divideBy(ls, by)
-    }
-  }
-
-
-  def averageResults(results: scala.List[(Double, scala.List[(SentimentLabel.Type, Double, Double, Double)])]): (Double, scala.List[(SentimentLabel.Type, Double, Double, Double)]) = {
-    var avgAccuracy = 0.0
-    var avgLabelResultsList = initializeAverageList(results(0)._2).sortBy({case(x,_,_,_)=>SentimentLabel.ordinality(x)})
-    for ((accuracy, labelResults) <- results) {
-      avgAccuracy += accuracy
-      avgLabelResultsList = addAll(labelResults.sortBy({case(x,_,_,_)=>SentimentLabel.ordinality(x)}), avgLabelResultsList)
-    }
-    avgAccuracy /= results.length
-    avgLabelResultsList = divideBy(avgLabelResultsList, results.length)
-    (avgAccuracy, avgLabelResultsList)
-  }
-
-  def getEvalStats(tweets: scala.List[SystemLabeledTweet]): (Double, List[(SentimentLabel.Type, Double, Double, Double)]) = {
+  def getEvalStats(resultName: String, tweets: scala.List[SystemLabeledTweet]): ExperimentalResult = {
     val (correct, total) = tabulate(tweets)
-    (accurracy(correct, total.toDouble),
+    ExperimentalResult(resultName, total, accurracy(correct, total),
       (for (label <- List(SentimentLabel.Negative, SentimentLabel.Neutral, SentimentLabel.Positive)) yield {
         val goldList = tweets.filter((tweet) => tweet.goldLabel == label)
+        logger.debug("%s gold tweets: %d".format(SentimentLabel.toEnglishName(label), goldList.length))
         val systemList = tweets.filter((tweet) => tweet.systemLabel == label)
+        logger.debug("%s system tweets: %d".format(SentimentLabel.toEnglishName(label), systemList.length))
         val labelPrecision = precision(
           systemList.filter((tweet) => tweet.goldLabel == label).length,
           systemList.length)
           goldList.filter((tweet) => tweet.systemLabel == label).length,
           goldList.length
         )
-        (label, labelPrecision, labelRecall, fScore(labelPrecision, labelRecall))
+        LabelResult(goldList.length, label, labelPrecision, labelRecall, fScore(labelPrecision, labelRecall))
       }).toList)
   }
+ 
 
-  def reportResults(resultTuple: (Double, scala.List[(SentimentLabel.Type, Double, Double, Double)])): String = {
-    val (accuracy, labelResultsList) = resultTuple
-    "Results:\n" +
-      "%12s%6.2f\n".format("Accuracy", accuracy) +
-      "%12s%11s%8s%9s\n".format("Label", "Precision", "Recall", "F-Score") +
-      (for ((label, precision, recall, fScore) <- labelResultsList) yield {
-        "%12s%11.2f%8.2f%9.2f".format(SentimentLabel.toEnglishName(label), precision, recall, fScore)
-      }).mkString("\n") + "\n"
+  def getEvalStatsPerUser(resultName: String, tweets: scala.List[SystemLabeledTweet]): List[ExperimentalResult] = {
+    val userToTweets = tweets.groupBy((tweet) => tweet.userid).toList.filter {
+      case (user, tweets) =>
+        tweets.length > MinTPU
+    }.sortBy {
+      case (user, tweets) => tweets.length
+    }.reverse
+    (for ((user, tweets) <- userToTweets) yield {
+      val res = Statistics.getEvalStats("%s %s".format(resultName, user), tweets)
+      res
+    }).toList
+  }
+
+  def getEvalStatsPerTarget(resultName: String, tweets: scala.List[TargetedSystemLabeledTweet]): List[ExperimentalResult] = {
+    val targetToTweets = tweets.groupBy((tweet) => tweet.target).toList.filter {
+      case (target, tweets) =>
+        tweets.length > MinTPT
+    }.sortBy {
+      case (target, tweets) => tweets.length
+    }.reverse
+    (for ((target, tweets) <- targetToTweets) yield {
+      val res = Statistics.getEvalStats("%s %s".format(resultName, target), tweets.map {
+        case TargetedSystemLabeledTweet(id, uid, features, gLabel, sLabel, target) => SystemLabeledTweet(id, uid, features, gLabel, sLabel)
+      })
+      res
+    }).toList
   }
 }

src/main/scala/updown/util/TopicModel.scala

   def getTopicsPerInstance: List[(String,List[Double])]
   def getTopicsPerTarget: Map[SentimentLabel.Type,List[Double]]
   def inferTopics(tweet: GoldLabeledTweet): List[Double]
+
+  def save(filename: String)
 }

src/test/scala/updown/test/PreprocHCRTest.scala

 
 import org.scalatest.FlatSpec
 import updown.data.SentimentLabel
-import updown.preproc.{SuccessfulHCRParse, PreprocHCRTweets}
+import updown.preproc.impl.PreprocHCRTweets
 
 class PreprocHCRTest extends FlatSpec {
   val HCR_INPUT_FIELDS = Array("9932982701", "29136568", "Hexham67", "Bully for you Mr. President. Bully for you. #hcr",
 
   val pst = PreprocHCRTweets
 
-  "processOneLine" should "produce expected output" in {
+  // the new preprocessor expects a file. I'll have to think about what to do here.
+  /*"processOneLine" should "produce expected output" in {
     assert(
       pst.processOneLine(9, HCR_INPUT_FIELDS, Set("for", "you", "mr"))
         ===
           List((HCR_SENTIMENT_GOLD,
           HCR_TARGET)),
           HCR_FEATURES))
-  }
+  }*/
 
   //TODO test failure modes
 }

src/test/scala/updown/test/PreprocShammaTest.scala

 
 import org.scalatest.FlatSpec
 import updown.data.SentimentLabel
-import updown.preproc.{SuccessfulShammaParse, PreprocShammaTweets}
+import updown.preproc.impl.PreprocShammaTweets
 
 class PreprocShammaTest extends FlatSpec {
   val SHAMMA_INPUT_LINE = "936472030\t9/27/08 1:03\tPreparing to have a heart attack #tweetdebate\tkyeung808\tKen Yeung\t1\t1\t1\t1\t\t\t\t"
     assert(tweet === SHAMMA_TWEET)
   }
 
-  "processOneLine" should "produce expected output" in {
+  // the new preprocessors don't work like this.
+  /*"processOneLine" should "produce expected output" in {
     assert(
       PreprocShammaTweets.processOneLine(SHAMMA_INPUT_LINE, Set("to", "have", "a"))
         ===
           SHAMMA_SENTIMENT_GOLD,
           SHAMMA_IAA,
           SHAMMA_FEATURES))
-  }
+  }*/
 }

src/test/scala/updown/test/PreprocStanfordTest.scala

 
 import org.scalatest.FlatSpec
 import updown.data.SentimentLabel
-import updown.preproc.{SuccessfulStanfordParse, PreprocStanfordTweets}
+import updown.preproc.impl.PreprocStanfordTweets
 
 class PreprocStanfordTest extends FlatSpec {
   val STANFORD_INPUT_LINE = "4;;3;;Mon May 11 03:17:40 UTC 2009;;kindle2;;tpryan;;@stellargirl I loooooooovvvvvveee my Kindle2. Not that the DX is cool, but the 2 is fantastic in its own right."
     assert(tweet === STANFORD_TWEET)
   }
 
-  "processOneLine" should "produce expected output" in {
+  // the new preprocessors don't work like this
+/*  "processOneLine" should "produce expected output" in {
     assert(
       pst.processOneLine(STANFORD_INPUT_LINE, Set())
         ===
           STANFORD_USERNAME,
           STANFORD_SENTIMENT_GOLD,
           STANFORD_FEATURES))
-  }
+  }*/
 }
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.