Mike Speriosu avatar Mike Speriosu committed d2b4197

Added TrainMaxentModel and PerTweetEvaluator.

Comments (0)

Files changed (3)

 
 FIRSTARG=$1
 
-MEMORY=8g
+MEMORY=2g
 
 case $FIRSTARG in
   [0-9] | [0-9][0-9]) 
 Updown 0.1 commands: 
 
   run               	run the main method of a given class
+  train-maxent          train a maxent model
 
 Include --help with any option for more information
 EOF
 
     java -jar $UPDOWN_DIR/project/build/sbt-launch-0.7.7.jar "$@"
 
-#elif [ $CMD = 'geotext-lp' ]; then
+elif [ $CMD = 'train-maxent' ]; then
 
-#     $SCALA_COMMAND opennlp.textgrounder.app.GeoTextLabelProp $*
+     $SCALA_COMMAND updown.app.TrainMaxentModel $*
 
 else  
 

project/build/UpdownProject.scala

 
 class UpdownProject (info: ProjectInfo) extends DefaultProject(info) /*with assembly.AssemblyBuilder*/ {
   override def disableCrossPaths = true 
-  //val argot = "org.clapper" %% "argot" % "0.3.1"
+
+  // Add repositories
+  val gsonRepo = "gson repo" at "http://google-gson.googlecode.com/svn/mavenrepo"
+  val opennlpRepo = "opennlp sourceforge repo" at "http://opennlp.sourceforge.net/maven2"
+
+  // Dependencies
+  val opennlpTools = "org.apache.opennlp" % "opennlp-tools" % "1.5.1-incubating"
+  val opennlpMaxent = "org.apache.opennlp" % "opennlp-maxent" % "3.0.1-incubating"
+  val argot = "org.clapper" %% "argot" % "0.3.1"
 }
 
-

src/main/scala/updown/app/TrainMaxentModel.scala

 package updown.app
 
+import java.io._
+
+import opennlp.tools.postag._
+import opennlp.tools.sentdetect._
+import opennlp.tools.tokenize._
+import opennlp.tools.util._
+import opennlp.maxent._
+import opennlp.maxent.io._
+import opennlp.model._
+
+import org.clapper.argot._
+
 object TrainMaxentModel {
 
+  import ArgotConverters._
+  val parser = new ArgotParser("updown run updown.app.TrainMaxentModel", preUsage=Some("Updown"))
+
+  val inputFile = parser.option[String](List("i", "input"), "input", "labeled tweet input")
+  val outputFile = parser.option[String](List("o", "output"), "output", "model output")
+
   def main(args: Array[String]) {
-    println("Hello world!")
+    try { parser.parse(args) }
+    catch { case e: ArgotUsageException => println(e.message); sys.exit(0) }
+
+    val reader = new FileReader(inputFile.value.get)
+    val dataStream = new PlainTextByLineDataStream(reader)
+    val eventStream = new BasicEventStream(dataStream, ",")
+    val dataIndexer = new TwoPassDataIndexer(eventStream, 5)
+
+    val model = GIS.trainModel(10, dataIndexer)
+
+    reader.close
+
+    val modelWriter = new BinaryGISModelWriter(model, new File(outputFile.value.get))
+    modelWriter.persist
+    modelWriter.close
   }
 }
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.