Commits

vvcephei committed 4612894

removed readlink from treemerge.sh, since it did not work on Macs, and it does not appear to be neccessary. changed error reporting for treemerge to be a little nicer.

Comments (0)

Files changed (4)

     exit 1
 }
 
-
 case $1 in
     kin|mlg)
         echo "merging $1"
         for collection in $treesrc/* ; do
             echo $collection
             for tree in $collection/* ; do
-                if [[ $exit_code -ne 0 ]]; then
+                if [[ ( $exit_code -ne 0 ) && ( $2 != "-f" )  ]]; then
                     exit $exit_code
                 else
                     treemerge.sh $tree
         ;;
     all)
         echo "merging all"
-        treemerge.sh kin
+        treemerge.sh kin $2
         (( exit_code += $? ))
-        if [[ $exit_code -ne 0 ]]; then
+        if [[ ( $exit_code -ne 0 ) && ( $2 != "-f" ) ]]; then
             exit $exit_code
         else
-            treemerge.sh mlg
+            treemerge.sh mlg $2
             (( exit_code += $? ))
         fi
         ;;
     *)
-        fullpath=`readlink -f $1`
+        #fullpath=`readlink -f $1`
+        fullpath=$1
         case $fullpath in
             */tree/src/*\.eng|*/tree/src/*\.fra|*/tree/src/*\.kin|*/tree/src/*\.mlg)
                 if [[ -e $fullpath ]]; then
                     lang=$( basename $( dirname $( dirname $( dirname $( dirname $fullpath ) ) ) ) )
                     echo "merging $fullpath to $root/$lang/tree/$collection/$base.tree"
                     #echo "running: scalabha run opennlp.scalabha.tree.Merge -i $fullpath -o $root/$lang/tree/$collection/$base.tree"
-                    scalabha run opennlp.scalabha.tree.Merge -i $fullpath -o $root/$lang/tree/$collection/$base.tree
+                    scalabha run opennlp.scalabha.tree.Merge --pprintErrs -i $fullpath -o $root/$lang/tree/$collection/$base.tree
                     (( exit_code += $? ))
                 else
                     echo 

src/main/scala/opennlp/scalabha/tree/Merge.scala

   val help = parser.flag[Boolean](List("h", "help"), "print help")
   val input = parser.option[String](List("i", "input"), "FILEorDIR", "tree source file or directory to compile")
   val output = parser.option[String](List("o", "output"), "FILE", "output file to write compiled trees to.")
+  val skipErrs = parser.flag[Boolean](List("f","skipErrs"),"Do not exit on errors. " +
+    "The default is to exit as soon as errors are caught in any input file.")
+  val pprintErrs = parser.flag[Boolean](List("pprintErrs"), "Format treenodes nicely in error reporting.")
+  
   var log = new SimpleLogger(this.getClass().getName, SimpleLogger.WARN, new BufferedWriter(new OutputStreamWriter(System.err)))
 
+  
   def applyFile(file: File): List[TreeNode] = {
     if (file.getName.endsWith("tree"))
       MultiLineTreeParser(file.getName, scala.io.Source.fromFile(file, "UTF-8"))
     }
   }
 
+  def okToProceed() = (MultiLineTreeParser.log.getStats()._2 == 0 || skipErrs.value.isDefined)
+
   def applyDir(dir: File): List[TreeNode] = {
     (for (child <- dir.listFiles().sorted) yield {
       if (child.isDirectory) {
         applyDir(child)
-      } else if (child.isFile) {
+      } else if (child.isFile && okToProceed) {
         applyFile(child)
       } else {
         // there are other types of files, and we'll just ignore them
       }
 
       MultiLineTreeParser.log.logLevel = SimpleLogger.WARN
+      MultiLineTreeParser.pprintErrs = pprintErrs.value.isDefined
 
-      val outputBuffer = output.value match {
-        case Some(filename) =>
-          if (filename.endsWith(".tree")) {
-            (new File(filename)).getParentFile.mkdirs()
-            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(filename))))
-          } else {
-            parser.usage("Output file must end with a '.tree' suffix")
-          }
-        case None => new BufferedWriter(new OutputStreamWriter(System.out))
-      }
       val parsedTrees = input.value match {
         case Some(filename) => apply(filename)
         case None => parser.usage("you must specify an input tree file or directory of input tree files")
       val (warnings, errors) = (compileWarnings + parseWarnings, compileErrors + parseErrors)
 
       log.summary("Warnings,Errors: %s\n".format((warnings, errors)))
-      if (errors == 0)
+      if (errors == 0){
+        val outputBuffer = output.value match {
+          case Some(filename) =>
+            if (filename.endsWith(".tree")) {
+              (new File(filename)).getParentFile.mkdirs()
+              new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(filename))))
+            } else {
+              parser.usage("Output file must end with a '.tree' suffix")
+            }
+          case None => new BufferedWriter(new OutputStreamWriter(System.out))
+        }
         outputBuffer.write(parsedTrees.map(tree => tree.getCanonicalString()).mkString("\n") + "\n")
+
+        outputBuffer.close()
+      }
       else
-        log.summary("Suspending output since there were errors.\n")
+      log.summary("Suspending output since there were errors.\n")
 
-      outputBuffer.close()
       System.exit(errors)
     }
     catch {

src/main/scala/opennlp/scalabha/tree/MultiLineTreeParser.scala

   val help = parser.flag[Boolean](List("h", "help"), "print help")
   val input = parser.option[String](List("i", "input"), "FILE", "input inputFile to tokenize")
   var log = new SimpleLogger(this.getClass().getName, SimpleLogger.WARN, new BufferedWriter(new OutputStreamWriter(System.err)))
+  var pprintErrs = false
+  val pprintErrsOpt = parser.flag[Boolean](List("pprintErrs"), "Format treenodes nicely in error reporting.")
 
+  
   val openSymRestRegex = """\s*(\(?)\s*([^\s)(]+)\s*(.*)""".r
 
+  private def sprintNode(node: TreeNode): String = {
+    if (pprintErrs) {
+      node.prettyPrintString()
+    } else {
+      node.getCanonicalString()
+    }
+  }
+  
   /**
    * Parse a string representation of a syntax tree into a TreeNode tree.
    * @param groupName This is used for logging errors. It's the name of the group that
           // a Node that contains a Value child must contain only one child
           if (children.length == 0
             || (children.filter(_.isInstanceOf[Value]).length > 0 && children.length != 1)) {
-            log.err("(file:%s,tree#:%d): A leaf node may only contain a tag and a token. I.e., (TAG token). Tree node %s fails this test.\n".format(groupName, index, Node(name, children).getCanonicalString))
+            log.err("(file:%s,tree#:%d): A leaf node may only contain a tag and a token. I.e., (TAG token). Following tree node fails this test: %s\n".format(groupName, index, sprintNode(Node(name, children))))
           }
           if (children.filter(_.isHead()).length != 1) {
-            log.err("(file:%s,tree#:%d): A node must have exactly one head. Tree node %s fails this test.\n".format(groupName, index, Node(name, children).getCanonicalString))
+            log.err("(file:%s,tree#:%d): A node must have exactly one head. Following tree node fails this test: %s\n".format(groupName, index, sprintNode(Node(name, children))))
           }
           log.trace("%sresult: %s,\"%s\"\n".format(prefix, Node(name, children), childRest.substring(cutoff + 1)))
           return Some((Node(name, children), childRest.substring(cutoff + 1)))
         parser.usage()
       }
 
+      pprintErrs = pprintErrsOpt.value.isDefined
+
       val inputTrees = input.value match {
         case Some(filename) => apply(filename)
         case None => parser.usage("you must specify an input tree file")

src/main/scala/opennlp/scalabha/tree/model/TreeNode.scala

 
   def getCanonicalString(): String
 
+  val _indent = "  "
+  def _prettyPrintStringPrefixed(prefix:String): String
+  def prettyPrintString(): String
+
   def getHeight(): Int
 
   def getTagCounts(): HashMap[String, Int] = {
 
   def getCanonicalString(): String = name
 
+  def prettyPrintString(): String = name
+  def _prettyPrintStringPrefixed(prefix:String): String = name
+
   def getHeight(): Int = 0
 }
 
       + (if (children != Nil && children.last.isInstanceOf[Node]) " " else "")
   )
 
+  def _prettyPrintStringPrefixed(prefix:String): String = "%s(%s %s)".format(
+    prefix,
+    name,
+    (for (child <- children) yield child._prettyPrintStringPrefixed(prefix + _indent)).mkString(" ")
+      + (if (children != Nil && children.last.isInstanceOf[Node]) " " else "")
+  )
+  
+  def prettyPrintString(): String = _prettyPrintStringPrefixed("\n")
+
   def maxChildHeight(children: List[TreeNode]): Int = {
     if (children.length == 0) {
       0
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.