vvcephei avatar vvcephei committed 4612894

removed readlink from treemerge.sh, since it did not work on Macs, and it does not appear to be neccessary. changed error reporting for treemerge to be a little nicer.

Comments (0)

Files changed (4)

     exit 1
 }
 
-
 case $1 in
     kin|mlg)
         echo "merging $1"
         for collection in $treesrc/* ; do
             echo $collection
             for tree in $collection/* ; do
-                if [[ $exit_code -ne 0 ]]; then
+                if [[ ( $exit_code -ne 0 ) && ( $2 != "-f" )  ]]; then
                     exit $exit_code
                 else
                     treemerge.sh $tree
         ;;
     all)
         echo "merging all"
-        treemerge.sh kin
+        treemerge.sh kin $2
         (( exit_code += $? ))
-        if [[ $exit_code -ne 0 ]]; then
+        if [[ ( $exit_code -ne 0 ) && ( $2 != "-f" ) ]]; then
             exit $exit_code
         else
-            treemerge.sh mlg
+            treemerge.sh mlg $2
             (( exit_code += $? ))
         fi
         ;;
     *)
-        fullpath=`readlink -f $1`
+        #fullpath=`readlink -f $1`
+        fullpath=$1
         case $fullpath in
             */tree/src/*\.eng|*/tree/src/*\.fra|*/tree/src/*\.kin|*/tree/src/*\.mlg)
                 if [[ -e $fullpath ]]; then
                     lang=$( basename $( dirname $( dirname $( dirname $( dirname $fullpath ) ) ) ) )
                     echo "merging $fullpath to $root/$lang/tree/$collection/$base.tree"
                     #echo "running: scalabha run opennlp.scalabha.tree.Merge -i $fullpath -o $root/$lang/tree/$collection/$base.tree"
-                    scalabha run opennlp.scalabha.tree.Merge -i $fullpath -o $root/$lang/tree/$collection/$base.tree
+                    scalabha run opennlp.scalabha.tree.Merge --pprintErrs -i $fullpath -o $root/$lang/tree/$collection/$base.tree
                     (( exit_code += $? ))
                 else
                     echo 

src/main/scala/opennlp/scalabha/tree/Merge.scala

   val help = parser.flag[Boolean](List("h", "help"), "print help")
   val input = parser.option[String](List("i", "input"), "FILEorDIR", "tree source file or directory to compile")
   val output = parser.option[String](List("o", "output"), "FILE", "output file to write compiled trees to.")
+  val skipErrs = parser.flag[Boolean](List("f","skipErrs"),"Do not exit on errors. " +
+    "The default is to exit as soon as errors are caught in any input file.")
+  val pprintErrs = parser.flag[Boolean](List("pprintErrs"), "Format treenodes nicely in error reporting.")
+  
   var log = new SimpleLogger(this.getClass().getName, SimpleLogger.WARN, new BufferedWriter(new OutputStreamWriter(System.err)))
 
+  
   def applyFile(file: File): List[TreeNode] = {
     if (file.getName.endsWith("tree"))
       MultiLineTreeParser(file.getName, scala.io.Source.fromFile(file, "UTF-8"))
     }
   }
 
+  def okToProceed() = (MultiLineTreeParser.log.getStats()._2 == 0 || skipErrs.value.isDefined)
+
   def applyDir(dir: File): List[TreeNode] = {
     (for (child <- dir.listFiles().sorted) yield {
       if (child.isDirectory) {
         applyDir(child)
-      } else if (child.isFile) {
+      } else if (child.isFile && okToProceed) {
         applyFile(child)
       } else {
         // there are other types of files, and we'll just ignore them
       }
 
       MultiLineTreeParser.log.logLevel = SimpleLogger.WARN
+      MultiLineTreeParser.pprintErrs = pprintErrs.value.isDefined
 
-      val outputBuffer = output.value match {
-        case Some(filename) =>
-          if (filename.endsWith(".tree")) {
-            (new File(filename)).getParentFile.mkdirs()
-            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(filename))))
-          } else {
-            parser.usage("Output file must end with a '.tree' suffix")
-          }
-        case None => new BufferedWriter(new OutputStreamWriter(System.out))
-      }
       val parsedTrees = input.value match {
         case Some(filename) => apply(filename)
         case None => parser.usage("you must specify an input tree file or directory of input tree files")
       val (warnings, errors) = (compileWarnings + parseWarnings, compileErrors + parseErrors)
 
       log.summary("Warnings,Errors: %s\n".format((warnings, errors)))
-      if (errors == 0)
+      if (errors == 0){
+        val outputBuffer = output.value match {
+          case Some(filename) =>
+            if (filename.endsWith(".tree")) {
+              (new File(filename)).getParentFile.mkdirs()
+              new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(filename))))
+            } else {
+              parser.usage("Output file must end with a '.tree' suffix")
+            }
+          case None => new BufferedWriter(new OutputStreamWriter(System.out))
+        }
         outputBuffer.write(parsedTrees.map(tree => tree.getCanonicalString()).mkString("\n") + "\n")
+
+        outputBuffer.close()
+      }
       else
-        log.summary("Suspending output since there were errors.\n")
+      log.summary("Suspending output since there were errors.\n")
 
-      outputBuffer.close()
       System.exit(errors)
     }
     catch {

src/main/scala/opennlp/scalabha/tree/MultiLineTreeParser.scala

   val help = parser.flag[Boolean](List("h", "help"), "print help")
   val input = parser.option[String](List("i", "input"), "FILE", "input inputFile to tokenize")
   var log = new SimpleLogger(this.getClass().getName, SimpleLogger.WARN, new BufferedWriter(new OutputStreamWriter(System.err)))
+  var pprintErrs = false
+  val pprintErrsOpt = parser.flag[Boolean](List("pprintErrs"), "Format treenodes nicely in error reporting.")
 
+  
   val openSymRestRegex = """\s*(\(?)\s*([^\s)(]+)\s*(.*)""".r
 
+  private def sprintNode(node: TreeNode): String = {
+    if (pprintErrs) {
+      node.prettyPrintString()
+    } else {
+      node.getCanonicalString()
+    }
+  }
+  
   /**
    * Parse a string representation of a syntax tree into a TreeNode tree.
    * @param groupName This is used for logging errors. It's the name of the group that
           // a Node that contains a Value child must contain only one child
           if (children.length == 0
             || (children.filter(_.isInstanceOf[Value]).length > 0 && children.length != 1)) {
-            log.err("(file:%s,tree#:%d): A leaf node may only contain a tag and a token. I.e., (TAG token). Tree node %s fails this test.\n".format(groupName, index, Node(name, children).getCanonicalString))
+            log.err("(file:%s,tree#:%d): A leaf node may only contain a tag and a token. I.e., (TAG token). Following tree node fails this test: %s\n".format(groupName, index, sprintNode(Node(name, children))))
           }
           if (children.filter(_.isHead()).length != 1) {
-            log.err("(file:%s,tree#:%d): A node must have exactly one head. Tree node %s fails this test.\n".format(groupName, index, Node(name, children).getCanonicalString))
+            log.err("(file:%s,tree#:%d): A node must have exactly one head. Following tree node fails this test: %s\n".format(groupName, index, sprintNode(Node(name, children))))
           }
           log.trace("%sresult: %s,\"%s\"\n".format(prefix, Node(name, children), childRest.substring(cutoff + 1)))
           return Some((Node(name, children), childRest.substring(cutoff + 1)))
         parser.usage()
       }
 
+      pprintErrs = pprintErrsOpt.value.isDefined
+
       val inputTrees = input.value match {
         case Some(filename) => apply(filename)
         case None => parser.usage("you must specify an input tree file")

src/main/scala/opennlp/scalabha/tree/model/TreeNode.scala

 
   def getCanonicalString(): String
 
+  val _indent = "  "
+  def _prettyPrintStringPrefixed(prefix:String): String
+  def prettyPrintString(): String
+
   def getHeight(): Int
 
   def getTagCounts(): HashMap[String, Int] = {
 
   def getCanonicalString(): String = name
 
+  def prettyPrintString(): String = name
+  def _prettyPrintStringPrefixed(prefix:String): String = name
+
   def getHeight(): Int = 0
 }
 
       + (if (children != Nil && children.last.isInstanceOf[Node]) " " else "")
   )
 
+  def _prettyPrintStringPrefixed(prefix:String): String = "%s(%s %s)".format(
+    prefix,
+    name,
+    (for (child <- children) yield child._prettyPrintStringPrefixed(prefix + _indent)).mkString(" ")
+      + (if (children != Nil && children.last.isInstanceOf[Node]) " " else "")
+  )
+  
+  def prettyPrintString(): String = _prettyPrintStringPrefixed("\n")
+
   def maxChildHeight(children: List[TreeNode]): Int = {
     if (children.length == 0) {
       0
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.