Commits

vvcephei  committed 195f419

refactored TreeNode, writing Grammarian, about to refactor TreeNodeTest

  • Participants
  • Parent commits cb864cd

Comments (0)

Files changed (10)

File src/main/scala/opennlp/scalabha/tree/Merge.scala

         val treeIndexBase = treeNum.toInt - 1
 
         for ((tree, index) <- trees.zipWithIndex) {
-          val treeTokens = tree.getTokens()
+          val treeTokens = tree.getTokenStrings
           val tokens = tokensFromFile(treeIndexBase + index)
           val pass = TokenChecker.checkTokensInLine(treeTokens, tokens)
         }
             }
           case None => new BufferedWriter(new OutputStreamWriter(System.out))
         }
-        outputBuffer.write(parsedTrees.map(tree => tree.getCanonicalString()).mkString("\n") + "\n")
+        outputBuffer.write(parsedTrees.map(tree => tree.getCanonicalString).mkString("\n") + "\n")
 
         outputBuffer.close()
       } else {

File src/main/scala/opennlp/scalabha/tree/MultiLineTreeParser.scala

 
   private def sprintNode(node: TreeNode): String = {
     if (pprintErrs) {
-      node.prettyPrintString()
+      node.getPrettyString
     } else {
-      node.getCanonicalString()
+      node.getCanonicalString
     }
   }
   
             || (children.filter(_.isInstanceOf[Value]).length > 0 && children.length != 1)) {
             log.err("(file:%s,tree#:%d): A leaf node may only contain a tag and a token. I.e., (TAG token). Following tree node fails this test: %s\n".format(groupName, index, sprintNode(Node(name, children))))
           }
-          if (children.filter(_.isHead()).length != 1) {
+          if (children.filter(_.isHead).length != 1) {
             log.err("(file:%s,tree#:%d): A node must have exactly one head. Following tree node fails this test: %s\n".format(groupName, index, sprintNode(Node(name, children))))
           }
           log.trace("%sresult: %s,\"%s\"\n".format(prefix, Node(name, children), childRest.substring(cutoff + 1)))
 
       val (warnings, errors) = log.getStats()
       if (errors == 0)
-        println(inputTrees.map(tree => tree.getCanonicalString()).mkString("\n"))
+        println(inputTrees.map(tree => tree.getCanonicalString).mkString("\n"))
       else
         log.summary("Suspending output since there were errors.\n")
 

File src/main/scala/opennlp/scalabha/tree/TagChecker.scala

-package opennlp.scalabha.tree
-
-import opennlp.scalabha.log.SimpleLogger
-import java.io.{OutputStreamWriter, BufferedWriter}
-import org.clapper.argot.{ArgotUsageException, ArgotParser, ArgotConverters}
-import opennlp.scalabha.model.TreeNode
-import collection.Set
-import collection.mutable.HashMap
-import grizzled.string
-import io.BufferedSource
-
-object TagChecker {
-
-  import ArgotConverters._
-
-  val parser = new ArgotParser(this.getClass.getName, preUsage = Some("Version 0.0"))
-  val help = parser.flag[Boolean](List("h", "help"), "print help")
-  val input = parser.option[String](List("i", "input"), "FILE", "input inputFile in which to check tags")
-  val other = parser.option[String](List("other"), "FILE", "(optional) other inputFile to compare against the input inputFile")
-  val log = new SimpleLogger(this.getClass.getName, SimpleLogger.WARN, new BufferedWriter(new OutputStreamWriter(System.err)))
-  val noLog = new SimpleLogger(this.getClass.getName, SimpleLogger.NONE, new BufferedWriter(new OutputStreamWriter(System.err)))
-
-
-  abstract class TagCheckResult
-
-  case class Success() extends TagCheckResult {
-    override def toString(): String = {
-      "OK"
-    }
-
-    val bool = true
-  }
-
-  case class Fail[T](left: T, right: T) extends TagCheckResult {
-    override def toString(): String = {
-      "FAIL: <<%s>>!=<<%s>>".format(left, right)
-    }
-
-    val bool = false
-  }
-
-  def spprintRepr(map: Map[String, Int], join: String): String = {
-    val regex = "[^(]+\\((.*)\\)".r
-    val regex(string) = map.toList.sorted.toString
-    string.replace(", ", join)
-  }
-
-  /**
-   * Return true iff both trees have the same tag counts
-   */
-  def apply(left: TreeNode, right: TreeNode): TagCheckResult = {
-    val leftCounts = left.getTagCounts()
-    val rightCounts = right.getTagCounts()
-    if (leftCounts == rightCounts)
-      Success()
-    else
-      Fail[HashMap[String, Int]](leftCounts, rightCounts)
-  }
-
-  def combineMaps[K, V](map1: Map[K, V], map2: Map[K, V], op: (V, V) => V): Map[K, V] =
-    ((for ((k, v) <- map2) yield (if (map1.contains(k)) (k, op(v, map1(k))) else (k, v)))
-      ++
-      (for ((k, v) <- map1 if !map2.contains(k)) yield (k, v))
-      ).toMap
-
-  def apply(left: Iterator[String], right: Iterator[String]): Map[String, Int] = {
-    var resultCounts = Map[String, Int]()
-    for (((leftLine, rightLine), index) <- (left zip right).zipWithIndex) {
-      MultiLineTreeParser("left",index,leftLine) match {
-        case Some(leftTree: TreeNode) =>
-          MultiLineTreeParser("right",index,rightLine) match {
-            case Some(rightTree: TreeNode) =>
-              if (leftTree.compareStructure(rightTree))
-                resultCounts = combineMaps[String, Int](resultCounts.toMap, leftTree.getTagCounts().toMap, (a: Int, b: Int) => (a + b))
-              else
-                log.err("Line %d: the structure of <<%s>> does not the match strucure of <<%s>>\n".format(index, leftLine, rightLine))
-            case None => ()
-          }
-        case None => ()
-      }
-    }
-    for (line <- left) {
-      log.err("Leftover line <<%s>> in input inputFile\n".format(line))
-    }
-    for (line <- right) {
-      log.err("Leftover line <<%s>> in other inputFile\n".format(line))
-    }
-    resultCounts
-  }
-
-  def apply(list: Iterator[String]): HashMap[String, Int] = {
-    val tagCounts = HashMap[String, Int]()
-
-    for ((line, index) <- list.zipWithIndex) {
-      val tree = MultiLineTreeParser("trees",index,line)
-      
-      if (tree.isDefined) {
-        for ((key, value) <- tree.get.getTagCounts()) {
-          if (tagCounts.contains(key))
-            tagCounts(key) += value
-          else
-            tagCounts(key) = value
-        }
-      }
-    }
-    tagCounts
-  }
-
-  def checkTokensInLine(aList: List[String], bList: List[String]): String = {
-    if (aList.length != bList.length) {
-      //log.err("Lists should be the same length: %s %s\n".format(treeTokens, tokFileTokens))
-      "Fail: \n\ttree: %s is not the same length as \n\ttok:  %s".format(aList, bList)
-    } else if (aList.length == 0) {
-      ""
-    } else {
-      val a :: as = aList
-      val b :: bs = bList
-      if (a != b) {
-        if ((a == "-LRB-" && b == "(")||(b == "-LRB-" && a == "(")) {
-          "" + checkTokensInLine(as, bs)
-        } else if ((a == "-RRB-" && b == ")") || (b == "-RRB-" && a == ")")) {
-          "" + checkTokensInLine(as, bs)
-        } else {
-          //log.err("%s does not match %s\n".format(a, b))
-          "Fail: \"%s\" does not match \"%s\"".format(a, b)
-        }
-      } else {
-        "" + checkTokensInLine(as, bs)
-      }
-    }
-  }
-
-  def checkTokens(infile: List[String], tokfile: List[String]): List[String] = {
-    for (((inTreeLine, tokLine), index) <- (infile zip tokfile).toList.zipWithIndex) yield {
-      val inTree = MultiLineTreeParser("trees",index,inTreeLine)
-      inTree match {
-        case Some(root) =>
-          val inTreeTokens: List[String] = root.getTokens
-          val tokTokens = tokLine.replace("<EOS>", "").split("\\s+").toList
-          checkTokensInLine(inTreeTokens, tokTokens) match {
-            case "" => "%d: pass".format(index)
-            case x => "%d: %s".format(index, x)
-          }
-        case _ => "%d: Fail - couldn't parse tree. See parser log messages.".format(index)
-      }
-    }
-
-
-  }
-
-  def main(args: Array[String]) {
-    val parser = new ArgotParser(this.getClass.getName, preUsage = Some("Version 0.0"))
-    val help = parser.flag[Boolean](List("h", "help"), "print help")
-    val input = parser.option[String](List("i", "input"), "FILE", "input inputFile in which to check tags")
-    val tokens = parser.option[String](List("tok"), "FILE", "tags to check")
-    val other = parser.option[String](List("other"), "FILE", "(optional) other inputFile to compare against the input inputFile")
-
-
-    try {
-      parser.parse(args)
-
-      if (help.value.isDefined) {
-        parser.usage()
-      }
-
-      val input_file =
-        (if (input.value.isDefined) {
-          scala.io.Source.fromFile(input.value.get, "UTF-8")
-        } else {
-          scala.io.Source.stdin
-        }).getLines()
-
-      if (tokens.value.isDefined) {
-        log.trace("comparing tokens from %s to those in the trees in %s\n")
-        val inputList = input_file.toList
-        println(
-          checkTokens(inputList, scala.io.Source.fromFile(tokens.value.get, "UTF-8").getLines().toList).mkString("\n")
-        )
-        log.summary("Tag stats:\n\t%s\n".format(spprintRepr(apply(inputList.iterator).toMap, "\n\t"))
-        )
-      } else {
-
-        other.value match {
-          case None =>
-            log.summary("Tag stats:\n\t%s\n".format(
-              spprintRepr(
-                apply(input_file).toMap, "\n\t")
-            ))
-          case Some(other_file_name) =>
-            val other_file = scala.io.Source.fromFile(other.value.get, "UTF-8").getLines()
-            log.summary("Tag stats:\n\t%s\n".format(
-              spprintRepr(
-                apply(input_file, other_file), "\n\t")
-            ))
-        }
-      }
-      log.summary("Warnings,Errors: %s\n".format(log.getStats()))
-      MultiLineTreeParser.log.summary("Warnings,Errors: %s\n".format(MultiLineTreeParser.log.getStats()))
-
-
-    } catch {
-      case e: ArgotUsageException =>
-        println(e.message)
-    }
-  }
-
-}

File src/main/scala/opennlp/scalabha/tree/Tok2Trees.scala

 import java.io._
 import org.xml.sax.SAXParseException
 import ArgotConverters._
-import opennlp.scalabha.model.{Value, Node}
 import com.sun.org.apache.xpath.internal.operations.Mult
+import opennlp.scalabha.model.{TreeNode, Value, Node}
 
 object Tok2Trees {
   val parser = new ArgotParser(this.getClass.getName, preUsage = Some("Version 0.0"))
     ("!" -> "!")
   ).withDefaultValue("x")
 
-  def getTree(tokLine: String): Node = 
+  def getTree(tokLine: String): Node =
     Node("TOP",
       tokLine
         .replaceAll("\\(", "-LRB-")
         .replaceAll("\\)", "-RRB-")
         .split("<EOS>")
-        .map(s=>s.trim)
+        .map(s => s.trim)
         .filter(s => s.length > 0)
         .map(sentence => Node("S", sentence.split("\\s+").map(word => Node(tagDictionary(word), List[Value](Value(word)))).toList))
         .toList
     )
-  
-  def getFormattedString(tokLine: String): String = getTree(tokLine).getCanonicalString().replaceAll("\\s*\\(S","\n    (S")
-  
+
+  def getFormattedString(tokLine: String): String = getTree(tokLine).getCanonicalString.replaceAll("\\s*\\(S", "\n    (S")
+
   /**
    * Build a rudimentary syntax tree from a tokenized line.
    * @param tokLine A space-separated list of tokens
    */
   def apply(tokLine: String): String = getFormattedString(tokLine)
 
+  // These conspire to form a list of only tags that are not autogenerated
+  val autoGenTags = List("TOP", "S", "x") //FIXME dry
+  val autoGenOk: (TreeNode) => Boolean =
+    (node) => {
+      node.getTagStrings.filter((str) => (!autoGenTags.contains(str))).length == 0
+    }
+
   /**
    * A file is ok to overwrite if it does not exist, or it is an autogenerated file, which we
    * can tell from the structure.
     val okNotExist = !file.exists()
     val okBoilerplate =
       (file.canWrite && MultiLineTreeParser(file.getPath).filter {
-        (treeNode) => {
-          // list of only treenodes that are not ok to overwrite.
-          val autoGenNotOk = (treeNode.getTagCounts().filter {
-            case (tag, count) => tag != "TOP" && tag != "S" && tag != "x"
-          }.size != 0)
-          val depthNotOk = (treeNode.getHeight() != 3)
-          autoGenNotOk || depthNotOk
-        }
+        (treeNode) => !autoGenOk(treeNode)
       }.length == 0)
     okNotExist || okBoilerplate
   }

File src/main/scala/opennlp/scalabha/tree/TokenChecker.scala

       val inTree = MultiLineTreeParser("trees",index,inTreeLine)
       inTree match {
         case Some(root) =>
-          val inTreeTokens: List[String] = root.getTokens
+          val inTreeTokens: List[String] = root.getTokenStrings
           val tokTokens = tokLine.replace("<EOS>", "").split("\\s+").toList
           checkTokensInLine(inTreeTokens, tokTokens) match {
             case true => "%d: pass".format(index)

File src/main/scala/opennlp/scalabha/tree/model/TreeNode.scala

 import collection.mutable.HashMap
 
 abstract class TreeNode {
+  protected[model] val _indent = "  "
+
+  protected[model] def _prettyPrintStringPrefixed(prefix: String): String
+
   val name: String
-  
-  def isHead(): Boolean
 
-  def compareStructure(other: TreeNode): Boolean
+  def isHead: Boolean
 
-  def getTagMap(): HashMap[String, HashMap[List[String], Int]]
+  def isTerminal: Boolean
 
-  def getTokens(): List[String]
+  def isToken: Boolean
 
-  def getCanonicalString(): String
+  def getChildren: List[TreeNode]
 
-  val _indent = "  "
-  def _prettyPrintStringPrefixed(prefix:String): String
-  def prettyPrintString(): String
+  def getTokenStrings: List[String]
 
-  def getHeight(): Int
+  def getTagStrings: List[String]
 
-  def getTagCounts(): HashMap[String, Int] = {
-    val result = HashMap[String, Int]()
-    for ((nodeName, innerMap) <- getTagMap()) {
-      for ((list, count) <- innerMap) {
-        if (result.contains(nodeName))
-          result(nodeName) += count
-        else
-          result(nodeName) = count
-      }
-    }
-    result
-  }
+  def getCanonicalString: String
+
+  def getPrettyString: String
+
 }
 
 case class Value(name: String) extends TreeNode {
-  def compareStructure(other: TreeNode): Boolean = {
-    other.isInstanceOf[Value]
-  }
-  
   // Values are heads by definition.
   // This isn't really meaningful, but since leaf Nodes are 
   // defined to have exactly one child, it makes validity checks easy
-  def isHead(): Boolean = true
+  def isHead: Boolean = true
 
-  def getTagMap(): HashMap[String, HashMap[List[String], Int]] = HashMap[String, HashMap[List[String], Int]]()
+  // terminals are interior nodes that contain Values as children. Values themselves are not terminals
+  def isTerminal = false
 
-  def getTokens(): List[String] = List(name)
+  def isToken = true
 
-  def getCanonicalString(): String = name
+  def getChildren = Nil
 
-  def prettyPrintString(): String = name
-  def _prettyPrintStringPrefixed(prefix:String): String = name
+  def getTokenStrings: List[String] = List(name)
 
-  def getHeight(): Int = 0
+  def getTagStrings = Nil // tokens are not tags, obviously
+
+  def getCanonicalString: String = name
+
+  def _prettyPrintStringPrefixed(prefix: String): String = name
+
+  def getPrettyString: String = name
 }
 
 case class Node(name: String, children: List[TreeNode]) extends TreeNode {
-  def compareStructure(other: TreeNode): Boolean = {
-    var result = other.isInstanceOf[Node]
-    if (result) {
-      val otherNode: Node = other.asInstanceOf[Node]
-      if (result) result &= (name == otherNode.name) && (children.length == otherNode.children.length)
-      if (result)
-        for ((t, o) <- (children zip otherNode.children)) {
-          if (result) result &= t.compareStructure(o)
-        }
-    }
-    result
+  // if we decide to strip the head marking from the name, we can change this later to be set on
+  // object creation.
+  def isHead: Boolean = name.endsWith("-H")
+
+  // I'm defining a terminal as a node with only Token children.
+  // Another valid definition might be a node with _any_ Token child.
+  // Since the conventions we have adopted specify that terminals should
+  // have exactly one child and that child should be a Token, there isn't
+  // currently a difference.
+  def isTerminal: Boolean = {
+    children.filter(child => child.isToken) == children
   }
 
-  // if we decide to strip the head marking from the name, we can change this later to be set on
-  // object creation.
-  def isHead(): Boolean = name.endsWith("-H")
+  def isToken = false
 
-  def getTokens(): List[String] = {
+  def getChildren = children
+
+  def getTokenStrings: List[String] = {
     (for (child <- children) yield {
-      child.getTokens()
+      child.getTokenStrings
     }).toList.flatten
   }
 
-  def getTagMap(): HashMap[String, HashMap[List[String], Int]] = {
-    val result: HashMap[String, HashMap[List[String], Int]] = HashMap[String, HashMap[List[String], Int]](
-      (name, HashMap((for (child <- children) yield child.name) -> 1))
-    )
-
-    for (child <- children) {
-      val childMap = child.getTagMap()
-      for ((nodeName, innerMap) <- childMap) {
-        for ((list, count) <- innerMap) {
-          if (result.contains(nodeName)) {
-            if (result(nodeName).contains(list)) {
-              result(nodeName)(list) += count
-            } else {
-              result(nodeName)(list) = count
-            }
-          } else {
-            result(nodeName) = HashMap(list -> count)
-          }
-        }
-      }
-    }
-    result
+  def getTagStrings = {
+    name :: children.flatMap(child=>child.getTagStrings)
   }
 
-  def getCanonicalString(): String = "(%s %s)".format(
+  def getCanonicalString: String = "(%s %s)".format(
     name,
-    (for (child <- children) yield child.getCanonicalString()).mkString(" ")
+    (for (child <- children) yield child.getCanonicalString).mkString(" ")
       + (if (children != Nil && children.last.isInstanceOf[Node]) " " else "")
   )
 
-  def _prettyPrintStringPrefixed(prefix:String): String = "%s(%s %s)".format(
+  def _prettyPrintStringPrefixed(prefix: String): String = "%s(%s %s)".format(
     prefix,
     name,
     (for (child <- children) yield child._prettyPrintStringPrefixed(prefix + _indent)).mkString(" ")
       + (if (children != Nil && children.last.isInstanceOf[Node]) " " else "")
   )
-  
-  def prettyPrintString(): String = _prettyPrintStringPrefixed("\n")
 
-  def maxChildHeight(children: List[TreeNode]): Int = {
-    if (children.length == 0) {
-      0
-    } else {
-      val c :: cs = children
-      math.max(c.getHeight(), maxChildHeight(cs))
-    }
-  }
-
-  def getHeight(): Int = {
-    1 + maxChildHeight(children)
-  }
+  def getPrettyString: String = _prettyPrintStringPrefixed("\n")
 }

File src/main/scala/opennlp/scalabha/util/ListUtils.scala

+package opennlp.scalabha.util
+
+object ListUtils {
+  val collapseToOrdinalMap: (List[String])=>Map[String, Int] = 
+  (list) => {
+    val result = scala.collection.mutable.Map[String, Int]().withDefaultValue(0)
+    for (string <- list) {
+      result(string) = result(string) + 1
+    }
+    result.toMap
+  }
+}

File src/main/scala/org/fiasana/Grammarian.scala

+package org.fiasana
+
+import org.clapper.argot._
+import opennlp.scalabha.log.SimpleLogger
+import java.io.{File, OutputStreamWriter, BufferedWriter}
+import opennlp.scalabha.tree.MultiLineTreeParser
+import opennlp.scalabha.model.TreeNode
+
+object Grammarian {
+  final val ALLOWED_LANGUAGES = List("kin", "mlg")
+  final val IGNORE_DIRS = List("src")
+
+  import ArgotConverters._
+
+  val parser = new ArgotParser(this.getClass().getName, preUsage = Some("Version 0.0"))
+  val help = parser.flag[Boolean](List("h", "help"), "print help")
+  val srcO = parser.flag(List("sources", "read_sources"), "Set this flag to read the tree/src files. " +
+    "Leave it off to read the tree/ files.")
+  val muri_dirO = parser.option[File](List("muri_dir"),
+    "LDMT_MURI_DIR", "The location of the ldmt-muri repo") {
+    (s, opt) =>
+
+      val file = new File(s)
+      if (!file.exists)
+        parser.usage("Muri directory \"" + s + "\" does not exist.")
+
+      file
+  }
+  val languageO = parser.option[String]("lang", "LANG", "The language to generate a grammar for.") {
+    (s, opt) =>
+      if (!ALLOWED_LANGUAGES.contains(s)) {
+        parser.usage("Allowed languages are " + ALLOWED_LANGUAGES)
+      }
+      s
+  }
+  val collectionsO = parser.multiOption[String]("coll", "COLLECTION ...", "One or more collections to examine." +
+    " If this option is unspecified, all collections in the language will be used.")
+  var log = new SimpleLogger(this.getClass().getName, SimpleLogger.WARN, new BufferedWriter(new OutputStreamWriter(System.err)))
+
+  val getAllFiles: (File, String, List[String]) => List[File] =
+    (root, extension, searchList) => {
+      assert(root.isDirectory)
+      var result: List[File] = Nil
+      for (child <- root.listFiles().sortBy(f => f.getName)) {
+        val a = child.isDirectory
+        val b = !IGNORE_DIRS.contains(child.getName)
+        val c = (searchList.contains(child.getName))
+        val d = (searchList.length == 0)
+        val e = searchList
+        val f = child.getName
+        if (child.isDirectory
+          && !IGNORE_DIRS.contains(child.getName)
+          && (searchList.contains(child.getName) || searchList.length == 0)) {
+          result :::= getAllFiles(child, extension, Nil)
+        } else if (child.isFile && child.getName.endsWith(extension)) {
+          result = child :: result
+        }
+      }
+      result
+    }
+
+  val getTreeGrammar: (TreeNode) => Map[String, Map[List[String], Int]] =
+    (tree) => {
+      var nonTerminals = Map[String, Map[List[String], Int]]().withDefaultValue(
+        Map[List[String], Int]().withDefaultValue(0)
+      )
+      val childList = tree.getChildren.map(child => child.name)
+      nonTerminals += (
+        (tree.name, nonTerminals(tree.name) + ((childList, nonTerminals(tree.name)(childList) + 1)))
+        )
+      nonTerminals
+    }
+
+  val buildGrammar: (List[File] => Map[TreeNode, List[(File, Int)]]) =
+    (files) => {
+      val result = scala.collection.mutable.Map
+      for (file <- files) {
+        val trees = MultiLineTreeParser(file.getAbsolutePath)
+        for (tree <- trees) {
+          println(file.getName + " " + tree.getCanonicalString)
+        }
+      }
+      Map[TreeNode, List[(File, Int)]]()
+    }
+
+  def main(args: Array[String]) {
+    try {
+      MultiLineTreeParser.log.logLevel = SimpleLogger.NONE
+      parser.parse(args)
+      if (help.value.isDefined) {
+        parser.usage()
+      }
+      val language: String = languageO.value match {
+        case Some(lang) => lang
+        case None => parser.usage("You must specify a language from: " + ALLOWED_LANGUAGES)
+      }
+      val muri_dir: String = muri_dirO.value match {
+        case Some(file: File) => file.getAbsolutePath
+        case None => parser.usage("You must specify the location of the muri repo.")
+      }
+      val collections = collectionsO.value.toList
+
+      val root = new File(
+        (List(muri_dir, "data", "phase2", language, "tree") :::
+          (if (srcO.value.isDefined) List("src") else List())).mkString(File.separator)
+      )
+
+      val treeFileList = getAllFiles(root, ".tree", collections).reverse
+
+      val treeList: List[List[TreeNode]] = treeFileList.map(treeFile => MultiLineTreeParser(treeFile.getAbsolutePath))
+
+      val grammar = buildGrammar(treeFileList)
+    } catch {
+      case e: ArgotUsageException =>
+        println(e.message)
+    }
+  }
+}

File src/test/scala/opennlp/scalabha/test/TagCheckerTest.scala

-package opennlp.scalabha.test
-
-import org.scalatest.FlatSpec
-import org.scalatest.matchers.ShouldMatchers
-import opennlp.scalabha.tree.TagChecker
-import opennlp.scalabha.model.{Value, Node, TreeNode}
-import opennlp.scalabha.tree.TagChecker.{Fail, Success, TagCheckResult}
-import collection.immutable.HashMap
-
-class TagCheckerTest extends FlatSpec with ShouldMatchers {
-  val tests = List[(TreeNode, TreeNode, TagCheckResult)](
-    (Node("a", List(Value("b"))),
-      Node("a", List(Value("b"))),
-      Success()),
-    (Node("a", List(Value("b"), Value("c"))),
-      Node("a", List(Value("b"), Value("c"))),
-      Success()),
-    (Node("a", List(Value("b"), Value("c"))),
-      Node("a", List(Value("e"), Value("f"))),
-      Success()),
-    (Node("a", List(Node("b", List(Value("c"))))),
-      Node("a", List(Node("e", List(Value("f"))))),
-      Fail(Map("a" -> 1, "b" -> 1), Map("a" -> 1, "e" -> 1)))
-  )
-
-  for ((left, right, result) <- tests) {
-    "TagChecker(%s,%s)".format(left,right) should "yield %s".format(result) in {
-      assert(TagChecker(left,right) === result)
-    }
-  }
-
-  "combineMaps" should "work" in {
-    val l = HashMap(("a",1),("b",2))
-    val r = HashMap(("a",3),("c",4))
-    assert(TagChecker.combineMaps(l,r,(a:Int,b:Int)=>(a+b)) === HashMap(("a",4), ("b",2), ("c",4)))
-  }
-}

File src/test/scala/opennlp/scalabha/test/TreeNodeTest.scala

 import org.scalatest.matchers.ShouldMatchers
 
 class TreeNodeTest extends FlatSpec with ShouldMatchers {
+  val equal: (TreeNode, TreeNode) => Boolean =
+    (a, b) => {
+      (a.name == b.name)
+      &&(a.getChildren.length == b.getChildren.length)
+      &&((a.getChildren zip b.getChildren).filter(
+        (ca: TreeNode, cb: TreeNode) => equal(ca, cb)
+      ).length == a.getChildren.length)
+    }
+  
   "A Value" should "always compare true to a Value" in {
     assert(Value("1").compareStructure(Value("1")))
     assert(Value("1").compareStructure(Value("2")))
     assert(!n3.compareStructure(n1))
   }
 
-  val tests = List[(TreeNode, HashMap[String, HashMap[List[String], Int]])](
-    (Node("a", List(Value("b"))), HashMap(("a", HashMap(List("b") -> 1)))),
-    (Node("a", List(Value("b"), Value("c"))), HashMap(("a", HashMap(List("b", "c") -> 1)))),
-    (Node("a", List(Node("b", List(Value("c"))))), HashMap(("a", HashMap(List("b") -> 1)), ("b", HashMap(List("c") -> 1)))),
-    (Node("r", List(Node("a", List(Value("b"))), Node("a", List(Value("c"))), Node("a", List(Value("b"))))),
-      HashMap(("a", HashMap(List("b") -> 2, List("c") -> 1)), ("r", HashMap(List("a", "a", "a") -> 1)))),
-    (Node("a", List(Value("b"), Value("c"), Value("d"))), HashMap(("a", HashMap(List("b", "c", "d") -> 1)))),
-    (Node("a", List(Node("b", List(Value("c"), Value("d"))), Node("e", List(Value("f"), Value("g"))))),
-      HashMap(("a", HashMap(List("b", "e") -> 1)), ("b", HashMap(List("c", "d") -> 1)), ("e", HashMap(List("f", "g") -> 1)))),
-    (Value("a"), HashMap()),
-    (Node("a", List()), HashMap(("a", HashMap(List() -> 1))))
-  )
-
   for ((tree, map) <- tests) {
     "%s.getTagMap()".format(tree) should "yield %s".format(map) in {
       assert(tree.getTagMap() === map)