Commits

vvcephei committed e544874

cleaned up json format

Comments (0)

Files changed (1)

src/main/scala/org/fiasana/Grammarian.scala

       ("[ %s \n%"+(14+lengthOfObsKey)+"s]").format(getCountList(count).map("\""+_.toString()+"\"").mkString(("\n%"+(14+lengthOfObsKey)+"s, ").format("")),"")
     }
     "{ %s\n}".format((for ((tag, observations) <- grammar.toList.sortBy(_._1)) yield {
-      "%5s : { ".format(tag) + observations.map {
+      "%5s : { ".format("\""+tag+"\"") + observations.map {
         case (key, count) => ("%-" + lengthOfObsKey + "s: %s").format("\"" + getObservationKeyString(key) + "\"", getCountString(count))
       }.mkString("\n          , ") + "\n          }\n"
     }).mkString("\n, "))
       "%s\n%s\n".format(tag,observations.map{
         case (key,count) =>
           val (total,_) = count
-          ",\"%s\",,,%s\n%s".format(getObservationKeyString(key).replaceAll("\"","<quote>"),total,getCountString(count))
+          ",\"%s\",,,%s\n%s".format(getObservationKeyString(key),total,getCountString(count))
       }.mkString("\n"))
     }).mkString("\n"))
 
 }
 
 case class NonTerminalGrammar() extends Grammar {
-  def getObservationKeyString(key: Any): String = key.asInstanceOf[Iterable[String]].mkString(" ")
+  def getObservationKeyString(key: Any): String = key.asInstanceOf[Iterable[String]].mkString(" ").replaceAll("\"","<quote>")
 }
 
 case class TerminalGrammar() extends Grammar {
-  def getObservationKeyString(key: Any): String = key.toString
+  def getObservationKeyString(key: Any): String = key.toString.replaceAll("\"","<quote>")
 }
 
 object Grammarian {
   }
 
   val collectionsO = parser.multiOption[(String, String)]("coll", "COLLECTION ...", "One or more collections to examine." +
-    " If this option is unspecified, all collections in the language will be used.") {
+    " Collections are of the form lang:collectionName, as in kin:kgmc. Note lang:* will scan all the collections" +
+    " under language 'lang'.") {
     (s, opt) =>
       try {
         val (lang :: coll :: Nil) = s.split(":").toList
       }.map {
         case (k, vs) => (k, vs.map {
           case (_, v) => v
-        })
+        }.filter(s=>s!="*"))
       }
 
+      val nonTerminalGrammar = NonTerminalGrammar()
+      val terminalGrammar = TerminalGrammar()
       for ((collectionLang, collections) <- langsAndCollections) {
         val root = new File(
           (List(muri_dir, "data", "phase2", collectionLang, "tree") :::
 
         val treeFileList = getAllFiles(root, language, ".tree", collections).reverse
 
-        val (nonTerminalGrammar, terminalGrammar) = buildGrammar(treeFileList)
-        doOutput(nonTerminalGrammar, terminalGrammar)
+        val (tmpNonTerminalGrammar, tmpTerminalGrammar) = buildGrammar(treeFileList)
+        nonTerminalGrammar.foldIn(tmpNonTerminalGrammar)
+        terminalGrammar.foldIn(tmpTerminalGrammar)
       }
+      doOutput(nonTerminalGrammar, terminalGrammar)
 
     } catch {
       case e: ArgotUsageException =>