1. Lars Yencken
  2. gpalign-scala

Commits

Lars Yencken  committed 4ba7e0b

Separates out several modules and adds tests.

  • Participants
  • Parent commits ed5f866
  • Branches default

Comments (0)

Files changed (10)

File src/gpaligner/AlignmentGenerator.scala

View file
  • Ignore whitespace
         for {
             graphemeSeg <- graphemeSegmentations(entry.graphemes)
             phonemeSeg <- phonemeSegmentations(graphemeSeg, entry.phonemes)
-        } yield new Alignment(for {(g, p) <- graphemeSeg zip phonemeSeg} yield Segment(g, p))
+        } yield Alignment(for {(g, p) <- graphemeSeg zip phonemeSeg} yield Segment(g, p))
     }
 
     def graphemeSegmentations(graphemes : String) : List[List[String]]
     }
 }
 
-object Combinatorics { 
-    def segmentCombinations[A](xs : List[A]) : List[List[List[A]]] = {
-        require(xs.length > 0)
-        val nCombinations = 1 << (xs.length - 1)
-        for {
-            i <- List.range(0, nCombinations)
-        } yield segmentCombinationN(xs, i)
-    }
-
-    private def segmentCombinationN[A](xss : List[A], n : Int) : List[List[A]] = xss match {
-        case List() => throw new RuntimeException("list should not be empty")
-        case List(x) => List(List(x))
-        case x :: xs => {
-            if (n % 2 == 0) {
-                val result = segmentCombinationN(xs, n / 2)
-                (x :: result.head) :: result.tail
-            } else {
-                List(x) :: segmentCombinationN(xs, n / 2)
-            }
-        }
-    }
-}

File src/gpaligner/AlignmentModel.scala

View file
  • Ignore whitespace
+/*
+ * AlignmentModel.scala
+ *
+ * To change this template, choose Tools | Template Manager
+ * and open the template in the editor.
+ */
+
+package gpaligner
+
+object AlignmentModel {
+    def withContext(seq : List[Segment]) : List[GPContext] = seq match {
+        case List() => List()
+        case List(x) => List(GPContext(None, x, None))
+        case x :: y :: xs => GPContext(None, x, Some(y)) :: withContext(x, y :: xs)
+    }
+
+    def withContext(head : Segment, seq : List[Segment]) : List[GPContext] = seq match {
+        case List() => List()
+        case List(x) => List(GPContext(Some(head), x, None))
+        case x :: y :: xs => GPContext(Some(head), x, Some(y)) :: withContext(x, y :: xs)
+    }
+}
+
+class AlignmentModel {
+    val gDist = new FreqDist[String]()
+    val gpDist = new FreqDist[Segment]()
+    val gpcDist = new FreqDist[GPContext]()
+
+    def inc(alignment : Alignment) : Unit = {
+        for (gpc <- AlignmentModel.withContext(alignment.segments)) {
+            gpcDist.inc(gpc)
+            gpDist.inc(gpc.pivot)
+            gDist.inc(gpc.pivot.grapheme)
+        }
+    }
+
+    def inc(alignments : List[Alignment]) : Unit = { alignments.map(a => inc(a)) }
+
+    def dec(alignment : Alignment) : Unit = {
+        for (gpc <- AlignmentModel.withContext(alignment.segments)) {
+            gpcDist.dec(gpc)
+            gpDist.dec(gpc.pivot)
+            gDist.dec(gpc.pivot.grapheme)
+        }
+    }
+
+    def dec(alignments : List[Alignment]) : Unit = { alignments.map(a => dec(a)) }
+}

File src/gpaligner/Combinatorics.scala

View file
  • Ignore whitespace
+/*
+ * Combinatorics.scala
+ *
+ * To change this template, choose Tools | Template Manager
+ * and open the template in the editor.
+ */
+
+package gpaligner
+
+object Combinatorics {
+    def segmentCombinations[A](xs : List[A]) : List[List[List[A]]] = {
+        require(xs.length > 0)
+        val nCombinations = 1 << (xs.length - 1)
+        for {
+            i <- List.range(0, nCombinations)
+        } yield segmentCombinationN(xs, i)
+    }
+
+    private def segmentCombinationN[A](xss : List[A], n : Int) : List[List[A]] = xss match {
+        case List() => throw new RuntimeException("list should not be empty")
+        case List(x) => List(List(x))
+        case x :: xs => {
+            if (n % 2 == 0) {
+                val result = segmentCombinationN(xs, n / 2)
+                (x :: result.head) :: result.tail
+            } else {
+                List(x) :: segmentCombinationN(xs, n / 2)
+            }
+        }
+    }
+}

File src/gpaligner/GPAligner.scala

View file
  • Ignore whitespace
     val solvedModel = new AlignmentModel
     val unsolvedModel = new AlignmentModel
 
-    val alpha = 0.2
-    val solvedWeight = 0.8
-    val unsolvedWeight = 0.2
+    val alpha = 2.5
+    val solvedWeight = 0.5
+    val unsolvedWeight = 0.5
+//    require(0 < alpha)
+//    require(alpha < unsolvedWeight)
+//    require(unsolvedWeight <= solvedWeight)
 
     def align() : Unit = {
         var clouds = buildClouds
         tf * idf
     }
 
-    private def wtf(grapheme : String) = unsolvedWeight * unsolvedModel.gDist.prob(grapheme) + solvedWeight * solvedModel.gDist.prob(grapheme)
-    private def wtf(gp : Segment) = unsolvedWeight * unsolvedModel.gpDist.prob(gp) + solvedWeight * solvedModel.gpDist.prob(gp)
-    private def wtf(gpc : GPContext) = unsolvedWeight * unsolvedModel.gpcDist.prob(gpc) + solvedWeight * solvedModel.gpcDist.prob(gpc)
+    private def wtf(grapheme : String) = unsolvedWeight * unsolvedModel.gDist.getCount(grapheme) + solvedWeight * solvedModel.gDist.getCount(grapheme)
+    private def wtf(gp : Segment) = unsolvedWeight * unsolvedModel.gpDist.getCount(gp) + solvedWeight * solvedModel.gpDist.getCount(gp)
+    private def wtf(gpc : GPContext) = unsolvedWeight * unsolvedModel.gpcDist.getCount(gpc) + solvedWeight * solvedModel.gpcDist.getCount(gpc)
 }
-
-object Stats {
-    def mean(xs : List[Double]) : Double = (0.0 /: xs)(_ + _) / xs.length    
-}
-
-object AlignmentModel {
-    
-    def withContext(seq : List[Segment]) : List[GPContext] = seq match {
-        case List() => List()
-        case List(x) => List(GPContext(None, x, None))
-        case x :: y :: xs => GPContext(None, x, Some(y)) :: withContext(x, y :: xs)
-    }
-
-    def withContext(head : Segment, seq : List[Segment]) : List[GPContext] = seq match {
-        case List() => List()
-        case List(x) => List(GPContext(Some(head), x, None))
-        case x :: y :: xs => GPContext(Some(head), x, Some(y)) :: withContext(x, y :: xs)
-    }
-}
-
-class AlignmentModel {
-    val gDist = new FreqDist[String]()
-    val gpDist = new FreqDist[Segment]()
-    val gpcDist = new FreqDist[GPContext]()
-
-    def inc(alignment : Alignment) : Unit = {
-        for (gpc <- AlignmentModel.withContext(alignment.segments)) {
-            gpcDist.inc(gpc)
-            gpDist.inc(gpc.pivot)
-            gDist.inc(gpc.pivot.grapheme)
-        }
-    }
-
-    def inc(alignments : List[Alignment]) : Unit = { alignments.map(a => inc(a)) }
-
-    def dec(alignment : Alignment) : Unit = {
-        for (gpc <- AlignmentModel.withContext(alignment.segments)) {
-            gpcDist.dec(gpc)
-            gpDist.dec(gpc.pivot)
-            gDist.dec(gpc.pivot.grapheme)
-        }
-    }
-
-    def dec(alignments : List[Alignment]) : Unit = { alignments.map(a => dec(a)) }
-}

File src/gpaligner/GPPair.scala

View file
  • Ignore whitespace
 case class ScoredAlignment(score : Double, alignment : Alignment)
 
 case class AlignmentCloud(gpPair : GPPair, scoredAlignments : List[ScoredAlignment]) {
-    val bestScore : Double = max(for { sa <- scoredAlignments } yield sa.score)
+    val bestScore : Double = Stats.max(for { sa <- scoredAlignments } yield sa.score)
     val size : Int = scoredAlignments.length
 
     def isOverconstrained = scoredAlignments.length == 0
         Pair(AlignedPair(gpPair, alignment), remainingAlignments)
     }
 
-    def max(a : List[Double]) = (Double.MinValue /: a)(Math.max(_, _))
-
     private def best(bestSoFar : ScoredAlignment, remainder : List[Alignment], rest : List[ScoredAlignment]) : Pair[Alignment, List[Alignment]] = rest match {
         case List() => Pair(bestSoFar.alignment, remainder)
         case x :: xs => {

File src/gpaligner/Stats.scala

View file
  • Ignore whitespace
+/*
+ * Stats.scala
+ *
+ * To change this template, choose Tools | Template Manager
+ * and open the template in the editor.
+ */
+
+package gpaligner
+
+import scala.Math
+
+object Stats {
+    def mean(xs : List[Double]) : Double = {
+        require(xs.length > 0)
+        (0.0 /: xs)(_ + _) / xs.length
+    }
+
+
+    def max(xs : List[Double]) = {
+        require(xs.length > 0)
+        (Double.MinValue /: xs)(Math.max(_, _))
+    }
+
+    def min(xs : List[Double]) = {
+        require(xs.length > 0)
+        (Double.MaxValue /: xs)(Math.min(_, _))
+    }
+}

File test/gpaligner/AlignmentGeneratorTest.scala

View file
  • Ignore whitespace
+/*
+ * AlignmentGeneratorTest.scala
+ *
+ * To change this template, choose Tools | Template Manager
+ * and open the template in the editor.
+ */
+
+package gpaligner
+
+import org.junit.After
+import org.junit.Before
+import org.junit.Test
+import org.junit.Assert._
+
+class AlignmentGeneratorTest {
+
+    @Before
+    def setUp: Unit = {
+    }
+
+    @After
+    def tearDown: Unit = {
+    }
+
+    @Test
+    def testNKana = {
+        val input = GPPair("神符", "しんぷ")
+
+        val alignments0 = LinguisticGenerator.generate(input)
+        assert(alignments0 == List(
+                Alignment(List(Segment("神符", "しんぷ"))),
+                Alignment(List(Segment("神", "し"), Segment("符", "んぷ"))),
+                Alignment(List(Segment("神", "しん"), Segment("符", "ぷ"))),
+            ))
+
+        val alignments1 = JapaneseGenerator.generate(input)
+        assert(alignments1 == List(
+                Alignment(List(Segment("神符", "しんぷ"))),
+                Alignment(List(Segment("神", "しん"), Segment("符", "ぷ")))
+            ))
+    }
+
+    @Test
+    def testSmallKana = {
+        val input = GPPair("州際", "しゅうさい")
+        val alignments = JapaneseGenerator.generate(input)
+        assert(alignments == List(
+                Alignment(List(Segment("州際", "しゅうさい"))),
+                Alignment(List(Segment("州", "しゅ"), Segment("際", "うさい"))),
+                Alignment(List(Segment("州", "しゅう"), Segment("際", "さい"))),
+                Alignment(List(Segment("州", "しゅうさ"), Segment("際", "い")))
+            ))
+    }
+
+}

File test/gpaligner/AlignmentModelTest.scala

View file
  • Ignore whitespace
+/*
+ * AlignmentModelTest.scala
+ *
+ * To change this template, choose Tools | Template Manager
+ * and open the template in the editor.
+ */
+
+package gpaligner
+
+import org.junit.After
+import org.junit.Before
+import org.junit.Test
+import org.junit.Assert._
+
+class AlignmentModelTest {
+
+    @Before
+    def setUp: Unit = {
+    }
+
+    @After
+    def tearDown: Unit = {
+    }
+
+    @Test
+    def contextTest = {
+        val input = List(Segment("t", "T"), Segment("oo", "U"))
+        assert(AlignmentModel.withContext(input) == List(
+                GPContext(None, Segment("t", "T"), Some(Segment("oo", "U"))),
+                GPContext(Some(Segment("t", "T")), Segment("oo", "U"), None)
+          ))
+    }
+
+}

File test/gpaligner/GPAlignerTest.scala

  • Ignore whitespace
-/*
- * GPAlignerTest.scala
- *
- * To change this template, choose Tools | Template Manager
- * and open the template in the editor.
- */
-
-package gpaligner
-
-import org.junit.After
-import org.junit.Before
-import org.junit.Test
-import org.junit.Assert._
-
-class GPAlignerTest {
-
-    @Before
-    def setUp: Unit = {
-    }
-
-    @After
-    def tearDown: Unit = {
-    }
-
-    @Test
-    def mean = {
-        val x = Stats.mean(List(-1.0, 0.0, 1.0))
-        assert(x == 0.0)
-    }
-
-}

File test/gpaligner/StatsTest.scala

View file
  • Ignore whitespace
+/*
+ * StatsTest.scala
+ *
+ * To change this template, choose Tools | Template Manager
+ * and open the template in the editor.
+ */
+
+package gpaligner
+
+import org.junit.After
+import org.junit.Before
+import org.junit.Test
+import org.junit.Assert._
+
+class StatsTest {
+
+    @Before
+    def setUp: Unit = {
+    }
+
+    @After
+    def tearDown: Unit = {
+    }
+
+    @Test
+    def mean = {
+        assert(Stats.mean(List(-1.0, 0.0, 1.0)) == 0.0)
+        assert(Stats.mean(List.range(0, 100).map(_.toDouble)) == 49.5)
+    }
+
+    @Test
+    def max = {
+        assert(Stats.max(List(-1.0, 0.0, 1.0)) == 1.0)
+    }
+
+    @Test
+    def min = {
+        assert(Stats.min(List(-1.0, 0.0, 1.0)) == -1.0)
+    }
+}