Commits

Lars Yencken committed a2c41b7 Merge

Merged in bug fixes.

  • Participants
  • Parent commits 4529872, 6681c90

Comments (0)

Files changed (4)

 
 #include <boost/functional/hash.hpp>
 
+const wstring gp_sep = L" ";
+
 //--------------------------------------------------------------------------//
 
 Segment::Segment()
 
 wostream& operator<<(wostream& oStream, const Segment& s)
 {
-    oStream << s.g << L"-" << s.p;
+    oStream << s.g << gp_sep << s.p;
     return oStream;
 }
 
     for (int i = 0; i < nSlots; i++) {
         slots.push_back(a[i].g);
     }
-    oStream << join(slots, L"|") << "-";
+    oStream << join(slots, L"|") << gp_sep;
     slots.clear();
     for (int i = 0; i < nSlots; i++) {
         slots.push_back(a[i].p);

src/generator.cpp

 
 typedef vector<Grapheme>  GraphemeList;
 
+/**
+ * A wide output stream operator for debugging output.
+ */
+wostream& operator<<(wostream& output, const GraphemeList& slots)
+{
+    if (slots.size() == 0) {
+        output << L"<no slots>";
+        return output;
+    }
+
+    for (unsigned int i = 0; i < slots.size(); i++) {
+        if (i > 0) {
+            output << L"|";
+        }
+        output << slots[i];
+    }
+    return output;
+}
+
 //--------------------------------------------------------------------------//
 
 /**
     for (i = 1; i < sSize; i++) {
         script = scriptType(s[i]);
         if (script != lastScript) {
-            results.push_back(make_pair(lastScript, s.substr(startSeg, i)));
+            results.push_back(make_pair(lastScript, s.substr(startSeg,
+                    i - startSeg)));
             startSeg = i;
             lastScript = script;
         }

src/tests/testGenerator.hpp

 class GeneratorTest : public CppUnit::TestFixture
 {
 private:
-    Segment* m_unique;
-    Segment* m_basic;
-    Segment* m_smallKana;
-    Segment* m_longVowel;
-    Segment* m_mixed;
 
 public:
     void setUp()
     {
-        m_unique = new Segment(L"私", L"わたし");
-        m_basic = new Segment(L"漢字", L"たんだん"); 
-        m_smallKana = new Segment(L"発起人", L"ほっきにん");
-        m_longVowel = new Segment(L"ビール", L"ビール");
-        m_mixed = new Segment(L"此の節", L"このせつ");
     }
 
     void testUnique()
     {
-        CPPUNIT_ASSERT(m_unique->g.size() == 1);
-        CPPUNIT_ASSERT(m_unique->p.size() == 3);
+        Segment data(L"私", L"わたし");
+        CPPUNIT_ASSERT(data.g.size() == 1);
+        CPPUNIT_ASSERT(data.p.size() == 3);
         vector<Alignment> alignments;
-        potentialAlignments(*m_unique, alignments);
+        potentialAlignments(data, alignments);
         CPPUNIT_ASSERT(alignments.size() == 1);
         Alignment& alignment = alignments[0];
         CPPUNIT_ASSERT(alignment.size() == 1);
-        CPPUNIT_ASSERT(alignment[0].g == m_unique->g);
-        CPPUNIT_ASSERT(alignment[0].p == m_unique->p);
+        CPPUNIT_ASSERT(alignment[0].g == data.g);
+        CPPUNIT_ASSERT(alignment[0].p == data.p);
     }
 
     void testBasic()
     {
+        Segment data(L"漢字", L"たんだん"); 
         vector<Alignment> alignments;
-        potentialAlignments(*m_basic, alignments);
+        potentialAlignments(data, alignments);
         CPPUNIT_ASSERT(alignments.size() == 2);
         {
             Alignment& alignment = alignments[0];
 
     void testSmallKana()
     {
+        Segment data(L"発起", L"ほっき");
         vector<Alignment> alignments;
-        potentialAlignments(*m_smallKana, alignments);
+        potentialAlignments(data, alignments);
         CPPUNIT_ASSERT(alignments.size() == 2);
         for (int i = 0; i < 2; i++) {
             Alignment& a = alignments[i];
+            // No phoneme segment can start with a small kana character.
             for (Alignment::iterator iter = a.begin(); iter != a.end();
                     iter++) {
                 CPPUNIT_ASSERT(!isSmallKana(iter->p[0]));
         }
     }
 
+    void testNKana()
+    {
+        Segment data(L"起人", L"きにん");
+        vector<Alignment> alignments;
+        potentialAlignments(data, alignments);
+        CPPUNIT_ASSERT(alignments.size() == 2);
+        for (int i = 0; i < 2; i++) {
+            Alignment& a = alignments[i];
+            // No phoneme segment can start with ん
+            for (Alignment::iterator iter = a.begin(); iter != a.end();
+                    iter++) {
+                CPPUNIT_ASSERT(iter->p[0] != 0x3093);
+            }
+        }
+    }
+
+    void testMixedSize()
+    {
+        Segment data(L"発起人", L"ほっきにん");
+        vector<Alignment> alignments;
+        potentialAlignments(data, alignments);
+        CPPUNIT_ASSERT(alignments.size() == 5);
+        for (int i = 0; i < 2; i++) {
+            Alignment& a = alignments[i];
+            // No phoneme segment can start with ん
+            for (Alignment::iterator iter = a.begin(); iter != a.end();
+                    iter++) {
+                CPPUNIT_ASSERT(iter->p[0] != 0x3093);
+                CPPUNIT_ASSERT(!isSmallKana(iter->p[0]));
+            }
+        }
+    }
+
     void testLongVowel()
     {
-        CPPUNIT_ASSERT(m_longVowel->g.size() == 3);
-        CPPUNIT_ASSERT(m_longVowel->p.size() == 3);
+        Segment data(L"ビール", L"ビール");
+        CPPUNIT_ASSERT(data.g.size() == 3);
+        CPPUNIT_ASSERT(data.p.size() == 3);
         vector<Alignment> alignments;
-        potentialAlignments(*m_longVowel, alignments);
+        potentialAlignments(data, alignments);
         CPPUNIT_ASSERT(alignments.size() == 1);
         Alignment& alignment = alignments[0];
         CPPUNIT_ASSERT(alignment.size() == 1);
-        CPPUNIT_ASSERT(alignment[0].g == m_longVowel->g);
-        CPPUNIT_ASSERT(alignment[0].p == m_longVowel->p);
+        CPPUNIT_ASSERT(alignment[0].g == data.g);
+        CPPUNIT_ASSERT(alignment[0].p == data.p);
     }
 
     void testMixedScript()
     {
-        m_mixed = new Segment(L"此の節", L"このせつ");
-        CPPUNIT_ASSERT(m_mixed->g.size() == 3);
-        CPPUNIT_ASSERT(m_mixed->p.size() == 4);
+        Segment data(L"此の節", L"このせつ");
+        CPPUNIT_ASSERT(data.g.size() == 3);
+        CPPUNIT_ASSERT(data.p.size() == 4);
         vector<Alignment> alignments;
-        potentialAlignments(*m_mixed, alignments);
+        potentialAlignments(data, alignments);
         CPPUNIT_ASSERT(alignments.size() == 1);
         Alignment& alignment = alignments[0];
         CPPUNIT_ASSERT(alignment.size() == 3);
         CPPUNIT_ASSERT(alignment[2].p == wstring(L"せつ"));
     }
 
+    void testHiragana()
+    {
+        Segment data(L"うなぎ", L"うなぎ");
+        vector<Alignment> alignments;
+        potentialAlignments(data, alignments);
+        CPPUNIT_ASSERT(alignments.size() == 1);
+        CPPUNIT_ASSERT(alignments[0].size() == 1);
+        CPPUNIT_ASSERT(alignments[0][0] == data);
+    }
+
     void tearDown()
     {
-        delete m_unique;
-        delete m_basic;
-        delete m_smallKana;
-        delete m_longVowel;
-        delete m_mixed;
     }
 
     static CppUnit::Test* suite()
                     &GeneratorTest::testLongVowel));
         s->addTest(new CppUnit::TestCaller<GeneratorTest>("testMixedScript",
                     &GeneratorTest::testMixedScript));
+        s->addTest(new CppUnit::TestCaller<GeneratorTest>("testNKana",
+                    &GeneratorTest::testNKana));
+        s->addTest(new CppUnit::TestCaller<GeneratorTest>("testMixedSize",
+                    &GeneratorTest::testMixedSize));
+        s->addTest(new CppUnit::TestCaller<GeneratorTest>("testHiragana",
+                    &GeneratorTest::testHiragana));
 
         return s;
     }