Commits

Lars Yencken committed 890039c

[263] Fixed a bug in long vowel handling, where it wasn't considered katakana.

  • Participants
  • Parent commits d8eeb29

Comments (0)

Files changed (3)

         return Script__Ascii;
     } else if (c >= 0x3041 && c <= 0x3096) {
         return Script__Hiragana;
-    } else if (c >= 0x30a1 && c <= 0x30f6) {
+    } else if ((c >= 0x30a1 && c <= 0x30f6) || c == 0x30fc) {
         return Script__Katakana;
     } else if (c >= 0x4e00 && c <= 0x9fa5) {
         return Script__Kanji;
 
 wchar_t toHiragana(wchar_t c)
 {
-    if (scriptType(c) == Script__Katakana) {
+    if (scriptType(c) == Script__Katakana && c != 0x30fc) {
         return c - _interKanaDistance;
     } else {
         return c;

src/tests/testJapanese.hpp

         CPPUNIT_ASSERT(!validSegment(Segment(L"起", L"っき")));
     }
 
+    void testLongVowel()
+    {
+        CPPUNIT_ASSERT(validSegment(Segment(L"コーヒー", L"コーヒー")));
+    }
+
     void tearDown()
     {
     }
         CppUnit::TestSuite* s = new CppUnit::TestSuite("TestJapanese");
         s->addTest(new CppUnit::TestCaller<TestJapanese>("testExample",
                     &TestJapanese::testExample));
+        s->addTest(new CppUnit::TestCaller<TestJapanese>("testLongVowel",
+                    &TestJapanese::testLongVowel));
         return s;
     }
 

src/tests/testScripts.hpp

         CPPUNIT_ASSERT(isSmallKana(L'ょ'));
         CPPUNIT_ASSERT(isSmallKana(L'ゃ'));
         CPPUNIT_ASSERT(isSmallKana(L'っ'));
+        CPPUNIT_ASSERT(!isSmallKana(L'や'));
+        CPPUNIT_ASSERT(!isSmallKana(L'ゆ'));
+        CPPUNIT_ASSERT(!isSmallKana(L'よ'));
     }
 
     void testToHiragana()
     {
         CPPUNIT_ASSERT(toHiragana(L'っ') == L'っ');
         CPPUNIT_ASSERT(toHiragana(L'ッ') == L'っ');
+        CPPUNIT_ASSERT(toHiragana(wstring(L"コーヒー")) ==
+                wstring(L"こーひー"));
+        CPPUNIT_ASSERT(toHiragana(wstring(L"東京")) == wstring(L"東京"));
+    }
+
+    void testScriptType()
+    {
+        CPPUNIT_ASSERT(scriptType(L'亜') == Script__Kanji);
+        CPPUNIT_ASSERT(scriptType(L'あ') == Script__Hiragana);
+        CPPUNIT_ASSERT(scriptType(L'ア') == Script__Katakana);
+        CPPUNIT_ASSERT(scriptType(L'A') == Script__Ascii);
+
+        // Long vowel marker
+        CPPUNIT_ASSERT(scriptType(L'ー') == Script__Katakana);
+        
+        // Kanji for "one"
+        CPPUNIT_ASSERT(scriptType(L'一') == Script__Kanji);
     }
 
     void tearDown()
         CppUnit::TestSuite* s = new CppUnit::TestSuite("ScriptsTest");
         s->addTest(new CppUnit::TestCaller<ScriptsTest>("testSmallKana",
                     &ScriptsTest::testSmallKana));
+        s->addTest(new CppUnit::TestCaller<ScriptsTest>("testToHiragana",
+                    &ScriptsTest::testToHiragana));
+        s->addTest(new CppUnit::TestCaller<ScriptsTest>("testScriptType",
+                    &ScriptsTest::testScriptType));
 
         return s;
     }