Commits

Mikhail Korobov committed e5c2208

изменил способ разбиения на приставку-стем-окончание: -5M к занимаемой памяти. Спасибо @radixvinni. Fix #5.

Comments (0)

Files changed (2)

pymorphy2/opencorpora_dict/compile.py

     forms, tags = list(zip(*lemma))
     prefixes = [''] * len(tags)
 
-    stem = os.path.commonprefix(forms)
-
-    if stem == "":
+    if len(set(forms)) == 1:
+        stem = forms[0]
+    else:
         stem = longest_common_substring(forms)
         prefixes = [form[:form.index(stem)] for form in forms]
         if any(pref not in LEMMA_PREFIXES for pref in prefixes):

tests/test_opencorpora_dict.py

             ("е", 3, "по"),
         )
 
+    def test_platina(self):
+        lemma = [
+            ["платиновее", 1],
+            ["платиновей", 2],
+            ["поплатиновее", 3],
+            ["поплатиновей", 4],
+        ]
+        stem, forms = _to_paradigm(lemma)
+        assert forms == (
+            ("е", 1, ""),
+            ("й", 2, ""),
+            ("е", 3, "по"),
+            ("й", 4, "по"),
+        )
+        assert stem == 'платинове'
+
+    def test_no_prefix(self):
+        lemma = [["английский", 1], ["английского", 2]]
+        stem, forms = _to_paradigm(lemma)
+        assert stem == 'английск'
+        assert forms == (
+            ("ий", 1, ""),
+            ("ого", 2, ""),
+        )
+
+    def test_single(self):
+        lemma = [["английски", 1]]
+        stem, forms = _to_paradigm(lemma)
+        assert stem == 'английски'
+        assert forms == (("", 1, ""),)
+