Commits

Mikhail Korobov committed 9fd1fec Merge

Merge pull request #30 from ivirabyan/master

"pluralize" refactoring

Comments (0)

Files changed (5)

docs/user/guide.rst

 
 Слово нужно ставить в разные формы в зависимости от числительного, к которому оно относится. Например: "1 бутявка", "2 бутявки", "5 бутявок"
 
-Для этих целей используйте метод :meth:`Parse.pluralize`::
+Для этих целей используйте метод :meth:`Parse.make_agree_with_number`::
 
     >>> butyavka = morph.parse('бутявка')[0]
-    >>> butyavka.pluralize(1).word
+    >>> butyavka.make_agree_with_number(1).word
     'бутявка'
-    >>> butyavka.pluralize(2).word
+    >>> butyavka.make_agree_with_number(2).word
     'бутявки'
-    >>> butyavka.pluralize(5).word
+    >>> butyavka.make_agree_with_number(5).word
     'бутявок'

pymorphy2/analyzer.py

     _dict = None
     """ :type _dict: pymorphy2.opencorpora_dict.Dictionary """
 
-    _plural_forms = (
-        set(['sing', 'nomn']),
-        set(['sing', 'gent']),
-        set(['plur', 'nomn']),
-        set(['plur', 'gent']),
-    )
-
     def inflect(self, required_grammemes):
         res = self._morph._inflect(self, required_grammemes)
         return None if not res else res[0]
 
-    def pluralize(self, num):
-        if (num % 10 == 1) and (num % 100 != 11):
-            index = 0
-        elif (num % 10 >= 2) and (num % 10 <= 4) and (num % 100 < 10 or num % 100 >= 20):
-            index = 1
-        else:
-            index = 2
-
-        if self.tag.POS == 'NOUN' and self.tag.case != 'nomn':
-            if index == 0:
-                grammemes = set(['sing', self.tag.case])
-            elif self.tag.case == 'accs' and index == 2:
-                grammemes = self._plural_forms[3]
-            else:
-                grammemes = set(['plur', self.tag.case])
-        elif index == 0:
-            grammemes = self._plural_forms[0]
-        elif self.tag.POS == 'NOUN' and index == 1:
-            grammemes = self._plural_forms[1]
-        elif self.tag.POS in ('ADJF', 'PRTF') and self.tag.gender == 'femn' and index == 1:
-            grammemes = self._plural_forms[2]
-        else:
-            grammemes = self._plural_forms[3]
-        return self.inflect(grammemes)
+    def make_agree_with_number(self, num):
+        """
+        Inflects the word so that it agrees with ``num``
+        """
+        return self.inflect(self.tag.numeral_agreement_grammemes(num))
 
     @property
     def lexeme(self):

pymorphy2/tagset.py

     _GRAMMEME_INCOMPATIBLE = collections.defaultdict(set)
     KNOWN_GRAMMEMES = set()
 
+    _NUMERAL_AGREEMENT_GRAMMEMES = (
+        set(['sing', 'nomn']),
+        set(['sing', 'gent']),
+        set(['plur', 'nomn']),
+        set(['plur', 'gent']),
+    )
+
     def __init__(self, tag):
         self._str = tag
 
     def _from_internal_grammeme(cls, grammeme):
         return grammeme
 
+    def numeral_agreement_grammemes(self, num):
+        if (num % 10 == 1) and (num % 100 != 11):
+            index = 0
+        elif (num % 10 >= 2) and (num % 10 <= 4) and (num % 100 < 10 or num % 100 >= 20):
+            index = 1
+        else:
+            index = 2
+
+        if self.POS not in ('NOUN', 'ADJF', 'PRTF'):
+            return set([])
+
+        if self.POS == 'NOUN' and self.case != 'nomn':
+            if index == 0:
+                grammemes = set(['sing', self.case])
+            elif self.case == 'accs' and index == 2:
+                grammemes = self._NUMERAL_AGREEMENT_GRAMMEMES[3]
+            else:
+                grammemes = set(['plur', self.case])
+        elif index == 0:
+            grammemes = self._NUMERAL_AGREEMENT_GRAMMEMES[0]
+        elif self.POS == 'NOUN' and index == 1:
+            grammemes = self._NUMERAL_AGREEMENT_GRAMMEMES[1]
+        elif self.POS in ('ADJF', 'PRTF') and self.gender == 'femn' and index == 1:
+            grammemes = self._NUMERAL_AGREEMENT_GRAMMEMES[2]
+        else:
+            grammemes = self._NUMERAL_AGREEMENT_GRAMMEMES[3]
+        return grammemes
 
 
 class CyrillicOpencorporaTag(OpencorporaTag):

tests/test_numeral_agreement.py

+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, unicode_literals
+import pytest
+
+from .utils import morph
+
+
+@pytest.mark.parametrize(('word', 'result'), [
+    # прилагательные
+    ("бесплатная", ["бесплатная", "бесплатные", "бесплатных"]),
+    ("бесплатный", ["бесплатный", "бесплатных", "бесплатных"]),
+    
+    # числительные
+    ("первый", ["первый", "первых", "первых"]),
+    ("первая", ["первая", "первые", "первых"]),
+
+    # существительные
+    ("книга", ["книга", "книги", "книг"]),
+    ("болт", ["болт", "болта", "болтов"]),
+
+    # причастия
+    ("летящий", ["летящий", "летящих", "летящих"]),
+    ("летящая", ["летящая", "летящие", "летящих"]),
+
+    # остальное части речи мы никак не согласовываем с числами
+    ("играет", ["играет", "играет", "играет"])
+])
+def test_plural_forms(word, result):
+    parsed = morph.parse(word)
+    assert len(parsed)
+    for plural, num in zip(result, [1, 2, 5]):
+        assert parsed[0].make_agree_with_number(num).word == plural
+
+
+@pytest.mark.parametrize(('word', 'case', 'result'), [
+    ("книги", 'gent', ["книги", "книг", "книг"]),
+    ("книге", 'datv', ["книге", "книгам", "книгам"]),
+    ("книгу", 'accs', ["книгу", "книги", "книг"]),
+    ("книгой", 'ablt', ["книгой", "книгами", "книгами"]),
+    ("книге", 'loct', ["книге", "книгах", "книгах"]),
+])
+def test_plural_inflected(word, case, result):
+    parsed = [p for p in morph.parse(word) if p.tag.case == case]
+    assert len(parsed)
+    for plural, num in zip(result, [1, 2, 5]):
+        assert parsed[0].make_agree_with_number(num).word == plural
+
+
+@pytest.mark.parametrize(('word', 'num', 'result'), [
+    ("лопата", 0, "лопат"),
+    ("лопата", 1, "лопата"),
+    ("лопата", 2, "лопаты"),
+    ("лопата", 4, "лопаты"),
+    ("лопата", 5, "лопат"),
+    ("лопата", 6, "лопат"),
+    ("лопата", 11, "лопат"),
+    ("лопата", 12, "лопат"),
+    ("лопата", 15, "лопат"),
+    ("лопата", 21, "лопата"),
+    ("лопата", 24, "лопаты"),
+    ("лопата", 25, "лопат"),
+    ("лопата", 101, "лопата"),
+    ("лопата", 103, "лопаты"),
+    ("лопата", 105, "лопат"),
+    ("лопата", 111, "лопат"),
+    ("лопата", 112, "лопат"),
+    ("лопата", 151, "лопата"),
+    ("лопата", 122, "лопаты"),
+    ("лопата", 5624, "лопаты"),
+    ("лопата", 5431, "лопата"),
+    ("лопата", 7613, "лопат"),
+    ("лопата", 2111, "лопат"),
+])
+def test_plural_num(word, num, result):
+    parsed = morph.parse(word)
+    assert len(parsed)
+    assert parsed[0].make_agree_with_number(num).word == result

tests/test_pluralize.py

-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, unicode_literals
-import pytest
-
-from .utils import morph
-
-
-@pytest.mark.parametrize(('word', 'result'), [
-    # прилагательные
-    ("бесплатная", ["бесплатная", "бесплатные", "бесплатных"]),
-    ("бесплатный", ["бесплатный", "бесплатных", "бесплатных"]),
-    
-    # числительные
-    ("первый", ["первый", "первых", "первых"]),
-    ("первая", ["первая", "первые", "первых"]),
-
-    # существительные
-    ("книга", ["книга", "книги", "книг"]),
-    ("болт", ["болт", "болта", "болтов"]),
-
-    # причастия
-    ("летящий", ["летящий", "летящих", "летящих"]),
-    ("летящая", ["летящая", "летящие", "летящих"]),
-])
-def test_plural_forms(word, result):
-    parsed = morph.parse(word)
-    assert len(parsed)
-    for plural, num in zip(result, [1, 2, 5]):
-        assert parsed[0].pluralize(num).word == plural
-
-
-@pytest.mark.parametrize(('word', 'case', 'result'), [
-    ("книги", 'gent', ["книги", "книг", "книг"]),
-    ("книге", 'datv', ["книге", "книгам", "книгам"]),
-    ("книгу", 'accs', ["книгу", "книги", "книг"]),
-    ("книгой", 'ablt', ["книгой", "книгами", "книгами"]),
-    ("книге", 'loct', ["книге", "книгах", "книгах"]),
-])
-def test_plural_inflected(word, case, result):
-    parsed = [p for p in morph.parse(word) if p.tag.case == case]
-    assert len(parsed)
-    for plural, num in zip(result, [1, 2, 5]):
-        assert parsed[0].pluralize(num).word == plural
-
-
-@pytest.mark.parametrize(('word', 'num', 'result'), [
-    ("лопата", 0, "лопат"),
-    ("лопата", 1, "лопата"),
-    ("лопата", 2, "лопаты"),
-    ("лопата", 4, "лопаты"),
-    ("лопата", 5, "лопат"),
-    ("лопата", 6, "лопат"),
-    ("лопата", 11, "лопат"),
-    ("лопата", 12, "лопат"),
-    ("лопата", 15, "лопат"),
-    ("лопата", 21, "лопата"),
-    ("лопата", 24, "лопаты"),
-    ("лопата", 25, "лопат"),
-    ("лопата", 101, "лопата"),
-    ("лопата", 103, "лопаты"),
-    ("лопата", 105, "лопат"),
-    ("лопата", 111, "лопат"),
-    ("лопата", 112, "лопат"),
-    ("лопата", 151, "лопата"),
-    ("лопата", 122, "лопаты"),
-    ("лопата", 5624, "лопаты"),
-    ("лопата", 5431, "лопата"),
-    ("лопата", 7613, "лопат"),
-    ("лопата", 2111, "лопат"),
-])
-def test_plural_num(word, num, result):
-    parsed = morph.parse(word)
-    assert len(parsed)
-    assert parsed[0].pluralize(num).word == result
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.