Commits

Mikhail Korobov committed b95cedf

more tests; __repr__ for positional.Tag

  • Participants
  • Parent commits d996be9

Comments (0)

Files changed (3)

russian_tagsets/aot.py

         # Personal pronoun (ja, my, ty, vy, on, ona, ono, oni, sebja)
         tag.POS = 'PP'
 
+        # fixme
+        # 3rd person pronoun in prepositional forms (nego, nej, ...)
+        # tag.POS = 'P5'
+
+        # Relative/interrogative pronoun with nominal declension (kto, čto)
+        # tag.POS = 'PQ'
+
+        # Negative pronoun with nominal declension (nicto, nikto)
+        # tag.POS = 'PW'
+
+        # Indefinite pronoun with nominal declension (kto-to, kto-nibud', cto-to, ...)
+        # tag.POS = 'PZ'
+
+        # Pronoun demonstrative (ètot, tot, sej, takoj, èkij, ... )
+        # tag.POS = 'PD'
+
     elif pos == 'МС-ПРЕДК':
         # e.g. нечего
         tag.POS = 'Db' # lossy
         if 'од' in info:
             tag.POS = 'PS'
 
+        # fixme: Negative pronoun with adjectival declension (nikakoj, nicej)
+        # tag.POS = 'Pw'
+
+        # fixme: Indefinite pronoun with adjectival declension (samyj, ves', ...)
+        # tag.POS = 'Pz'
+
     elif pos == 'Н':
         # Adverb without a possibility to form negation
         # and degrees of comparison (vverxu, vnizu, potom)
         # fixme: Generic/collective numeral (dvoje, četvero)
         # tag.POS = 'Cj'
 
+        # Indefinite numeral (mnogo, neskol'ko)
+        # tag.POS = 'Ca'
+
+        # Interrogative numeral (skol'ko)
+        # tag.POS = 'Cu'
+
+        # Multiplicative numeral (dvaždy, triždy)
+        # tag.POS = 'Cv'
+
     elif pos == 'ЧИСЛ-П':
         tag.POS = 'Cr'
 

russian_tagsets/positional/__init__.py

     def __str__(self):
         return self._data.tounicode() # this is not correct under python 2.x
 
+    def __repr__(self):
+        return 'Tag("%s")' % self
+
 
 if __name__ == '__main__':
     print(Tag("NNFIS7-------A--").verbose_info())

russian_tagsets/tests/test_aot.py

     if tag.POS in ['J,', 'J^']:
         tag.POS = 'J-'
 
+    # pronouns
+    if tag.POS in ['PP', 'P5', 'PQ', 'PW', 'PZ', 'PD']:
+        tag.POS = 'P-'
+    if tag.POS in ['Pq', 'PS', 'Pw', 'Pz']:
+        tag.POS = 'Pq'
+
+    # numerals
+    if tag.POS in ['Cu', 'Cn']:
+        tag.number = '-'
+    if tag.POS in ['Cj', 'Ca', 'Cu', 'Cv']:
+        tag.POS = 'Cn'
+
 
     return str(tag)
 
         ['мужнин',      'AUMXS4M------A--', 'П,мр,ед,вн,но'],
         ['прочитан',    'AcM-S----I-P-AP-', 'КР_ПРИЧАСТИЕ,од,но,прш,стр,ед,мр'],
 
+        # pronouns
+        ['нам',         'PP--P3--1I------', 'МС,1л,мн,дт'],
+        ['он',          'PPM-S1--3I------', 'МС,3л,мр,ед,им'],
+        ['себя',        'PP---4---R------', 'МС,вн'],
+        ['него',        'P5M-S2--3-------', 'МС,3л,мр,ед,рд'],
+        #['эту',         'PDFXS4----------', 'МС-П,жр,ед,вн,од,но'],
+        #['ничто',       'PW---1----------', 'МС,ср,ед,им'],
+        ['никакой',     'PwMXS1----------', 'МС-П,мр,ед,им,од,но'],
+        #['моя',         'PSFXS1-S1I------', 'МС-П,жр,ед,им,од,но'], # aot returns it without '1л'
+        #['его',         'PSXXXXMS3I------', 'МС-П,3л,мр,ед,рд'],
+        ['свой',        'PSMXS1---R------', 'МС-П,мр,ед,им,од,но'],
+        #['что',         'PQ---1----------', 'МС,ср,ед,им'],
+        ['какой',       'PqMXS1----------', 'МС-П,мр,ед,им,од,но'],
+        #['кого-то',     'PZ---4----------', 'МС,мр,ед,вн'],
+        ['какой-то',    'PzMXS1----------', 'МС-П,мр,ед,им,од,но'],
+
+        # numerals
+        ['одному',      'CnMXS3----------', 'ЧИСЛ,мр,дт'],
+        ['двух',        'CnMX-2----------', 'ЧИСЛ,мр,рд'],
+        ['трех',        'Cn-A-4----------', 'ЧИСЛ,вн'],
+        ['пяти',        'Cn---2----------', 'ЧИСЛ,рд'],
+        ['первый',      'CrMXS1----------', 'ЧИСЛ-П,мр,ед,им,од,но'],
+        ['двоих',       'Cj-A-2----------', 'ЧИСЛ,рд'], # Cj-A-3---------- Dative?
+        ['сколько',     'Cu---1----------', 'ЧИСЛ,им'], # original: Cu---x----------
+        ['несколько',   'Ca---1----------', 'ЧИСЛ,им'],
+        #['многому',     'CaMXS3----------', 'МС-П,мр,ед,дт,од,но'], # online example was incorrect
+        #['трижды',      'Cv--------------', 'Н'], # online example was incorrect
+
         # verbs
         ['отрываешь',   'VB--S---2IPI----', 'Г,дст,нст,2л,ед'],
         ['читал',       'VBM-S----IRI----', 'Г,дст,прш,мр,ед'],