Commits

Mikhail Korobov committed b2faae4

LATN -> NONLEX

  • Participants
  • Parent commits f18741a

Comments (0)

Files changed (2)

File russian_tagsets/ruscorpora.py

 indc => indic
 impr => imper
 
+# extra grammemes
+LATN => NONLEX
+
 # hack to preserve whitespace info:
 | => =
 """)
 
     tag = open_tag.replace(' ', ',|,').split(',')
     result = rule_engine.apply_rules(TO_OPENCORPORA, tag)
-    return ','.join(result).replace(',=,', '=').replace(',=', '')
+    result = ','.join(result).replace(',=,', '=').replace(',=', '')
+    if result == '':
+        return 'NONLEX'
+    return result
+
+
 
 converters.add('opencorpora-int', 'ruscorpora', from_opencorpora_int)

File russian_tagsets/tests/test_ruscorpora.py

     # voc
     # ("Зинк", "S,persn,f,anim=sg,voc", "NOUN,anim,masc sing,nomn | NOUN,inan,masc sing,nomn"),
     # zoon
+
+    # nonlex
+    ("foo", "NONLEX", "LATN"),
+    ("foo", "NONLEX", "UNKN"),
 ]
 
 def test_from_opencorpora():