Mikhail Korobov  committed 2f3f1a0

временный набросок для парсинга из командной строки через команду pymorphy (потом будет меняться)

  • Participants
  • Parent commits c9712a3
  • Branches default

Comments (0)

Files changed (2)

File pymorphy2/

 import time
 import sys
 import pprint
+import codecs
 import pymorphy2
 from pymorphy2 import opencorpora_dict, test_suite_generator'\nDone.')
+def _parse(dict_path, in_filename, out_filename):
+    from pymorphy2 import tagger
+    morph = pymorphy2.tagger.Morph.load(dict_path)
+    with, 'r', 'utf8') as in_file:
+        with, 'w', 'utf8') as out_file:
+            for line in in_file:
+                word = line.strip()
+                parses = morph.parse(word)
+                parse_str = "|".join([p[1] for p in parses])
+                out_file.write(word + ": " +parse_str + "\n")
 # =============================================================================
     pymorphy dict xml2json <IN_XML_FILE> <OUT_JSON_FILE> [--verbose]
     pymorphy dict download [--verbose]
     pymorphy dict download_xml <OUT_FILE> [--verbose]
-    pymorphy dict mem_usage [<PATH>] [--verbose]
+    pymorphy dict mem_usage [--dict <PATH>] [--verbose]
     pymorphy dict make_test_suite <IN_FILE> <OUT_FILE> [--limit <NUM>] [--verbose]
-    pymorphy dict meta [<PATH>]
+    pymorphy dict meta [--dict <PATH>]
+    pymorphy _parse <IN_FILE> <OUT_FILE> [--dict <PATH>] [--verbose]
     pymorphy -h | --help
     pymorphy --version
     --min_ending_freq <NUM>             Prediction: min. number of suffix occurances [default: 2]
     --min_paradigm_popularity <NUM>     Prediction: min. number of lemmas for the paradigm [default: 3]
     --max_forms_per_class <NUM>         Prediction: max. number of word forms per part of speech [default: 1]
-    <PATH>                              Dictionary folder path [default: dict]
+    --dict <PATH>                       Dictionary folder path [default: dict]
-    if args['dict']:
+    if args['_parse']:
+        return _parse(args['--dict'], args['<IN_FILE>'], args['<OUT_FILE>'])
+    elif args['dict']:
         if args['compile']:
             prediction_options = dict(
                 (key, int(args['--'+key]))
         elif args['xml2json']:
             return xml_to_json(args['<IN_XML_FILE>'], args['<OUT_JSON_FILE>'])
         elif args['mem_usage']:
-            return show_dict_mem_usage(args['<PATH>'] or 'dict', args['--verbose'])
+            return show_dict_mem_usage(args['--dict'] or 'dict', args['--verbose'])
         elif args['meta']:
-            return show_dict_meta(args['<PATH>'] or 'dict')
+            return show_dict_meta(args['--dict'] or 'dict')
         elif args['make_test_suite']:
             return make_test_suite(args['<IN_FILE>'], args['<OUT_FILE>'], int(args['--limit']))
         elif args['download_xml']:

File pymorphy2/

 from __future__ import absolute_import
+def tags_list(tag):
+    return tag.replace(' ', ',', 1).split(',')
 def get_POS(tag):
     return tag.replace(' ', ',', 1).split(',', 1)[0]