Jakub Wilk avatar Jakub Wilk committed 3810100

New parameter for analyse(): expand_dag.

Comments (0)

Files changed (2)

     unicode = str
 
 __author__ = 'Jakub Wilk <jwilk@jwilk.net>'
-__version__ = '0.3100'
+__version__ = '0.3200'
 __all__ = ['analyse', 'about', 'expand_tags', 'ATTRIBUTES', 'VALUES']
 
 ATTRIBUTES = '''
 def _dont_expand_tags(s, **kwargs):
     return [s]
 
-def analyse(text, expand_tags=True, expand_dot=True, expand_underscore=True):
+def analyse(text, expand_tags=True, expand_dot=True, expand_underscore=True, dag=False):
     '''
     Analyse the text.
     '''
     expand_tags = _expand_tags if expand_tags else _dont_expand_tags
     text = unicode(text)
     text = text.encode('UTF-8')
+    analyse = _analyse_as_dag if dag else _analyse_as_list
+    return analyse(text=text, expand_tags=expand_tags, expand_dot=expand_dot, expand_underscore=expand_underscore)
+
+def _analyse_as_dag(text, expand_tags, expand_dot, expand_underscore):
+    result = []
+    with libmorfeusz_lock:
+        for edge in libmorfeusz_analyse(text):
+            if edge.i == -1:
+                break
+            for tag in expand_tags(edge.tags, expand_dot=expand_dot, expand_underscore=expand_underscore):
+                result += [(edge.i, edge.j, (edge.orth, edge.base, tag))]
+    return result
+
+def _analyse_as_list(text, expand_tags, expand_dot, expand_underscore):
     dag = collections.defaultdict(list)
     with libmorfeusz_lock:
         for edge in libmorfeusz_analyse(text):
                 break
             for tag in expand_tags(edge.tags, expand_dot=expand_dot, expand_underscore=expand_underscore):
                 dag[edge.i] += [((edge.orth, edge.base, tag), edge.j)]
-
     def expand_dag(i):
         nexts = dag[i]
         if not nexts:
             for head, j in nexts:
                 for tail in expand_dag(j):
                     yield [head] + tail
-
     return list(expand_dag(0))
 
 def about():
             [(u('Mama'), u('mama'), 'subst:sg:nom:f'), (u('ma'), u('mój'), 'adj:sg:nom:f:pos'), (u('.'), u('.'), 'interp')]
         ])
 
+    def test2(self):
+        text = u('Miałem miał.')
+        interps = morfeusz.analyse(text, dag=True)
+        self.assertEqual(interps, [
+            (0, 1, (u('Miał'), u('mieć'), u('praet:sg:m1:imperf'))),
+            (0, 1, (u('Miał'), u('mieć'), u('praet:sg:m2:imperf'))),
+            (0, 1, (u('Miał'), u('mieć'), u('praet:sg:m3:imperf'))),
+            (1, 2, (u('em'), u('być'), u('aglt:sg:pri:imperf:wok'))),
+            (0, 2, (u('Miałem'), u('miał'), u('subst:sg:inst:m3'))),
+            (2, 3, (u('miał'), u('miał'), u('subst:sg:nom:m3'))),
+            (2, 3, (u('miał'), u('miał'), u('subst:sg:acc:m3'))),
+            (2, 3, (u('miał'), u('mieć'), u('praet:sg:m1:imperf'))),
+            (2, 3, (u('miał'), u('mieć'), u('praet:sg:m2:imperf'))),
+            (2, 3, (u('miał'), u('mieć'), u('praet:sg:m3:imperf'))),
+            (3, 4, (u('.'), u('.'), u('interp'))),
+        ])
+
 class test_about(unittest.TestCase):
 
     def test_type(self):
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.