Commits

Lars Yencken committed ee03835

Adds a basic version which doesn't yet handle all parse relations.

Comments (0)

Files changed (4)

+syntax: glob
+.DS_Store
+*.pyc
+*.pyo
+*.swp
+*.pdf
+*.svg
+*.dot
+parse_vis.py
+============
+
+A simple script for visualising GR parse trees. Requires graphviz, consoleLog
+and evince.
+
+Dependencies can be installed by:
+
+    $ sudo apt-get install graphviz python-pip
+    $ sudo pip install consoleLog
+
+The example is then run by:
+
+    $ ./parse_vis.py example.parse
+
+(ncmod _ chemotherapy_1 Systemic_0)                                             
+(ncmod _ gemcitabine_6 agent_5)
+(ncmod _ gemcitabine_6 single_3)
+(ncmod _ regimen_12 based_11)
+(ncmod _ regimen_12 gemcitabine_9)
+(det regimen_12 a_8)
+(conj or_7 regimen_12)
+(conj or_7 gemcitabine_6)
+(dobj with_2 regimen_12)
+(dobj with_2 gemcitabine_6)
+(ncmod _ chemotherapy_1 with_2)
+(det standard_16 a_15)
+(dobj of_17 care_18)
+(ncmod _ standard_16 of_17)
+(dobj remains_14 standard_16)
+(ncmod _ remains_14 still_13)
+(det treatment_21 the_20)
+(ncmod _ advanced_26 locally_25)
+(conj and_27 metastatic_28)
+(conj and_27 advanced_26)
+(ncmod _ cancer_30 pancreatic_29)
+(ncmod _ cancer_30 metastatic_28)
+(ncmod _ cancer_30 advanced_26)
+(dobj with_24 cancer_30)
+(ncmod _ patients_23 with_24)
+(dobj of_22 patients_23)
+(ncmod _ treatment_21 of_22)
+(dobj for_19 treatment_21)
+(ncmod _ remains_14 for_19)
+(ncsubj remains_14 chemotherapy_1 _)
+<c> Systemic|JJ|N/N chemotherapy|NN|N with|IN|(NP\NP)/NP single|JJ|N/N -|JJ|: agent|NN|N/N gemcitabine|NN|N or|CC|conj a|DT|NP[nb]/N gemcitabine|JJ|N/N -|JJ|: based|JJ|N/N regimen|NNS|N still|RB|(S\NP)/(S\NP) remains|VBZ|(S[dcl]\NP)/NP a|DT|NP[nb]/N standard|NN|N of|IN|(NP\NP)/NP care|NN|N for|IN|((S\NP)\(S\NP))/NP the|DT|NP[nb]/N treatment|NN|N of|IN|(NP\NP)/NP patients|NNS|N with|IN|(NP\NP)/NP locally|RB|(N/N)/(N/N) advanced|JJ|N/N and|CC|conj metastatic|JJ|N/N pancreatic|JJ|N/N cancer|NN|N .|.|.
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#  vis_graph.py
+#  lars
+#
+#  Created by Lars Yencken on 2010-09-29.
+#  Copyright 2010 NICTA. All rights reserved.
+#
+
+"""
+Visualize a GR parse tree.
+"""
+
+import os
+import sys
+import optparse
+import subprocess
+import itertools
+
+from consoleLog import shellColor
+
+def parse_vis(filename, fmt='pdf'):
+    n = 0
+    with open(filename) as istream:
+        tree = _parse_tree(istream)
+        while tree:
+            _dump_dot(tree, filename + '.dot')
+            _render_dot(filename + '.dot', fmt)
+            _view_file(filename, fmt)
+            n += 1
+
+            tree = _parse_tree(istream)
+
+    print 'Displayed %d trees' % n
+
+def _view_file(filename, fmt):
+    retval = os.system('evince %s.%s' % (filename, fmt))
+    if retval != 0:
+        sys.exit(1)
+
+def _render_dot(filename, fmt):
+    output_name = os.path.splitext(filename)[0] + '.' + fmt
+    dot = subprocess.Popen(['dot', '-T%s' % fmt, '-o', output_name],
+        stdin=open(filename))
+    retcode = dot.wait()
+    if retcode != 0:
+        import pdb; pdb.set_trace()
+
+def _dump_dot(tree, filename):
+    with open(filename, 'w') as ostream:
+        print >> ostream, "digraph G {" 
+        for node, edges in tree.iteritems():
+            for child, label in edges.iteritems():
+                print >> ostream, "  %s -> %s [label=\"%s\"]" % (
+                        _normalize_token(node), _normalize_token(child),
+                        label)
+        print >> ostream, "}"
+
+def _normalize_token(name):
+    if name.startswith('-'):
+        return 'DASH' + name.lstrip('-')
+
+    return name
+
+def _parse_tree(istream):
+    # scan to the next graph
+    line = None
+    for line in istream:
+        line = line.rstrip()
+        if line.startswith('(') and line.endswith(')'):
+            break
+
+    if not line:
+        return {}
+
+    istream = itertools.chain([line], istream)
+
+    tree = {} # node -> (label, node)
+    for line in istream:
+        line = line.rstrip()
+        if not (line.startswith('(') and line.endswith(')')):
+            break
+
+        line = line[1:-1]
+        parts = line.split()
+        label = parts.pop(0)
+        if len(parts) == 3:
+            if parts[0] == '_':
+                del parts[0]
+            elif parts[-1] == '_':
+                del parts[-1]
+            elif label == 'ncsubj' and parts[-1] == 'obj':
+                label = 'ncsubj+obj'
+                del parts[-1]
+            elif label in ('cmod', 'ccomp'):
+                label = "%s['%s']" % (label, parts.pop(0))
+            else:
+                import pdb; pdb.set_trace()
+
+        if len(parts) == 2:
+            node_a, node_b = parts
+            edges = tree.setdefault(node_a, {})
+            assert node_b not in edges
+            edges[node_b] = label
+        else:
+            raise ValueError(line)
+
+    if line.startswith('<c>'):
+        _print_line(line)
+
+    return tree
+
+def _print_line(line):
+    line = line[4:].split()
+    for i, token_data in enumerate(line):
+        token = token_data.split('|')[0]
+        print '%s_%s' % (token, shellColor.color(str(i), 'blue')),
+    print
+    print '-----'
+
+#----------------------------------------------------------------------------#
+
+def _create_option_parser():
+    usage = \
+"""%prog [options] parse_file
+
+Visualises GR dependency parses found in the file, one by one."""
+
+    parser = optparse.OptionParser(usage)
+
+    return parser
+
+def main(argv):
+    parser = _create_option_parser()
+    (options, args) = parser.parse_args(argv)
+
+    if len(args) != 1:
+        parser.print_help()
+        sys.exit(1)
+
+    parse_vis(*args)
+
+#----------------------------------------------------------------------------#
+
+if __name__ == '__main__':
+    main(sys.argv[1:])