Commits

david_walker committed feef935

first incomplete code to interface to PET HPSG parser binary 'cheap'

  • Participants
  • Parent commits 89b3cbb
  • Branches parse

Comments (0)

Files changed (1)

+#!/usr/bin/env python
+"""
+Interface to external parser.
+"""
+
+import subprocess
+import xmlwitch
+
+class Parser(object):
+    def __init__(self):
+        self._pic = None
+
+    def _create_pet_input_chart(self, tokens):
+        xml = xmlwitch.Builder(version='1.0', encoding='utf-8')
+        with xml.pet_input_chart:
+            i = 1
+            cpos = 1
+            for token in tokens:
+                if token.non_printing or token.is_para:
+                    continue
+                with xml.w(id='W'+str(i), cbegin=str(cpos), cend=str(cpos + len(token.str))):
+                    xml.surface(token.str)
+                    with xml.pos(tag=token.pos, prio='1.0'):
+                        pass
+                cpos += len(token.str) + 1
+                i += 1
+        self._pic = unicode(xml).replace('pet_input_chart', 'pet-input-chart')
+
+    def parse(self, tokens):
+        # create an XML DOM object that represents the tagged tokens to parse
+        self._create_pet_input_chart(tokens)
+        # write it to a file to serve as input to the 'cheap' PET parser
+        with open('pic.xml', 'w') as outfile:
+            outfile.write(str(self._pic))
+            # cheap requires two blank lines at end or it faults
+            outfile.write('\n\n')
+        # invoke the parser and capture its output, which should include
+        # both a parse tree and an RMRS (Robust Minimal Recursion
+        # Semantics) structure.