corenlp-python / corenlp / corenlp.py

Diff from to

corenlp/corenlp.py

 
     return results
 
-def parse_parser_xml_results(xml, file_name=""):
+def parse_parser_xml_results(xml, file_name="", raw_output=False):
     import xmltodict
     from collections import OrderedDict
 
 
     # Turning the raw xml into a raw python dictionary:
     raw_dict = xmltodict.parse(xml)
+    if raw_output:
+        return raw_dict
+
     document = raw_dict[u'root'][u'document']
 
     # Making a raw sentence list of dictionaries:
 
     return results
 
-def parse_xml_output(input_dir, corenlp_path=DIRECTORY, memory="3g"):
+def parse_xml_output(input_dir, corenlp_path=DIRECTORY, memory="3g", raw_output=False):
     """Because interaction with the command-line interface of the CoreNLP
     tools is limited to very short text bits, it is necessary to parse xml
     output"""
             with open(xml_dir+'/'+output_file, 'r') as xml:
                 # parsed = xml.read()
                 file_name = re.sub('.xml$', '', os.path.basename(output_file))
-                result.append(parse_parser_xml_results(xml.read(), file_name))
+                result.append(parse_parser_xml_results(xml.read(), file_name,
+                                                       raw_output=raw_output))
     finally:
         file_list.close()
         shutil.rmtree(xml_dir)
         return json.dumps(self.raw_parse(text))
 
 
-def batch_parse(input_folder, corenlp_path=DIRECTORY, memory="3g"):
+def batch_parse(input_folder, corenlp_path=DIRECTORY, memory="3g", raw_output=False):
     """
     This function takes input files,
     sends list of input files to the Stanford parser,
     reads in the results from temporary folder in your OS and
     returns a generator object of list that consist of dictionary entry.
+    If raw_output is true, the dictionary returned will correspond exactly to XML.
     ( The function needs xmltodict,
     and doesn't need init 'StanfordCoreNLP' class. )
     """
     if not os.path.exists(input_folder):
         raise Exception("Not exist input_folder")
 
-    return parse_xml_output(input_folder, corenlp_path, memory)
+    return parse_xml_output(input_folder, corenlp_path, memory, raw_output=raw_output)
 
 
 if __name__ == '__main__':
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.