Hiroyoshi Komatsu  committed 44f97db

Minor bugfix

  • Participants
  • Parent commits 9b88744

Comments (0)

Files changed (3)

     sudo pip install jsonrpclib pexpect unidecode   # unidecode is optional
     git clone
 	  cd corenlp-python
-    wget
-    unzip
+    wget
+    unzip
 Then, to launch a server:
 That will run a public JSON-RPC server on port 3456.
 And you can specify Stanford CoreNLP directory:
-    python corenlp/ -S stanford-corenlp-full-2013-04-04/
+    python corenlp/ -S stanford-corenlp-full-2013-06-20/
-Assuming you are running on port 8080 and CoreNLP directory is `stanford-corenlp-full-2013-04-04/` in current directory, the code in `` shows an example parse:
+Assuming you are running on port 8080 and CoreNLP directory is `stanford-corenlp-full-2013-06-20/` in current directory, the code in `` shows an example parse:
     import jsonrpclib
     from simplejson import loads
 Not to use JSON-RPC, load the module instead:
     from corenlp import StanfordCoreNLP
-    corenlp_dir = "stanford-corenlp-full-2013-04-04/"
+    corenlp_dir = "stanford-corenlp-full-2013-06-20/"
     corenlp = StanfordCoreNLP(corenlp_dir)  # wait a few minutes...
     corenlp.parse("Parse it")
 If you need to parse long texts (more than 30-50 sentences), you have to use a batch_parse() function. It reads text files from input directory and returns a generator object of dictionaries parsed each file results:
     from corenlp import batch_parse
-    corenlp_dir = "stanford-corenlp-full-2013-04-04/"
+    corenlp_dir = "stanford-corenlp-full-2013-06-20/"
     raw_text_directory = "sample_raw_text/"
     parsed = batch_parse(raw_text_directory, corenlp_dir)  # It returns a generator object
     print parsed  #=> [{'coref': ..., 'sentences': ..., 'file_name': 'new_sample.txt'}]

File corenlp/

 WORD_PATTERN = re.compile('\[([^\]]+)\]')
 CR_PATTERN = re.compile(r"\((\d*),(\d)*,\[(\d*),(\d*)\)\) -> \((\d*),(\d)*,\[(\d*),(\d*)\)\), that is: \"(.*)\" -> \"(.*)\"")
+DIRECTORY = "stanford-corenlp-full-2013-06-20"
 class bc:
     HEADER = '\033[95m'
     OKBLUE = '\033[94m'
     return results
-def parse_xml_output(input_dir, corenlp_path="stanford-corenlp-full-2013-04-04/", memory="3g"):
+def parse_xml_output(input_dir, corenlp_path=DIRECTORY, memory="3g"):
     """Because interaction with the command-line interface of the CoreNLP
     tools is limited to very short text bits, it is necessary to parse xml
     Command-line interaction with Stanford's CoreNLP java utilities.
     Can be run as a JSON-RPC server or imported as a module.
-    def __init__(self, corenlp_path="stanford-corenlp-full-2013-04-04/", memory="3g"):
+    def __init__(self, corenlp_path=DIRECTORY, memory="3g"):
         Checks the location of the jar files.
         Spawns the server as a process.
         return json.dumps(self.raw_parse(text))
-def batch_parse(input_folder, corenlp_path="stanford-corenlp-full-2013-04-04/", memory="3g"):
+def batch_parse(input_folder, corenlp_path=DIRECTORY, memory="3g"):
     This function takes input files,
     sends list of input files to the Stanford parser,
                       help='Port to serve on (default 8080)')
     parser.add_option('-H', '--host', default='',
                       help='Host to serve on (default localhost; to make public)')
-    parser.add_option('-S', '--corenlp', default="stanford-corenlp-full-2013-04-04",
-                      help='Stanford CoreNLP tool directory (default stanford-corenlp-full-2013-04-04/)')
+    parser.add_option('-S', '--corenlp', default=DIRECTORY,
+                      help='Stanford CoreNLP tool directory (default %s)' % DIRECTORY)
     options, args = parser.parse_args()
     # server = jsonrpc.Server(jsonrpc.JsonRpc20(),
     #                         jsonrpc.TransportTcpIp(addr=(, int(options.port))))
 AUTHOR = "Hiroyoshi Komatsu"
 URL = ""
-VERSION = "2.3.0-0"
+VERSION = "3.2.0-0"
 # Utility function to read the README file.
 # Used for the long_description.  It's nice, because now 1) we have a top level