Commits

Hiroyoshi Komatsu  committed 44f97db

Minor bugfix

  • Participants
  • Parent commits 9b88744

Comments (0)

Files changed (3)

     sudo pip install jsonrpclib pexpect unidecode   # unidecode is optional
     git clone https://bitbucket.org/torotoki/corenlp-python.git
 	  cd corenlp-python
-    wget http://nlp.stanford.edu/software/stanford-corenlp-full-2013-04-04.zip
-    unzip stanford-corenlp-full-2013-04-04.zip
+    wget http://nlp.stanford.edu/software/stanford-corenlp-full-2013-06-20.zip
+    unzip stanford-corenlp-full-2013-06-20.zip
 
 Then, to launch a server:
 
 That will run a public JSON-RPC server on port 3456.
 And you can specify Stanford CoreNLP directory:
 
-    python corenlp/corenlp.py -S stanford-corenlp-full-2013-04-04/
+    python corenlp/corenlp.py -S stanford-corenlp-full-2013-06-20/
 
 
-Assuming you are running on port 8080 and CoreNLP directory is `stanford-corenlp-full-2013-04-04/` in current directory, the code in `client.py` shows an example parse:
+Assuming you are running on port 8080 and CoreNLP directory is `stanford-corenlp-full-2013-06-20/` in current directory, the code in `client.py` shows an example parse:
 
     import jsonrpclib
     from simplejson import loads
 Not to use JSON-RPC, load the module instead:
 
     from corenlp import StanfordCoreNLP
-    corenlp_dir = "stanford-corenlp-full-2013-04-04/"
+    corenlp_dir = "stanford-corenlp-full-2013-06-20/"
     corenlp = StanfordCoreNLP(corenlp_dir)  # wait a few minutes...
     corenlp.parse("Parse it")
 
 If you need to parse long texts (more than 30-50 sentences), you have to use a batch_parse() function. It reads text files from input directory and returns a generator object of dictionaries parsed each file results:
 
     from corenlp import batch_parse
-    corenlp_dir = "stanford-corenlp-full-2013-04-04/"
+    corenlp_dir = "stanford-corenlp-full-2013-06-20/"
     raw_text_directory = "sample_raw_text/"
     parsed = batch_parse(raw_text_directory, corenlp_dir)  # It returns a generator object
     print parsed  #=> [{'coref': ..., 'sentences': ..., 'file_name': 'new_sample.txt'}]

File corenlp/corenlp.py

 WORD_PATTERN = re.compile('\[([^\]]+)\]')
 CR_PATTERN = re.compile(r"\((\d*),(\d)*,\[(\d*),(\d*)\)\) -> \((\d*),(\d)*,\[(\d*),(\d*)\)\), that is: \"(.*)\" -> \"(.*)\"")
 
+DIRECTORY = "stanford-corenlp-full-2013-06-20"
+
 class bc:
     HEADER = '\033[95m'
     OKBLUE = '\033[94m'
 
     return results
 
-def parse_xml_output(input_dir, corenlp_path="stanford-corenlp-full-2013-04-04/", memory="3g"):
+def parse_xml_output(input_dir, corenlp_path=DIRECTORY, memory="3g"):
     """Because interaction with the command-line interface of the CoreNLP
     tools is limited to very short text bits, it is necessary to parse xml
     output"""
     Command-line interaction with Stanford's CoreNLP java utilities.
     Can be run as a JSON-RPC server or imported as a module.
     """
-    def __init__(self, corenlp_path="stanford-corenlp-full-2013-04-04/", memory="3g"):
+    def __init__(self, corenlp_path=DIRECTORY, memory="3g"):
         """
         Checks the location of the jar files.
         Spawns the server as a process.
         return json.dumps(self.raw_parse(text))
 
 
-def batch_parse(input_folder, corenlp_path="stanford-corenlp-full-2013-04-04/", memory="3g"):
+def batch_parse(input_folder, corenlp_path=DIRECTORY, memory="3g"):
     """
     This function takes input files,
     sends list of input files to the Stanford parser,
                       help='Port to serve on (default 8080)')
     parser.add_option('-H', '--host', default='127.0.0.1',
                       help='Host to serve on (default localhost; 0.0.0.0 to make public)')
-    parser.add_option('-S', '--corenlp', default="stanford-corenlp-full-2013-04-04",
-                      help='Stanford CoreNLP tool directory (default stanford-corenlp-full-2013-04-04/)')
+    parser.add_option('-S', '--corenlp', default=DIRECTORY,
+                      help='Stanford CoreNLP tool directory (default %s)' % DIRECTORY)
     options, args = parser.parse_args()
     # server = jsonrpc.Server(jsonrpc.JsonRpc20(),
     #                         jsonrpc.TransportTcpIp(addr=(options.host, int(options.port))))
 AUTHOR = "Hiroyoshi Komatsu"
 AUTHOR_EMAIL = "hiroyoshi.komat@gmail.com"
 URL = "https://bitbucket.org/torotoki/corenlp-python"
-VERSION = "2.3.0-0"
+VERSION = "3.2.0-0"
 
 # Utility function to read the README file.
 # Used for the long_description.  It's nice, because now 1) we have a top level