Commits

Hiroyoshi Komatsu committed 3b97123

Update package

Comments (0)

Files changed (7)

 # file GENERATED by distutils, do NOT edit
 setup.py
+corenlp/__init__.py
+corenlp/client.py
+corenlp/corenlp.py
+corenlp/default.properties
+corenlp/progressbar.py
- This is a fork of [stanford-corenlp-python](https://github.com/dasmith/stanford-corenlp-python).
+ A Stanford Core NLP Python wrapper
+
+This is a fork of [stanford-corenlp-python](https://github.com/dasmith/stanford-corenlp-python).
 
 ## Edited
    * Update to Stanford CoreNLP v1.3.5
    * Can edit the constants as argument such as Stanford Core NLP directory.
    * Adjust parameters not to timeout in high load
    * Other bug fix
-   * Packaging
+   * Packaging (beta)
 
 ## Requirements
    * [jsonrpclib](https://github.com/joshmarshall/jsonrpclib)
 
     python corenlp/corenlp.py -H 0.0.0.0 -p 3456
 
+That will run a public JSON-RPC server on port 3456.
 And you can specify Stanford CoreNLP directory:
 
     python corenlp/corenlp.py -S stanford-corenlp-full-2013-04-04/
 
-That will run a public JSON-RPC server on port 3456.
 
-Assuming you are running on port 8080, the code in `client.py` shows an example parse:
+Assuming you are running on port 8080 and CoreNLP directory is `stanford-corenlp-full-2013-04-04/` in current directory, the code in `client.py` shows an example parse:
 
     import jsonrpclib
     from simplejson import loads
 
 To use it in a regular script or to edit/debug it (because errors via RPC are opaque), load the module instead:
 
-    from corenlp import *
+    from corenlp import StanfordCoreNLP
     corenlp_dir = "stanford-corenlp-full-2013-04-04/"
     corenlp = StanfordCoreNLP(corenlp_dir)  # wait a few minutes...
     corenlp.parse("Parse it")
+# Tweepy
+# Copyright 2013- Hiroyoshi Komatsu
+# See LICENSE for details.
+
+"""
+Tweepy Twitter API library
+"""
+__version__ = '1.0.3'
+__author__ = 'Hiroyoshi Komatsu'
+__license__ = 'GNU v2+'
+
+from corenlp import StanfordCoreNLP
         classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
         # include the properties file, so you can change defaults
         # but any changes in output format will break parse_parser_results()
-        props = "-props default.properties"
+        property_name = "default.properties"
+        current_dir_pr = os.path.dirname(os.path.abspath( __file__ )) +"/"+ property_name
+        print current_dir_pr
+        if os.path.exists(property_name):
+            props = "-props %s" % (property_name)
+        elif os.path.exists(current_dir_pr):
+            props = "-props %s" % (current_dir_pr)
+        else:
+            print "Error! Cannot locate: default.properties"
+            sys.exit(1)
 
         # add and check classpaths
         jars = [corenlp_path +"/"+ jar for jar in jars]

corenlp/default.properties

+annotators = tokenize, ssplit, pos, lemma, ner, parse, dcoref
+
+# A true-casing annotator is also available (see below)
+#annotators = tokenize, ssplit, pos, lemma, truecase
+
+# A simple regex NER annotator is also available
+# annotators = tokenize, ssplit, regexner
+
+#Use these as EOS punctuation and discard them from the actual sentence content
+#These are HTML tags that get expanded internally to correct syntax, e.g., from "p" to "<p>", "</p>" etc.
+#Will have no effect if the "cleanxml" annotator is used
+#ssplit.htmlBoundariesToDiscard = p,text
+
+#
+# None of these paths are necessary anymore: we load all models from the JAR file
+#
+
+#pos.model = /u/nlp/data/pos-tagger/wsj3t0-18-left3words/left3words-distsim-wsj-0-18.tagger
+## slightly better model but much slower:
+##pos.model = /u/nlp/data/pos-tagger/wsj3t0-18-bidirectional/bidirectional-distsim-wsj-0-18.tagger
+
+#ner.model.3class = /u/nlp/data/ner/goodClassifiers/all.3class.distsim.crf.ser.gz
+#ner.model.7class = /u/nlp/data/ner/goodClassifiers/muc.distsim.crf.ser.gz
+#ner.model.MISCclass = /u/nlp/data/ner/goodClassifiers/conll.distsim.crf.ser.gz
+
+#regexner.mapping = /u/nlp/data/TAC-KBP2010/sentence_extraction/type_map_clean
+#regexner.ignorecase = false
+
+#nfl.gazetteer = /scr/nlp/data/machine-reading/Machine_Reading_P1_Reading_Task_V2.0/data/SportsDomain/NFLScoring_UseCase/NFLgazetteer.txt
+#nfl.relation.model =  /scr/nlp/data/ldc/LDC2009E112/Machine_Reading_P1_NFL_Scoring_Training_Data_V1.2/models/nfl_relation_model.ser
+#nfl.entity.model =  /scr/nlp/data/ldc/LDC2009E112/Machine_Reading_P1_NFL_Scoring_Training_Data_V1.2/models/nfl_entity_model.ser
+#printable.relation.beam = 20
+
+#parser.model = /u/nlp/data/lexparser/englishPCFG.ser.gz
+
+#srl.verb.args=/u/kristina/srl/verbs.core_args
+#srl.model.cls=/u/nlp/data/srl/trainedModels/englishPCFG/cls/train.ann
+#srl.model.id=/u/nlp/data/srl/trainedModels/englishPCFG/id/train.ann
+
+#coref.model=/u/nlp/rte/resources/anno/coref/corefClassifierAll.March2009.ser.gz
+#coref.name.dir=/u/nlp/data/coref/
+#wordnet.dir=/u/nlp/data/wordnet/wordnet-3.0-prolog
+
+#dcoref.demonym = /scr/heeyoung/demonyms.txt
+#dcoref.animate = /scr/nlp/data/DekangLin-Animacy-Gender/Animacy/animate.unigrams.txt
+#dcoref.inanimate = /scr/nlp/data/DekangLin-Animacy-Gender/Animacy/inanimate.unigrams.txt
+#dcoref.male = /scr/nlp/data/Bergsma-Gender/male.unigrams.txt
+#dcoref.neutral = /scr/nlp/data/Bergsma-Gender/neutral.unigrams.txt
+#dcoref.female = /scr/nlp/data/Bergsma-Gender/female.unigrams.txt
+#dcoref.plural = /scr/nlp/data/Bergsma-Gender/plural.unigrams.txt
+#dcoref.singular = /scr/nlp/data/Bergsma-Gender/singular.unigrams.txt
+
+
+# This is the regular expression that describes which xml tags to keep
+# the text from.  In order to on off the xml removal, add cleanxml
+# to the list of annotators above after "tokenize".
+#clean.xmltags = .*
+# A set of tags which will force the end of a sentence.  HTML example:
+# you would not want to end on <i>, but you would want to end on <p>.
+# Once again, a regular expression.  
+# (Blank means there are no sentence enders.)
+#clean.sentenceendingtags =
+# Whether or not to allow malformed xml
+# StanfordCoreNLP.properties
+#wordnet.dir=models/wordnet-3.0-prolog

default.properties

-annotators = tokenize, ssplit, pos, lemma, ner, parse, dcoref
-
-# A true-casing annotator is also available (see below)
-#annotators = tokenize, ssplit, pos, lemma, truecase
-
-# A simple regex NER annotator is also available
-# annotators = tokenize, ssplit, regexner
-
-#Use these as EOS punctuation and discard them from the actual sentence content
-#These are HTML tags that get expanded internally to correct syntax, e.g., from "p" to "<p>", "</p>" etc.
-#Will have no effect if the "cleanxml" annotator is used
-#ssplit.htmlBoundariesToDiscard = p,text
-
-#
-# None of these paths are necessary anymore: we load all models from the JAR file
-#
-
-#pos.model = /u/nlp/data/pos-tagger/wsj3t0-18-left3words/left3words-distsim-wsj-0-18.tagger
-## slightly better model but much slower:
-##pos.model = /u/nlp/data/pos-tagger/wsj3t0-18-bidirectional/bidirectional-distsim-wsj-0-18.tagger
-
-#ner.model.3class = /u/nlp/data/ner/goodClassifiers/all.3class.distsim.crf.ser.gz
-#ner.model.7class = /u/nlp/data/ner/goodClassifiers/muc.distsim.crf.ser.gz
-#ner.model.MISCclass = /u/nlp/data/ner/goodClassifiers/conll.distsim.crf.ser.gz
-
-#regexner.mapping = /u/nlp/data/TAC-KBP2010/sentence_extraction/type_map_clean
-#regexner.ignorecase = false
-
-#nfl.gazetteer = /scr/nlp/data/machine-reading/Machine_Reading_P1_Reading_Task_V2.0/data/SportsDomain/NFLScoring_UseCase/NFLgazetteer.txt
-#nfl.relation.model =  /scr/nlp/data/ldc/LDC2009E112/Machine_Reading_P1_NFL_Scoring_Training_Data_V1.2/models/nfl_relation_model.ser
-#nfl.entity.model =  /scr/nlp/data/ldc/LDC2009E112/Machine_Reading_P1_NFL_Scoring_Training_Data_V1.2/models/nfl_entity_model.ser
-#printable.relation.beam = 20
-
-#parser.model = /u/nlp/data/lexparser/englishPCFG.ser.gz
-
-#srl.verb.args=/u/kristina/srl/verbs.core_args
-#srl.model.cls=/u/nlp/data/srl/trainedModels/englishPCFG/cls/train.ann
-#srl.model.id=/u/nlp/data/srl/trainedModels/englishPCFG/id/train.ann
-
-#coref.model=/u/nlp/rte/resources/anno/coref/corefClassifierAll.March2009.ser.gz
-#coref.name.dir=/u/nlp/data/coref/
-#wordnet.dir=/u/nlp/data/wordnet/wordnet-3.0-prolog
-
-#dcoref.demonym = /scr/heeyoung/demonyms.txt
-#dcoref.animate = /scr/nlp/data/DekangLin-Animacy-Gender/Animacy/animate.unigrams.txt
-#dcoref.inanimate = /scr/nlp/data/DekangLin-Animacy-Gender/Animacy/inanimate.unigrams.txt
-#dcoref.male = /scr/nlp/data/Bergsma-Gender/male.unigrams.txt
-#dcoref.neutral = /scr/nlp/data/Bergsma-Gender/neutral.unigrams.txt
-#dcoref.female = /scr/nlp/data/Bergsma-Gender/female.unigrams.txt
-#dcoref.plural = /scr/nlp/data/Bergsma-Gender/plural.unigrams.txt
-#dcoref.singular = /scr/nlp/data/Bergsma-Gender/singular.unigrams.txt
-
-
-# This is the regular expression that describes which xml tags to keep
-# the text from.  In order to on off the xml removal, add cleanxml
-# to the list of annotators above after "tokenize".
-#clean.xmltags = .*
-# A set of tags which will force the end of a sentence.  HTML example:
-# you would not want to end on <i>, but you would want to end on <p>.
-# Once again, a regular expression.  
-# (Blank means there are no sentence enders.)
-#clean.sentenceendingtags =
-# Whether or not to allow malformed xml
-# StanfordCoreNLP.properties
-#wordnet.dir=models/wordnet-3.0-prolog
 AUTHOR = "Hiroyoshi Komatsu"
 AUTHOR_EMAIL = "hiroyoshi.komat@gmail.com"
 URL = "https://bitbucket.org/torotoki/corenlp-python"
-VERSION = "1.0.1"
+VERSION = "1.0.3"
 
 # Utility function to read the README file.
 # Used for the long_description.  It's nice, because now 1) we have a top level
     author_email=AUTHOR_EMAIL,
     url=URL,
     packages=['corenlp'],
+    package_dir = {'corenlp': 'corenlp'},
+    package_data = {
+        "corenlp": ["default.properties"]
+    },
+    # data_files = [
+    #     ('corenlp', ["default.properties"]),
+    # ],
     # package_data=find_package_data(
     #     PACKAGE,
     #     only_in_packages=False
     # )
     classifiers=[
-        "Intended Audience :: Developers",
         "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)",
         "Programming Language :: Python",
     ],
-)
+)