Commits

Felix_Remmel committed 1419226

remove verbose messages, because they occur in errors while starting the server
changed default properties to use the german language

Comments (0)

Files changed (2)

corenlp/corenlp.py

     """
     results = {"sentences": []}
     state = STATE_START
-    for line in unidecode(text.decode('utf-8')).split("\n"):
+    for line in unidecode(text).split("\n"):
         line = line.strip()
 
         if line.startswith("Sentence #"):
         self.corenlp = pexpect.spawn(self.start_corenlp, timeout=120, maxread=8192, searchwindowsize=80)
 
         # show progress bar while loading the models
-        if VERBOSE:
-            widgets = ['Loading Models: ', Fraction()]
-            pbar = ProgressBar(widgets=widgets, maxval=5, force_update=True).start()
-            # Model timeouts:
-            # pos tagger model (~5sec)
-            # NER-all classifier (~33sec)
-            # NER-muc classifier (~60sec)
-            # CoNLL classifier (~50sec)
-            # PCFG (~3sec)
-            timeouts = [20, 200, 600, 600, 20]
-            for i in xrange(5):
-                self.corenlp.expect("done.", timeout=timeouts[i])  # Load model
-                pbar.update(i + 1)
-            self.corenlp.expect("Entering interactive shell.")
-            pbar.finish()
+#        if VERBOSE:
+#            widgets = ['Loading Models: ', Fraction()]
+#            pbar = ProgressBar(widgets=widgets, maxval=5, force_update=True).start()
+#            # Model timeouts:
+#            # pos tagger model (~5sec)
+#            # NER-all classifier (~33sec)
+#            # NER-muc classifier (~60sec)
+#            # CoNLL classifier (~50sec)
+#            # PCFG (~3sec)
+#            timeouts = [20, 80, 60, 50, 3]
+#            for i in xrange(5):
+#                print "FICKIEN"
+#                self.corenlp.expect("done.", timeout=timeouts[i])  # Load model
+#                pbar.update(i + 1)
+#            self.corenlp.expect("Entering interactive shell.")
+#            pbar.finish()
 
         # interactive shell
         self.corenlp.expect("\nNLP> ")

corenlp/default.properties

-annotators = tokenize, ssplit, pos, lemma, ner, parse, dcoref
+annotators = tokenize, ssplit, pos, lemma, ner
 
 # A true-casing annotator is also available (see below)
 #annotators = tokenize, ssplit, pos, lemma, truecase
 #
 # None of these paths are necessary anymore: we load all models from the JAR file
 #
-
+pos.model = edu/stanford/nlp/models/pos-tagger/german-hgc.tagger
 #pos.model = /u/nlp/data/pos-tagger/wsj3t0-18-left3words/left3words-distsim-wsj-0-18.tagger
 ## slightly better model but much slower:
 ##pos.model = /u/nlp/data/pos-tagger/wsj3t0-18-bidirectional/bidirectional-distsim-wsj-0-18.tagger
 
+# If you set ner.model, you can name any arbitrary model you want.
+# The models named by ner.model.3class, ner.model.7class, and
+# ner.model.MISCclass are also added in the order named.
+# Any of the ner.model properties can be a comma separated list of names,
+# in which case each of the models in the comma separated list is added.
+ner.model = edu/stanford/nlp/models/ner/german.hgc_175m_600.crf.ser.gz
 #ner.model.3class = /u/nlp/data/ner/goodClassifiers/all.3class.distsim.crf.ser.gz
 #ner.model.7class = /u/nlp/data/ner/goodClassifiers/muc.distsim.crf.ser.gz
 #ner.model.MISCclass = /u/nlp/data/ner/goodClassifiers/conll.distsim.crf.ser.gz
 #printable.relation.beam = 20
 
 #parser.model = /u/nlp/data/lexparser/englishPCFG.ser.gz
+#parser.flags = -retainTmpSubcategories
 
 #srl.verb.args=/u/kristina/srl/verbs.core_args
 #srl.model.cls=/u/nlp/data/srl/trainedModels/englishPCFG/cls/train.ann
 #dcoref.plural = /scr/nlp/data/Bergsma-Gender/plural.unigrams.txt
 #dcoref.singular = /scr/nlp/data/Bergsma-Gender/singular.unigrams.txt
 
-
 # This is the regular expression that describes which xml tags to keep
 # the text from.  In order to on off the xml removal, add cleanxml
 # to the list of annotators above after "tokenize".
 #clean.xmltags = .*
 # A set of tags which will force the end of a sentence.  HTML example:
 # you would not want to end on <i>, but you would want to end on <p>.
-# Once again, a regular expression.
+# Once again, a regular expression.  
 # (Blank means there are no sentence enders.)
 #clean.sentenceendingtags =
 # Whether or not to allow malformed xml
-# StanfordCoreNLP.properties
-#wordnet.dir=models/wordnet-3.0-prolog
+#clean.allowflawedxml