Commits

Hiroyoshi Komatsu committed 2fa3bee

Packaging with Pypi

Comments (0)

Files changed (7)

+# file GENERATED by distutils, do NOT edit
+setup.py

corenlp/__init__.py

Empty file added.

corenlp/client.py

+import json
+# from jsonrpc import ServerProxy, JsonRpc20, TransportTcpIp
+import jsonrpclib
+from pprint import pprint
+
+
+class StanfordNLP:
+    def __init__(self, port_number=8080):
+        self.server = jsonrpclib.Server("http://localhost:%d" % port_number)
+
+    def parse(self, text):
+        return json.loads(self.server.parse(text))
+
+nlp = StanfordNLP()
+result = nlp.parse("Hello world!  It is so beautiful.")
+pprint(result)
+
+from nltk.tree import Tree
+tree = Tree.parse(result['sentences'][0]['parsetree'])
+pprint(tree)

corenlp/corenlp.py

+#!/usr/bin/env python
+#
+# corenlp  - Python interface to Stanford Core NLP tools
+# Copyright (c) 2012 Dustin Smith
+#   https://github.com/dasmith/stanford-corenlp-python
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+import json, optparse, os, re, sys, time, traceback
+import pexpect
+from progressbar import ProgressBar, Fraction
+from unidecode import unidecode
+from jsonrpclib.SimpleJSONRPCServer import SimpleJSONRPCServer
+
+VERBOSE = True
+STATE_START, STATE_TEXT, STATE_WORDS, STATE_TREE, STATE_DEPENDENCY, STATE_COREFERENCE = 0, 1, 2, 3, 4, 5
+WORD_PATTERN = re.compile('\[([^\]]+)\]')
+CR_PATTERN = re.compile(r"\((\d*),(\d)*,\[(\d*),(\d*)\)\) -> \((\d*),(\d)*,\[(\d*),(\d*)\)\), that is: \"(.*)\" -> \"(.*)\"")
+
+
+def remove_id(word):
+    """Removes the numeric suffix from the parsed recognized words: e.g. 'word-2' > 'word' """
+    return word.count("-") == 0 and word or word[0:word.rindex("-")]
+
+
+def parse_bracketed(s):
+    '''Parse word features [abc=... def = ...]
+    Also manages to parse out features that have XML within them
+    '''
+    word = None
+    attrs = {}
+    temp = {}
+    # Substitute XML tags, to replace them later
+    for i, tag in enumerate(re.findall(r"(<[^<>]+>.*<\/[^<>]+>)", s)):
+        temp["^^^%d^^^" % i] = tag
+        s = s.replace(tag, "^^^%d^^^" % i)
+    # Load key-value pairs, substituting as necessary
+    for attr, val in re.findall(r"([^=\s]*)=([^=\s]*)", s):
+        if val in temp:
+            val = temp[val]
+        if attr == 'Text':
+            word = val
+        else:
+            attrs[attr] = val
+    return (word, attrs)
+
+
+def parse_parser_results(text):
+    """ This is the nasty bit of code to interact with the command-line
+    interface of the CoreNLP tools.  Takes a string of the parser results
+    and then returns a Python list of dictionaries, one for each parsed
+    sentence.
+    """
+    results = {"sentences": []}
+    state = STATE_START
+    for line in unidecode(text).split("\n"):
+        line = line.strip()
+
+        if line.startswith("Sentence #"):
+            sentence = {'words':[], 'parsetree':[], 'dependencies':[]}
+            results["sentences"].append(sentence)
+            state = STATE_TEXT
+
+        elif state == STATE_TEXT:
+            sentence['text'] = line
+            state = STATE_WORDS
+
+        elif state == STATE_WORDS:
+            if not line.startswith("[Text="):
+                raise Exception('Parse error. Could not find "[Text=" in: %s' % line)
+            for s in WORD_PATTERN.findall(line):
+                sentence['words'].append(parse_bracketed(s))
+            state = STATE_TREE
+
+        elif state == STATE_TREE:
+            if len(line) == 0:
+                state = STATE_DEPENDENCY
+                sentence['parsetree'] = " ".join(sentence['parsetree'])
+            else:
+                sentence['parsetree'].append(line)
+
+        elif state == STATE_DEPENDENCY:
+            if len(line) == 0:
+                state = STATE_COREFERENCE
+            else:
+                split_entry = re.split("\(|, ", line[:-1])
+                if len(split_entry) == 3:
+                    rel, left, right = map(lambda x: remove_id(x), split_entry)
+                    sentence['dependencies'].append(tuple([rel,left,right]))
+
+        elif state == STATE_COREFERENCE:
+            if "Coreference set" in line:
+                if 'coref' not in results:
+                    results['coref'] = []
+                coref_set = []
+                results['coref'].append(coref_set)
+            else:
+                for src_i, src_pos, src_l, src_r, sink_i, sink_pos, sink_l, sink_r, src_word, sink_word in CR_PATTERN.findall(line):
+                    src_i, src_pos, src_l, src_r = int(src_i)-1, int(src_pos)-1, int(src_l)-1, int(src_r)-1
+                    sink_i, sink_pos, sink_l, sink_r = int(sink_i)-1, int(sink_pos)-1, int(sink_l)-1, int(sink_r)-1
+                    coref_set.append(((src_word, src_i, src_pos, src_l, src_r), (sink_word, sink_i, sink_pos, sink_l, sink_r)))
+
+    return results
+
+
+class StanfordCoreNLP(object):
+    """
+    Command-line interaction with Stanford's CoreNLP java utilities.
+    Can be run as a JSON-RPC server or imported as a module.
+    """
+    def __init__(self, corenlp_path="stanford-corenlp-full-2013-04-04/", memory="3g"):
+        """
+        Checks the location of the jar files.
+        Spawns the server as a process.
+        """
+
+        # TODO: Can edit jar constants
+        jars = ["stanford-corenlp-1.3.5.jar",
+                "stanford-corenlp-1.3.5-models.jar",
+                "joda-time.jar",
+                "xom.jar"]
+        jars = ["stanford-corenlp-1.3.5.jar",
+                "stanford-corenlp-1.3.5-models.jar",
+                "xom.jar",
+                "joda-time.jar",
+                "jollyday.jar"]
+
+        java_path = "java"
+        classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
+        # include the properties file, so you can change defaults
+        # but any changes in output format will break parse_parser_results()
+        props = "-props default.properties"
+
+        # add and check classpaths
+        jars = [corenlp_path +"/"+ jar for jar in jars]
+        for jar in jars:
+            if not os.path.exists(jar):
+                print "Error! Cannot locate %s" % jar
+                sys.exit(1)
+
+        # spawn the server
+        start_corenlp = "%s -Xmx%s -cp %s %s %s" % (java_path, memory, ':'.join(jars), classname, props)
+        if VERBOSE: print start_corenlp
+        self.corenlp = pexpect.spawn(start_corenlp)
+
+        # show progress bar while loading the models
+        widgets = ['Loading Models: ', Fraction()]
+        pbar = ProgressBar(widgets=widgets, maxval=5, force_update=True).start()
+        self.corenlp.expect("done.", timeout=20) # Load pos tagger model (~5sec)
+        pbar.update(1)
+        self.corenlp.expect("done.", timeout=200) # Load NER-all classifier (~33sec)
+        pbar.update(2)
+        self.corenlp.expect("done.", timeout=600) # Load NER-muc classifier (~60sec)
+        pbar.update(3)
+        self.corenlp.expect("done.", timeout=600) # Load CoNLL classifier (~50sec)
+        pbar.update(4)
+        self.corenlp.expect("done.", timeout=200) # Loading PCFG (~3sec)
+        pbar.update(5)
+        self.corenlp.expect("Entering interactive shell.")
+        pbar.finish()
+
+    def _parse(self, text):
+        """
+        This is the core interaction with the parser.
+
+        It returns a Python data-structure, while the parse()
+        function returns a JSON object
+        """
+        # clean up anything leftover
+        while True:
+            try:
+                self.corenlp.read_nonblocking (4096, 0.3)
+            except pexpect.TIMEOUT:
+                break
+            except pexpect.EOF:
+                break
+
+        self.corenlp.sendline(text)
+
+        # How much time should we give the parser to parse it?
+        # the idea here is that you increase the timeout as a
+        # function of the text's length.
+        # anything longer than 30 seconds requires that you also
+        # increase timeout=30 in jsonrpc.py
+        max_expected_time = max(30, 3 + len(text) / 20.0)
+        end_time = time.time() + max_expected_time
+
+        incoming = ""
+        while True:
+            # Time left, read more data
+            try:
+                incoming += self.corenlp.read_nonblocking(2048, 1)
+                if "\nNLP>" in incoming: break
+                time.sleep(0.0001)
+            except pexpect.TIMEOUT:
+                if end_time - time.time() < 0:
+                    print "[ERROR] Timeout"
+                    return {'error': "timed out after %f seconds" % max_expected_time,
+                            'input': text,
+                            'output': incoming}
+                else:
+                    continue
+            except pexpect.EOF:
+                break
+
+        if VERBOSE: print "%s\n%s" % ('='*40, incoming)
+        try:
+            results = parse_parser_results(incoming)
+        except Exception, e:
+            if VERBOSE: print traceback.format_exc()
+            raise e
+
+        return results
+
+    def parse(self, text):
+        """
+        This function takes a text string, sends it to the Stanford parser,
+        reads in the result, parses the results and returns a list
+        with one dictionary entry for each parsed sentence, in JSON format.
+        """
+        return json.dumps(self._parse(text))
+
+
+if __name__ == '__main__':
+    """
+    The code below starts an JSONRPC server
+    """
+    parser = optparse.OptionParser(usage="%prog [OPTIONS]")
+    parser.add_option('-p', '--port', default='8080',
+                      help='Port to serve on (default 8080)')
+    parser.add_option('-H', '--host', default='127.0.0.1',
+                      help='Host to serve on (default localhost; 0.0.0.0 to make public)')
+    parser.add_option('-S', '--corenlp', default="stanford-corenlp-full-2013-04-04",
+                      help='Stanford CoreNLP tool directory (default stanford-corenlp-full-2013-04-04/)')
+    options, args = parser.parse_args()
+    # server = jsonrpc.Server(jsonrpc.JsonRpc20(),
+    #                         jsonrpc.TransportTcpIp(addr=(options.host, int(options.port))))
+    server = SimpleJSONRPCServer((options.host, int(options.port)))
+
+    nlp = StanfordCoreNLP(options.corenlp)
+    server.register_function(nlp.parse)
+
+    print 'Serving on http://%s:%s' % (options.host, options.port)
+    # server.serve()
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        print >>stderr, "Bye."
+        exit()

corenlp/default.properties

+annotators = tokenize, ssplit, pos, lemma, ner, parse, dcoref
+
+# A true-casing annotator is also available (see below)
+#annotators = tokenize, ssplit, pos, lemma, truecase
+
+# A simple regex NER annotator is also available
+# annotators = tokenize, ssplit, regexner
+
+#Use these as EOS punctuation and discard them from the actual sentence content
+#These are HTML tags that get expanded internally to correct syntax, e.g., from "p" to "<p>", "</p>" etc.
+#Will have no effect if the "cleanxml" annotator is used
+#ssplit.htmlBoundariesToDiscard = p,text
+
+#
+# None of these paths are necessary anymore: we load all models from the JAR file
+#
+
+#pos.model = /u/nlp/data/pos-tagger/wsj3t0-18-left3words/left3words-distsim-wsj-0-18.tagger
+## slightly better model but much slower:
+##pos.model = /u/nlp/data/pos-tagger/wsj3t0-18-bidirectional/bidirectional-distsim-wsj-0-18.tagger
+
+#ner.model.3class = /u/nlp/data/ner/goodClassifiers/all.3class.distsim.crf.ser.gz
+#ner.model.7class = /u/nlp/data/ner/goodClassifiers/muc.distsim.crf.ser.gz
+#ner.model.MISCclass = /u/nlp/data/ner/goodClassifiers/conll.distsim.crf.ser.gz
+
+#regexner.mapping = /u/nlp/data/TAC-KBP2010/sentence_extraction/type_map_clean
+#regexner.ignorecase = false
+
+#nfl.gazetteer = /scr/nlp/data/machine-reading/Machine_Reading_P1_Reading_Task_V2.0/data/SportsDomain/NFLScoring_UseCase/NFLgazetteer.txt
+#nfl.relation.model =  /scr/nlp/data/ldc/LDC2009E112/Machine_Reading_P1_NFL_Scoring_Training_Data_V1.2/models/nfl_relation_model.ser
+#nfl.entity.model =  /scr/nlp/data/ldc/LDC2009E112/Machine_Reading_P1_NFL_Scoring_Training_Data_V1.2/models/nfl_entity_model.ser
+#printable.relation.beam = 20
+
+#parser.model = /u/nlp/data/lexparser/englishPCFG.ser.gz
+
+#srl.verb.args=/u/kristina/srl/verbs.core_args
+#srl.model.cls=/u/nlp/data/srl/trainedModels/englishPCFG/cls/train.ann
+#srl.model.id=/u/nlp/data/srl/trainedModels/englishPCFG/id/train.ann
+
+#coref.model=/u/nlp/rte/resources/anno/coref/corefClassifierAll.March2009.ser.gz
+#coref.name.dir=/u/nlp/data/coref/
+#wordnet.dir=/u/nlp/data/wordnet/wordnet-3.0-prolog
+
+#dcoref.demonym = /scr/heeyoung/demonyms.txt
+#dcoref.animate = /scr/nlp/data/DekangLin-Animacy-Gender/Animacy/animate.unigrams.txt
+#dcoref.inanimate = /scr/nlp/data/DekangLin-Animacy-Gender/Animacy/inanimate.unigrams.txt
+#dcoref.male = /scr/nlp/data/Bergsma-Gender/male.unigrams.txt
+#dcoref.neutral = /scr/nlp/data/Bergsma-Gender/neutral.unigrams.txt
+#dcoref.female = /scr/nlp/data/Bergsma-Gender/female.unigrams.txt
+#dcoref.plural = /scr/nlp/data/Bergsma-Gender/plural.unigrams.txt
+#dcoref.singular = /scr/nlp/data/Bergsma-Gender/singular.unigrams.txt
+
+
+# This is the regular expression that describes which xml tags to keep
+# the text from.  In order to on off the xml removal, add cleanxml
+# to the list of annotators above after "tokenize".
+#clean.xmltags = .*
+# A set of tags which will force the end of a sentence.  HTML example:
+# you would not want to end on <i>, but you would want to end on <p>.
+# Once again, a regular expression.  
+# (Blank means there are no sentence enders.)
+#clean.sentenceendingtags =
+# Whether or not to allow malformed xml
+# StanfordCoreNLP.properties
+#wordnet.dir=models/wordnet-3.0-prolog

corenlp/progressbar.py

+#!/usr/bin/python
+# -*- coding: iso-8859-1 -*-
+#
+# progressbar  - Text progressbar library for python.
+# Copyright (c) 2005 Nilton Volpato
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+
+"""Text progressbar library for python.
+
+This library provides a text mode progressbar. This is typically used
+to display the progress of a long running operation, providing a
+visual clue that processing is underway.
+
+The ProgressBar class manages the progress, and the format of the line
+is given by a number of widgets. A widget is an object that may
+display diferently depending on the state of the progress. There are
+three types of widget:
+- a string, which always shows itself;
+- a ProgressBarWidget, which may return a diferent value every time
+it's update method is called; and
+- a ProgressBarWidgetHFill, which is like ProgressBarWidget, except it
+expands to fill the remaining width of the line.
+
+The progressbar module is very easy to use, yet very powerful. And
+automatically supports features like auto-resizing when available.
+"""
+
+__author__ = "Nilton Volpato"
+__author_email__ = "first-name dot last-name @ gmail.com"
+__date__ = "2006-05-07"
+__version__ = "2.2"
+
+# Changelog
+#
+# 2006-05-07: v2.2 fixed bug in windows
+# 2005-12-04: v2.1 autodetect terminal width, added start method
+# 2005-12-04: v2.0 everything is now a widget (wow!)
+# 2005-12-03: v1.0 rewrite using widgets
+# 2005-06-02: v0.5 rewrite
+# 2004-??-??: v0.1 first version
+
+import sys
+import time
+from array import array
+try:
+    from fcntl import ioctl
+    import termios
+except ImportError:
+    pass
+import signal
+
+
+class ProgressBarWidget(object):
+    """This is an element of ProgressBar formatting.
+
+    The ProgressBar object will call it's update value when an update
+    is needed. It's size may change between call, but the results will
+    not be good if the size changes drastically and repeatedly.
+    """
+    def update(self, pbar):
+        """Returns the string representing the widget.
+
+        The parameter pbar is a reference to the calling ProgressBar,
+        where one can access attributes of the class for knowing how
+        the update must be made.
+
+        At least this function must be overriden."""
+        pass
+
+
+class ProgressBarWidgetHFill(object):
+    """This is a variable width element of ProgressBar formatting.
+
+    The ProgressBar object will call it's update value, informing the
+    width this object must the made. This is like TeX \\hfill, it will
+    expand to fill the line. You can use more than one in the same
+    line, and they will all have the same width, and together will
+    fill the line.
+    """
+    def update(self, pbar, width):
+        """Returns the string representing the widget.
+
+        The parameter pbar is a reference to the calling ProgressBar,
+        where one can access attributes of the class for knowing how
+        the update must be made. The parameter width is the total
+        horizontal width the widget must have.
+
+        At least this function must be overriden."""
+        pass
+
+
+class ETA(ProgressBarWidget):
+    "Widget for the Estimated Time of Arrival"
+    def format_time(self, seconds):
+        return time.strftime('%H:%M:%S', time.gmtime(seconds))
+
+    def update(self, pbar):
+        if pbar.currval == 0:
+            return 'ETA:  --:--:--'
+        elif pbar.finished:
+            return 'Time: %s' % self.format_time(pbar.seconds_elapsed)
+        else:
+            elapsed = pbar.seconds_elapsed
+            eta = elapsed * pbar.maxval / pbar.currval - elapsed
+            return 'ETA:  %s' % self.format_time(eta)
+
+
+class FileTransferSpeed(ProgressBarWidget):
+    "Widget for showing the transfer speed (useful for file transfers)."
+    def __init__(self):
+        self.fmt = '%6.2f %s'
+        self.units = ['B', 'K', 'M', 'G', 'T', 'P']
+
+    def update(self, pbar):
+        if pbar.seconds_elapsed < 2e-6:  # == 0:
+            bps = 0.0
+        else:
+            bps = float(pbar.currval) / pbar.seconds_elapsed
+        spd = bps
+        for u in self.units:
+            if spd < 1000:
+                break
+            spd /= 1000
+        return self.fmt % (spd, u + '/s')
+
+
+class RotatingMarker(ProgressBarWidget):
+    "A rotating marker for filling the bar of progress."
+    def __init__(self, markers='|/-\\'):
+        self.markers = markers
+        self.curmark = -1
+
+    def update(self, pbar):
+        if pbar.finished:
+            return self.markers[0]
+        self.curmark = (self.curmark + 1) % len(self.markers)
+        return self.markers[self.curmark]
+
+
+class Percentage(ProgressBarWidget):
+    "Just the percentage done."
+    def update(self, pbar):
+        return '%3d%%' % pbar.percentage()
+
+
+class Fraction(ProgressBarWidget):
+    "Just the fraction done."
+    def update(self, pbar):
+        return "%d/%d" % (pbar.currval, pbar.maxval)
+
+
+class Bar(ProgressBarWidgetHFill):
+    "The bar of progress. It will strech to fill the line."
+    def __init__(self, marker='#', left='|', right='|'):
+        self.marker = marker
+        self.left = left
+        self.right = right
+
+    def _format_marker(self, pbar):
+        if isinstance(self.marker, (str, unicode)):
+            return self.marker
+        else:
+            return self.marker.update(pbar)
+
+    def update(self, pbar, width):
+        percent = pbar.percentage()
+        cwidth = width - len(self.left) - len(self.right)
+        marked_width = int(percent * cwidth / 100)
+        m = self._format_marker(pbar)
+        bar = (self.left + (m * marked_width).ljust(cwidth) + self.right)
+        return bar
+
+
+class ReverseBar(Bar):
+    "The reverse bar of progress, or bar of regress. :)"
+    def update(self, pbar, width):
+        percent = pbar.percentage()
+        cwidth = width - len(self.left) - len(self.right)
+        marked_width = int(percent * cwidth / 100)
+        m = self._format_marker(pbar)
+        bar = (self.left + (m * marked_width).rjust(cwidth) + self.right)
+        return bar
+
+default_widgets = [Percentage(), ' ', Bar()]
+
+
+class ProgressBar(object):
+    """This is the ProgressBar class, it updates and prints the bar.
+
+    The term_width parameter may be an integer. Or None, in which case
+    it will try to guess it, if it fails it will default to 80 columns.
+
+    The simple use is like this:
+    >>> pbar = ProgressBar().start()
+    >>> for i in xrange(100):
+    ...    # do something
+    ...    pbar.update(i+1)
+    ...
+    >>> pbar.finish()
+
+    But anything you want to do is possible (well, almost anything).
+    You can supply different widgets of any type in any order. And you
+    can even write your own widgets! There are many widgets already
+    shipped and you should experiment with them.
+
+    When implementing a widget update method you may access any
+    attribute or function of the ProgressBar object calling the
+    widget's update method. The most important attributes you would
+    like to access are:
+    - currval: current value of the progress, 0 <= currval <= maxval
+    - maxval: maximum (and final) value of the progress
+    - finished: True if the bar is have finished (reached 100%), False o/w
+    - start_time: first time update() method of ProgressBar was called
+    - seconds_elapsed: seconds elapsed since start_time
+    - percentage(): percentage of the progress (this is a method)
+    """
+    def __init__(self, maxval=100, widgets=default_widgets, term_width=None,
+                 fd=sys.stderr, force_update=False):
+        assert maxval > 0
+        self.maxval = maxval
+        self.widgets = widgets
+        self.fd = fd
+        self.signal_set = False
+        if term_width is None:
+            try:
+                self.handle_resize(None, None)
+                signal.signal(signal.SIGWINCH, self.handle_resize)
+                self.signal_set = True
+            except:
+                self.term_width = 79
+        else:
+            self.term_width = term_width
+
+        self.currval = 0
+        self.finished = False
+        self.prev_percentage = -1
+        self.start_time = None
+        self.seconds_elapsed = 0
+        self.force_update = force_update
+
+    def handle_resize(self, signum, frame):
+        h, w = array('h', ioctl(self.fd, termios.TIOCGWINSZ, '\0' * 8))[:2]
+        self.term_width = w
+
+    def percentage(self):
+        "Returns the percentage of the progress."
+        return self.currval * 100.0 / self.maxval
+
+    def _format_widgets(self):
+        r = []
+        hfill_inds = []
+        num_hfill = 0
+        currwidth = 0
+        for i, w in enumerate(self.widgets):
+            if isinstance(w, ProgressBarWidgetHFill):
+                r.append(w)
+                hfill_inds.append(i)
+                num_hfill += 1
+            elif isinstance(w, (str, unicode)):
+                r.append(w)
+                currwidth += len(w)
+            else:
+                weval = w.update(self)
+                currwidth += len(weval)
+                r.append(weval)
+        for iw in hfill_inds:
+            r[iw] = r[iw].update(self,
+                                 (self.term_width - currwidth) / num_hfill)
+        return r
+
+    def _format_line(self):
+        return ''.join(self._format_widgets()).ljust(self.term_width)
+
+    def _need_update(self):
+        if self.force_update:
+            return True
+        return int(self.percentage()) != int(self.prev_percentage)
+
+    def reset(self):
+        if not self.finished and self.start_time:
+            self.finish()
+        self.finished = False
+        self.currval = 0
+        self.start_time = None
+        self.seconds_elapsed = None
+        self.prev_percentage = None
+        return self
+
+    def update(self, value):
+        "Updates the progress bar to a new value."
+        assert 0 <= value <= self.maxval
+        self.currval = value
+        if not self._need_update() or self.finished:
+            return
+        if not self.start_time:
+            self.start_time = time.time()
+        self.seconds_elapsed = time.time() - self.start_time
+        self.prev_percentage = self.percentage()
+        if value != self.maxval:
+            self.fd.write(self._format_line() + '\r')
+        else:
+            self.finished = True
+            self.fd.write(self._format_line() + '\n')
+
+    def start(self):
+        """Start measuring time, and prints the bar at 0%.
+
+        It returns self so you can use it like this:
+        >>> pbar = ProgressBar().start()
+        >>> for i in xrange(100):
+        ...    # do something
+        ...    pbar.update(i+1)
+        ...
+        >>> pbar.finish()
+        """
+        self.update(0)
+        return self
+
+    def finish(self):
+        """Used to tell the progress is finished."""
+        self.update(self.maxval)
+        if self.signal_set:
+            signal.signal(signal.SIGWINCH, signal.SIG_DFL)
+
+
+def example1():
+    widgets = ['Test: ', Percentage(), ' ', Bar(marker=RotatingMarker()),
+               ' ', ETA(), ' ', FileTransferSpeed()]
+    pbar = ProgressBar(widgets=widgets, maxval=10000000).start()
+    for i in range(1000000):
+        # do something
+        pbar.update(10 * i + 1)
+    pbar.finish()
+    return pbar
+
+
+def example2():
+    class CrazyFileTransferSpeed(FileTransferSpeed):
+        "It's bigger between 45 and 80 percent"
+        def update(self, pbar):
+            if 45 < pbar.percentage() < 80:
+                return 'Bigger Now ' + FileTransferSpeed.update(self, pbar)
+            else:
+                return FileTransferSpeed.update(self, pbar)
+
+    widgets = [CrazyFileTransferSpeed(), ' <<<',
+               Bar(), '>>> ', Percentage(), ' ', ETA()]
+    pbar = ProgressBar(widgets=widgets, maxval=10000000)
+    # maybe do something
+    pbar.start()
+    for i in range(2000000):
+        # do something
+        pbar.update(5 * i + 1)
+    pbar.finish()
+    return pbar
+
+
+def example3():
+    widgets = [Bar('>'), ' ', ETA(), ' ', ReverseBar('<')]
+    pbar = ProgressBar(widgets=widgets, maxval=10000000).start()
+    for i in range(1000000):
+        # do something
+        pbar.update(10 * i + 1)
+    pbar.finish()
+    return pbar
+
+
+def example4():
+    widgets = ['Test: ', Percentage(), ' ',
+               Bar(marker='0', left='[', right=']'),
+               ' ', ETA(), ' ', FileTransferSpeed()]
+    pbar = ProgressBar(widgets=widgets, maxval=500)
+    pbar.start()
+    for i in range(100, 500 + 1, 50):
+        time.sleep(0.2)
+        pbar.update(i)
+    pbar.finish()
+    return pbar
+
+
+def example5():
+    widgets = ['Test: ', Fraction(), ' ', Bar(marker=RotatingMarker()),
+               ' ', ETA(), ' ', FileTransferSpeed()]
+    pbar = ProgressBar(widgets=widgets, maxval=10, force_update=True).start()
+    for i in range(1, 11):
+        # do something
+        time.sleep(0.5)
+        pbar.update(i)
+    pbar.finish()
+    return pbar
+
+
+def main():
+    example1()
+    print
+    example2()
+    print
+    example3()
+    print
+    example4()
+    print
+    example5()
+    print
+
+if __name__ == '__main__':
+    main()
+import os
+from distutils.core import setup
+
+PACKAGE = "corenlp"
+NAME = "corenlp-python"
+DESCRIPTION = "A Stanford Core NLP wrapper"
+AUTHOR = "Hiroyoshi Komatsu"
+AUTHOR_EMAIL = "hiroyoshi.komat@gmail.com"
+URL = "https://bitbucket.org/torotoki/corenlp-python"
+VERSION = "1.0.1"
+
+# Utility function to read the README file.
+# Used for the long_description.  It's nice, because now 1) we have a top level
+# README file and 2) it's easier to type in the README file than to put a raw
+# string in below ...
+def read(fname):
+    return open(os.path.join(os.path.dirname(__file__), fname)).read()
+
+setup(
+    name=NAME,
+    version=VERSION,
+    description=DESCRIPTION,
+    long_description=read("README.md"),
+    author=AUTHOR,
+    author_email=AUTHOR_EMAIL,
+    url=URL,
+    packages=['corenlp'],
+    # package_data=find_package_data(
+    #     PACKAGE,
+    #     only_in_packages=False
+    # )
+    classifiers=[
+        "Intended Audience :: Developers",
+        "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)",
+        "Programming Language :: Python",
+    ],
+)