MontyLingua3 / JMontyLingua.py

"""
 Module JMontyLingua.py

 Java API for MontyLingua, please see MontyLingua module
 for additional documentation and license information.

 EXPLANATION OF FUNCTIONS:

     @sig public String jist_predicates(String text)
       - returns lisp-style predicate argument structures
       - each structure should look something like this:
          - ("verb" "subject" "obj1" "obj2" ... )
       - words are all lemmatised, and determiners and
         modals are stripped out
       - obj's can be direct or indirect, but not
         subordinate clauses for now.
       - returns one pred-arg per line
       - multiple pred-args are possible for a sentence
       - blank line separates pred-args of each sentence

     @sig public String tag_text(String text)
       - takes in raw text.
       - tokenizes and POS tags text using Brill94
         tbl-based tagging and common sense
       - uses Penn Treebank tagset
         (http://www.cis.upenn.edu/~treebank/)
       - returns one tagged sentence per line

     @sig public String chunk_text(String text)
       - takes in raw text.
       - tokenizes, POS tags, and chunks tagset
         in adjective chunks, noun chunks, and verb
         chunks (AX, NX, and VX respectively)
       - returns one chunked sentence per line
     
     @sig public String lemmatise_text(String text)
       - lemmatises raw text and outputs the form:
         'These/DT/These sentences/NNS/sentence were/VBZ/be false/JJ/false'
         (lemma follows the pos tag)
       - returns one lemmatised sentence per line
"""

__version__ = "2.0"

import MontyLingua
import java
from jarray import array

class JMontyLingua(java.lang.Object):

    def __init__(self):
        "@sig public JMontyLingua()"
        self.theMontyLingua = MontyLingua.MontyLingua()

    def jist_predicates(self,text):
        "@sig public String jist_predicates(String text)"
        svoos_list = self.theMontyLingua.jist_predicates(text)
        return '\n\n'.join(map(lambda x:'\n'.join(x),svoos_list))

    def tag_text(self,text):
        "@sig public String tag_text(String text)"
        sentences = self.theMontyLingua.split_sentences(text)
        tokenized = map(self.theMontyLingua.tokenize,sentences)
        tagged = map(self.theMontyLingua.tag_tokenized,tokenized)
        return '\n\n'.join(tagged)

    
    def chunk_text(self,text):
        "@sig public String chunk_text(String text)"
        sentences = self.theMontyLingua.split_sentences(text)
        tokenized = map(self.theMontyLingua.tokenize,sentences)
        tagged = map(self.theMontyLingua.tag_tokenized,tokenized)
        chunked = map(self.theMontyLingua.chunk_tagged,tagged)
        return '\n\n'.join(chunked)

    def lemmatise_text(self,text):
        "@sig public String lemmatise_text(String text)"
        sentences = self.theMontyLingua.split_sentences(text)
        tokenized = map(self.theMontyLingua.tokenize,sentences)
        tagged = map(self.theMontyLingua.tag_tokenized,tokenized)
        lemmatised = map(self.theMontyLingua.lemmatise_tagged,tagged)
        return '\n\n'.join(lemmatised)

    
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.