packagecom.tc;importedu.stanford.nlp.util.IntPair;importjava.util.ArrayList;importjava.util.List;/** * Created by Tomer on 2016-01-11. * Represent a span in a tree and all the nodes covering this span. */publicclassDependencySpan{privateIntPairspan;privateList<DependencyTreeNode>dependencies;/** * Initializes a new instance of {@code DependencySpan} class. * @param span the span of this instance. */DependencySpan(IntPairspan){this.span=span;this.dependencies=newArrayList<>();}publicIntPairgetSpan(){returnthis.span;}publicList<DependencyTreeNode>getDependencies(){returnthis.dependencies;}@OverridepublicStringtoString(){returnString.format("%s %s",this.span.toString(),this.dependencies.toString());}}
packagecom.tc;importedu.stanford.nlp.ling.CoreAnnotations;importedu.stanford.nlp.ling.CoreLabel;importedu.stanford.nlp.util.IntPair;importorg.json.JSONArray;importorg.json.JSONObject;importjava.util.ArrayList;importjava.util.LinkedList;importjava.util.List;importjava.util.Queue;/** * Created by Tomer on 2015-10-28. */publicclassDependencyTreeNode{privateDependencyTreeNodem_parent;privateCoreLabelm_label;privateStringm_name;privateList<DependencyTreeNode>m_children;publicDependencyTreeNode(DependencyTreeNodeparent,Stringname,CoreLabellabel){this.m_parent=parent;this.m_name=name;this.m_label=label;this.m_children=newArrayList<DependencyTreeNode>();}publicDependencyTreeNodeparent(){returnthis.m_parent;}publicStringvalue(){returnthis.m_name;}publicCoreLabellabel(){returnthis.m_label;}publicList<DependencyTreeNode>children(){returnthis.m_children;}publicvoidaddChild(DependencyTreeNodenode){this.m_children.add(node);}publicbooleanisRoot(){returnthis.m_parent==null;}publicbooleanisLeave(){returnthis.m_children.size()==0;}@OverridepublicStringtoString(){returnthis.m_name;}/** * Convert this node (and its children, recursively) to JSON object. * @return JSONObject. */publicJSONObjecttoJSON(){JSONObjectobj=newJSONObject();obj.put("type",this.m_name);obj.put("headDep","");JSONArraychildren=newJSONArray();for(DependencyTreeNodechild:this.m_children){children.put(child.toJSON());}obj.put("children",children);returnobj;}/** * Get the minimum (CoreLabel/word) index in this subtree. * @return the minimum index in the subtree. In case no core label the min index of the "parent" CoreLabel (design decision of where to place the CoreLabels). */publicIntegergetMinIndex(){Integeridx=(this.m_label==null?this.m_parent.m_label:m_label).get(CoreAnnotations.IndexAnnotation.class)-1;// : this.m_label.get(CoreAnnotations.IndexAnnotation.class);for(inti=0;i<this.m_children.size();i++){intchildIdx=this.m_children.get(i).getMinIndex();if(childIdx<idx){idx=childIdx;}}returnidx;}/** * Get the maximum (CoreLabel/word) index in this subtree. * @return the maximum index in the subtree. In case no core label the max index of the "parent" CoreLabel (design decision of where to place the CoreLabels). */publicIntegergetMaxIndex(){Integeridx=(this.m_label==null?this.m_parent.m_label:m_label).get(CoreAnnotations.IndexAnnotation.class)-1;// : this.m_label.get(CoreAnnotations.IndexAnnotation.class);for(inti=0;i<this.m_children.size();i++){intchildIdx=this.m_children.get(i).getMaxIndex();if(childIdx>idx){idx=childIdx;}}returnidx;}publicIntPairgetSpan(){returnnewIntPair(this.getMinIndex(),this.getMaxIndex());}}
packagecom.tc;importedu.stanford.nlp.ling.CoreAnnotations;importedu.stanford.nlp.ling.CoreLabel;importedu.stanford.nlp.pipeline.Annotation;importedu.stanford.nlp.pipeline.StanfordCoreNLP;importedu.stanford.nlp.sentiment.SentimentCoreAnnotations;importedu.stanford.nlp.trees.*;importedu.stanford.nlp.util.CollectionFactory;importedu.stanford.nlp.util.CoreMap;importedu.stanford.nlp.util.Filters;importedu.stanford.nlp.util.IntPair;importjava.util.*;/** * Created by Tomer on 2016-01-11. */publicclassMergeConstituencyAndDependency{StanfordCoreNLPpipeline;GrammaticalStructureFactorygsf;publicstaticvoidmain(String[]args){// define an instance of the stanford code annotator to be used for processing.Propertiesprops=newProperties();// set the properties// (not sure why but gsf.newGrammaticalStructure(tree) fails without sentiment annotator)props.setProperty("annotators","tokenize, ssplit, pos, parse, depparse, sentiment");StanfordCoreNLPpipeline=newStanfordCoreNLP(props);MergeConstituencyAndDependencyprocessor=newMergeConstituencyAndDependency(pipeline);List<MergedNode>mergedTrees=processor.merge("The quick brown fox jumped over the lazy dog.");for(MergedNodeMergedNode:mergedTrees){System.out.println(MergedNode.toJSON());}}publicMergeConstituencyAndDependency(StanfordCoreNLPpipeline){this.pipeline=pipeline;TreebankLanguagePacktlp=newPennTreebankLanguagePack();this.gsf=tlp.grammaticalStructureFactory(Filters.acceptFilter());}publicList<MergedNode>merge(Stringdata){Annotationannotation=this.pipeline.process(data);ArrayList<MergedNode>trees=newArrayList<>();for(CoreMapsentence:annotation.get(CoreAnnotations.SentencesAnnotation.class)){// get the annotated treeTreetree=sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);List<CoreLabel>coreLabels=sentence.get(CoreAnnotations.TokensAnnotation.class);GrammaticalStructuregs=gsf.newGrammaticalStructure(tree);Collection<TypedDependency>typedDependencies=gs.typedDependencies();DependencyTreeNodedependencyTree=getDependencyTree(typedDependencies);List<DependencySpan>dependencyNodesSpanMap=getDependencySpanMap(dependencyTree);MergedNodeMergedNode=this.createUnifiedTree(tree,coreLabels.iterator(),dependencyNodesSpanMap);trees.add(MergedNode);}returntrees;}/** * Build expanded dependency tree by transforming the dependency graph (Universal Dependencies). * Losely based on: * Tsarfaty et. al. "Evaluating Dependency Parsing: Robust and Heuristics-Free Cross-Annotation Evaluation" * (EMNLP, 2011, http://www.tsarfaty.com/pdfs/emnlp11.pdf) * @param typedDependencies list of Universal Dependencies. * @return the dependency tree derived from the constituency tree. */privateDependencyTreeNodegetDependencyTree(Collection<TypedDependency>typedDependencies){// create a map between the governing node to its dependenciesHashMap<CoreLabel,List<TypedDependency>>labelDependenciesMap=newHashMap<CoreLabel,List<TypedDependency>>();CoreLabelrootLabel=null;for(TypedDependencytypedDependency:typedDependencies){// get the labelCoreLabelgov=typedDependency.gov().backingLabel();// if not in the map - add itif(!labelDependenciesMap.containsKey(gov)){labelDependenciesMap.put(gov,newArrayList<TypedDependency>());}// add the dependency to this governing node's list.labelDependenciesMap.get(gov).add(typedDependency);// if encountered the root - save it (used for starting point for tree)if(gov.value()=="ROOT"){rootLabel=gov;}}// sort/re-arrange the dependencies of each node according to their min-index (left-to-rigt)for(CoreLabelkey:labelDependenciesMap.keySet()){List<TypedDependency>list=labelDependenciesMap.get(key);list.sort((a,b)->(a.dep().backingLabel().get(CoreAnnotations.IndexAnnotation.class).compareTo(b.dep().backingLabel().get(CoreAnnotations.IndexAnnotation.class))));}if(rootLabel!=null){Queue<DependencyTreeNode>labels=newLinkedList<DependencyTreeNode>();// create a dependency node for the rootDependencyTreeNodedependencyTreeRoot=newDependencyTreeNode(null,rootLabel.value(),rootLabel);// get a list of it's dependencies to processList<TypedDependency>list=labelDependenciesMap.get(dependencyTreeRoot.label());for(inti1=0;i1<list.size();i1++){// get the dependencyTypedDependencydependency=list.get(i1);// create a child for each dependencyDependencyTreeNodearcNode=newDependencyTreeNode(dependencyTreeRoot,dependency.reln().toString(),dependency.dep().backingLabel());// and add to the rootdependencyTreeRoot.addChild(arcNode);DependencyTreeNodelabelNode=newDependencyTreeNode(arcNode,dependency.dep().toString(),null);arcNode.addChild(labelNode);labels.add(arcNode);}while(labels.size()>0){DependencyTreeNodecurrentNode=labels.remove();if(labelDependenciesMap.containsKey(currentNode.label())){for(TypedDependencydependency:labelDependenciesMap.get(currentNode.label())){// create a child for each dependencyDependencyTreeNodearcNode=newDependencyTreeNode(currentNode,dependency.reln().toString(),dependency.dep().backingLabel());currentNode.addChild(arcNode);DependencyTreeNodelabelNode=newDependencyTreeNode(arcNode,dependency.dep().toString(),null);arcNode.addChild(labelNode);labels.add(arcNode);}}}// arranged the children according to their indexStack<DependencyTreeNode>stack=newStack<>();stack.push(dependencyTreeRoot);while(!stack.empty()){DependencyTreeNodenode=stack.pop();// arrange the nodes according to their order.node.children().sort((a,b)->a.getMinIndex().compareTo(b.getMinIndex()));// arrange the order in the children as wellnode.children().forEach(c->stack.push(c));}returndependencyTreeRoot;}returnnull;}/** * Convert a dependency tree to span map. * @param dependencyTree the dependency tree. see {@code getDependencyTree} for details. * @return a list of DependencySpans - each contain a span (from-to words) and the dependencies within it. */privateList<DependencySpan>getDependencySpanMap(DependencyTreeNodedependencyTree){HashMap<IntPair,DependencySpan>map=newHashMap<>();ArrayList<DependencySpan>spans=newArrayList<>();Queue<DependencyTreeNode>stack=newLinkedList<>();stack.add(dependencyTree);while(stack.size()>0){DependencyTreeNodenode=stack.remove();if(!node.isLeave()){IntPairspan=node.getSpan();if(!map.containsKey(span)){map.put(span,newDependencySpan(span));spans.add(map.get(span));}map.get(span).getDependencies().add(node);}// arrange the order in the children as wellnode.children().forEach(c->stack.add(c));}returnspans;}/** * Merge Constituency and Dependency trees. * @param tree the tree (current root) * @param labels tree labels * @param dependencySpans list of dependency spans based on dependency tree (see {@code getDependencySpanMap}). * @return Unified tree containing both constituency and dependency information in each node */privateMergedNodecreateUnifiedTree(Treetree,Iterator<CoreLabel>labels,List<DependencySpan>dependencySpans){IntPaircurrentNodeSpan=tree.getSpan();// this is where we merge the dependency and the constituency based on// matching span covered by the node in constituency tree to the span covered// by a node in the dependency tree.List<String>dependencies=newArrayList<>();booleanfoundDependencies=false;for(DependencySpandependencySpan:dependencySpans){if(currentNodeSpan.equals(dependencySpan.getSpan())){foundDependencies=true;// copy this dependency to the node.dependencies.add(dependencySpan.getDependencies().get(0).value());// remove it so it is not used againdependencySpan.getDependencies().remove(dependencySpan.getDependencies().get(0));// if the span is empty, remove it.if(dependencySpan.getDependencies().size()==0){dependencySpans.remove(dependencySpan);}break;}}List<MergedNode>children=newArrayList<>();List<Tree>childNodes=tree.getChildrenAsList();for(inti=0;i<tree.getChildrenAsList().size();i++){TreechildTree=childNodes.get(i);MergedNodechildUnifiedNode=this.createUnifiedTree(childTree,labels,dependencySpans);children.add(childUnifiedNode);}MergedNodenode=null;if(tree.isLeaf()){CoreLabelnext=labels.next();Stringword=next.get(CoreAnnotations.TextAnnotation.class);Stringpos=next.get(CoreAnnotations.PartOfSpeechAnnotation.class);node=newMergedNode(pos,word);}else{node=newMergedNode(tree.label().toString());}// set dependency (if found).if(foundDependencies){node.addDependency(dependencies.get(0));}else{node.addDependency("_");}if(children.size()>0){node.addChildren(children);}returnnode;}privatevoidconvertToRelationalRealization(MergedNodeunifiedTreeNode){Queue<MergedNode>queue=newLinkedList<>();queue.add(unifiedTreeNode);while(!queue.isEmpty()){MergedNodecurrnet=queue.remove();if(currnet.isLeaf()){continue;}Stringdependencies=removeSquareParenthesis(Arrays.toString(currnet.getChildren().stream().map(c->c.getDependenciesStr()).toArray()));StringintermediateType=String.format("{%s}@%s",dependencies,currnet.getType());MergedNodeintermediate=newMergedNode(intermediateType,currnet.getSentiment());intermediate.setHeadWord(currnet.getHeadWord());for(MergedNodechildNode:currnet.getChildren()){MergedNoderealizationNode=newMergedNode(String.format("%s@%s",childNode.getDependenciesStr(),currnet.getType()),currnet.getSentiment());realizationNode.setHeadWord(currnet.getHeadWord());childNode.getDependencies().forEach(d->{intermediate.addDependency(d);realizationNode.addDependency(d);});intermediate.addChild(realizationNode);realizationNode.addChild(childNode);if(!childNode.isPreTerminal()){queue.add(childNode);}}currnet.getChildren().clear();currnet.addChild(intermediate);}}privateStringremoveSquareParenthesis(Strings){returns.replaceAll("[\\[\\]]","");}}
/** * Created by Tomer on 2015-04-24. */packagecom.tc;importorg.json.JSONArray;importorg.json.JSONObject;importjava.util.*;/** * A merged tree node which include relevant annotation */publicclassMergedNode{/** * Backing field for type property. */privateStringtype;/** * Backing field for part of speech property. */privateStringpos;/** * Backing field for word property. */privateStringword;/** * Backing field for children property. */privateList<MergedNode>children;/** * Backing field for a headDep word for the subtree under this node. */privateList<String>dependencies;/** * Initializes a new instance of the MergedNode Class which is a non-terminal. * @param type the type of the node (non-terminal). */publicMergedNode(Stringtype){this(type,"","");}/** * Initializes a new instance of the MergedNode class which is a leaf (terminal). * @param pos * @param word */publicMergedNode(Stringpos,Stringword){this("TK",pos,word);}/** * Initializes a new instance of the MergedNode with all properties. * @param type the type of node (could be either terminal or not). * @param pos the part of speech of the node. * @param word the word of the node. */privateMergedNode(Stringtype,Stringpos,Stringword){this.setType(type);this.setPos(pos);this.setWord(word);this.children=newArrayList<>();this.dependencies=newArrayList<>();}/** * Gets a value indicating if this node is a leaf. * @return {@code true} is the node is a lead and {@code false} otherwise. */publicbooleanisLeaf(){returnthis.children.size()==0;}/** * Gets the part of speech tag value. Relevant to leaf nodes. * @return a {@code String} of the POS tag. */publicStringgetPos(){returnthis.pos;}/** * Gets type of the node. * @return a {@code String} representing the type of the node. For leaf nodes will return "TK". */publicStringgetType(){returnthis.type;}/** * Gets the word of this node. Relevant to leaf nodes. * @return a {@code String}. */publicStringgetWord(){returnthis.word;}/** * Gets the children of this node. * @return a {@code List<MergedNode>} containing the children of this node. */publicList<MergedNode>getChildren(){returnthis.children;}/** * Sets the part of speech of this node. * @param pos the part of speech tag of the node. */protectedvoidsetPos(Stringpos){this.pos=pos;}/** * Sets the type of this node. * @param type the type of the node. */protectedvoidsetType(Stringtype){this.type=type;}/** * Sets the word of this node. * @param word the word the node. */protectedvoidsetWord(Stringword){this.word=word;}/** * Get the dependencies of this node as string. * @return the headWord word for this node or subtree under this node. */publicStringgetDependenciesStr(){returnArrays.toString(this.dependencies.toArray());}publicList<String>getDependencies(){returnthis.dependencies;}/** * Set the head dependency category. * @param dependencies a list of dependencies */publicvoidsetDependencies(List<String>dependencies){this.dependencies.addAll(dependencies);}publicvoidaddDependency(Stringdependency){this.dependencies.add(dependency);}/** * Returns this node as a JSON object. Populate the relevant properties according to the type of node. * @return a JSON object. For non-terminals include "type" and dependency. For leaf (terminal) include "type" * (always "TK") and "word". In case there are child nodes they are also converted * to JSON objects (calling recursively {@code toJSON}) and returned as a JSONArry under "children" property. */publicJSONObjecttoJSON(){JSONArraylocalChildren=newJSONArray();for(MergedNodechild:this.children){localChildren.put(child.toJSON());}JSONObjectobj=newJSONObject();obj.put("type",this.getType());if(this.isLeaf()){obj.put("word",this.getWord());obj.put("pos",this.getPos());}if(this.dependencies!=null&&this.dependencies.size()>0){JSONArrayarray=newJSONArray(this.dependencies.toArray());obj.put("dependencies",array);}if(localChildren.length()>0){obj.put("children",localChildren);}returnobj;}/** * Add children to this node. * @param children a list of children. */publicvoidaddChildren(List<MergedNode>children){for(MergedNodechild:children){this.addChild(child);}}/** * Add a single child to this node. * @param node the child to add. */publicvoidaddChild(MergedNodenode){this.children.add(node);}/** * Get the annotation for this node. * @return For non-terminal will return "type (sentiment)". For leaves (terminals) will return the word of the node. *//*public String getAnnotation() { return getAnnotation(true, true); }*//** * Get the annotation for this node. * @return annotation (string) for node - include constituency and dependency where relevant. */publicStringgetAnnotation(){returnthis.isLeaf()?this.getWord():String.format("%s[%s]",this.getType(),this.getDependenciesStr());}/** * Return a value indicating whether this node is before the leaves (or, all node's children are leaves). * @return {@code true} if all child nodes are leaves and false otherwise. */publicbooleanisPreTerminal(){for(MergedNodechild:this.getChildren()){if(!child.isLeaf()){returnfalse;}}returntrue;}@OverridepublicStringtoString(){returnthis.getAnnotation();}/** * Get the yield (leaves) of the tree. * @return {@code List<String>} where each entry in list is a leaf in the tree. */publicList<String>yieldArray(){Stack<MergedNode>queue=newStack<MergedNode>();queue.add(this);List<String>words=newArrayList<String>();while(queue.size()>0){MergedNodecurrent=queue.pop();if(current.isLeaf()){words.add(current.getWord());}else{List<MergedNode>children=current.getChildren();for(inti=children.size()-1;i>=0;i--){queue.push(children.get(i));}}}returnwords;}}
Comments (0)
HTTPSSSH
You can clone a snippet to your computer for local editing.
Learn more.