Created by Peter Schüller 2016-02-24
      %
% SemEval 2016 System: Inspire - Interpretable Textual
% Similarity Alignment based on Answer Set Programming
%
% Copyright (C) 2015-2016 Mishal Kazmi
% Copyright (C) 2015-2016 Peter Schueller
%

% comfort representation
word(I,W) :- mword(I,W,L,P,N).
lword(I,L) :- mword(I,W,L,P,N).
pos(I,P) :- mword(I,W,L,P,N).
ner(I,N) :- mword(I,W,L,P,N).

% classify words
propernoun(X) :- pos(X,("NNP";"NNPS")).
noun(X) :- pos(X,("NN";"NNS";"PRP";"PRP$";"WP";"WP$")).
noun(X) :- propernoun(X).
verb(X) :- pos(X,("VB";"VBD";"VBG";"VBN";"VBP";"VBZ")).
location(X) :- ner(X,"LOCATION").
adj(X) :- pos(X,("JJ";"JJR";"JJS")).
adv(X) :- pos(X,("RB";"RBR";"RBS";"WRB")).
contentword(X) :- noun(X).
contentword(X) :- verb(X).
contentword(X) :- adj(X).
contentword(X) :- adv(X).
conjunction(X) :- lword(X,("and";"or";"but";"although";"therefore";"hence";"moreover")).

% symmetry of similarity
% (we generate it in one direction in Python but we use it in both directions in ASP!)
chunksimilarity(C2,C1,S) :- chunksimilarity(C1,C2,S).

% do we have a chunk similarity value?
has_chunksimilarity(C1,C2) :- chunksimilarity(C1,C2,_).

% candidate_pair(SentenceID1,SentenceID2,ChunkID1,ChunkID2)
%   pair of chunks in different sentences
chpair(S1,S2,C1,C2) :-
  chunk(C1), chunk(C2), C1 = sc(S1,CIdx1), C2 = sc(S2,CIdx2), S1 != S2.

% has_contentword is true for chunks that contain at least one content word
has_contentword(C) :- contentword(cw(C,_)).

% if require_contentword is true, we care about has_contentword
% if require_contentword is not true, we do not care (it is always true)
has_contentword_or_dontcare(C) :- chunk(C), not require_contentword.
has_contentword_or_dontcare(C) :- has_contentword(C), require_contentword.

% Applying conditions before rules

% condc1(ChunkID/ChunkID): second chunk has conjunction, first one has not
condc1(C1,C2) :-
  chpair(_,_,C1,C2),
  % C1 has no conjunction and C2 has
  % (we ensure the other case below)
  #count { W1 : conjunction(cw(C1,W1)) } == 0,
  #count { W2 : conjunction(cw(C2,W2)) } >= 1.

% condc2(ChunkID1/ChunkID2)
%   A content word in C1 has an antonym in C2
condc2(C1,C2):-
  chpair(_,_,C1,C2),
  contentword(cw(C1,WI1)), lword(cw(C1,WI1),W1),
  1 <= #count { WI2 : lword(cw(C2,WI2),W2), antonym(W1,W2) }.

% condc3(ChunkID): chunk has numeric entity
condc3(C) :- chunk(C), #count { W : cardinalnumber(cw(C,W)) } >= 1.

% match(WordID,WordID):
%   if two words in different sentences match (ignore case)
match(WI1,WI2) :-
  chpair(_,_,C1,C2), % in chunks in different sentences
  WI1 = cw(C1,W1), WI2 = cw(C2,W2),
  lword(WI1,W), lword(WI2,W).

% condc4(ChunkID)
% a chunks has a LOCATION entity
condc4(C) :- location(cw(C,_)).

% cond5(ChunkID)
% a chunk has a DATE/TIME entity
condc5(C) :- datetime(cw(C,_)).

% condc6(ChunkID,ChunkID):
%   chunks share one content word other than noun
condc6(C1,C2) :-
  match(cw(C1,W1),cw(C2,W2)),
  contentword(cw(C1,W1)), contentword(cw(C2,W2)),
  not noun(cw(C1,W1)), not noun(cw(C2,W2)).

% condc7(ChunkID):
%   any of the chunks has a conjunction
condc7(C) :- chunk(C), conjunction(cw(C,_)).

% Order of condition application
cond1235(C1,C2) :- condc1(C1,C2). % both directions of condc1
cond1235(C1,C2) :- condc1(C2,C1). % both directions of condc1
cond1235(C1,C2) :- condc2(C1,C2). % both directions of condc2
cond1235(C1,C2) :- condc2(C2,C1). % both directions of condc2
cond1235(C1,C2) :- chpair(_,_,C1,C2), condc3(C1).
cond1235(C1,C2) :- chpair(_,_,C1,C2), condc3(C2).
cond1235(C1,C2) :- chpair(_,_,C1,C2), condc5(C1).
cond1235(C1,C2) :- chpair(_,_,C1,C2), condc5(C2).

cond1to5(C1,C2) :- cond1235(C1,C2).
cond1to5(C1,C2) :- chpair(_,_,C1,C2), condc4(C1), condc4(C2). % both chunks have location entities

cond3or7(C) :- condc3(C).
cond3or7(C) :- condc7(C).

% len(ChunkID,w,Length): number of words in chunk
len(C,w,Length) :-
  chunk(C),
  Length = #count { WIdx : word(cw(C,WIdx),_) }.

% Rules
% no1(ChunkID):
%   chunk is a single punctuation token
punct(".";",";"!";"?";"'";"\"").
no1(C) :- chunk(C), pos(cw(C,WID),Pos), punct(Pos), len(C,w,1).

% word_extra_w(C1,C2,CW):
%   chunk C1 has word with ID CW that is not in chunk C2
word_extra_w(C1,C2,cw(C1,W1)) :-
  chpair(_,_,C1,C2),
  % word W from C1 is not matched in any word W2 in C2
  word(cw(C1,W1),_), 0 = #count { W2: match(cw(C1,W1),cw(C2,W2)) }.

% word_extra(C1,C2):
%   chunk C1 has some word that is not in chunk C2
word_extra(C1,C2) :- word_extra_w(C1,C2,_).

% eq1(ChunkID,ChunkID)
%   if chunks in different sentences are the same (lowercased)
eq1(C1,C2) :-
  chpair(_,_,C1,C2),
  not word_extra(C1,C2), not word_extra(C2,C1).

% contentword_extra_w(C1,C2,CW):
%   chunk C1 has content word with ID CW that is not in chunk C2
contentword_extra_w(C1,C2,W) :- word_extra_w(C1,C2,W), contentword(W).

% contentword_extra(C1,C2):
%   chunk C1 has some content word that is not in chunk C2
%   in sets: C1 \not\subseteq C2
contentword_extra(C1,C2) :- contentword_extra_w(C1,C2,_).

% contentword_subset(C1,C2)
%   chunk C1 is a sub-chunk of chunk C2
% = chunk C1 contains only contentwords from chunk C2
contentword_subset(C1,C2) :- chpair(_,_,C1,C2), not contentword_extra(C1,C2).

% contentword_match(ChunkID1,ChunkID2)
%   if there is at least one contentword match between chunks
contentword_match(C1,C2) :- chpair(_,_,C1,C2),
  match(cw(C1,WI1), cw(C2,WI2)), contentword(cw(C1,WI1)).

% eq2(ChunkID,ChunkID):
%   both chunks have same content words
eq2(C1,C2) :-
  chpair(1,2,C1,C2), contentword_match(C1,C2),
  not contentword_extra(C1,C2), not contentword_extra(C2,C1).

% contentword_extra_notsynonym_w(ChunkID1,ChunkID2,W):
contentword_extra_notsynonym_w(C1,C2,WI1) :-
  contentword_extra_w(C1,C2,WI1), lword(WI1,W1), 0 = #count { WI2 : lword(cw(C2,WI2),W2), synonym(W1,W2) }.
contentword_extra_notsynonym(C1,C2) :- contentword_extra_notsynonym_w(C1,C2,_).

% build transitive reflexive closure over synonyms
synonym(X,Y) :- synonym(Y,X).
synonym(X,Z) :- synonym(X,Y), synonym(Y,Z).

% reflexivity for antonyms
antonym(X,Y) :- antonym(Y,X).

% eq3(ChunkID,ChunkID):
%   all content words match using synonym lookup
eq3(C1,C2) :-
  chpair(_,_,C1,C2),
  not contentword_extra_notsynonym(C1,C2),
  not contentword_extra_notsynonym(C2,C1).

% eq4(ChunkID1,ChunkID2)
% All content words of a chunk match and unmatched content words of other chunk
% are all proper noun type
contentword_extra_notpropernoun_w(C1,C2,WI1) :-
  contentword_extra_w(C1,C2,WI1), not propernoun(WI1).
contentword_extra_notpropernoun(C1,C2) :- contentword_extra_notpropernoun_w(C1,C2,_).

% in both directions
eq4(C1,C2):-
  chpair(_,_,C1,C2),
  not cond1to5(C1,C2), % only if none of condition 1 to 5 are fulfilled
  not contentword_extra(C1,C2),
  not contentword_extra_notpropernoun(C2,C1).

% both chunks have equal number of content words
eqcontentw(C1,C2):-
  chpair(1,2,C1,C2),
  0 = #sum { 1,W1 : contentword(cw(C1,W1)) ; -1,W2 : contentword(cw(C2,W2)) }.

% eq5(ChunkID1, ChunkID2, score)
% Both chunks have equal number of content words and sim Mikolov>0.6
eq5(C1,C2):- eqcontentw(C1,C2),
  not cond1235(C1,C2), % only if none of condition 1,2,3,5 are fulfilled
  chunksimilarity(C1,C2,S), S > 60.

% op1(ChunkID1,ChunkID2):
% A content word in one chunk has an antonym in the other chunk
% (corresponds to cond2)
% not if c3 or c7
op1(C1,C2) :- chpair(_,_,C1,C2), % can be in both directions
  condc2(C1,C2), not cond3or7(C1), not cond3or7(C2).

% sp1(ChunkID A,ChunkID B): chunk A is more specific than chunk B
% chunk A has a conjunction
% and
% chunk A contains all content words of chunk B
sp1(A,B) :- chpair(_,_,A,B),
  % condc1: B has no conjunction, A has at least one conjunction
  condc1(B,A),
  % contentword_subset: B is a sub-chunk of A
  contentword_subset(B,A),
  % both chunks contain at least one content word
  has_contentword_or_dontcare(A), has_contentword_or_dontcare(B).

% sp2(Chunk ID A,Chunk ID B): chunk A is more specific than chunk B
% Chunk A contains all content words of chunk B plus extra content words that are not verbs
% Maximum token overlap is selected at spe
sp2candidate(A,B) :-
  chpair(_,_,A,B),
  % contentword_subset: B is a sub-chunk of A
  contentword_subset(B,A),
  % both chunks have at least one content word
  has_contentword_or_dontcare(A), has_contentword_or_dontcare(B),
  0 == #count { WId : contentword_extra_w(A,B,WId), verb(WId) }.

% how many tokens in these chunks do overlap?
sp2overlap(A,B,Overlap) :- sp2candidate(A,B),
  Overlap = #count { WAId :  match(cw(A,WAId),cw(B,WBId)) }.

% for each chunk A, find the longest overlap
sp2bestoverlap(A,Highest) :- sp2candidate(A,_),
  Highest = #max { Overlap : sp2overlap(A,B,Overlap) }.

% for each chunk A, select one of the longest overlaps
{ sp2choose(A,B) } :- sp2bestoverlap(A,Highest), sp2overlap(A,B,Highest).

% choose exactly one for each A
:- sp2candidate(A,_), not 1 = #count { A: sp2choose(A,B) }.

sp2(A,B):- sp2choose(A,B).

% sp3(Chunk ID A,Chunk ID B): chunk A is more specific than chunk B
% Chunk A and B contain only one noun each and hypernym determines which is more specific
sp3onenouneach(C1,C2):-
  chpair(_,_,C1,C2),
  1 = #count{W1 : noun(cw(C1,W1))},
  1 = #count{W2 : noun(cw(C2,W2))}.
sp3(C2,C1) :-
  sp3onenouneach(C1,C2),
  noun(cw(C1,W1)), noun(cw(C2,W2)),
  lword(cw(C1,W1),W1String), lword(cw(C2,W2),W2String),

  % hypernym(X,Y): Y is more specific than X
  hypernym(W1String,W2String).


% si1(ChunkID1,ChunkID2):
% Only unmatched content word in each chunk is a cardinal number type
si1candidate(C1,C2) :-
  chpair(1,2,C1,C2),
  1 = #count { W1 : contentword_extra_w(C1,C2,W1) }, % only one extra content word
  1 = #count { W2 : contentword_extra_w(C2,C1,W2) }. % only one extra content word
si1(C1,C2) :-
  si1candidate(C1,C2),
  contentword_extra_w(C1,C2,W1), cardinalnumber(W1), % those are cardinals
  contentword_extra_w(C2,C1,W2), cardinalnumber(W2). % those are cardinals

% si2: both chunks have DATE/TIME entities
si2(C1,C2) :- chpair(1,2,C1,C2), condc5(C1), condc5(C2).

% si3(ChunkID1,ChunkID2):
% Each chunk has a token of LOCATION type
si3(C1,C2):- chpair(1,2,C1,C2), condc4(C1), condc4(C2).

% si4(ChunkID1, ChunkID2):
% Both chunks share atleast one noun:
% if sim Mikolov>=0.4 then score=3 otherwise 2
si4sim(C1,C2,S) :-
  chpair(1,2,C1,C2),
  match(cw(C1,W1),cw(C2,W2)),
  noun(cw(C1,W1)), noun(cw(C2,W2)),
  chunksimilarity(C1,C2,S).

% si5sim(ChunkID1,ChunkID2,Similarity):
% if condition 6 not satisified
% score = 4 if sim Mikolov in [0.7,-1.0]
% score = 3 if sim Mikolov in [0.65,-0.7)
% score = 2 if sim Mikolov in [0.60,-0.65)
si5sim(C1,C2,S):-
  chpair(1,2,C1,C2),
  has_contentword_or_dontcare(C1), has_contentword_or_dontcare(C2),
  not condc6(C1,C2),
  chunksimilarity(C1,C2,S).

% re1sim(ChunkID1,ChunkID2,Similarity):
% if both chunks share atleast one content word other than noun
re1sim(C1,C2,S):-
  chpair(1,2,C1,C2),
  has_contentword_or_dontcare(C1), has_contentword_or_dontcare(C2),
  condc6(C1,C2),
  chunksimilarity(C1,C2,S).


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% NeRoSim: "For aligning a chunk pair, these rules are applied in order
% of precedence as NOALIC, EQUI, OPPO, SPE, SIMI, and REL.
% below we define such an order in terms of steps nextStep/2
%
% define chunk alignments (we define them in steps)
% chalign/5(C1,Rel,Score,C2,Step)

%nextStep(noalic,equi1).
%nextStep(equi1,equi2).  nextStep(equi2,equi3).  nextStep(equi3,equi4).
%nextStep(equi4,equi5).  nextStep(equi5,oppo).   nextStep(oppo,sp1).
%nextStep(sp1,sp2).      nextStep(sp2,sp3).
%nextStep(sp3,simi1).    nextStep(simi1,simi2).  nextStep(simi2,simi3).
%nextStep(simi3,simi4).  nextStep(simi4,simi5).  nextStep(simi5,rel1).
%nextStep(rel1,result).

% get final alignment from last step
% final(Chunk ID, Relation, Score, Chunk ID, Rule Causing Alignment, Mikolov-Score)

% define similarity for all candidate pairs
similarity_or_none(C1,C2,Mikolov) :- chpair(1,2,C1,C2), chunksimilarity(C1,C2,Mikolov).
similarity_or_none(C1,C2,null) :- chpair(1,2,C1,C2), not has_chunksimilarity(C1,C2).

% a final chunk alignment is
final(C1,Rel,S,C2,Step,Mikolov) :-
  % caused by aligning chunks that are not already aligned at that step
  chalign(C1,Rel,S,C2,Step), not aligned(C1,Step), not aligned(C2,Step),
  % we also use this step
  usedStep(Step),
  % and uses similarity if it exists
  similarity_or_none(C1,C2,Mikolov).

% has a chunk been aligned in a certain step? then it is already aligned in next step
aligned(C,NextStep) :- chalign(C,_,_,_,Step), nextStep(Step,NextStep).
aligned(C,NextStep) :- chalign(_,_,_,C,Step), nextStep(Step,NextStep).

% what is aligned stays aligned
aligned(C,NextStep) :- aligned(C,Step), nextStep(Step,NextStep).
chalign(C1,R,S,C2,NextStep) :- chalign(C1,R,S,C2,Step), nextStep(Step,NextStep).

usedStep(X) :- nextStep(X,_).
usedStep(X) :- nextStep(_,X).

% define NOALI alignments
chalign(C,"NOALI",0,null,noalic) :- chunk(C), C = sc(1,CI),
  not aligned(C,noalic), no1(C).
chalign(null,"NOALI",0,C,noalic) :- chunk(C), C = sc(2,CI),
  not aligned(C,noalic), no1(C).

% define EQUI alignments
chalign(C1,"EQUI",5,C2,equi1) :- chpair(1,2,C1,C2),
  not aligned(C1,equi1), not aligned(C2,equi1), eq1(C1,C2).
chalign(C1,"EQUI",5,C2,equi2) :- chpair(1,2,C1,C2),
  not aligned(C1,equi2), not aligned(C2,equi2), eq2(C1,C2).
chalign(C1,"EQUI",5,C2,equi3) :- chpair(1,2,C1,C2),
  not aligned(C1,equi3), not aligned(C2,equi3), eq3(C1,C2).
chalign(C1,"EQUI",5,C2,equi4) :- chpair(1,2,C1,C2),
  not aligned(C1,equi4), not aligned(C2,equi4), eq4(C1,C2).
chalign(C1,"EQUI",5,C2,equi5) :- chpair(1,2,C1,C2),
  not aligned(C1,equi5), not aligned(C2,equi5), eq5(C1,C2).

% define OPPO alignements
chalign(C1,"OPPO",4,C2,oppo) :- chpair(1,2,C1,C2),
  not aligned(C1,oppo), not aligned(C2,oppo), op1(C1,C2).

% define SPE1/SPE2 alignements
% sp1/sp2/sp3(ChunkID A,ChunkID B): chunk A is more specific than chunk B
chalign(C1,"SPE1",4,C2,sp1) :- chpair(1,2,C1,C2),
  not aligned(C1,sp1), not aligned(C2,sp1), sp1(C1,C2).
chalign(C1,"SPE2",4,C2,sp1) :- chpair(1,2,C1,C2),
  not aligned(C1,sp1), not aligned(C2,sp1), sp1(C2,C1).

chalign(C1,"SPE1",4,C2,sp2) :- chpair(1,2,C1,C2),
  not aligned(C1,sp2), not aligned(C2,sp2), sp2(C1,C2).
chalign(C1,"SPE2",4,C2,sp2) :- chpair(1,2,C1,C2),
  not aligned(C1,sp2), not aligned(C2,sp2), sp2(C2,C1).

chalign(C1,"SPE1",4,C2,sp3) :- chpair(1,2,C1,C2),
  not aligned(C1,sp3), not aligned(C2,sp3), sp3(C1,C2).
chalign(C1,"SPE2",4,C2,sp3) :- chpair(1,2,C1,C2),
  not aligned(C1,sp3), not aligned(C2,sp3), sp3(C2,C1).

% define SIMI alignements
chalign(C1,"SIMI",3,C2,simi1) :- chpair(_,_,C1,C2),
  not aligned(C1,simi1), not aligned(C2,simi1), si1(C1,C2).
chalign(C1,"SIMI",3,C2,simi2) :- chpair(_,_,C1,C2),
  not aligned(C1,simi2), not aligned(C2,simi2), si2(C1,C2).
chalign(C1,"SIMI",3,C2,simi3) :- chpair(_,_,C1,C2),
  not aligned(C1,simi3), not aligned(C2,simi3), si3(C1,C2).
chalign(C1,"SIMI",3,C2,simi4) :- chpair(_,_,C1,C2),
  not aligned(C1,simi4), not aligned(C2,simi4), si4sim(C1,C2,S), S >= 40.
chalign(C1,"SIMI",2,C2,simi4) :- chpair(_,_,C1,C2),
  not aligned(C1,simi4), not aligned(C2,simi4), si4sim(C1,C2,S), S < 40.
chalign(C1,"SIMI",4,C2,simi5) :- chpair(_,_,C1,C2),
  not aligned(C1,simi5), not aligned(C2,simi5), si5sim(C1,C2,S), 70 <= S.
chalign(C1,"SIMI",3,C2,simi5) :- chpair(_,_,C1,C2),
  not aligned(C1,simi5), not aligned(C2,simi5), si5sim(C1,C2,S), 65 <= S, S < 70.
chalign(C1,"SIMI",2,C2,simi5) :- chpair(_,_,C1,C2),
  not aligned(C1,simi5), not aligned(C2,simi5), si5sim(C1,C2,S), 60 <= S, S < 65.
chalign(C1,"SIMI",1,C2,simi5) :- chpair(_,_,C1,C2),
  not aligned(C1,simi5), not aligned(C2,simi5), si5sim(C1,C2,S), 55 <= S, S < 60.

% define REL alignements
chalign(C1,"REL",4,C2,rel1) :- chpair(_,_,C1,C2),
  not aligned(C1,rel1), not aligned(C2,rel1), re1sim(C1,C2,S), 50 <= S.
chalign(C1,"REL",3,C2,rel1) :- chpair(_,_,C1,C2),
  not aligned(C1,rel1), not aligned(C2,rel1), re1sim(C1,C2,S), 40 <= S, S < 50.
chalign(C1,"REL",2,C2,rel1) :- chpair(_,_,C1,C2),
  not aligned(C1,rel1), not aligned(C2,rel1), re1sim(C1,C2,S), S < 40.

% hide everything
#show.

% show what we need for extracting alignments
#show final/6.
#show word/2.


    
Comments (0)

HTTPS
SSH
You can clone a snippet to your computer for local editing. Learn more.
Snippets

KnowLP SemEval 2016 Task 2: Inspire System: ASP Source Code

Comments (0)