Commits

cnu  committed a43f000

Added common contractions to tokenizer

  • Participants
  • Parent commits 96f2057

Comments (0)

Files changed (1)

File MontyTokenizer.py

 "they'll":"they 'll",
 }
     contractions_unwound={
-"ain't":"ai not",    }
+"ain't":"ai not",    
+"aren't":"are not",
+"isn't":"is not",
+"wasn't":"was not",
+"weren't":"were not",
+"didn't":"did not",
+"doesn't":"does not",
+"don't":"do not",
+"hadn't":"had not",
+"hasn't":"has not",
+"haven't":"have not",
+"can't":"can not",
+"couldn't":"could not",
+"needn't":"need not",
+"shouldn't":"should not",
+"shan't":"shall not",
+"won't":"would not",
+"wouldn't":"would not",
+"i'm":"i am",
+"you're":"you are",
+"he's":"he is",
+"she's":"she is",
+"it's":"it is",
+"we're":"we are",
+"they're":"they are",
+"i've":"i have",
+"you've":"you have",
+"we've":"we have",
+"they've":"they have",
+"who've":"who have",
+"what've":"what have",
+"when've":"when have",
+"where've":"where have",
+"why've":"why have",
+"how've":"how have",
+"i'd":"i would",
+"you'd":"you would",
+"he'd":"he would",
+"she'd":"she would",
+"we'd":"we would",
+"they'd":"they would",
+"i'll":"i will",
+"you'll":"you will",
+"he'll":"he will",
+"she'll":"she will",
+"we'll":"we will",
+"they'll":"they will",
+}
     common_abbrev_and_acro=[
 'mr.',
 'mrs.',
 'r.s.v.p.',
 'n.y.c.',
 'c.o.d.',
-'s.u.v.']
+'s.u.v.']