Commits

david_walker committed 100412e

make ':' a non-spacing punctuation character

Comments (0)

Files changed (2)

     def is_nonspacing_punc(self):
         """Return True if this token is a punctuation character.
         """
-        return len(self._str) == 1 and self._str in u',.!?;%'
+        return len(self._str) == 1 and self._str in u',.!?;%:'
 
     @property
     def is_open(self):
 
         # misspellings
         (u'dependants', u'dependents'),
+        (ur'therefor\b', u'therefore'),
 
         # proper nouns
         (u'congo town', u'Congo Town'),
         (ur'(?i)ksh\.', u' KES '),
 
         # incorrect punctuation
-        (ur'e\.t\.c\.', u'etc.'),
+        (ur'e\.t\.c\.?', u'etc.'),
         (ur'\betc([^.])', ur'etc.\1'),
         (ur'([0-9]+) year old (man|woman)', ur'\1-year-old \2'),
         (ur'(?<!\.)\.\.(?!\.)', u'.'),  # blah.. -> blah.
          ur'clicking the link to the '
          '<a href="http://www.kiva.org/team/nwtf_philippines">'
          'NWTF Kiva lending team</a>'),
+        (u'Kiva\'s Muslim World Lending helptext: http://tinyurl.com/3aekx8m',
+         u'Kiva\'s article on <a href="http://na3.salesforce.com/_ui/'
+         'selfservice/pkb/'
+         'PublicKnowledgeSolution/d?orgId=00D500000006svl&lang=1&id='
+         '50150000000SN1N&retURL=/sol/public/solutionbrowser.jsp%3Fsearch%3D'
+         'muslim%2Bworld%26cid%3D02n50000000DUOS%26orgId%3D00D500000006svl%26'
+         'lang%3D1%26t%3D4&ps=1&pPv=1">Lending in the Muslim World</a>'),
 
         # Jargon
         (u'cycle loan', u'loan'),