Commits

david_walker  committed 485f42a

preparation for branching

add .emacs.desktop to .hgignore
minor changes to kea2.py and rules.py
additions to samples.txt

  • Participants
  • Parent commits 0fdb7ac

Comments (0)

Files changed (4)

 # Emacs tag file
 ^TAGS$
 
-# emacs auto-save files
+# Emacs auto-save files
 \#.+\#
 
+# Emacs buffer-list file
+\.emacs\.desktop
+
 # compiled python files
 \.pyc$
 
     errors in Kiva loan descriptions.
     """
 
+    # Initialize logging to go to a file
     handler = logging.FileHandler("/home/david/Dropbox/Projects/Kiva/logfile.txt", "w",
                                   encoding="UTF-8")
     formatter = logging.Formatter("%(message)s")
         (u'infant-aged', u'infant'),
         (u'requesting for a', u'requesting a'),
         (u'requested a loan for ([0-9]+)', ur'requested a loan of \1'),
-        (u'he is widowed', u'he is a widower'),
+        (ur'\bhe is widowed', u'he is a widower'),
         (u'borrowed a loan', u'took out a loan'),
         (u'in a business of', u'in the business of'),
         (u'with (.+) children and (.+) of them go to school',
         (u'neighbour', u'neighbor'),
         (u'licencing', u'licensing'),
 
-        # currency abbreviations
+        # non-ISO currency abbreviations
         (u'/=', u' UGX '),
         (ur'(?i)ksh\.', u' KES '),
-        (ur'(?i)kshs', u' KES '),
+        (ur'(?i)kshs(\.|)', u' KES '),
         (ur'[Pp]hp', 'PHP'),
+        (ur'(?i)\bLE([0-9]*)\b', ur'SLL \1'),
+        (ur'\bRp\.', 'IDR'),
 
         # incorrect punctuation
         (ur'e\.t\.c\.?', u'etc.'),
         (ur'\betc([^.])', ur'etc.\1'),
-        (ur'([0-9]+) year(?:s?) old (man|woman)', ur'\1-year-old \2'),
+        (ur'([0-9]+) year(?:s?) old (man|woman|single|married|widow|widowed)',
+         ur'\1-year-old \2'),
         (ur'(?<!\.)\.\.(?!\.)', u'.'),  # blah.. -> blah.
 
         # grammatical errors
                     # Create a transform to split the token at this
                     # point.
                     logging.debug(u"PunctSplitRule '{}' at {}".format(
-                            token, i))
+                            token.str, i))
                     transforms.append(
                         tr.IndexSplitTransform(self,
                                                [token],
 Isabella is 60 years old, married to Michael. She has been keeping poultry for ten years with a monthly income of KES 12,000.
 
 five peso's-worth
+
+wood Le500, 000. He
+