Commits

Tim Heap committed 41ee60b Draft

Made `parse` more robust, support more languages

  • Participants
  • Parent commits ee0c46d

Comments (0)

Files changed (1)

File timedelta/helpers.py

 import re
 import datetime
 from decimal import Decimal
+from itertools import product
+
+
+language_sets = {}
+
+language_sets['en'] = {
+    'ignore': re.compile(r'^(and)$', re.IGNORECASE),
+    'weeks': re.compile(r'^(weeks?|wks?|w)$', re.IGNORECASE),
+    'days': re.compile(r'^(days?|d)$', re.IGNORECASE),
+    'hours': re.compile(r'^(hours?|hrs|hr?)$', re.IGNORECASE),
+    'minutes': re.compile(r'^(minutes?|mins?|m)$', re.IGNORECASE),
+    'seconds': re.compile(r'^(seconds?|secs?|s)$', re.IGNORECASE),
+}
+
+language_sets['de'] = {
+    'ignore': re.compile(r'^(und)$', re.IGNORECASE),
+    'weeks': re.compile(r'^(woche?|ws?|w)$', re.IGNORECASE),
+    'days': re.compile(r'^(tag?|ts?)$', re.IGNORECASE),
+    'hours': re.compile(r'^(stunden?|ss?|hr?)$', re.IGNORECASE),
+    'minutes': re.compile(r'^(minuten?|mins?|m)$', re.IGNORECASE),
+    'seconds': re.compile(r'^(Sekunden?|seks?|s)$', re.IGNORECASE),
+}
+
+bits = ['ignore', 'weeks', 'days', 'hours', 'minutes', 'seconds']
+
 
 def nice_repr(timedelta, display="long", sep=", "):
     """
 
     return "".join(result)
 
-def parse(string):
+
+def parse(time_string, languages=['en']):
     """
     Parse a string into a timedelta object.
     
     >>> parse("2 ws")
     Traceback (most recent call last):
     ...
-    TypeError: '2 ws' is not a valid time interval
+    TypeError: Could not parse remaining time string: '2 ws' from '2 ws'
     >>> parse("2 ds")
     Traceback (most recent call last):
     ...
-    TypeError: '2 ds' is not a valid time interval
+    TypeError: Could not parse remaining time string: '2 ds' from '2 ds'
     >>> parse("2 hs")
     Traceback (most recent call last):
     ...
-    TypeError: '2 hs' is not a valid time interval
+    TypeError: Could not parse remaining time string: '2 hs' from '2 hs'
     >>> parse("2 ms")
     Traceback (most recent call last):
     ...
-    TypeError: '2 ms' is not a valid time interval
+    TypeError: Could not parse remaining time string: '2 ms' from '2 ms'
     >>> parse("2 ss")
     Traceback (most recent call last):
     ...
-    TypeError: '2 ss' is not a valid time interval
+    TypeError: Could not parse remaining time string: '2 ss' from '2 ss'
     >>> parse("")
     Traceback (most recent call last):
     ...
     TypeError: '' is not a valid time interval
     
     """
-    if string == "":
-        raise TypeError("'%s' is not a valid time interval" % string)
-    # This is the format we get from sometimes Postgres, and from serialization
-    d = re.match(r'((?P<days>\d+) days?,? )?(?P<hours>\d+):'
-                 r'(?P<minutes>\d+)(:(?P<seconds>\d+))?',
-                 unicode(string))
-    if d: 
-        d = d.groupdict(0)
-    else:
-        # This is the more flexible format
-        d = re.match(
-                     r'^((?P<weeks>((\d*\.\d+)|\d+))\W*w((ee)?(k(s)?)?)(,)?\W*)?'
-                     r'((?P<days>((\d*\.\d+)|\d+))\W*d(ay(s)?)?(,)?\W*)?'
-                     r'((?P<hours>((\d*\.\d+)|\d+))\W*h(ou)?(r(s)?)?(,)?\W*)?'
-                     r'((?P<minutes>((\d*\.\d+)|\d+))\W*m(in(ute)?(s)?)?(,)?\W*)?'
-                     r'((?P<seconds>((\d*\.\d+)|\d+))\W*s(ec(ond)?(s)?)?)?\W*$',
-                     unicode(string))
-        if not d:
-            raise TypeError("'%s' is not a valid time interval" % string)
-        d = d.groupdict(0)
     
-    return datetime.timedelta(**dict(( (k, float(v)) for k,v in d.items())))
+    if not time_string:
+        raise TypeError('{0!r} is not a valid time interval'.format(time_string))
+    
+    sets = []
+    for language in languages:
+        language_set = language_sets[language]
+        sets.append(language_set)
+    
+    splitter = re.compile(r'^\s*(?P<count>-?(?:\d*\.)?\d+)?\s*(?P<type>\w+)(?:,\s*|\s+|$)')
+    
+    matched_bits = splitter.findall(time_string)
+    kwargs = {}
+    
+    original_time_string = time_string
+    
+    while time_string:
+        match = splitter.match(time_string)
+    
+        if not match:
+            raise TypeError("Could not parse remaining time string: {0!r} from {1!r}".format(original_time_string, time_string[:20]))
+    
+        count, time_type = match.groups()
+        found = False
+        language_set_bit_iter = product(sets, bits)
+    
+        try:
+            while not found:
+                language_set, bit = next(language_set_bit_iter)
+                type_matcher = language_set[bit]
+                if not type_matcher.match(time_type):
+                    continue
+                if bit in kwargs and time_type != 'ignore':
+                    raise ValueError(
+                        "Time type '{0}' specified more than once".format(bit))
+    
+                kwargs[bit] = float(count or '1')
+                found = True
+    
+        except StopIteration:
+            raise TypeError("Could not parse remaining time string: {0!r} from {1!r}".format(original_time_string, time_string[:20]))
+    
+        time_string = time_string[match.end():]
+    
+    kwargs.pop('ignore', None)
+    
+    return datetime.timedelta(**kwargs)
 
 
 def divide(obj1, obj2, as_float=False):