Pedro Romano avatar Pedro Romano committed 4c775c7 Draft

Updated for compatibility with (latest) CLDR release 21.0.2. IMPORTANT NOTE: the implementation of 'babel.dates.get_timezone_name' is not considered finalised because now metazones no longer have the 'common' attribute and it is not clear how the code should be updated (3 doc tests are failing, should those be updated or the logic to determine the fallback corrected?).

Comments (0)

Files changed (6)

         The display name will include the language, territory, script, and
         variant, if those are specified.
         
-        >>> Locale('zh', 'CN', script='Hans').get_display_name('en') == 'Chinese (Simplified Han, China)'
+        >>> Locale('zh', 'CN', script='Hans').get_display_name('en') == 'Chinese (Simplified, China)'
         True
         
         :param locale: the locale to use
         
         >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight'] == 'British Summer Time'
         True
-        >>> Locale('en', 'US').time_zones['America/St_Johns']['city'] == "St. John's"
+        >>> Locale('en', 'US').time_zones['America/St_Johns']['city'] == 'St. John\u2019s'
         True
         
         :type: `dict`
     
     >>> from pytz import timezone
     >>> tz = timezone('America/St_Johns')
-    >>> get_timezone_location(tz, locale='de_DE') == "Kanada (St. John's)"
+    >>> get_timezone_location(tz, locale='de_DE') == 'Kanada Zeit (St. John\u2019s)'
     True
     >>> tz = timezone('America/Mexico_City')
-    >>> get_timezone_location(tz, locale='de_DE') == 'Mexiko (Mexiko-Stadt)'
+    >>> get_timezone_location(tz, locale='de_DE') == 'Mexiko Zeit (Mexiko-Stadt)'
     True
     
     If the timezone is associated with a country that uses only a single
     timezone, just the localized country name is returned:
     
     >>> tz = timezone('Europe/Berlin')
-    >>> get_timezone_location(tz, locale='de_DE') == 'Deutschland'
+    >>> get_timezone_location(tz, locale='de_DE') == 'Deutschland Zeit'
     True
     
     :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
         return region_format % (territory_name)
 
     # Otherwise, include the city in the output
-    fallback_format = locale.zone_formats['fallback']
     if 'city' in info:
         city_name = info['city']
     else:
         else:
             city_name = zone.replace('_', ' ')
 
-    return region_format % (fallback_format % {
+    fallback_format = locale.zone_formats['fallback']
+    fallback_region_format = locale.zone_formats.get(
+        'fallback_region', region_format % fallback_format)
+
+    return fallback_region_format % {
         '0': city_name,
         '1': territory_name
-    })
+    }
 
-def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False,
-                      locale=LC_TIME):
+def get_timezone_name(dt_or_tzinfo=None, width='long', locale=LC_TIME):
     r"""Return the localized display name for the given timezone. The timezone
     may be specified using a ``datetime`` or `tzinfo` object.
     
     that country is returned, formatted according to the locale:
     
     >>> tz = timezone('Europe/Berlin')
-    >>> get_timezone_name(tz, locale='de_DE') == 'Deutschland'
+    >>> get_timezone_name(tz, locale='de_DE') == 'Deutschland Zeit'
     True
     >>> get_timezone_name(tz, locale='pt_BR') == 'Hor\xe1rio Alemanha'
     True
     included in the representation:
     
     >>> tz = timezone('America/St_Johns')
-    >>> get_timezone_name(tz, locale='de_DE') == "Kanada (St. John's)"
-    True
-    
-    The `uncommon` parameter can be set to `True` to enable the use of timezone
-    representations that are not commonly used by the requested locale. For
-    example, while in French the central European timezone is usually
-    abbreviated as "HEC", in Canadian French, this abbreviation is not in
-    common use, so a generic name would be chosen by default:
-    
-    >>> tz = timezone('Europe/Paris')
-    >>> get_timezone_name(tz, 'short', locale='fr_CA') == 'France'
-    True
-    >>> get_timezone_name(tz, 'short', uncommon=True, locale='fr_CA') == 'HEC'
+    >>> get_timezone_name(tz, locale='de_DE') == 'Kanada Zeit (St. John\u2019s)'
     True
     
     :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
                          independent of daylight savings time; if `None`, the
                          current date in UTC is assumed
     :param width: either "long" or "short"
-    :param uncommon: whether even uncommon timezone abbreviations should be used
     :param locale: the `Locale` object, or a locale string
     :return: the timezone display name
     :rtype: `unicode`
     metazone = get_global('meta_zones').get(zone)
     if metazone:
         metazone_info = locale.meta_zones.get(metazone, {})
-        if width in metazone_info and (uncommon or metazone_info.get('common')):
+        if width in metazone_info:
             if dt is None:
                 field = 'generic'
             else:
     
     >>> from pytz import timezone
     >>> format_datetime(dt, 'full', tzinfo=timezone('Europe/Paris'),
-    ...                 locale='fr_FR') == 'dimanche 1 avril 2007 17:30:00 Heure avanc\xe9e de l\u2019Europe centrale'
+    ...                 locale='fr_FR') == 'dimanche 1 avril 2007 17:30:00 heure avanc\xe9e d\u2019Europe centrale'
     True
     >>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz",
     ...                 tzinfo=timezone('US/Eastern'), locale='en') == '2007.04.01 AD at 11:30:00 EDT'
     >>> t = datetime(2007, 4, 1, 15, 30)
     >>> tzinfo = timezone('Europe/Paris')
     >>> t = tzinfo.localize(t)
-    >>> format_time(t, format='full', tzinfo=tzinfo, locale='fr_FR') == '15:30:00 Heure avanc\xe9e de l\u2019Europe centrale'
+    >>> format_time(t, format='full', tzinfo=tzinfo, locale='fr_FR') == '15:30:00 heure avanc\xe9e d\u2019Europe centrale'
     True
     >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=timezone('US/Eastern'),
     ...             locale='en') == "09 o'clock AM, Eastern Daylight Time"
     
     >>> t = time(15, 30)
     >>> format_time(t, format='full', tzinfo=timezone('Europe/Paris'),
-    ...             locale='fr_FR') == '15:30:00 Heure normale de l\u2019Europe centrale'
+    ...             locale='fr_FR') == '15:30:00 heure normale de l\u2019Europe centrale'
     True
     >>> format_time(t, format='full', tzinfo=timezone('US/Eastern'),
     ...             locale='en_US') == '3:30:00 PM Eastern Standard Time'
     
     >>> format_currency(1099.98, 'USD', locale='en_US') == '$1,099.98'
     True
-    >>> format_currency(1099.98, 'USD', locale='es_CO') == 'US$\\xa01.099,98'
+    >>> format_currency(1099.98, 'USD', locale='es_CO') == '1.099,98\\xa0US$'
     True
     >>> format_currency(1099.98, 'EUR', locale='de_DE') == '1.099,98\\xa0\\u20ac'
     True
     'one'
     >>> func(3)
     'few'
+    >>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'})
+    >>> func(11)
+    'one'
+    >>> func(15)
+    'few'
 
     :param rule: the rules as list or dict, or a `PluralRule` object
     :return: a corresponding Python function
     :raise RuleError: if the expression is malformed
     """
     namespace = {
-        'IN':       in_range,
-        'WITHIN':   within_range,
+        'IN':       in_range_list,
+        'WITHIN':   within_range_list,
         'MOD':      cldr_modulo
     }
     to_python = _PythonCompiler().compile
     return ''.join(result)
 
 
-def in_range(num, min, max):
-    """Integer range test.  This is the callback for the "in" operator
+def in_range_list(num, range_list):
+    """Integer range list test.  This is the callback for the "in" operator
     of the UTS #35 pluralization rule language:
 
-    >>> in_range(1, 1, 3)
+    >>> in_range_list(1, [(1, 3)])
     True
-    >>> in_range(3, 1, 3)
+    >>> in_range_list(3, [(1, 3)])
     True
-    >>> in_range(1.2, 1, 4)
+    >>> in_range_list(3, [(1, 3), (5, 8)])
+    True
+    >>> in_range_list(1.2, [(1, 4)])
     False
-    >>> in_range(10, 1, 4)
+    >>> in_range_list(10, [(1, 4)])
+    False
+    >>> in_range_list(10, [(1, 4), (6, 8)])
     False
     """
-    return num == int(num) and within_range(num, min, max)
+    return num == int(num) and within_range_list(num, range_list)
 
 
-def within_range(num, min, max):
+def within_range_list(num, range_list):
     """Float range test.  This is the callback for the "within" operator
     of the UTS #35 pluralization rule language:
 
-    >>> within_range(1, 1, 3)
+    >>> within_range_list(1, [(1, 3)])
     True
-    >>> within_range(1.0, 1, 3)
+    >>> within_range_list(1.0, [(1, 3)])
     True
-    >>> within_range(1.2, 1, 4)
+    >>> within_range_list(1.2, [(1, 4)])
     True
-    >>> within_range(10, 1, 4)
+    >>> within_range_list(8.8, [(1, 4), (7, 15)])
+    True
+    >>> within_range_list(10, [(1, 4)])
+    False
+    >>> within_range_list(10.5, [(1, 4), (20, 30)])
     False
     """
-    return num >= min and num <= max
+    return any(num >= min_ and num <= max_ for min_, max_ in range_list)
 
 
 def cldr_modulo(a, b):
     """Internal parser.  This class can translate a single rule into an abstract
     tree of tuples. It implements the following grammar::
 
-        condition   = and_condition ('or' and_condition)*
+        condition     = and_condition ('or' and_condition)*
         and_condition = relation ('and' relation)*
-        relation    = is_relation | in_relation | within_relation | 'n' <EOL>
-        is_relation = expr 'is' ('not')? value
-        in_relation = expr ('not')? 'in' range
-        within_relation = expr ('not')? 'within' range
-        expr        = 'n' ('mod' value)?
-        value       = digit+
-        digit       = 0|1|2|3|4|5|6|7|8|9
-        range       = value'..'value
+        relation      = is_relation | in_relation | within_relation | 'n' <EOL>
+        is_relation   = expr 'is' ('not')? value
+        in_relation   = expr ('not')? 'in' range_list
+        within_relation = expr ('not')? 'within' range_list
+        expr          = 'n' ('mod' value)?
+        range_list    = (range | value) (',' range_list)*
+        value         = digit+
+        digit         = 0|1|2|3|4|5|6|7|8|9
+        range         = value'..'value
 
     - Whitespace can occur between or around any of the above tokens.
     - Rules should be mutually exclusive; for a given numeric value, only one
       rule should apply (i.e. the condition should only be true for one of
-      the plural rule elements.
+      the plural rule elements).
+    - The in and within relations can take comma-separated lists, such as:
+      'n in 3,5,7..15'.
 
     The translator parses the expression on instanciation into an attribute
     called `ast`.
         (None, re.compile(r'\s+(?u)')),
         ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|n)\b')),
         ('value', re.compile(r'\d+')),
+        ('comma', re.compile(r',')),
         ('ellipsis', re.compile(r'\.\.'))
     ]
 
             method = 'within'
         else:
             self.expect('word', 'in', term="'within' or 'in'")
-        rv = 'relation', (method, left, self.range())
+        rv = 'relation', (method, left, self.range_list())
         if negated:
             rv = 'not', (rv,)
         return rv
 
-    def range(self):
+    def range_or_value(self):
         left = self.value()
-        self.expect('ellipsis')
-        return 'range', (left, self.value())
+        if self.skip('ellipsis'):
+            return((left, self.value()))
+        else:
+            return((left, left))
+
+    def range_list(self):
+        range_list = [self.range_or_value()]
+        while self.skip('comma'):
+            range_list.append(self.range_or_value())
+        return 'range_list', range_list
 
     def expr(self):
         self.expect('word', 'n')
     compile_is = _binary_compiler('(%s == %s)')
     compile_isnot = _binary_compiler('(%s != %s)')
 
-    def compile_relation(self, method, expr, range):
-        range = '%s, %s' % tuple(map(self.compile, range[1]))
-        return '%s(%s, %s)' % (method.upper(), self.compile(expr), range)
+    def compile_relation(self, method, expr, range_list):
+        compile_range_list = '[%s]' % ','.join(
+            ['(%s, %s)' % tuple(map(self.compile, range_))
+             for range_ in range_list[1]])
+        return '%s(%s, %s)' % (method.upper(), self.compile(expr),
+                               compile_range_list)
 
 
 class _PythonCompiler(_Compiler):

babel/tests/dates.py

     def test_month_context(self):
         d = date(2006, 1, 8)
         fmt = dates.DateTimeFormat(d, locale='cs_CZ')
-        self.assertEqual('1', fmt['MMM'])
+        self.assertEqual('1', fmt['M'])
+        fmt = dates.DateTimeFormat(d, locale='cs_CZ')
+        self.assertEqual('01', fmt['MM'])
+        fmt = dates.DateTimeFormat(d, locale='cs_CZ')
+        self.assertEqual('Led', fmt['MMM'])
         fmt = dates.DateTimeFormat(d, locale='cs_CZ')
         self.assertEqual('1.', fmt['LLL'])
 
         fmt = dates.DateTimeFormat(d, locale='de_DE')
         self.assertEqual('52', fmt['w'])
         fmt = dates.DateTimeFormat(d, locale='en_US')
-        self.assertEqual('52', fmt['w'])
+        self.assertEqual('53', fmt['w'])
 
     def test_week_of_month_first(self):
         d = date(2006, 1, 8)
         self.assertEqual('7', fmt['e']) # monday is first day of week
         fmt = dates.DateTimeFormat(d, locale='en_US')
         self.assertEqual('01', fmt['ee']) # sunday is first day of week
-        fmt = dates.DateTimeFormat(d, locale='dv_MV')
+        fmt = dates.DateTimeFormat(d, locale='bn_BD')
         self.assertEqual('03', fmt['ee']) # friday is first day of week
 
         d = date(2007, 4, 2) # a monday
         self.assertEqual('1', fmt['e']) # monday is first day of week
         fmt = dates.DateTimeFormat(d, locale='en_US')
         self.assertEqual('02', fmt['ee']) # sunday is first day of week
-        fmt = dates.DateTimeFormat(d, locale='dv_MV')
+        fmt = dates.DateTimeFormat(d, locale='bn_BD')
         self.assertEqual('04', fmt['ee']) # friday is first day of week
 
     def test_local_day_of_week_standalone(self):
         self.assertEqual('7', fmt['c']) # monday is first day of week
         fmt = dates.DateTimeFormat(d, locale='en_US')
         self.assertEqual('1', fmt['c']) # sunday is first day of week
-        fmt = dates.DateTimeFormat(d, locale='dv_MV')
+        fmt = dates.DateTimeFormat(d, locale='bn_BD')
         self.assertEqual('3', fmt['c']) # friday is first day of week
 
         d = date(2007, 4, 2) # a monday
         self.assertEqual('1', fmt['c']) # monday is first day of week
         fmt = dates.DateTimeFormat(d, locale='en_US')
         self.assertEqual('2', fmt['c']) # sunday is first day of week
-        fmt = dates.DateTimeFormat(d, locale='dv_MV')
+        fmt = dates.DateTimeFormat(d, locale='bn_BD')
         self.assertEqual('4', fmt['c']) # friday is first day of week
 
     def test_fractional_seconds(self):
         tz = timezone('Europe/Paris')
         dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz)
         fmt = dates.DateTimeFormat(dt, locale='fr_CA')
-        self.assertEqual('France', fmt['v'])
-
-    def test_timezone_with_uncommon(self):
-        tz = timezone('Europe/Paris')
-        dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz)
-        fmt = dates.DateTimeFormat(dt, locale='fr_CA')
-        self.assertEqual('HEC', fmt['V'])
+        self.assertEqual('Heure\u00a0: France', fmt['v'])
 
     def test_timezone_location_format(self):
         tz = timezone('Europe/Paris')
         dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz)
         fmt = dates.DateTimeFormat(dt, locale='fr_FR')
-        self.assertEqual('France', fmt['VVVV'])
+        self.assertEqual('Heure\u00a0: France', fmt['VVVV'])
 
     def test_timezone_walltime_short(self):
         tz = timezone('Europe/Paris')
         t = time(15, 30, tzinfo=tz)
-        fmt = dates.DateTimeFormat(t, locale='fr_FR')
-        self.assertEqual('HEC', fmt['v'])
+        fmt = dates.DateTimeFormat(t, locale='de_DE')
+        self.assertEqual('MEZ', fmt['v'])
 
     def test_timezone_walltime_long(self):
         tz = timezone('Europe/Paris')
         t = time(15, 30, tzinfo=tz)
         fmt = dates.DateTimeFormat(t, locale='fr_FR')
-        self.assertEqual('Heure de l\u2019Europe centrale', fmt['vvvv'])
+        self.assertEqual('heure de l\u2019Europe centrale', fmt['vvvv'])
 
     def test_hour_formatting(self):
         l = 'en_US'

scripts/import_cldr.py

                            '..', 'babel')
 
     sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
+    bcp47_timezone = parse(os.path.join(srcdir, 'bcp47', 'timezone.xml'))
+    sup_windows_zones = parse(os.path.join(srcdir, 'supplemental',
+                                           'windowsZones.xml'))
+
 
     # Import global data from the supplemental files
     global_data = {}
     territory_zones = global_data.setdefault('territory_zones', {})
     zone_aliases = global_data.setdefault('zone_aliases', {})
     zone_territories = global_data.setdefault('zone_territories', {})
-    for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
-        tzid = elem.attrib['type']
-        territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
-        zone_territories[tzid] = elem.attrib['territory']
-        if 'aliases' in elem.attrib:
-            for alias in elem.attrib['aliases'].split():
-                zone_aliases[alias] = tzid
+
+    # create auxiliary zone->territory map from the windows zones (we don't set
+    # the 'zones_territories' map directly here, because there are some zones
+    # aliases listed and we defer the decision of which ones to choose to the
+    # 'bcp47' data
+    _zone_territory_map = {}
+    for map_zone in sup_windows_zones.findall('.//windowsZones/mapTimezones/mapZone'):
+        for tzid in map_zone.attrib['type'].split():
+            _zone_territory_map[tzid] = map_zone.attrib['territory']
+
+    for key_elem in bcp47_timezone.findall('.//keyword/key'):
+        if key_elem.attrib['name'] == 'tz':
+            for elem in key_elem.findall('type'):
+                aliases = elem.attrib['alias'].split()
+                tzid = aliases.pop(0)
+                territory = _zone_territory_map.get(tzid, '001')
+                territory_zones.setdefault(territory, []).append(tzid)
+                zone_territories[tzid] = territory
+                for alias in aliases:
+                    zone_aliases[alias] = tzid
+            break
 
     # Import Metazone mapping
     meta_zones = global_data.setdefault('meta_zones', {})
-    tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml'))
+    tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml'))
     for elem in tzsup.findall('.//timezone'):
         for child in elem.findall('usesMetazone'):
             if 'to' not in child.attrib: # FIXME: support old mappings
                 zone_formats['fallback'] = text_type(elem.text) \
                     .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
                 break
+        for elem in tree.findall('.//timeZoneNames/fallbackRegionFormat'):
+            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
+                zone_formats['fallback_region'] = text_type(elem.text) \
+                    .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
+                break
 
         time_zones = data.setdefault('time_zones', {})
         for elem in tree.findall('.//timeZoneNames/zone'):
                 info.setdefault('long', {})[child.tag] = text_type(child.text)
             for child in elem.findall('short/*'):
                 info.setdefault('short', {})[child.tag] = text_type(child.text)
-            info['common'] = elem.findtext('commonlyUsed') == 'true'
             meta_zones[elem.attrib['type']] = info
 
         for calendar in tree.findall('.//calendars/calendar'):
 
             # AM/PM
             periods = data.setdefault('periods', {})
-            for elem in calendar.findall('am'):
-                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
-                        and elem.tag in periods:
-                    continue
-                periods[elem.tag] = text_type(elem.text)
-            for elem in calendar.findall('pm'):
-                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
-                        and elem.tag in periods:
-                    continue
-                periods[elem.tag] = text_type(elem.text)
+            for day_period_width in calendar.findall(
+                'dayPeriods/dayPeriodContext/dayPeriodWidth'):
+                if day_period_width.attrib['type'] == 'wide':
+                    for day_period in day_period_width.findall('dayPeriod'):
+                        if 'alt' not in day_period.attrib:
+                            periods[day_period.attrib['type']] = text_type(
+                                day_period.text)
 
             date_formats = data.setdefault('date_formats', {})
             for format in calendar.findall('dateFormats'):
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.