Commits

Lars Yencken committed 802d382

Favour popular languages and use aliases.

When matches are ambiguous, favour languages with two-character ISO codes. Use
aliases for high-frequency languages which otherwise miss (like Mandarin
Chinese).

  • Participants
  • Parent commits c1a2dd3

Comments (0)

Files changed (2)

File globetrotter.py

 def find_language(name):
     "Find a language's information given an approximate name."
     norm_query = _norm_string(name)
+
+    # query against a normalized list of exact matches
     norm_name = _norm_languages.get(norm_query)
+
     if norm_name:
+        # hit!
         l = pycountry.languages.get(name=norm_name)
         assert l
         return l
 
+    # query against a normalized list of known aliases
+    if norm_query in _language_aliases:
+        alias_for = _language_aliases[norm_query]
+        l = pycountry.languages.get(name=_norm_languages[alias_for])
+        assert l
+        return l
+
     # try prefix matching
     matches = [l for (n, l) in _norm_languages.iteritems() if
             n.startswith(norm_query)]
-    if matches and len(matches) == 1:
-        return pycountry.languages.get(name=matches[0])
+    if matches:
+        if len(matches) > 1:
+            # default to languages with a two-letter ISO code
+            matches = [m for m in matches
+                    if hasattr(pycountry.languages.get(name=m), 'alpha2')]
+
+        if len(matches) == 1:
+            return pycountry.languages.get(name=matches[0])
 
     raise KeyError(name)
 
 _norm_languages = {_norm_string(l.name): l.name
         for l in pycountry.languages.objects}
 
+_language_aliases = {
+        'mandarin': 'chinese',
+        'mandarinchinese': 'chinese',
+    }
+

File test_globetrotter.py

         for approx, name in pairs:
             self.assertEqual(fl(approx).name, name)
 
+    def test_modern_ancient(self):
+        fl = globetrotter.find_language
+        pairs = [
+                ('Greek', 'Greek, Modern (1453-)'),
+                ('Dutch', 'Dutch; Flemish'),
+            ]
+        for approx, name in pairs:
+            self.assertEqual(fl(approx).name, name)
+
+    def test_aliases(self):
+        fl = globetrotter.find_language
+        pairs = [
+                ('Mandarin', 'Chinese'),
+                ('Mandarin Chinese', 'Chinese'),
+            ]
+        for approx, name in pairs:
+            self.assertEqual(fl(approx).name, name)
+
+
 def _mangle_case(s):
     if len(s) == 0:
         return s