Source

globetrotter / globetrotter.py

Full commit
# -*- coding: utf-8 -*-
#
#  globetrotter.py
#  globetrotter
#

"""
Approximate country and language finding for pycountry.
"""

import pycountry

def find_country(name):
    "Find a country's information given an approximate name."
    norm_name = _norm_countries[_norm_string(name)]
    c = pycountry.countries.get(name=norm_name)
    if not c:
        raise KeyError(name)
    return c

def find_language(name):
    "Find a language's information given an approximate name."
    norm_query = _norm_string(name)

    # query against a normalized list of exact matches
    norm_name = _norm_languages.get(norm_query)

    if norm_name:
        # hit!
        l = pycountry.languages.get(name=norm_name)
        assert l
        return l

    # query against a normalized list of known aliases
    if norm_query in _language_aliases:
        alias_for = _language_aliases[norm_query]
        l = pycountry.languages.get(name=_norm_languages[alias_for])
        assert l
        return l

    # try prefix matching
    matches = [l for (n, l) in _norm_languages.iteritems() if
            n.startswith(norm_query)]
    if matches:
        if len(matches) > 1:
            # default to languages with a two-letter ISO code
            matches = [m for m in matches
                    if hasattr(pycountry.languages.get(name=m), 'alpha2')]

        if len(matches) == 1:
            return pycountry.languages.get(name=matches[0])

    raise KeyError(name)

def _norm_string(s):
    return s.replace(' ', '').lower()

_norm_countries = {_norm_string(c.name): c.name
        for c in pycountry.countries.objects}

_norm_languages = {_norm_string(l.name): l.name
        for l in pycountry.languages.objects}

_language_aliases = {
        'mandarin': 'chinese',
        'mandarinchinese': 'chinese',
    }