Dragoman / dragoman / lib /

Full commit
import gettext
import urllib
    import json as json_parser
except ImportError:
    import simplejson as json_parser
from collections import defaultdict
from itertools import groupby, imap
from pymongo import Connection, json_util, DESCENDING, ASCENDING
from pymongo.objectid import ObjectId
from eggmonster import env
from pprint import pprint

from dragoman.lib.gettext_tools import GettextCatalog

class MongoJSON(object):
    def dumps(self, *args, **kw):
        kw.update({'default': json_util.default})
        return json_parser.dumps(*args, **kw)

    def loads(self, *args, **kw):
        kw.update({'object_hook': json_util.object_hook})
        return json_parser.loads(*args, **kw)
json = MongoJSON()

def get_db():
    args = filter(None, [env.storage_host, env.storage_port])
    if args:
        return Connection(slave_okay=True, *args)[env.storage_db]
    return Connection(slave_okay=True)[env.storage_db]

# piglatin is a placeholder language
PLACEHOLDER = 'piglatin'
SYSNAMES = set([

def percent_encoding(f):
    def func(obj, phrase, *args, **kw):
        if not phrase:
            phrase = ''
        return f(obj, urllib.unquote(phrase), *args, **kw)
    return func

def first(tupl):
    return tupl[0]

def second(tupl):
    return tupl[1]

class LanguageList(object):
    '''A mongo backed dicty list of languages. Results for keys and
    items will be sorted based on the abbrev.'''

    collection = 'config.languages'
    def __init__(self):
        self.db = get_db() 

    def get(self, abbrev):
        db = get_db()[self.collection]
        result = db.find_one({'abbrev': abbrev})
        if result:
            return result['name']
        return None
    __getitem__ = get

    def __contains__(self, abbrev):
        if self.get(abbrev):
            return True
        return  False

    def add(self, abbrev, name):
        db = self.db[self.collection]
        data = {
            'abbrev': abbrev,
            'name': name,
        if not db.find(data).count():

    def delete(self, abbrev):
        db = self.db[self.collection]
        query = {'abbrev': abbrev}
        lang = db.find(query)
        for l in lang:

    def keys(self):
        db = self.db[self.collection]
        keys = db.find({}, ['abbrev']).sort('abbrev')
        for key in keys:
            yield key['abbrev']
    __iter__ = keys
    def values(self):
        db = self.db[self.collection]
        keys = db.find({}, ['name'])
        for key in keys:
            yield key

    def items(self):
        db = self.db[self.collection]
        for doc in db.find():
            if doc['abbrev'] != PLACEHOLDER:
                yield doc['abbrev'], doc['name']

class CatalogList(object):
    def __init__(self):
        self.db = get_db()
        self._catalogs = defaultdict(list)        

    def catalogs(self):
        '''Takes the collection names and breaks them into catalog,
        language pairs. The langauges are sorted alphabetically'''
        catlangs = [
            tuple(n.split('.', 1))
            for n in self.db.collection_names()
            if n not in SYSNAMES

        for k, g in groupby(catlangs, first):
            self._catalogs[k].extend(map(second, g))
        return self._catalogs

    def delete(self, catalog):
        for collection in self.db.collection_names():
            if collection.startswith(catalog):

    def add(self, catalog):
        c = Catalog(catalog, PLACEHOLDER)
        return c

class Catalog(object):

    def is_valid_lang(self, lang):
        if lang == PLACEHOLDER:
            return True

        # sometimes we get None which is not valid
        if not lang:
            return lang

        return LanguageList.get(lang)

    def __init__(self, catalog, lang):
        self.extra_keys = [
            'extracted_comment', 'reference', 'flag',
        self.db = get_db()
        self.lang = lang
        self.catalog = catalog = catalog
        self._collection = '%s.%s' % (self.catalog, self.lang)
        c = self.db[self._collection]
        if c.find().count() > 0:
        assert self.is_valid_lang(lang)

    def add(self, phrase, translation, *args,  **extra_info):
        '''Adds a phrase to the catalog'''
        # get into utf
        phrase = phrase.decode('utf-8')
        doc = {
            'phrase': phrase,
            'translation': translation,
            'encoding': 'utf-8',

        if extra_info:

        query = {'phrase': phrase}
        orig_doc = self._get(phrase, **extra_info)
        if not orig_doc:
            self._update(orig_doc['_id'], doc)

    def update(self, id, doc):
        '''Public update that percent decodes the input'''
        return self._update(id, doc)

    def _update(self, id, doc):
        '''Update the actual document.'''
        q = {'_id': ObjectId(id)}
        self.db[self._collection].update(q, {'$set': doc})

    def get(self, phrase, *args, **ctx):
        '''Public getting of a phrase'''
        doc = self._get(phrase, *args, **ctx)
        if doc:
            return doc.get(u'translation')
        return False

    def _get(self, phrase, *args, **ctx):
        '''Gets a phrase's translation'''
        query = {'phrase': phrase}
        doc = self.db[self._collection].find_one(query)
        if doc:
            return doc
        return False

    def _safe_doc(self, d):
        '''Make the _id from the mongo docs json serializable'''
        d['_id'] = str(d['_id']) 
        return d 

    def phrases(self):
        '''Get all the phrases in the catalog'''
        docs = self.db[self._collection].find({'phrase' : {'$ne': ''}})
        return imap(self._safe_doc, docs.sort('phrase'))

    def find(self, query):
        '''Query the DB for things like empty phrases/translations or
        based on extra metadata'''
        return imap(self._safe_doc, self.db[self._collection].find(query))

    def batch(self, json_doc):
        '''Do a batch add from some json input. It really wouldn't
        json at this point, but a list of dicts. But, the dict
        structure will define what the json would look like in the
        for lang, phrases in json_doc.items():
            for phrase in phrases:
                msgid, msgstr = phrase['phrase'], phrase['translation']
                extra = dict([
                    (k,v) for k, v in phrase.items()
                    if k not in ['phrase', 'translation']
                self.add(msgid, msgstr, **extra)

    def delete(self):
        '''Removes the language catalog'''

    def initialize(self):
        '''Creates a placeholder catalog. It is necessary b/c mongo
        doesn't allow empty collections'''
        defaults = env.get('initial_catalogs', {})
        if in defaults:
    def import_po(self, po):
        '''Imports a po file on the filesystem into the current catalog'''
        po = GettextCatalog(po, None)
        for m in po.messages:
            extra = [getattr(po, k) for k in self.extra_keys if hasattr(po, k)]
            extra = dict(zip(self.extra_keys, extra))
            self.add(m['msgid'], m['msgstr'], extra)