Dragoman / dragoman / lib / storage.py

import gettext
import urllib
try:
    import json as json_parser
except ImportError:
    import simplejson as json_parser
from collections import defaultdict
from itertools import groupby, imap
from pymongo import Connection, json_util, DESCENDING, ASCENDING
from pymongo.objectid import ObjectId
from eggmonster import env
from pprint import pprint

from dragoman.lib.gettext_tools import GettextCatalog


class MongoJSON(object):
    
    def dumps(self, *args, **kw):
        kw.update({'default': json_util.default})
        return json_parser.dumps(*args, **kw)

    def loads(self, *args, **kw):
        kw.update({'object_hook': json_util.object_hook})
        return json_parser.loads(*args, **kw)
json = MongoJSON()


def get_db():
    args = filter(None, [env.storage_host, env.storage_port])
    if args:
        return Connection(*args)[env.storage_db]
    return Connection()[env.storage_db]


# piglatin is a placeholder language
PLACEHOLDER = 'piglatin'
SYSNAMES = set([
    'system.indexes',
    'config.languages',
])


def percent_encoding(f):
    def func(obj, phrase, *args, **kw):
        if not phrase:
            phrase = ''
        return f(obj, urllib.unquote(phrase), *args, **kw)
    return func

def first(tupl):
    return tupl[0]

def second(tupl):
    return tupl[1]


class LanguageList(object):
    '''A mongo backed dicty list of languages. Results for keys and
    items will be sorted based on the abbrev.'''

    collection = 'config.languages'
    def __init__(self):
        self.db = get_db() 

    @classmethod
    def get(self, abbrev):
        db = get_db()[self.collection]
        result = db.find_one({'abbrev': abbrev})
        if result:
            return result['name']
        return None
    __getitem__ = get

    def __contains__(self, abbrev):
        if self.get(abbrev):
            return True
        return  False

    def add(self, abbrev, name):
        db = self.db[self.collection]
        data = {
            'abbrev': abbrev,
            'name': name,
        }
        if not db.find(data).count():
            db.insert(data)

    def delete(self, abbrev):
        db = self.db[self.collection]
        query = {'abbrev': abbrev}
        lang = db.find(query)
        for l in lang:
            db.remove(l)

    def keys(self):
        db = self.db[self.collection]
        keys = db.find({}, ['abbrev']).sort('abbrev')
        for key in keys:
            yield key['abbrev']
    __iter__ = keys
    
    def values(self):
        db = self.db[self.collection]
        keys = db.find({}, ['name'])
        for key in keys:
            yield key

    def items(self):
        db = self.db[self.collection]
        for doc in db.find():
            if doc['abbrev'] != PLACEHOLDER:
                yield doc['abbrev'], doc['name']


class CatalogList(object):
    def __init__(self):
        self.db = get_db()
        self._catalogs = defaultdict(list)        

    @property
    def catalogs(self):
        '''Takes the collection names and breaks them into catalog,
        language pairs. The langauges are sorted alphabetically'''
        catlangs = [
            tuple(n.split('.', 1))
            for n in self.db.collection_names()
            if n not in SYSNAMES
        ]

        for k, g in groupby(catlangs, first):
            self._catalogs[k].extend(map(second, g))
            self._catalogs[k].sort()
        return self._catalogs

    def delete(self, catalog):
        for collection in self.db.collection_names():
            if collection.startswith(catalog):
                self.db.drop_collection(collection)

    def add(self, catalog):
        c = Catalog(catalog, PLACEHOLDER)
        c.initialize()
        return c


class Catalog(object):

    @classmethod
    def is_valid_lang(self, lang):
        if lang == PLACEHOLDER:
            return True

        # sometimes we get None which is not valid
        if not lang:
            return lang

        return LanguageList.get(lang)

    def __init__(self, catalog, lang):
        self.db = get_db()
        self.lang = lang
        self.catalog = catalog
        self.name = catalog
        self._collection = '%s.%s' % (self.catalog, self.lang)
        c = self.db[self._collection]
        if c.find().count() > 0:
            c.ensure_index('phrase')
        assert self.is_valid_lang(lang)

    @percent_encoding
    def add(self, phrase, translation, *args,  **extra_info):
        '''Adds a phrase to the catalog'''
        doc = {
            'phrase': phrase.decode('utf-8'),
            'translation': translation,
            'encoding': 'utf-8',
        }

        if extra_info:
            doc.update(extra_info)

        query = {'phrase': phrase}
        query.update(extra_info)
        docs = list(self.db[self._collection].find(query))
        if docs:
            orig_doc = docs.pop()
            orig_doc.update(doc)
        self.db[self._collection].insert(doc)

    @percent_encoding
    def update(self, id, doc):
        '''Update the actual document.'''
        q = {'_id': ObjectId(id)}
        self.db[self._collection].update(q, {'$set': doc})


    @percent_encoding
    def get(self, phrase, *args, **ctx):
        '''Gets a phrase's translation'''
        query = {'phrase': phrase}
        query.update(ctx)
        docs = list(self.db[self._collection].find(query))
        if docs:
            doc = docs.pop()
            return doc.get(u'translation')
        return False

    def _safe_doc(self, d):
        '''Make the _id from the mongo docs json serializable'''
        d['_id'] = str(d['_id']) 
        return d 

    @property
    def phrases(self):
        '''Get all the phrases in the catalog'''
        docs = self.db[self._collection].find({'phrase' : {'$ne': ''}})
        return imap(self._safe_doc, docs.sort('phrase'))

    def find(self, query):
        '''Query the DB for things like empty phrases/translations or
        based on extra metadata'''
        return imap(self._safe_doc, self.db[self._collection].find(query))

    def batch(self, json_doc):
        '''Do a batch add from some json input. It really wouldn't
        json at this point, but a list of dicts. But, the dict
        structure will define what the json would look like in the
        API'''
        for lang, phrases in json_doc.items():
            for phrase in phrases:
                msgid, msgstr = phrase['phrase'], phrase['translation']
                extra = dict([
                    (k,v) for k, v in phrase.items()
                    if k not in ['phrase', 'translation']
                ])
                self.add(msgid, msgstr, **extra)

    def delete(self):
        '''Removes the language catalog'''
        self.db.drop_collection(self._collection)


    def initialize(self):
        defaults = env.get('initial_catalogs', {})
        if self.name in defaults:
            self.import_po(env.path(defaults[self.name]))
        else:
            self.import_po(env.path(defaults['default']))
            
        
    def import_po(self, po):
        '''Imports a po file on the filesystem into the current catalog'''
        extra_keys = [
            'extracted_comment', 'reference', 'flag',
            'previous_untranslated_string',
        ]
        po = GettextCatalog(po, None)
        for m in po.messages:
            extra = [getattr(po, k) for k in extra_keys if hasattr(po, k)]
            extra = dict(zip(extra_keys, extra))
            self.add(m['msgid'], m['msgstr'], extra)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.