Source

wdmmgext / wdmmgext / load / util.py

'''Utilities that are useful to more than one loader module.
'''
import wdmmg.model as model
import urllib2
from urllib2 import urlopen
from urllib import urlencode
import sys
import cgi
try: import json
except ImportError: import simplejson as json
from BeautifulSoup import BeautifulSoup

def byname_string(name, parent):
    return str(parent) + "--" + name

# tree structure for allocating codes
class Node(object):
    # store a dictionary of nodes by name
    byname = {}
    # when you create the object...
    def __init__(self, name, parent=None):
        self.name = name
        self.parent = parent
        self.children = []
        if parent is None:
            parent_name = ''
        else:
            parent_name = parent.name
        name_string = byname_string(name, parent_name)
        #print "creating node with name " + name_string
        self.byname[name_string] = self
        if parent is None:  # root pseudo-node
            self.code = 0
        else:  # all normal nodes
            self.parent.children.append(self)
            self.code = len(self.parent.children) # 'end' of code
            # set 0 if necessary
    def get_codes(self, codelist):
        #print "get_codes"
        if self.code:
            # get own code first, then call recursively for each parent
            codelist.append(str(self.code))
            self.parent.get_codes(codelist)

def get_code(nodename, parentname=''):
    # look in the dictionary for the node 
    name_string = byname_string(nodename, parentname)
    #print "get_code, looking with " + name_string
    node = Node.byname.get(name_string)
    if node is None: return ''
    codes = []
    node.get_codes(codes)
    codes.reverse()
    # hack to deal with the Flash requirement for initial zeros
    if len(codes[0])==1:
        codes[0] = '0' + codes[0]            
    code = '.'.join(codes)
    #print code
    return unicode(str(code))

def addnodes(names, parent):
    #print 'addnodes'
    for name in names:
        # check by name - hack
        if parent is None:
            parent_name = ''
        else:
            parent_name = parent.name
        name_string = byname_string(name, parent_name)
        #print "addnodes, checking with " + name_string
        newnode = Node.byname.get(name_string)
        if newnode is None:
            newnode = Node(name, parent)
        parent = newnode
    
def to_float(s):
    '''Clean up numbers: remove commas & spaces and convert to float.
    '''
    if not s: return 0.0
    s = s.replace(',','')
    s = s.replace(' ','')
    return float(s)

def get_or_create_dataset(name, long_name=None, currency=None, notes=None):
    '''Retrieves the dataset with given name, creating it if necessary.
    Use with caution!
    '''
    dataset_ = model.Session.query(model.Dataset).filter_by(name=name).first()
    if not dataset_:
        dataset_ = model.Dataset(name=name, long_name=long_name,\
           currency=currency, notes=notes)
        model.Session.add(dataset_)
        model.Session.commit()
    return dataset_
    
def get_or_create_key(name, notes):
    '''Retrieves the Key called 'name', creating it if necessary.
    '''
    key = model.Session.query(model.Key).filter_by(name=name).first()
    if not key:
        key = model.Key(name=name, notes=notes)
        model.Session.add(key)
    return key

def get_or_create_value(key, code, name=None, notes=u'', index=None, parent=None):
    '''Get or create an enumeration value using an optional cache (index).
    '''
    if not name:
        name = code
    def get_value():
        ev = model.Session.query(model.EnumerationValue).filter_by(key=key,
                code=code).first()
        if ev is None:
            ev = model.EnumerationValue(
                key=key, code=code, name=name or code, notes=notes)
            if parent: 
                key_parent = model.Session.query(model.Key).filter_by(name=u'parent').first()
                if not key_parent:
                    key_parent = model.Key(name=u'parent', notes=u'Means "is part of".')
                    model.Session.add(key_parent)
                ev.keyvalues[key_parent] = parent
            model.Session.add(ev)
        return ev
    if index is None:
        return get_value()
    else:
        if (key.name, code) not in index:
            index[key.name, code] = get_value()
        return index[key.name, code]

# Translate a phrase from one language to another, using the 
# Google Translate API (used for Israeli data)
def translate(from_lang, to_lang, phrase):
    # Documentation for Google Translate API: 
    # http://code.google.com/apis/ajaxlanguage/documentation/#Examples
    langpair='%s|%s'%(from_lang,to_lang)
    base_url='http://ajax.googleapis.com/ajax/services/language/translate?'
    params=urlencode( (('v',1.0),
                       ('q',phrase),
                       ('langpair',langpair),) )
    url=base_url+params
    try:
        content=urlopen(url).read()
    except urllib2.URLError:
        return u'Translation not available'
    try:
        trans_dict=json.loads(content)
    except AttributeError:
        trans_dict=json.read(content)
    translation = trans_dict['responseData']['translatedText']
    s = BeautifulSoup(translation,convertEntities=BeautifulSoup.HTML_ENTITIES).contents[0]
    return unicode(s)