Source

analytics / googleanalytics.py

import urllib, httplib, tornado.web, tornado.auth, tornado.httpclient
from lxml import etree

try: import secret_config as config
except: import config

try: import json
except: from django.utils import simplejson as json

# FIELDS['dimension'] = [
#   {type:dimension,category:Visitor,name:ga:browser,display
#
import os.path, csv
fields = {}
for field in csv.DictReader(open(os.path.join(os.path.dirname(__file__), 'gametrics.csv'))):
    fields.setdefault(field['type'], {}).setdefault(field['category'],
        []).append(field)


class Page(tornado.web.RequestHandler, tornado.auth.GoogleMixin):
    '''Add this mixin to any request handler that needs to use Google Analytics
       or requires authentication via @tornado.web.authenticated.

       Redirects errors to template/error.html

       self.current_user    gets the current logged in user's properties
       self.getaccounts()   gets the accounts as a list of dicts
       self.getdata(...)    gets [[dimensions],[metrics]], type-converted'''

    SCOPE = 'https://www.google.com/analytics/feeds/'
    NS = {
        'GA'    : 'http://schemas.google.com/analytics/2009',
        'ATOM'  : 'http://www.w3.org/2005/Atom',
    }

    def get_current_user(self):
        user = config.DB.get('uid:' + (self.get_secure_cookie('uid') or ''))
        if user:
            user = json.loads(user)
            # Convert to bytes for HMAC: http://bugs.python.org/issue5285
            if user.has_key('access_token'):
                user['access_token']['secret'] = str(user['access_token']['secret'])
            return user
        else: return None

    def get_error_html(self, code, **kwargs):
        self.render('template/error.html', code=code, kwargs=kwargs,
            msg=httplib.responses[code])

    # Calling chain:
    #   getdata(query, callback)    Called by user function. Transforms query
    #     _ga                       Performs an async HTTP request for query
    #       _xml                    Converts to etree, handlers errors
    #         _on_data              Post processes data
    #           callback            Returns to user
    def getdata(self, query, callback):
        # Pick only the dimensions we want to cache for the query
        query = [(key, query.get(key, '')) for key in sorted(('ids',
            'dimensions', 'metrics', 'sort', 'filters', 'start-date',
            'end-date', 'start-index', 'max-results'))
            if query.has_key(key)]

        # Return from cache if possible, else perform the query
        cache = config.DB.get('cache:ga:' + urllib.urlencode(query))
        if cache: callback(json.loads(cache))
        else:     self._ga(self.SCOPE + 'data', query, self._on_data, callback)

    def _on_data(self, query, callback, xml):
        data = [[
            [x.get('value') for x in e.iter('{%(GA)s}dimension' % self.NS)],
            [x.get('value') for x in e.iter('{%(GA)s}metric' % self.NS)]
          ] for e in xml.iter('{%(ATOM)s}entry' % self.NS)
        ]
        config.DB.set('cache:ga:' + urllib.urlencode(query), json.dumps(data))
        callback(data)

    # Calling chain:
    #   getaccounts(callback)   Called by user function
    #     _ga                   Performs an async HTTP request for accounts
    #       _xml                Converts to etree, handlers errors
    #         _on_acc           Post processes accounts into list of dicts
    #           callback        Returns to user
    def getaccounts(self, callback):
        self._ga(self.SCOPE + 'accounts/default', {}, self._on_acc, callback)

    def _on_acc(self, query, callback, xml):
        data = []
        for e in xml.iter('{%(ATOM)s}entry' % self.NS):
            row = { 'title': e.findtext('{%(ATOM)s}title' % self.NS) }
            for p in e.iter('{%(GA)s}property' % self.NS):
                row[p.get('name')] = p.get('value')
            data.append(row)
        callback(data)


    def _ga(self, url, query, processor, callback):
        '''Standard async HTTP request to Google Analytics feeds'''
        try:
            params = self._oauth_request_parameters(url,
                self.current_user['access_token'], query)
        except:
            raise tornado.web.HTTPError(500, 'Not logged into Google Analytics')
        params.update(query)
        http = tornado.httpclient.AsyncHTTPClient()
        http.fetch(url + '?' + urllib.urlencode(params),
            self.async_callback(self._xml, processor, query, callback))

    def _xml(self, processor, query, callback, response):
        if response.error:
            raise tornado.web.HTTPError(500, response.body)
        processor(query, callback, etree.fromstring(response.body))


class Login(Page):
    '''Gets access to GA, stores token, redirects to accounts page.'''
    URL = r'/login'

    @tornado.web.asynchronous
    def get(self):
        if self.get_argument('openid.mode', None):
            self.get_authenticated_user(self.async_callback(self._on_auth))
            return
        self.authorize_redirect(self.SCOPE, self.request.full_url())

    @tornado.web.asynchronous
    def _on_auth(self, user):
        if not user:
            raise tornado.web.HTTPError(500, "Couldn't check with Google.")
        self.set_secure_cookie('uid', user['email'])
        config.DB.set('uid:' + user['email'], json.dumps(user))
        self.redirect(Accounts.URL)


class Accounts(Page):
    '''Stores list of accounts, then redirects to home page.'''
    URL = r'/accounts'

    @tornado.web.authenticated
    @tornado.web.asynchronous
    def get(self):
        try:    self.getaccounts(self.save_accounts)
        except: self.redirect('/')

    def save_accounts(self, data):
        user = self.current_user
        user['accounts'] = data
        config.DB.set('uid:' + user['email'], json.dumps(user))
        self.redirect('/')


class Logout(Page):
    URL = r'/logout'

    def get(self):
        self.clear_cookie('uid')
        self.redirect('/')