fernandotakai / herd (https://ubiquity.mozilla.com/)

The new Ubiquity Herd

Clone this repository (size: 655.8 KB): HTTPS / SSH
$ hg clone http://bitbucket.org/fernandotakai/herd/
commit 127: 67c98a729b7a
parent 126: 9a43feabbadd
branch: default
tags: tip
Made sessions really work with mod_wsgi
Fernando Takai / fernandotakai
5 months ago
herd / feed_parser.py
r127:67c98a729b7a 135 loc 3.6 KB embed / history / annotate / raw /
from cache import cache
from storage import storage

import simplejson

import os
import subprocess
import tempfile

import logging as log

log.basicConfig(level=log.INFO)

VERSIONS = {'parser1': 1, 'parser2': 2}

def validate_feed(feed):
    if feed.get("takes") or feed.get("modifiers"):
        return 'parser1'
    elif feed.get("names") or feed.get("arguments"):
        return 'parser2'
    elif feed.get("name"):
        return "both"
    else:
        raise Exception("Which feed is that?")


def exec_js(code, version, extra_output=None):
    if extra_output is None:
        extra_output = {}

    fd, temppath = tempfile.mkstemp('.js')
    os.close(fd)
    temp = open(temppath, 'w')
    temp.write(code.encode('utf-8'))
    temp.close()

    extra_output['errors'] = []

    BRANCH_LIMIT = '500'
    OUTPUT_FILENAME = '__temp_output.txt'
    ERROR_FILENAME = '__temp_input.txt'

    out_file = open(OUTPUT_FILENAME, 'w')
    err_file = open(ERROR_FILENAME, 'w')
    
    path = os.path.dirname(__file__)

    pre_file = version == 2 and os.path.join(path, "js/pre_parser2.js") or os.path.join(path, "js/pre_parser1.js")
    
    args = ['js',
            '-f', os.path.join(path, 'js/clear.js'),
            '-f', os.path.join(path, 'js/json2.js'),
            '-f', os.path.join(path, pre_file),
            '-f', temppath,
            '-f', os.path.join(path, 'js/post.js')]
 
    popen = subprocess.Popen(args,
                             stdout=out_file,
                             stderr=err_file)

    popen.wait()

    os.remove(temppath)

    out_file = open(OUTPUT_FILENAME, 'r')
    output = out_file.read()

    err_file = open(ERROR_FILENAME, 'r')
    error = err_file.read()

    out_file.close()
    err_file.close()

    os.remove(OUTPUT_FILENAME)
    os.remove(ERROR_FILENAME)

    output = output.strip()

    if error:
        extra_output['errors'].append(error)

    try:
        obj = simplejson.loads(output)
    except Exception, e:
        extra_output['errors'].append(str(e))
        obj = None

    return obj
    
def parse():
    feeds = storage.get_feeds_for_parsing()
    
    log.info("Parsing %s feeds" % feeds.count())

    cache.delete("all_feeds")
    
    for feed in feeds:
        if not feed.get("commands"):
            name = feed.get('url')
            code = feed.get('code')

            if code:
                log.info('Processing code for %s.' % name)
                extra = {}
                commands = []
                
                for parser_name, version in VERSIONS.iteritems():
                    cmds = exec_js(code, version, extra_output = extra)
                    if cmds:
                        for cmd in cmds: 
                            validated = validate_feed(cmd)
                            if validated == "both" or validated == parser_name:
                                cmd['type'] = parser_name
                                commands.append(cmd)
                
                if not commands:
                    log.error("Occurred errors while parsing %s " % name)
                    for error in extra['errors']:
                        log.error(error)
                    
                    continue

                # Let's standardize the name property
                for command in commands:
                    if command.get("names", None):
                        command['name'] = command['names']
                        del command['names']
                
                feed['commands'] = commands
                
                cache.delete(feed['_id'])
                storage.save(feed)
    
if __name__ == '__main__':
    parse()