fernandotakai / herd (https://ubiquity.mozilla.com/)
The new Ubiquity Herd
Clone this repository (size: 655.8 KB): HTTPS / SSH
$ hg clone http://bitbucket.org/fernandotakai/herd/
| commit 127: | 67c98a729b7a |
| parent 126: | 9a43feabbadd |
| branch: | default |
| tags: | tip |
Made sessions really work with mod_wsgi
herd /
feed_parser.py
| r127:67c98a729b7a | 135 loc | 3.6 KB | embed / history / annotate / raw / |
|---|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | from cache import cache
from storage import storage
import simplejson
import os
import subprocess
import tempfile
import logging as log
log.basicConfig(level=log.INFO)
VERSIONS = {'parser1': 1, 'parser2': 2}
def validate_feed(feed):
if feed.get("takes") or feed.get("modifiers"):
return 'parser1'
elif feed.get("names") or feed.get("arguments"):
return 'parser2'
elif feed.get("name"):
return "both"
else:
raise Exception("Which feed is that?")
def exec_js(code, version, extra_output=None):
if extra_output is None:
extra_output = {}
fd, temppath = tempfile.mkstemp('.js')
os.close(fd)
temp = open(temppath, 'w')
temp.write(code.encode('utf-8'))
temp.close()
extra_output['errors'] = []
BRANCH_LIMIT = '500'
OUTPUT_FILENAME = '__temp_output.txt'
ERROR_FILENAME = '__temp_input.txt'
out_file = open(OUTPUT_FILENAME, 'w')
err_file = open(ERROR_FILENAME, 'w')
path = os.path.dirname(__file__)
pre_file = version == 2 and os.path.join(path, "js/pre_parser2.js") or os.path.join(path, "js/pre_parser1.js")
args = ['js',
'-f', os.path.join(path, 'js/clear.js'),
'-f', os.path.join(path, 'js/json2.js'),
'-f', os.path.join(path, pre_file),
'-f', temppath,
'-f', os.path.join(path, 'js/post.js')]
popen = subprocess.Popen(args,
stdout=out_file,
stderr=err_file)
popen.wait()
os.remove(temppath)
out_file = open(OUTPUT_FILENAME, 'r')
output = out_file.read()
err_file = open(ERROR_FILENAME, 'r')
error = err_file.read()
out_file.close()
err_file.close()
os.remove(OUTPUT_FILENAME)
os.remove(ERROR_FILENAME)
output = output.strip()
if error:
extra_output['errors'].append(error)
try:
obj = simplejson.loads(output)
except Exception, e:
extra_output['errors'].append(str(e))
obj = None
return obj
def parse():
feeds = storage.get_feeds_for_parsing()
log.info("Parsing %s feeds" % feeds.count())
cache.delete("all_feeds")
for feed in feeds:
if not feed.get("commands"):
name = feed.get('url')
code = feed.get('code')
if code:
log.info('Processing code for %s.' % name)
extra = {}
commands = []
for parser_name, version in VERSIONS.iteritems():
cmds = exec_js(code, version, extra_output = extra)
if cmds:
for cmd in cmds:
validated = validate_feed(cmd)
if validated == "both" or validated == parser_name:
cmd['type'] = parser_name
commands.append(cmd)
if not commands:
log.error("Occurred errors while parsing %s " % name)
for error in extra['errors']:
log.error(error)
continue
# Let's standardize the name property
for command in commands:
if command.get("names", None):
command['name'] = command['names']
del command['names']
feed['commands'] = commands
cache.delete(feed['_id'])
storage.save(feed)
if __name__ == '__main__':
parse()
|
