Etc / parsing /

import simpleparse
from simpleparse.common import strings, numbers
from simpleparse.dispatchprocessor import *

# JSON grammar to pass to simpleparse
# adapted from:
jsonbnf = r"""
file                := document
document            := ws, value, ws
>value<             := false / null / true / json_object / array / float / int / string_json
true                := "true"
false               := "false"
null                := "null"
string_json         := string_single_quote / string_double_quote
array               := "[", ws, ( value, (ws, ",", ws, value)* )?, ws, "]"
json_object         := "{", ws, ( member, ( ws, ",", ws, member )* )?, ws, "}"
member              := string_json, ws, ":", ws, value 
<ws>                := [ \t\n\r]*

class JSONParser(object):
    """Supports loading Python objects from JSON strings."""
    def __init__(self): 
        self._parser = simpleparse.parser.Parser(jsonbnf, 'file')

    def loads(self, src):
        loader = JSONLoader()
        self._parser.parse(src, processor=loader)
        return loader.value

class JSONLoader(DispatchProcessor):
    """Processes a SimpleParse result tree into a Python object."""
    def document(self, parseinfo, src):
        tag, start, stop, subtree = parseinfo
        self.value = dispatch(self, subtree[0], src)

    def null(self, parseinfo, src):
        return None

    def true(self, parseinfo, src):
        return True
    def false(self, parseinfo, src):
        return False

    def json_object(self, parseinfo, src):
        tag, start, stop, subtree = parseinfo
        return dict(dispatchList(self, subtree, src))

    def member(self, parseinfo, src):
        tag, start, stop, (key, value) = parseinfo
        key = dispatch(self, key, src)
        value = dispatch(self, value, src)
        return (key, value)

    def string_json(self, parseinfo, src):
        return getString(parseinfo, src)[1:-1]

    def array(self, parseinfo, src):
        tag, start, stop, subtree = parseinfo
        return dispatchList(self, subtree, src)

    def int(self, parseinfo, src):
        return int(getString(parseinfo, src))

    def float(self, parseinfo, src):
        return float(getString(parseinfo, src))

if __name__ == '__main__':
    import timeit
    from codetalker.contrib.json import loads as ctloads

    parser = JSONParser()
    loads = parser.loads
    bigjson = open('large_doc.json').read()

    TRIALS = 25

    print "CodeTalker", timeit.timeit(
        'from __main__ import ctloads, bigjson', 
    ) / TRIALS

    print "SimpleParse", timeit.timeit(
        'from __main__ import loads, bigjson', 
    ) / TRIALS