Source

Socrates / src / socrates / parser / prdparser / parse.py

Full commit
#!/usr/bin/env python
# -*- coding:utf-8 -*-

"""
Predicate expression parser
>>> parse = ExprParser()
>>> parse('name')
('name', None)
>>> parse('name:string')
('name', 'string')
>>> parse("visit page")
('visit page', None)
>>> parse("visit page:string")
('visit page', 'string')
>>> parse('"test":"subject"')
('test', 'subject')
>>> parse('"test subject":"subject"')
('test subject', 'subject')
>>> parse('"test subject:page":"subject"')
('test subject:page', 'subject')
>>> parse('"test subject":"string type"')
('test subject', 'string type')
>>> parse('"test subject:page":"subject array"')
('test subject:page', 'subject array')
>>> parse('"test subject:page":"subject array:integer"')
('test subject:page', 'subject array:integer')
>>> parse('"test subject:page":"subject array:integer"')
('test subject:page', 'subject array:integer')
"""

from spark import GenericParser
from scanner import Token, PrdExprScanner

class AST(object):
    def __init__(self, type, args, **argv):
        self.type = type
        self.attr = ''.join(arg.attr for arg in args)
        for k, v in argv.iteritems():
            setattr(self, k, v)

    def __repr__(self):
        return self.attr

class PrdExprParser(GenericParser):
    def __init__(self):
        GenericParser.__init__(self, "expr")
        
    def typestring(self, token):
        return token.type

    def error(self, token):
        print "Syntax error at `%s'" % token
        raise SystemExit

    def p_atom(self, args):
        """
        atom ::= chip
        atom ::= quoted
        atom ::= \\ "
        atom ::= \\ '
        atom ::= ' expr '
        atom ::= " expr "
        """
        return AST('atom', args)

    def p_term(self, args):
        """
        term ::= term atom 
        term ::= atom
        """
        return AST('term', args, )
    def p_expr(self, args):
        """
        expr ::= term : term
        expr ::= term
        """
        return AST('expr', args,
                   predicate=args[0],
                   object=args[2] if len(args)==3 else None)
    
def parse(tokens):
    parser = PrdExprParser()
    return re.predicate, re.object

class ExprParser(object):
    def __init__(self):
        self.scanner = PrdExprScanner()
        self.parser = PrdExprParser()
    def unshell(self, string):
        if (string[0] in ['"', "'"]) and string[0]==string[-1]:
            return string[1:-1]
        else:
            return string

    def __call__(self, expr):
        ast = self.parser.parse(self.scanner.tokenize(expr))
        predicate = self.unshell(ast.predicate.attr)
        if ast.object == None:
            object = None
        else:
            object = self.unshell(ast.object.attr)
        return predicate, object

if __name__=="__main__":
    import doctest
    doctest.testmod()