Source

trac-gviz / trac-dev / gviz / tracgviz / testing / test_parsing.py

r"""Test cases and test data for parsers.
"""
__author__ = 'Olemis Lang'

from pygments.token import *

from tracgviz.util.parsing import Any, EndMarker, NonTerminal, \
    OperatorPrecedenceParser as Parser

# Modified version of sample operator precedence grammar in
# The Theory of Parsing, Translation, and Compiling, A.V. Aho, J.B. Ullman
#     Volume 1: Parsing
#
# Grammar
# =======
#
#  1. E -> E + T
#  2. E -> T
#  3. T -> T * F
#  4. T -> F
#  5. F -> ( E )
#  6. F -> a

E         = (NonTerminal, 'E')
T         = (NonTerminal, 'T')
F         = (NonTerminal, 'F')
Multiply  = (Operator, '*')
Add       = (Operator, '+')
OpenP     = (Punctuation, '(')
CloseP    = (Punctuation, ')')
EndE      = (EndMarker, 'E')
Var       = (Name, Any)

SAMPLE_GRAMMAR = [
    ('1', 'E', [E, Add, T]),
    ('2', 'E', [T]),
    ('3', 'T', [T, Multiply, F]),
    ('4', 'T', [F]),
    ('5', 'F', [OpenP, E, CloseP]),
    ('6', 'F', [Var]),
  ]

SAMPLE_GRAMMAR_PRECEDENCE = {
    CloseP : {
        Parser.MorePrecedence : [Multiply, Add, CloseP, EndE],
      },
    Var : {
        Parser.MorePrecedence : [Multiply, Add, CloseP, EndE],
      },
    Multiply : {
        Parser.MorePrecedence : [Multiply, Add, CloseP, EndE],
        Parser.LessPrecedence : [OpenP, Var],
      },
    Add : {
        Parser.MorePrecedence : [Add, CloseP, EndE],
        Parser.LessPrecedence : [Multiply, OpenP, Var],
      },
    OpenP : {
        Parser.LessPrecedence : [OpenP, Var, Multiply, Add,],
        Parser.SamePrecedence : [CloseP, ],
      },
    EndE : {
        Parser.LessPrecedence : [OpenP, Var, Multiply, Add,],
      },
  }

SAMPLE_GRAMMAR_PRECEDENCE = Parser.gen_precedence(SAMPLE_GRAMMAR_PRECEDENCE) 

SAMPLE_GRAMMAR_PRODUCTIONS = {
    (Name, Any) : {
        EndMarker : '6',
      },
    (Punctuation, ')') : {
        (NonTerminal, Any) : {
            (Punctuation, '(') : {
                EndMarker: '5',
              }
          }
      },
    (NonTerminal, Any) : {
        (Operator, '*') :{
            (NonTerminal, Any) : {
                EndMarker: '3',
              }
          },
        (Operator, '+') :{
            (NonTerminal, Any) : {
                EndMarker: '1',
              }
          },
      },
  }

# Input
# =====
#
# (a + a) * a

SAMPLE_INPUT_STREAM = [
    (Punctuation, '('), (Name, 'a'), (Operator, '+'), (Name, 'a'), 
    (Punctuation, ')'), (Operator, '*'), (Name, 'a'), 
  ]

__test__ = {
  'Operator precedence grammar: Simple example' : r"""
      >>> from tracgviz.util.parsing import OperatorPrecedenceParser
      >>> p = OperatorPrecedenceParser()

      It's possible to set productions lookup tree and precedence directly

      >>> p.productions_tree = SAMPLE_GRAMMAR_PRODUCTIONS
      >>> p.precedence = SAMPLE_GRAMMAR_PRECEDENCE
      >>> p.start_state = 'E'

      >>> stream = iter(SAMPLE_INPUT_STREAM)

      >>> parse_order = []
      >>> def store_parse_order(production_id, *args):
      ...   parse_order.append(production_id)

      >>> p.parse(stream, store_parse_order)
      >>> ' '.join(parse_order)
      '6 6 1 5 6 3'

      However most of the time it will be easier to set productions
      and let the parser build productions lookup tree and precedence

      >>> from pprint import pprint

      >>> p.set_productions('E', *SAMPLE_GRAMMAR)
      >>> for tkn1 in [CloseP, Var, Multiply, Add, OpenP, EndE]:
      ...   for tkn2 in [OpenP, Var, Multiply, Add, CloseP, EndE]:
      ...     try:
      ...       prec = p.precedence[tkn1, tkn2]
      ...     except KeyError:
      ...       pass
      ...     else:
      ...       print tkn1, \
      ...           '<' if prec in Parser.LessPrecedence \
      ...               else ('>' if prec in Parser.MorePrecedence \
      ...               else ('=' if prec in Parser.SamePrecedence
      ...               else prec)), tkn2
      ... 
      (Token.Punctuation, ')') > (Token.Operator, '*')
      (Token.Punctuation, ')') > (Token.Operator, '+')
      (Token.Punctuation, ')') > (Token.Punctuation, ')')
      (Token.Punctuation, ')') > (Token.Grammar.EndMarker, 'E')
      (Token.Name, Token.Grammar.Any) > (Token.Operator, '*')
      (Token.Name, Token.Grammar.Any) > (Token.Operator, '+')
      (Token.Name, Token.Grammar.Any) > (Token.Punctuation, ')')
      (Token.Name, Token.Grammar.Any) > (Token.Grammar.EndMarker, 'E')
      (Token.Operator, '*') < (Token.Punctuation, '(')
      (Token.Operator, '*') < (Token.Name, Token.Grammar.Any)
      (Token.Operator, '*') > (Token.Operator, '*')
      (Token.Operator, '*') > (Token.Operator, '+')
      (Token.Operator, '*') > (Token.Punctuation, ')')
      (Token.Operator, '*') > (Token.Grammar.EndMarker, 'E')
      (Token.Operator, '+') < (Token.Punctuation, '(')
      (Token.Operator, '+') < (Token.Name, Token.Grammar.Any)
      (Token.Operator, '+') < (Token.Operator, '*')
      (Token.Operator, '+') > (Token.Operator, '+')
      (Token.Operator, '+') > (Token.Punctuation, ')')
      (Token.Operator, '+') > (Token.Grammar.EndMarker, 'E')
      (Token.Punctuation, '(') < (Token.Punctuation, '(')
      (Token.Punctuation, '(') < (Token.Name, Token.Grammar.Any)
      (Token.Punctuation, '(') < (Token.Operator, '*')
      (Token.Punctuation, '(') < (Token.Operator, '+')
      (Token.Punctuation, '(') = (Token.Punctuation, ')')
      (Token.Grammar.EndMarker, 'E') < (Token.Punctuation, '(')
      (Token.Grammar.EndMarker, 'E') < (Token.Name, Token.Grammar.Any)
      (Token.Grammar.EndMarker, 'E') < (Token.Operator, '*')
      (Token.Grammar.EndMarker, 'E') < (Token.Operator, '+')

      >>> pprint(p.productions_tree)
      {(Token.Grammar.NonTerminal, Token.Grammar.Any): {(Token.Operator, '*'): {(Token.Grammar.NonTerminal, Token.Grammar.Any): {Token.Grammar.EndMarker: '3'}},
                                                        (Token.Operator, '+'): {(Token.Grammar.NonTerminal, Token.Grammar.Any): {Token.Grammar.EndMarker: '1'}}},
       (Token.Name, Token.Grammar.Any): {Token.Grammar.EndMarker: '6'},
       (Token.Punctuation, ')'): {(Token.Grammar.NonTerminal, Token.Grammar.Any): {(Token.Punctuation, '('): {Token.Grammar.EndMarker: '5'}}}}

      >>> stream = iter(SAMPLE_INPUT_STREAM)

      >>> parse_order = []

      >>> p.parse(stream, store_parse_order)
      >>> ' '.join(parse_order)
      '6 6 1 5 6 3'

      """,
  }
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.