Source

trac-gviz / trac-dev / gviz / tracgviz / util / parsing.py

Olemis Lang f4e31f7 






















Olemis Lang 2a7eab0 

Olemis Lang f4e31f7 

Olemis Lang 2a7eab0 
Olemis Lang f4e31f7 



















Olemis Lang e1cdac3 
Olemis Lang b276c16 
Olemis Lang e1cdac3 




Olemis Lang f4e31f7 



Olemis Lang 8c84312 






Olemis Lang 2a7eab0 




















Olemis Lang 8c84312 



Olemis Lang e1cdac3 



































Olemis Lang 8c84312 
Olemis Lang e1cdac3 














Olemis Lang 8c84312 
Olemis Lang e1cdac3 
































Olemis Lang 8c84312 
Olemis Lang e1cdac3 

Olemis Lang 8c84312 
Olemis Lang e1cdac3 




Olemis Lang 8c84312 
Olemis Lang e1cdac3 

Olemis Lang 8c84312 
Olemis Lang e1cdac3 



Olemis Lang 8c84312 
Olemis Lang e1cdac3 

Olemis Lang 8c84312 
Olemis Lang e1cdac3 













Olemis Lang 8c84312 
Olemis Lang e1cdac3 

Olemis Lang 8c84312 
Olemis Lang e1cdac3 
Olemis Lang 8c84312 
Olemis Lang e1cdac3 
Olemis Lang 910c62a 

Olemis Lang f4e31f7 









































Olemis Lang 910c62a 






Olemis Lang f4e31f7 




Olemis Lang b276c16 

Olemis Lang f4e31f7 
Olemis Lang b276c16 

Olemis Lang f4e31f7 








Olemis Lang 2a7eab0 
Olemis Lang f4e31f7 


Olemis Lang 233753f 
Olemis Lang b276c16 
Olemis Lang f4e31f7 










Olemis Lang 2a7eab0 

Olemis Lang f4e31f7 


Olemis Lang 2a7eab0 
Olemis Lang f4e31f7 

Olemis Lang 2a7eab0 










Olemis Lang f4e31f7 



Olemis Lang 2a7eab0 
Olemis Lang f4e31f7 
Olemis Lang 2a7eab0 

Olemis Lang f4e31f7 
Olemis Lang 2a7eab0 






Olemis Lang f4e31f7 


Olemis Lang 2a7eab0 

Olemis Lang f4e31f7 
Olemis Lang 2a7eab0 
Olemis Lang f4e31f7 

























Olemis Lang 8c84312 


Olemis Lang f4e31f7 





Olemis Lang e1cdac3 




Olemis Lang f4e31f7 




Olemis Lang e1cdac3 

Olemis Lang f4e31f7 

















































































  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
#!/usr/bin/env python

# Copyright 2009-2011 Olemis Lang <olemis at gmail.com>
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

r"""Generic parsing algorithms.

Copyright 2009-2011 Olemis Lang <olemis at gmail.com>
Licensed under the Apache License, Version 2.0 
"""
__author__ = 'Olemis Lang'

__all__ = 'OperatorPrecedenceParser', 'Any', 'EndMarker', 'NonTerminal'

__metaclass__ = type

from itertools import dropwhile, ifilter
import logging
from pygments.token import *

#------------------------------------------------------
#   Operator precedence grammars
#------------------------------------------------------

class OperatorPrecedenceParser:
  r"""
  Shift-reduce parser for operator precedence grammars based on precedence
  matrix.
  """

  MorePrecedence = Token.Grammar.Relationship.MorePrecedence
  SamePrecedence = Token.Grammar.Relationship.SamePrecedence
  LessPrecedence = Token.Grammar.Relationship.LessPrecedence

  def __init__(self):
    r"""Initialize with empty precendence table and skeletal grammar.
    """
    self.set_productions()
    self.verbose = False

  def set_productions(self, start_state=None, *productions):
    r"""Rebuild precedence and production look up tree according to 
    a new set of grammar productions. All previous state is discarded.
    """
    self.start_state = None
    self.precedence = {}
    self.productions_tree = {}

    self.precedence, self.productions_tree = self.process_productions(
        *productions)

    # Everything ok , assign instance methods now
    self.start_state = start_state

  @classmethod
  def gen_precedence(cls, pseudo_precedence):
    r"""Generates a precedence table from a more readable representation 
    of the form 

    {
        SrcToken : {
            Token.Grammar.Relationship.MorePrecedence : { DstToken1, ... }
            Token.Grammar.Relationship.SamePrecedence : { DstToken2, ... }
            Token.Grammar.Relationship.LessPrecedence : { DstToken3, ... }
          },
        ...
    }
    """
    return dict(
      [ (tkn1, tkn2), prec] \
          for tkn1, v1 in pseudo_precedence.iteritems() \
          for prec, v2 in v1.iteritems() \
          for tkn2 in v2
    )

  @classmethod
  def process_productions(cls, *productions):
    r"""Generate precedence and production look up tree according to 
    a set of grammar productions.
    """
    nfirst = {}
    nlast = {}
    pre = {}
    post = {}
    precedence = {}
    productions_tree = {}

    if productions:

      def _update(state, nt_rel, t_rel, terminal=None, nt=None):
        r"""Incremental update of relationship adding termminals
        """
        logging.debug('_update: state=%s terminal=%s nt=%s', state, terminal, nt)
        new_terminals = set([terminal]) if terminal is not None else set()
        # Add terminal to PRE / POST
        if nt is not None and nt != state:
          if nt in nt_rel.setdefault(state, set()):
            # Dependency loop
            raise InvalidProduction('Failed to establish precedence (loop)')
          # Add non-terminal to NFIRST / NLAST
          nt_rel.setdefault(nt, set()).add(state)
          # Propagate terminals related to non-terminal as well
          new_terminals.update(t_rel.setdefault(nt, set()))
        if new_terminals:
          # Propagate changes
          pend = list( nt_rel.setdefault(state, set()) ) + [state] 
          already = set()
          while pend:
            s = pend.pop()
            if s not in already:
              t_rel.setdefault(s, set()).update(new_terminals)
              pend += list( nt_rel.setdefault(s, set()) )
            already.add(s)

      for prod_id, state, ps in productions:
        if not ps:
          raise InvalidProduction(cls, 'Empty productions not allowed')

        # Update skeletal production path
        choices = productions_tree
        for subtkn, subval in reversed(ps):
          if len(ps) > 1 or subtkn not in NonTerminal:
            choices = choices.setdefault(
                (subtkn, Any if subtkn in NonTerminal else subval), {})
        else:
          choices[EndMarker] = prod_id

        # Populate NFIRST, NLAST, PRE, POST
        subtkn, subval = ps[0]
        if len(ps) == 1:
          if subtkn in NonTerminal:
            if state == subval:
              raise InvalidProduction(cls, 'Infinite recursion state=' + state)
            else:
              # PRE and POST for subval should be propagated back to state
              _update(state, nfirst, pre, nt=subval)
              _update(state, nlast, post, nt=subval)
          else:
            _update(state, nfirst, pre, terminal=(subtkn, subval))
            _update(state, nlast, post, terminal=(subtkn, subval))
        else:
          # Update PRE (NFIRST)
          if subtkn in NonTerminal:
            _update(state, nfirst, pre, terminal=ps[1], nt=subval)
          else:
            _update(state, nfirst, pre, terminal=(subtkn, subval))
          # Update POST (NLAST)
          subtkn, subval = ps[-1]
          if subtkn in NonTerminal:
            _update(state, nlast, post, terminal=ps[-2], nt=subval)
          else:
            _update(state, nlast, post, terminal=(subtkn, subval))

      # Build precedence table and productions lookup tree
      for prod_id, state, ps in productions:
        prev_t = prev_nt = None
        is_last_nt = False
        for subtkn, subval in ps:
          if subtkn in NonTerminal:
            if is_last_nt:
              raise InvalidProduction("Consecutive non-terminals in production")
            else:
              if prev_t:
                # Terminals in PRE have less precedence
                for terminal in pre.setdefault(subval, set()):
                  key = (prev_t, terminal)
                  if precedence.get(key) not in (None, cls.LessPrecedence):
                    raise InvalidParserConfiguration(
                        "Failed to establish precedence")
                  precedence[key] = cls.LessPrecedence
            prev_nt = subval
          else:
            if prev_t:
              # Adjacent terminals have the same precedence
              key = (prev_t, (subtkn, subval))
              if precedence.get(key) not in (None, cls.SamePrecedence):
                raise InvalidParserConfiguration(
                    "Failed to establish precedence")
              precedence[key] = cls.SamePrecedence
            if prev_nt:
              # Terminals in previous non-terminal's POST have more precedence
              for terminal in post.setdefault(prev_nt, set()):
                key = (terminal, (subtkn, subval))
                if precedence.get(key) not in (None, cls.MorePrecedence):
                  raise InvalidParserConfiguration(
                      "Failed to establish precedence")
                precedence[key] = cls.MorePrecedence
            prev_t = (subtkn, subval)
            # Non-terminal has to be adjacent to terminal
            prev_nt = None
          is_last_nt = subtkn in NonTerminal

      # Remove end marker from productions tree root , just in case ...
      try:
        del productions_tree[EndMarker]
      except KeyError:
        pass

    # End-markers precedence
    for nt, nt_pre in pre.iteritems():
      for terminal in nt_pre:
        precedence[((EndMarker, nt), terminal)] = cls.LessPrecedence
    for nt, nt_post in post.iteritems():
      for terminal in nt_post:
        precedence[(terminal, (EndMarker, nt))] = cls.MorePrecedence

    return precedence, productions_tree

  def parse(self, stream, on_reduce, start_state=None, on_accept=None):
    r"""Parse a `stream` of tokens.

    :param on_reduce:   Optional Callable object invoked when a reduction is 
                        perfomed according to some grammar production.
                        It receives production ID as first parameter.
                        Subsequent positional arguments will be the
                        values matched for each symbol in target 
                        production. It may return a value. It will be
                        propagated in the form of a `NonTerminal` token 
                        and will be available in subsequent reductions 
                        involving that token.
    :param start_state: The name of a non-terminal symbol. This parameter 
                        allows for parsing a subset of the target grammar.
                        If missing parsing will start with topmost
                        grammar state.
    :return:            the last object returned by on_reduce callback
    :raise SyntaxError: if invalid syntax is detected
    :raise InvalidParserConfiguration: if no suitable start state is found
    """
    if start_state is None:
      start_state = self.start_state
    if start_state is None:
      raise InvalidParserConfiguration(self, "Missing start state")
    if on_reduce is None:
      on_reduce = lambda *args: None
    # Auxiliary variables
    SHIFT_PREC = (self.SamePrecedence, self.LessPrecedence)
    REDUCE_PREC = (self.MorePrecedence,)
    # Initial state
    pushdown_list = [(EndMarker, start_state)]
    last_tkn, last_val = EndMarker, start_state
    tkn = None        # Force reading next char from stream
    while True:
      is_last_nt = last_tkn is NonTerminal
      if is_last_nt:
        last_tkn, last_val = pushdown_list[-2]
      if tkn is None:
        try:
          tkn, val = stream.next()
        except StopIteration:
          tkn, val = (EndMarker, start_state)
      if is_last_nt and last_tkn is EndMarker and tkn is EndMarker:
        # Accept token stream !!!
        result = pushdown_list[-1][1]
        if on_accept is not None:
          result = on_accept(result)
        return result
      candidates = ( ((_lt, _lv) , (_t, _v)) \
          for _lt in reversed(last_tkn.split()) for _t in reversed(tkn.split()) \
          for _lv in (last_val, Any) for _v in (val, Any) )
      try:
        precedence = ifilter(None, 
            (self.precedence.get(c) for c in candidates)).next()
      except StopIteration:
        if tkn is EndMarker:
          raise SyntaxError("Unexpected EOL" + 
              ("\n\n" + str(pushdown_list) if self.verbose else "") )
        else:
          raise SyntaxError("Unexpected token %s.%s" % (val, 
              ("\n\n" + unicode(pushdown_list) if self.verbose else "") ) )
      else:
        logging.debug('Precedence %s,%s |--> %s,%s: %s', 
            last_tkn, last_val, tkn, val, precedence)
      if precedence in SHIFT_PREC:
        # Shift
        pushdown_list.append((tkn, val))
        last_tkn, last_val = tkn, val
        tkn = None        # Force reading next char from stream
      elif precedence in REDUCE_PREC:
        # Reduce
        try:
          prod_id, args = self._match_production(pushdown_list)
        except LookupError:
          raise SyntaxError('Unexpected token after %s.%s' % 
              (last_val, ("\n\n" + pushdown_list if self.verbose else "") ) )
        pushdown_list.append((NonTerminal, on_reduce(prod_id, *args)))
        last_tkn, last_val = pushdown_list[-1]
      else:
        raise InvalidParserConfiguration(self, "Invalid precedence " + 
            str(precedence))

  def _match_token(self, reftkn, tkn):
    r"""Match a token `tkn` against expected token `reftkn`.
    """
    if reftkn is Any:
      return True
    elif reftkn is EndMarker:
      return False
    else:
      _reftype, _refval = reftkn
      _type, _val = tkn
      return (_reftype is Any or _type in _reftype) and \
          (_refval is Any or _refval == _val)

  def _match_any(self, tkn, choices):
    r"""Find the first match for token `tkn` against a sequence of `choices`.
    """
    try:
      _, newchoices = dropwhile(lambda (t, _): not self._match_token(t, tkn), 
          choices).next()
    except StopIteration:
      return None
    else:
      return newchoices

  def _match_production(self, pushdown_list):
    r"""Match production on reduce
    """
    idx = 0
    last_match = None
    choices = self.productions_tree
    while choices:
      last_match = choices
      idx -= 1
      tkn = pushdown_list[idx]
      logging.debug('Last token %s', tkn)
      choices = self._match_any(tkn, choices.iteritems())
      logging.debug('Match %s, pushdown list %s, idx %s',
          bool(choices), pushdown_list, idx)
    if last_match and EndMarker in last_match:
      idx += 1
      args = pushdown_list[idx:]
      for _ in xrange(idx, 0):
        pushdown_list.pop()
      logging.debug('Prod id %s with args %s', last_match[EndMarker], args)
      return last_match[EndMarker], args
    else:
      raise LookupError("Could not match grammar against pushdown list")

#------------------------------------------------------
#   Helper functions and objects
#------------------------------------------------------

# Intermediate tokens representing non-terminal grammar symbols
NonTerminal = Token.Grammar.NonTerminal
# Wildchar token used in token matching.
Any         = Token.Grammar.Any
# Token used to delimit segments in a token stream
EndMarker   = Token.Grammar.EndMarker

#------------------------------------------------------
#   Exception classes
#------------------------------------------------------

class ParserError(RuntimeError):
  r"""Error condition detected at parsing time.
  """
  def __init__(self, parser, *args):
    r"""Initialize ecception object with parser and arguments
    """
    self.parser = parser
    RuntimeError.__init__(self, *args)

  def __unicode__(self):
    cls = self.parser \
        if isinstance(self.parser, type) else self.parser.__class__
    return "Parser %s failed: %s" % (cls.__name__,
        RuntimeError.__unicode__(self))

class InvalidParserConfiguration(ParserError):
  r"""Wrong parser configuration detected.
  """

class InvalidProduction(InvalidParserConfiguration):
  r"""Grammar production is either malformed or breaks a pre-condition 
  for using target parser.
  """

#------------------------------------------------------
#   Global Testing
#------------------------------------------------------

from tracgviz.testing.test_parsing import __test__, SAMPLE_GRAMMAR_PRECEDENCE, \
    SAMPLE_GRAMMAR_PRODUCTIONS, SAMPLE_INPUT_STREAM, SAMPLE_GRAMMAR, Parser, \
    CloseP, Var, Multiply, Add, OpenP, EndE

def test_suite():
  from doctest import ELLIPSIS, NORMALIZE_WHITESPACE, REPORT_UDIFF
  from unittest import defaultTestLoader
  from string import whitespace
  import sys

  from tracgviz.api import GVizInvalidQuery
  from tracgviz.testing.dutest import MultiTestLoader, DocTestLoader

  # logging.basicConfig(level=logging.DEBUG)

  def parse(expr, *attrs, **kwds):
    # Test lexical analysis
    print "*****\n* Tokens\n*****"
    newline = False
    p = GVizQLParser()
    p.noisy = False
    for tkn, val in p.get_tokens(expr):
      if tkn is not Whitespace:
        if newline:
          print
        else:
          newline = True
        print tkn, val,
    if not (tkn is Whitespace and val == '\n'):  # Check for EOL token.
      raise AssertionError('Expected : %s , %r \n\nGot : %s , %r' % \
                                  (Whitespace, u'\n', tkn, val))
    if attrs :
      # Test parsing and compilation
      print
      print "*****\n* Parsing\n*****",
      from api import GVizUnsupportedQueryOp
      try:
        expr = p.parse(expr)
      except GVizUnsupportedQueryOp :
        print '\nNotSupported  :(',
      except GVizInvalidQuery, exc :
        print 
        print exc.message
      else:
        for attrnm in attrs :
          print
          print getattr(expr, attrnm),
        # Test evaluation and transformations on result sets
        print
        print "*****\n* Result\n*****",
        try:
          schema, data = expr.transform(TEST_DATA_GVIZSCHEMA, \
                                        TEST_DATA_GVIZDATA)
        except Exception, exc:
          print
          print exc.__class__.__name__, ' : ', exc.message,
        else:
          print
          print "= Columns =",
          if isinstance(schema, dict):
            def iter_schema():
              return ((k,) + v for k, v in schema.iteritems())
          else:
            iter_schema = lambda : schema
          for col in iter_schema() :
            print
            print col[0], col[1],
            try :
              print col[2],
            except IndexError :
              pass
          print 
          for row in data :
            print "= Row ="
            for val, col in izip(row, iter_schema()):
              print '  ', col[0], '=', val

  l = MultiTestLoader([defaultTestLoader, \
                        DocTestLoader(
                            extraglobs=dict(parse=parse),
                            optionflags=ELLIPSIS \
                                        | NORMALIZE_WHITESPACE \
                                        | REPORT_UDIFF \
                          )])
  return l.loadTestsFromModule(sys.modules[__name__])
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.