Commits

Martin Vejnár committed 242fc9f

Token position tracking, #line directives in the generated file.

  • Participants
  • Parent commits 5052a29

Comments (0)

Files changed (5)

     action_stubs = []
     action_functions = []
     for i, lex_rule in enumerate(g.lex_rules):
-        (lhs, lhs_name), (rhs, rhs_name), action = lex_rule
+        (lhs, lhs_name), (rhs, rhs_name), action, rule_pos = lex_rule
         if (rhs_name is None) != (action is None):
             raise RuntimeError('XXX 1')
         
                 action,
                 '}'
                 ]
+            if rule_pos is not None:
+                action_function.insert(2, '#line %d "%s"' % (rule_pos.line, rule_pos.filename.replace('\\', '\\\\')))
             action_functions.append('\n        '.join(action_function))
 
     sd = {}
                 ret_type = sym_annot[rule.left]
             else:
                 ret_type = 'void'
-            lime_actions.append("%s a%d(%s)\n{%s}\n" % (ret_type, i, ', '.join(param_list), rule.lime_action))
-#        else:
-#            lime_actions.append("void a%d(%s)\n{}\n" % (i, ', '.join(param_list)))
+            if rule.lime_action_pos:
+                line = '#line %d "%s"\n' % (rule.lime_action_pos.line, rule.lime_action_pos.filename.replace('\\', '\\\\'))
+            else:
+                line = ''
+            lime_actions.append("%s a%d(%s)\n%s{%s}\n" % (ret_type, i, ', '.join(param_list), line, rule.lime_action))
 
     def _get_action_row(lookahead):
         action_row = []

File lime_grammar.py

 from grammar import Grammar
 from lrparser import Parser
 from docparser import parser_LR, action, matcher
-from simple_lexer import simple_lexer
+from simple_lexer import simple_lexer, Token
 import types
 from lime_lexer import LimeLexer
 from fa import make_dfa_from_literal, union_fa, minimize_enfa
 
 def _make_rule(lhs, lhs_name, rhs_list, rule_action):
     r = Rule(lhs, tuple((rhs for rhs, rhs_name in rhs_list)))
-    r.lime_action = rule_action
+    if rule_action is not None:
+        r.lime_action = rule_action.text()
+        r.lime_action_pos = rule_action.pos()
+    else:
+        r.lime_action = None
+        r.lime_action_pos = None
     r.lhs_name = lhs_name
     r.rhs_names = [rhs_name for rhs, rhs_name in rhs_list]
     return r
     def _update_implicit_tokens(self, g):
         for lex_rhs, token_name in self.implicit_tokens.iteritems():
             if token_name not in self.processed_implicit_tokens:
-                g.lex_rules.append(((token_name, None), (lex_rhs, None), None))
+                g.lex_rules.append(((token_name, None), (lex_rhs, None), None, None))
                 g.token_comments[token_name] = lex_rhs
                 self.processed_implicit_tokens.add(token_name)
 
         """
         grammar = grammar, _kw_include, snippet;
         """
-        g.user_include = incl
+        g.user_include = incl.text()
+        g.user_include_pos = incl.pos()
         return g
 
     @action
         """
         grammar = grammar, _kw_token_type, snippet;
         """
-        g.token_type = token_type
+        g.token_type = token_type.text()
+        g.token_type_pos = token_type.pos()
         return g
 
     @action
         lex_stmt = lex_lhs, '~=', lex_rhs;
         lex_stmt = lex_lhs, '~=', lex_rhs, snippet;
         """
-        return (lhs, rhs, action)
+        if action is None:
+            return (lhs, rhs, None, None)
+        else:
+            return (lhs, rhs, action.text(), action.pos())
 
     @action
     def lex_lhs(self, lhs, lhs_name=None):
         lex_rhs = snippet;
         lex_rhs = snippet, '(', id, ')';
         """
-        return (LexRegex(rhs), rhs_name)
+        return (LexRegex(rhs.text()), rhs_name)
 
     @action
     def lex_rhs_lit(self, rhs, rhs_name=None):
         """
         type_stmt = id, '::', snippet;
         """
-        return (lhs, type)
+        return (lhs, type.text())
 
     @action
     def stmt_type_void(self, lhs, type):
         """
         named_item = snippet;
         """
-        snippet = snippet.strip()
+        snippet = snippet.text().strip()
         snippet = LexRegex(snippet)
         return self._lex_rhs(snippet)
 
             self.implicit_tokens[rhs] = tok_name
         return [(tok_name, None)]
 
-def lime_lexer(input):
-    for tok in simple_lexer(input, _LimeLexerClassify()):
-        if isinstance(tok, tuple):
+def lime_lexer(input, filename=None):
+    for tok in simple_lexer(input, _LimeLexerClassify(), filename=filename):
+        if isinstance(tok, tuple) or isinstance(tok, Token):
             if tok[0] == 'id' and tok[1][:1] == '%':
-                yield ('kw_' + tok[1][1:], tok[1])
+                yield Token('kw_' + tok[1][1:], tok[1], tok.pos())
                 continue
             if tok[0] == 'snippet':
-                yield ('snippet', tok[1][:-1])
+                yield Token('snippet', tok[1][:-1], tok.pos())
                 continue
 
         yield tok
 
-def parse_lime_grammar(input):
+def _extract(tok):
+    return tok[1] if tok[0] != 'snippet' else tok
+
+def parse_lime_grammar(input, filename=None):
     p = _LimeGrammar()
     from lrparser import extract_second
-    return p.parse(lime_lexer(input), extract_value=extract_second)
+    return p.parse(lime_lexer(input, filename=filename), extract_value=_extract)
 
 def _lex(p, lex, text):
     g = p.grammar
 def _build_multidfa(lex_rules, allowed_syms=None):
     fas = []
     for i, lex_rule in enumerate(lex_rules):
-        (lhs, lhs_name), (rhs, rhs_name), action = lex_rule
+        (lhs, lhs_name), (rhs, rhs_name), action, pos = lex_rule
         if allowed_syms is not None and lhs not in allowed_syms:
             continue
 
     if not options.output:
         options.output = os.path.splitext(fname)[0] + '.hpp'
 
+    fname = os.path.abspath(fname)
     input = open(fname, 'r').read()
 
     import sys
         print >>sys.stderr, '%s(%d): error : %s' % (fname, line, msg)
 
     try:
-        g = parse_lime_grammar(input)
+        g = parse_lime_grammar(input, filename=fname)
         p = make_lime_parser(g, keep_states=options.print_states)
 
         if options.print_states:
 from grammar import Grammar, Rule
 from first import First
 from matchers import default_matchers
+from simple_lexer import Token
 
 class InvalidGrammarError(Exception):
     """Raised during a construction of a parser, if the grammar is not LR(k)."""
     >>> extract_first(('item', 42))
     'item'
     """
-    return token[0] if isinstance(token, tuple) else token
+    return token[0] if isinstance(token, tuple) or isinstance(token, Token) else token
 
 def extract_second(token):
     """Returns the argument or, if it is a tuple, its second member.
     >>> extract_first(('item', 42))
     42
     """
-    return token[1] if isinstance(token, tuple) else token
+    return token[1] if isinstance(token, tuple) or isinstance(token, Token) else token
 
 class Parser(object):
     """Represents a LR(k) parser.
                 tok = get_shift_token()
                 if shift_visitor:
                     shift_visitor(tok)
-                if tok == None:
+                if tok is None:
                     if state_id == self.accepting_state:
                         assert len(asts) == 1
                         return asts[0]

File simple_lexer.py

         
         return ''
 
-def simple_lexer(input, classify=None):
+class TokenPos:
+    def __init__(self, filename, line, col):
+        self.filename = filename
+        self.line = line
+        self.col = col
+
+    def __str__(self):
+        return "%s(%d)" % (self.filename, self.line)
+
+class Token:
+    def __init__(self, kind, text, pos=None):
+        self.tok = (kind, text)
+        self._pos = pos
+
+    def __iter__(self):
+        return iter(self.tok)
+
+    def __len__(self):
+        return len(self.tok)
+
+    def __eq__(self, rhs):
+        if isinstance(rhs, Token):
+            return self.tok == rhs.tok
+        return self.tok == rhs
+
+    def __hash__(self):
+        return hash(self.tok)
+
+    def __str__(self):
+        return str(self.tok)
+
+    def __repr__(self):
+        if self.pos is not None:
+            return 'Token(%r, %r, %r)' % (self.tok[0], self.tok[1], self._pos)
+        else:
+            return 'Token(%r, %r)' % self.tok
+
+    def __getitem__(self, i):
+        return self.tok[i]
+
+    def kind(self):
+        return self.tok[0]
+
+    def text(self):
+        return self.tok[1]
+
+    def pos(self):
+        return self._pos
+
+def simple_lexer(input, classify=None, filename=None):
     """Provides a simple lexer.
     
     >>> list(simple_lexer('spam, eggs # comment'))
     
     lit = ''
     last_cl = None
+
+    line_no = 1
+    column_no = 1
+    tok_pos = TokenPos(filename, 1, 1)
     for ch in input:
         cl = classify(ch)
-        if cl == False:
-            continue
+        if cl is not False:
+            if lit and cl != last_cl:
+                yield Token(last_cl, lit, tok_pos)
+                tok_pos = TokenPos(filename, line_no, column_no)
+                lit = ''
         
-        if lit and cl != last_cl:
-            yield (last_cl, lit)
-            lit = ''
-        
-        last_cl = cl
-        if cl == '':
-            yield ch
-        elif cl:
-            lit += ch
+            last_cl = cl
+            if cl == '':
+                yield Token(ch, ch, tok_pos)
+                tok_pos = TokenPos(filename, line_no, column_no)
+            elif cl:
+                lit += ch
+
+        if ch == '\n':
+            line_no += 1
+            column_no = 1
+        else:
+            column_no += 1
+
     if lit:
-        yield (last_cl, lit)
+        yield Token(last_cl, lit, tok_pos)
 
 if __name__ == '__main__':
     import doctest