Commits

Martin Vejnár committed 7b19570

A bunch of fixes.

Comments (0)

Files changed (6)

         self.rules.extend(item.rules)
 
 def _pretty_forward_args(self, *args):
+    if len(args) == 0:
+        return None
     if len(args) == 1:
         return args[0]
     return args
         
         for symbol in rule.right:
             self._symbols.add(symbol)
-    
+
+    def add_symbol(self, symbol):
+        """Addes a symbol without adding any rule that references it."""
+        self._symbols.add(symbol)
+
     def extend(self, rules):
         """Extends the grammar by the given set of rules."""
         for rule in rules:
-from lime_grammar import LexDiscard, LexRegex
 from lime_grammar import LexDiscard, LexRegex
 from dfa import make_dfa_from_literal
 
         action_stub = ['static void stub_%s(basic_lexer & l, TokenSink & token_sink)' % i, '{']
         if not isinstance(g.sym_annot.get(lhs), LexDiscard):
             if action is None:
-                if g.sym_annot[lhs] is None:
+                if g.sym_annot.get(lhs) is None:
                     action_stub.append('    token_sink.push_token(tok::%s);' % lhs.lower())
                 else:
                     action_stub.append('    token_sink.push_token(tok::%s, (%s)l.m_token);'
 def lime_cpp(p):
     g = p.grammar
     dfa = p.lexer
-    lower_terminals = True
 
     sym_annot = dict(g.sym_annot.iteritems())
     for sym in g.symbols():
             else:
                 sym_annot[sym] = None
         else:
-            if sym_annot[sym] is not None:
+            if sym_annot[sym] is not None and not isinstance(sym_annot[sym], LexDiscard):
                 sym_annot[sym] = sym_annot[sym].strip()
 
     syms_by_type = {}
     nonterm_indexes = dict([(nonterm, i) for i, nonterm in enumerate(g.nonterms())])
     term_indexes = dict([(term, i) for i, term in enumerate(g.terminals())])
 
-    ast_stacks = ['std::vector<%s> m_ast_stack_%d;' % (annot, i)
+    ast_stacks = ['std::vector<%s > m_ast_stack_%d;' % (annot, i)
         for annot, i in annot_indexes.iteritems() if annot is not None and not isinstance(annot, LexDiscard)]
 
-    if lower_terminals:
-        tokens = [term.lower() for term in g.terminals()]
-    else:
-        tokens = sorted(g.terminals())
+    tokens = [term.lower() for term in g.terminals()]
+    token_lines = []
+    for term in g.terminals():
+        if g.token_comments.get(term) is not None:
+            token_lines.append('%s, // %s' % (term.lower(), g.token_comments[term]))
+        else:
+            token_lines.append('%s,' % term.lower())
 
     root_type = sym_annot.get(g.root())
 
         if rule.lime_action is None:
             # This must be either a typed non-terminal with exactly one
             # typed rhs symbol, or a void non-terminal with no typed rhs symbols
-            if sym_annot[rule.left] is None:
-                if len(idx_counts) != 0:
-                    raise RuntimeError('XXX 4') # This should probably be done before the generation begins
-            else:
+            if sym_annot[rule.left] is not None:
                 if (len(idx_counts) != 1 or idx_counts.values()[0] != 1
                         or idx_counts.keys()[0] != annot_indexes[sym_annot[rule.left]]):
                     raise RuntimeError('XXX 2') # This should probably be done before the generation begins
 
-        modify_inplace = any((rule.lhs_name == rhs for rhs in rule.rhs_names))
+        modify_inplace = rule.lhs_name is not None and any((rule.lhs_name == rhs for rhs in rule.rhs_names))
         if rule.lime_action is not None:
             if not modify_inplace and sym_annot[rule.left] is not None:
                 f.append('    %s res[1] = {};' % sym_annot[rule.left])
-            f.append('    self.m_actions.a%d(' % i)
+            if rule.lhs_name or sym_annot[rule.left] is None:
+                f.append('    self.m_actions.a%d(' % i)
+            else:
+                f.append('    res[0] = self.m_actions.a%d(' % i)
             params = []
             if not modify_inplace and rule.lhs_name and sym_annot[rule.left] is not None:
                 params.append('        res[0]')
                 used_indexes.setdefault(idx, 0)
                 # XXX: do this beforehand
                 if rhs_name and sym_annot[right] is None:
-                    raise RuntimeError('XXX 3')
+                    raise RuntimeError('A symbol has a name, yet it has no type: %s' % right)
                 if rhs_name:
                     params.append(
                         '            self.m_ast_stack_%d.end()[-%d]' % (
                         idx, idx_counts[idx] - used_indexes[idx]))
-                if rhs_name == rule.lhs_name:
-                    assert modify_inplace
+                if modify_inplace and rhs_name == rule.lhs_name:
                     inplace_swap_stack = idx
                     inplace_swap = used_indexes[idx]
                 used_indexes[idx] += 1
         for sym, sym_name in zip(rule.right, rule.rhs_names):
             _add_param(sym, sym_name)
 
-        if rule.lime_action:
-            lime_actions.append("void a%d(%s)\n{%s}\n" % (i, ', '.join(param_list), rule.lime_action))
-        else:
-            lime_actions.append("void a%d(%s)\n{}\n" % (i, ', '.join(param_list)))
+        if rule.lime_action is not None:
+            if not rule.lhs_name and sym_annot[rule.left] is not None:
+                ret_type = sym_annot[rule.left]
+            else:
+                ret_type = 'void'
+            lime_actions.append("%s a%d(%s)\n{%s}\n" % (ret_type, i, ', '.join(param_list), rule.lime_action))
+#        else:
+#            lime_actions.append("void a%d(%s)\n{}\n" % (i, ', '.join(param_list)))
 
     def _get_action_row(lookahead):
         action_row = []
 
     push_token_lines = []
     for type, terms in terms_by_type.iteritems():
+        if isinstance(type, LexDiscard):
+            continue
         if type is None:
             push_token_lines.extend([
                 "void push_token(tok::token_kind kind)",
     sd['user_include'] = '' if g.user_include is None else g.user_include
     sd['finish_function'] = finish_function
     sd['ast_stacks'] = '\n    '.join(ast_stacks)
-    sd['tokens'] = ',\n    '.join(tokens)
+    sd['tokens'] = '\n    '.join(token_lines)
     sd['reduce_functions'] = '\n    '.join(reduce_functions)
     sd['lime_actions'] = '\n'.join(lime_actions)
     sd['term_count'] = str(len(g.terminals())+1)
         static state_t const goto_table[$term_count_m1][$state_count] = {
             { $term_goto_table },
         };
-        m_state_stack.push_back(goto_table[kind-1][m_state_stack.back()]);
+        std::size_t new_state = goto_table[kind-1][m_state_stack.back()];
+        if (new_state == 0)
+            throw std::runtime_error("Unexpected token");
+        m_state_stack.push_back(new_state);
     }
 
     void do_reduce(tok::token_kind lookahead)
     def __hash__(self):
         return hash(self.regex)
 
+    def __repr__(self):
+        return 'LexRegex(%r)' % self.regex
+
 class LexLiteral:
     def __init__(self, literal):
         self.literal = literal
     def __hash__(self):
         return hash(self.literal)
 
+    def __repr__(self):
+        return 'LexLiteral(%r)' % self.literal
+
 class _LimeLexerClassify:
     def __init__(self):
         self.quote = None
         if self.snippet != 0:
             if ch == '}':
                 self.snippet -= 1
-                if not self.snippet:
-                    return
             elif ch == '{':
                 self.snippet += 1
             return 'snippet'
 
     def __init__(self):
         self.implicit_tokens = {}
+        self.processed_implicit_tokens = set()
 
     @action
     def root(self, g):
         """
         root = grammar;
         """
+        return g
+
+    def _update_implicit_tokens(self, g):
         for lex_rhs, token_name in self.implicit_tokens.iteritems():
-            g.lex_rules.append(((token_name, None), (lex_rhs, None), None))
-        return g
+            if token_name not in self.processed_implicit_tokens:
+                g.lex_rules.append(((token_name, None), (lex_rhs, None), None))
+                g.token_comments[token_name] = lex_rhs
+                self.processed_implicit_tokens.add(token_name)
 
     @action
     def grammar_empty(self):
         g = Grammar()
         g.lex_rules = []
         g.sym_annot = {}
+        g.token_comments = {}
         g.user_include = None
         g.token_type = None # XXX: perhaps default_type?
         return g
         """
         rule.id = len(g)
         g.add(rule)
+        self._update_implicit_tokens(g)
         return g
 
     @action
         grammar = grammar, lex_stmt;
         """
         g.lex_rules.append(rule)
+        g.add_symbol(rule[0][0])
         return g
 
     @action
 
 def lime_lexer(input):
     for tok in simple_lexer(input, _LimeLexerClassify()):
-        if isinstance(tok, tuple) and tok[0] == 'id' and tok[1][:1] == '%':
-            yield ('kw_' + tok[1][1:], tok[1])
-        else:
-            yield tok
+        if isinstance(tok, tuple):
+            if tok[0] == 'id' and tok[1][:1] == '%':
+                yield ('kw_' + tok[1][1:], tok[1])
+                continue
+            if tok[0] == 'snippet':
+                yield ('snippet', tok[1][:-1])
+                continue
+
+        yield tok
 
 def parse_lime_grammar(input):
     p = _LimeGrammar()
         open(options.output, 'w').write(lime_cpp(p))
     except Exception, e:
         _error(e)
+        import traceback
+        traceback.print_exc(sys.stderr)
+        return 1
 
 if __name__ == '__main__':
     from optparse import OptionParser
     if len(args) != 1:
         opts.error('exactly one filename must be specified')
 
-    _main(options, args[0])
+    import sys
+    sys.exit(_main(options, args[0]))
     def __repr__(self):
         return repr(sorted(self.charset))
 
+class _Empty:
+    pass
+
 class _Rep:
     def __init__(self, term):
         self.term = term
     root = alt;
     alt = concat;
 
-    concat = rep;
-
     rep = atom;
 
     atom = _lparen, alt, _rparen;
         return _Concat(atom, _Rep(atom))
 
     @action
+    def rep_q(self, atom):
+        """
+        rep = atom, _q;
+        """
+        return _Alt(_Empty(), atom)
+
+    @action
+    def concat_empty(self):
+        """
+        concat = ;
+        """
+        return _Empty()
+
+    @action
     def concat(self, lhs, rhs):
         """
         concat = concat, rep;
             yield ('minus', ch)
         elif ch == '|':
             yield ('pipe', ch)
+        elif ch == '?':
+            yield ('q', ch)
         else:
             yield ('ch', ch)
     if esc:
     # The NFA now looks like this
     # 0 --epsilon--> 2 --regex--> 3 --epsilon--> 1
 
-    changed = True
-    while changed:
-        changed = False
+    while True:
         for edge in fa.get_edges():
             source, target, r = edge.source, edge.target, edge.label
-            fa.remove_edge(edge)
             if isinstance(r, _Alt):
+                fa.remove_edge(edge)
                 a = fa.new_state(target)
                 fa.new_edge(source, target, r.lhs)
                 fa.new_edge(source, a, r.rhs)
-                changed = True
+                break
             elif isinstance(r, _Concat):
+                fa.remove_edge(edge)
                 a = fa.new_state()
                 fa.new_edge(source, a, r.lhs)
                 fa.new_edge(a, target, r.rhs)
-                changed = True
+                break
             elif isinstance(r, _Rep):
+                fa.remove_edge(edge)
                 a = fa.new_state()
                 fa.new_edge(source, a)
                 fa.new_edge(a, target)
                 fa.new_edge(a, a, r.term)
-                changed = True
-            else:
-                fa.new_edge(source, target, r)
+                break
+            elif isinstance(r, _Empty):
+                fa.remove_edge(edge)
+                fa.new_edge(source, target)
+                break
+        else:
+            break
 
     for edge in fa.get_edges():
         if edge.label is not None:
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.