Commits

Martin Vejnár  committed 785cf70

Some cleanup. The core functionality is basically finished.

  • Participants
  • Parent commits 43278b2

Comments (0)

Files changed (6)

+from rule import Rule
+from grammar import Grammar
+from parser import Parser

File fifo.py

-from grammar import Grammar
-
-def join(left_wordset, right_wordset, k):
-	res = set()
-	min_len = k
-	for lword in left_wordset:
-		for rword in right_wordset:
-			w = list(lword)
-			w.extend(rword)
-			w = w[:k]
-			if len(w) < min_len:
-				min_len = len(w)
-			res.add(tuple(w))
-	return res, min_len
-	
-class FirstTable:
-	def __init__(self, grammar, k):
-		self.table = {}
-		self.k = k
-
-		for nonterm in grammar.nonterms():
-			self.table[nonterm] = set()
-
-		done = False
-		while not done:
-			done = True
-
-			for rule in grammar:
-				fi = self(rule.right)
-			
-				for word in fi:
-					if word not in self.table[rule.left]:
-						self.table[rule.left].add(word)
-						done = False
-		
-	def _first1(self, token):
-		if token not in self.table:
-			return set([(token,)])
-		return self.table[token]
-
-	def __call__(self, word):
-		res = set([()])
-		for token in word:
-			res, c = join(res, self._first1(token), self.k)
-			if c == self.k:
-				break
-
-		return res
-
-if __name__ == '__main__':
-	g = Grammar(
-		('E', 'E', '*', 'B'),
-		('E', 'E', '+', 'B'),
-		('E', 'B'),
-		('B', '0'),
-		('B', '1')
-	)
-
-	print g
-
-	ft = FirstTable(g, 2)
-	print ft.table
-
-	print ft(('E', '*'))
+"""
+This module defines some basic operations on words and sets of words.
+
+A word is any iterable of symbols (strings), i.e. ('list', 'item') is a word.
+The iterable must support slices, joining with operator + and must return
+their length through the 'len' function.
+"""
+
+from rule import Rule
+from grammar import Grammar
+
+def first(word, k=1):
+    """Returns FIRST_k(word).
+    
+    The implied grammar is taken to be empty and all symbols are treated as terminals.
+    See <http://www.jambe.co.nz/UNI/FirstAndFollowSets.html> for more information.
+    
+    >>> first('hello, world', k=7)
+    'hello, '
+    >>> first(('list', 'item', 'item', 'item', 'item'), k=2)
+    ('list', 'item')
+    """
+    return word[:k]
+
+def oplus(left, right, k=1):
+    """Returns the set { FIRST_k(vw) | v in left, w in right } and the length of its shortest member.
+    
+    The 'left' and 'right' are iterables of words.
+    The function return type is a pair (s, l), where 's' is the first-set
+    and 'l' is the length of its shortest member. If 's' is empty, 'l' is set equal to 'k'.
+    The type of 's' is unspecified, but is guaranteed to be an iterable of words
+    and to support operators 'in' and 'not in'.
+    
+    >>> s, l = oplus(['ab', 'ac', ''], ['zz', 'y', ''], k=3)
+    >>> sorted(list(s))
+    ['', 'ab', 'aby', 'abz', 'ac', 'acy', 'acz', 'y', 'zz']
+    >>> l
+    0
+    
+    >>> s, l = oplus(['ab', 'ac'], ['zz', 'y'], k=3)
+    >>> sorted(list(s))
+    ['aby', 'abz', 'acy', 'acz']
+    >>> l
+    3
+    """
+    res = set()
+    min_len = k
+    for lword in left:
+        for rword in right:
+            w = first(lword + rword, k)
+            if len(w) < min_len:
+                min_len = len(w)
+            res.add(w)
+    return res, min_len
+    
+class First:
+    """Represents the first-set for a given grammar.
+    
+    The grammar and 'k' parameter are passed during construction.
+    
+    >>> g = Grammar(Rule('list'), Rule('list', 'list', 'item'))
+    >>> f = First(g, k=2)
+    >>> f.grammar
+    Grammar(Rule('list'), Rule('list', 'list', 'item'))
+    
+    The objects are callable and, for a given word 'w', return the set
+    { FIRST_k(u) | w =>* u, u terminal }.
+    
+    >>> sorted(list(f(('item', 'item', 'item'))))
+    [('item', 'item')]
+    >>> sorted(list(f(())))
+    [()]
+    >>> sorted(list(f(('list',))))
+    [(), ('item',), ('item', 'item')]
+    """
+    def __init__(self, grammar, k=1):
+        """
+        Given a grammar and a 'k', constructs the first-set table for all non-terminals.
+        The table is then used by the '__call__' method.
+        
+        For the construction algorithm, see the Dragon book.
+        """
+        self.grammar = Grammar(*grammar)
+        self.k = k
+
+        self.table = dict((nonterm, set()) for nonterm in grammar.nonterms())
+
+        # The sets in the table start empty and are iteratively filled.
+        # The termination is guaranteed by the existence of the least fixed point.
+        done = False
+        while not done:
+            done = True
+            for rule in grammar:
+                for word in self(rule.right):
+                    if word not in self.table[rule.left]:
+                        self.table[rule.left].add(word)
+                        done = False
+    
+    def __call__(self, word):
+        """Returns FIRST_k(word) with respect to the associated grammar."""
+        res = set([()])
+        for symbol in word:
+            if symbol not in self.table:
+                rset = set([(symbol,)])
+            else:
+                rset = self.table[symbol]
+            res, c = oplus(res, rset, self.k)
+            if c == self.k:
+                break
+        
+        return res
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
-def _default_action(*args):
-	if len(args) == 1:
-		return args[0]
-	else:
-		return tuple(args)
+from rule import Rule
 
-class Rule:
-	def __init__(self, left, right, action=_default_action):
-		if type(right) != tuple:
-			right = (right,)
-		self.left = left
-		self.right = right
-		self.action = action
-		
-	def __str__(self):
-		out = [self.left, '::=']
-		out.extend(self.right)
-		return ' '.join(out)
-	
-	def __repr__(self):
-		return ''.join(("'", self.__str__(), "'"))
+class Grammar:
+    """Represents a set of production rules.
+    
+    Each rule is encapsulated by an instance of the 'Rule' class.
+    The rules can be added during construction through or later
+    using the 'add' and 'extend' methods.
+    
+    The grammar also tracks the root non-terminal, which is defined
+    to be the left symbol of the first rule. For an empty grammar,
+    there is no root and Grammar.root() will return None.
+    
+    >>> g = Grammar()
+    >>> print g.root()
+    None
 
-# TODO: caching
-class Grammar:
-	def __init__(self, *rules):
-		self._rules = []
-		self._nonterms = set()
-		self._symbols = set()
-		
-		for rule in rules:
-			self.add_rule(*rule)
-		
-	def __getitem__(self, key):
-		return self._rules[key]
-		
-	def __len__(self):
-		return len(self._rules)
-		
-	def __iter__(self):
-		return iter(self._rules)
-		
-	def __str__(self):
-		return '\n'.join(str(rule) for rule in self._rules)
-		
-	def add(self, rule):
-		self._rules.append(rule)
-		self._nonterms.add(rule.left)
-		self._symbols.add(rule.left)
-		for symbol in rule.right:
-			self._symbols.add(symbol)
-	
-	def add_rule(self, left, *right):
-		self.add(Rule(left, right))
-		
-	def extend(self, rules):
-		for rule in rules:
-			self.add(rule)
+    >>> g = Grammar(Rule('list'))
+    >>> g.add(Rule('list', 'list', 'item'))
+    >>> print g
+    list ::= <empty>
+    list ::= list item
+    >>> g.root()
+    'list'
+    
+    Grammars expose their rules using the standard list interface.
+    There is, however, no support for splices and the list of rules
+    can only be mutated through calls to 'add' and 'extend'.
+    
+    >>> g[0]
+    Rule('list')
+    >>> len(g)
+    2
+    >>> for rule in g: print rule
+    list ::= <empty>
+    list ::= list item
+    
+    Symbols are considered non-terminal (with respect to a grammar) if
+    they stand on the left side of some rule. All other symbols are considered terminal.
+    A grammar can test a symbol for its terminality. It also exposes a list of
+    all non-terminals and a list of all referenced symbols.
+    
+    >>> g.add(Rule('root', 'list'))
+    >>> [g.is_terminal(symbol) for symbol in ('list', 'root', 'item', 'unreferenced')]
+    [False, False, True, True]
+    >>> sorted(list(g.symbols()))
+    ['item', 'list', 'root']
+    >>> sorted(list(g.nonterms()))
+    ['list', 'root']
+    
+    The grammar also allows fast access to a set of rules with a given symbol on the left.
+    
+    >>> for rule in g.rules('list'): print rule
+    list ::= <empty>
+    list ::= list item
+    >>> for rule in g.rules('unreferenced'): print rule
+    >>> for rule in g.rules('root'): print rule
+    root ::= list
+    """
+    def __init__(self, *rules):
+        self._rules = []
+        self._rule_cache = {}
+        self._nonterms = set()
+        self._symbols = set()
+        
+        for rule in rules:
+            self.add(rule)
+        
+    def __getitem__(self, key):
+        return self._rules[key]
+        
+    def __len__(self):
+        return len(self._rules)
+        
+    def __iter__(self):
+        return iter(self._rules)
+        
+    def __str__(self):
+        """
+        >>> print Grammar(Rule('a', 'b', 'c'), Rule('a', 'c', 'b'))
+        a ::= b c
+        a ::= c b
+        """
+        return '\n'.join(str(rule) for rule in self._rules)
+    
+    def __repr__(self):
+        """
+        >>> print repr(Grammar(Rule('a', 'b', 'c'), Rule('a', 'c', 'b')))
+        Grammar(Rule('a', 'b', 'c'), Rule('a', 'c', 'b'))
+        """
+        return 'Grammar(%s)' % ', '.join(repr(rule) for rule in self._rules)
+        
+    def add(self, rule):
+        """Adds a rule to the grammar."""
+        self._rules.append(rule)
+        self._nonterms.add(rule.left)
+        self._symbols.add(rule.left)
+        self._rule_cache.setdefault(rule.left, []).append(rule)
+        
+        for symbol in rule.right:
+            self._symbols.add(symbol)
+    
+    def extend(self, rules):
+        """Extends the grammar by the given set of rules."""
+        for rule in rules:
+            self.add(rule)
 
-	def rules(self, left):
-		for rule in self._rules:
-			if rule.left == left:
-				yield rule
-				
-	def is_terminal(self, token):
-		return token not in self._nonterms
+    def rules(self, left):
+        """Retrieves the set of rules with a given non-terminal on the left.
+        
+        >>> g = Grammar(Rule('a', 'b'), Rule('b', 'c'), Rule('b', 'd'))
+        >>> for rule in g.rules('a'): print rule
+        a ::= b
+        >>> for rule in g.rules('c'): print rule
+        >>> for rule in g.rules('b'): print rule
+        b ::= c
+        b ::= d
+        """
+        return self._rule_cache.get(left, ())
+    
+    def is_terminal(self, token):
+        """Tests the symbol for terminality.
+        
+        All non-terminal symbols are considered terminal,
+        regardless of whether they are referenced by the grammar.
+        
+        >>> g = Grammar(Rule('a', 'b'), Rule('b', 'c'))
+        >>> tuple(g.is_terminal(sym) for sym in 'abcd')
+        (False, False, True, True)
+        """
+        return token not in self._nonterms
 
-	def nonterms(self):
-		return self._nonterms
-		
-	def symbols(self):
-		return self._symbols
-		
-if __name__ == '__main__':
-	g = Grammar(
-		('E', 'E', '*', 'B'),
-		('E', 'E', '+', 'B'),
-		('E', 'B'),
-		('B', '0'),
-		('B', '1')
-	)
-	
-	print g
-	print
-	print 'Non-terminals:', g.nonterms()
-	print 'Terminals:', g.symbols()
+    def nonterms(self):
+        """Returns an iterable representing the set of all non-terminal symbols."""
+        return self._nonterms
+        
+    def symbols(self):
+        """Returns an iterable representing the current set of all referenced symbols."""
+        return self._symbols
+        
+    def root(self):
+        """Returns the root non-terminal.
+        
+        >>> g = Grammar()
+        >>> print g.root()
+        None
+        >>> g.add(Rule('a', 'b'))
+        >>> print g.root()
+        a
+        """
+        return self._rules[0].left if self._rules else None
+        
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
 from grammar import Grammar, Rule
-from fifo import FirstTable
+from first import First
+
+# TODO: Fix 'shift' and 'reduce' tokens (a bool perhaps?).
+
+class InvalidGrammarError(BaseException):
+    """Raised during a construction of a parser, if the grammar is not LR(k)."""
+    pass
+
+class ParsingError(BaseException):
+    """Raised by a parser if the input word is not a sentence of the grammar."""
+    pass
+    
+class Parser:
+    """Represents a LR(k) parser.
+    
+    The parser is created with a grammar and a 'k'. The LR parsing tables
+    are created during construction. If the grammar is not LR(k),
+    an InvalidGrammarException is raised.
+    
+    >>> not_a_lr0_grammar = Grammar(Rule('list'), Rule('list', 'item', 'list'))
+    >>> Parser(not_a_lr0_grammar, k=0) # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+        ...
+    InvalidGrammarError: LR(0) table conflict ...
+
+    >>> lr0_grammar = Grammar(
+    ...     Rule('list', action=lambda c: [] if c == None else [c]),
+    ...     Rule('list', 'list', 'item', action=lambda c, l, i: l + [i]))
+    >>> p = Parser(lr0_grammar, k=0)
+    >>> print p.grammar
+    list ::= <empty>
+    list ::= list item
+    
+    The method 'parse' will accept an iterable of tokens (which are arbitrary objects)
+    and an extraction function. The extraction function should return the terminal symbol
+    associated with the token. By default, the extraction function is an identity. Whenever
+    the parser reduces a word to a non-terminal, an action associated with the reduction rule
+    is executed. This way Abstract Syntax Trees or other objects can be constructed.
+    The parse method returns the result of an action associated with the topmost reduction rule.
+    
+    Optionally, the 'parse' function will accept a 'context' keyword argument.
+    This is passed to an action when reduction occurs. By default, context is None.
+    
+    >>> p.parse(())
+    []
+    >>> p.parse(('item', 'item', 'item', 'item'))
+    ['item', 'item', 'item', 'item']
+    >>> p.parse('spam', extract=lambda x: 'item')
+    ['s', 'p', 'a', 'm']
+    >>> p.parse('gg', extract=lambda x: 'item', context='e')
+    ['e', 'g', 'g']
+    
+    If an error occurs during parsing, a ParsingError is raised.
+    
+    >>> p.parse('spam')
+    Traceback (most recent call last):
+        ...
+    ParsingError: Unexpected input token: 's'
+    """
+    
+    def __init__(self, grammar, k, keep_states=False):
+        
+        if grammar.root() == None:
+            raise InvalidGrammarError('There must be at least one rule in the grammar.')
+            
+        self.grammar = Grammar(*grammar)
+        self.k = k
+        
+        # Augment the grammar with a special rule: 'S -> R',
+        # where S is a new non-terminal (in this case '').
+        aug_grammar = Grammar(Rule('', grammar.root()), *grammar)
+            
+        first = First(aug_grammar, k)
+        
+        def _close_itemset(itemset):
+            i = 0
+            while i < len(itemset):
+                curitem = itemset[i]
+                
+                for next_lookahead in curitem.next_lookaheads(first):
+                    for next_rule in aug_grammar.rules(curitem.next_token()):
+                        newitem = _Item(next_rule, 0, next_lookahead)
+                        if newitem not in itemset:
+                            itemset.append(newitem)
+                
+                i += 1
+                
+        def _goto(itemset, symbol):
+            res = []
+            for item in itemset:
+                if item.next_token() != symbol:
+                    continue
+                    
+                res.append(_Item(item.rule, item.index + 1, item.lookahead))
+                
+            _close_itemset(res)
+            return set(res)
+        
+        itemset = [_Item(aug_grammar[0], 0, ())]
+        _close_itemset(itemset)
+        
+        states = [set(itemset)]
+        
+        goto_table = {}
+        
+        done = False
+        while not done:
+            done = True
+            
+            i = 0
+            while i < len(states):
+                itemset = states[i]
+                
+                for symbol in aug_grammar.symbols():
+                    newstate = _goto(itemset, symbol)
+                    if len(newstate) == 0:
+                        continue
+                        
+                    for j, state in enumerate(states):
+                        if newstate == state:
+                            goto_table[i, symbol] = j
+                            break
+                    else:
+                        goto_table[i, symbol] = len(states)
+                        states.append(newstate)
+                        done = False
+                
+                i += 1
+        
+        action_table = {}
+        accepting_state = None
+        
+        def add_action(state_id, lookahead, action, item):
+            key = (state_id, lookahead)
+            if key in action_table and action_table[key] != action:
+                raise InvalidGrammarError('LR(%i) table conflict at %s: actions %s, %s trying to add %s' % (k, key, action_table[key], action, item))
+            action_table[key] = action
+        
+        for state_id, state in enumerate(states):
+            for item in state:
+                nt = item.next_token()
+                if nt == None:
+                    if item.rule.left == '':
+                        accepting_state = state_id
+                        add_action(state_id, item.lookahead, ('shift',), item)
+                    else:
+                        add_action(state_id, item.lookahead, ('reduce', item.rule), item)
+                elif aug_grammar.is_terminal(nt):
+                    for la in item.lookaheads(first):
+                        add_action(state_id, la, ('shift',), item)
+        
+        assert accepting_state != None
+        self.goto = goto_table
+        self.action = action_table
+        self.accepting_state = accepting_state
+        self.k = k
+        
+        if keep_states:
+            self.states = states
+
+    def parse(self, sentence, context=None, extract=lambda x: x):
+        it = iter(sentence)
+        buf = []
+        while len(buf) < self.k:
+            try:
+                buf.append(it.next())
+            except StopIteration:
+                break
+                    
+        def get_shift_token():
+            if len(buf) == 0:
+                try:
+                    return it.next()
+                except StopIteration:
+                    return None
+            else:
+                res = buf.pop(0)
+                try:
+                    buf.append(it.next())
+                except StopIteration:
+                    pass
+                return res
+        
+        states = [0]
+        asts = []
+        while True:
+            state = states[-1]
+            
+            key = (state, tuple(extract(token) for token in buf))
+            if key not in self.action:
+                raise ParsingError('Unexpected input token: %s' % repr(key[1]))
+            
+            action = self.action[key]
+            if action[0] == 'reduce':
+                rule = action[1]
+                
+                if len(rule.right) > 0:
+                    new_ast = rule.action(context, *asts[-len(rule.right):])
+                    del states[-len(rule.right):]
+                    del asts[-len(rule.right):]
+                else:
+                    new_ast = rule.action(context)
+                
+                states.append(self.goto[states[-1], rule.left])
+                asts.append(new_ast)
+            else: # shift
+                tok = get_shift_token()
+                if tok == None:
+                    if state == self.accepting_state:
+                        assert len(asts) == 1
+                        return asts[0]
+                    else:
+                        raise ParsingError('Reached the end of file prematurely.')
+                
+                key = (state, extract(tok))
+                if key not in self.goto:
+                    raise ParsingError('Unexpected input token: %s' % repr(key[1]))
+                
+                states.append(self.goto[key])
+                asts.append(tok)
 
 class _Item:
-	def __init__(self, rule, index, lookahead):
-		self.rule = rule
-		self.index = index
-		self.lookahead = lookahead
-		
-		self.final = len(self.rule.right) <= self.index
-		
-	def __cmp__(self, other):
-		return cmp(
-			(self.rule, self.index, self.lookahead),
-			(other.rule, other.index, other.lookahead))
-			
-	def __hash__(self):
-		return hash((self.rule, self.index, self.lookahead))
-		
-	def __str__(self):
-		out = [self.rule.left, '::=']
-		out.extend(self.rule.right)
-		out.insert(self.index + 2, '.')
-		return ' '.join(out) + ' ' + str(self.lookahead)
-		
-	def __repr__(self):
-		return ''.join(("'", self.__str__(), "'"))
-		
-	def next_token(self):
-		return self.rule.right[self.index] if not self.final else None
-		
-	def next_lookaheads(self, first):
-		rule_suffix = self.rule.right[self.index + 1:]
-		word = rule_suffix + self.lookahead
-		return first(word)
-		
-	def lookaheads(self, first):
-		rule_suffix = self.rule.right[self.index:]
-		word = rule_suffix + self.lookahead
-		return first(word)
+    def __init__(self, rule, index, lookahead):
+        self.rule = rule
+        self.index = index
+        self.lookahead = lookahead
+        
+        self.final = len(self.rule.right) <= self.index
+    
+    def __cmp__(self, other):
+        return cmp(
+            (self.rule, self.index, self.lookahead),
+            (other.rule, other.index, other.lookahead))
+    
+    def __hash__(self):
+        return hash((self.rule, self.index, self.lookahead))
+        
+    def __str__(self):
+        out = [self.rule.left, '::=']
+        out.extend(self.rule.right)
+        out.insert(self.index + 2, '.')
+        return ' '.join(out) + ' ' + str(self.lookahead)
+    
+    def __repr__(self):
+        return ''.join(("'", self.__str__(), "'"))
+    
+    def next_token(self):
+        return self.rule.right[self.index] if not self.final else None
+    
+    def next_lookaheads(self, first):
+        rule_suffix = self.rule.right[self.index + 1:]
+        word = rule_suffix + self.lookahead
+        return first(word)
+    
+    def lookaheads(self, first):
+        rule_suffix = self.rule.right[self.index:]
+        word = rule_suffix + self.lookahead
+        return first(word)
 
-class InvalidGrammarError(BaseException):
-	pass
-
-class ParsingError(BaseException):
-	pass
-	
-class Parser:
-	def __init__(self, rules, k):
-		if len(rules) == 0:
-			raise InvalidGrammarError('There must be at least one rule in a grammar.')
-
-		grammar = Grammar()
-
-		# Augment the grammar with a special rule: 'S -> R',
-		# where S is a new non-terminal (in this case '').
-		grammar.add_rule('', rules[0].left)
-		grammar.extend(rules)
-			
-		first = FirstTable(grammar, k)
-		
-		def _close_itemset(itemset):
-			i = 0
-			while i < len(itemset):
-				curitem = itemset[i]
-				
-				for next_lookahead in curitem.next_lookaheads(first):
-					for next_rule in grammar.rules(curitem.next_token()):
-						newitem = _Item(next_rule, 0, next_lookahead)
-						if newitem not in itemset:
-							itemset.append(newitem)
-				
-				i += 1
-				
-		def _goto(itemset, symbol):
-			res = []
-			for item in itemset:
-				if item.next_token() != symbol:
-					continue
-					
-				res.append(_Item(item.rule, item.index + 1, item.lookahead))
-				
-			_close_itemset(res)
-			return set(res)
-		
-		itemset = [_Item(grammar[0], 0, ())]
-		_close_itemset(itemset)
-
-		states = [set(itemset)]
-		
-		goto_table = {}
-		
-		done = False
-		while not done:
-			done = True
-			
-			i = 0
-			while i < len(states):
-				itemset = states[i]
-				
-				for symbol in grammar.symbols():
-					newstate = _goto(itemset, symbol)
-					if len(newstate) == 0:
-						continue
-						
-					for j, state in enumerate(states):
-						if newstate == state:
-							goto_table[i, symbol] = j
-							break
-					else:
-						goto_table[i, symbol] = len(states)
-						states.append(newstate)
-						done = False
-
-				i += 1
-
-		action_table = {}
-		accepting_state = None
-		
-		def add_action(state_id, lookahead, action, item):
-			key = (state_id, lookahead)
-			if key in action_table and action_table[key] != action:
-				raise InvalidGrammarError('LR(%i) table conflict at %s: actions %s, %s trying to add %s' % (k, key, action_table[key], action, item))
-			action_table[key] = action
-		
-		for state_id, state in enumerate(states):
-			for item in state:
-				nt = item.next_token()
-				if nt == None:
-					if item.rule.left == '':
-						accepting_state = state_id
-						add_action(state_id, item.lookahead, ('shift',), item)
-					else:
-						add_action(state_id, item.lookahead, ('reduce', item.rule), item)
-				elif grammar.is_terminal(nt):
-					for la in item.lookaheads(first):
-						add_action(state_id, la, ('shift',), item)
-		
-		assert accepting_state != None
-		self.goto = goto_table
-		self.action = action_table
-		self.accepting_state = accepting_state
-		self.k = k
-
-	def parse(self, sentence, extract=lambda x: x):
-		it = iter(sentence)
-		buf = []
-		while len(buf) < self.k:
-			try:
-				buf.append(it.next())
-			except StopIteration:
-				return
-					
-		def get_shift_token():
-			if len(buf) == 0:
-				try:
-					return it.next()
-				except StopIteration:
-					return None
-			else:
-				res = buf.pop(0)
-				try:
-					buf.append(it.next())
-				except StopIteration:
-					pass
-				return res
-
-		states = [0]
-		asts = []
-		while True:
-			state = states[-1]
-
-			key = (state, tuple(extract(token) for token in buf))
-			if key not in self.action:
-				raise ParsingError()
-
-			action = self.action[key]
-			if action[0] == 'reduce':
-				rule = action[1]
-				del states[-len(rule.right):]
-				states.append(self.goto[states[-1], rule.left])
-				
-				new_ast = rule.action(*asts[-len(rule.right):])
-				del asts[-len(rule.right):]
-				asts.append(new_ast)
-				
-			else: # shift
-				tok = get_shift_token()
-				if tok == None:
-					if state == self.accepting_state:
-						assert len(asts) == 1
-						return asts[0]
-					else:
-						raise ParsingError('Reached the end of file prematurely.')
-
-				states.append(self.goto[state, extract(tok)])
-				asts.append(tok)
-	
-if __name__ == '__main__':
-	parser = Parser([
-		Rule('E', ('E', '*', 'B'), lambda x, _, y: x * y),
-		Rule('E', ('E', '+', 'B'), lambda x, _, y: x + y),
-		Rule('E', 'B'),
-		Rule('B', '0', int),
-		Rule('B', '1', int)
-	], k=0)
-
-	import sys
-	line = sys.stdin.readline()
-	print parser.parse(line.strip())
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
+class Rule:
+    """Represents a single production rule of a grammar.
+    
+    Rules are usually written using some sort of BNF-like notation,
+    for example, 'list ::= list item' would be a valid rule. A rule
+    always has a single non-terminal symbol on the left and a list
+    (possibly empty) of arbitrary symbols on the right. The above rule
+    would be constructed as follows.
+    
+    >>> r = Rule('list', 'list', 'item')
+    >>> print r
+    list ::= list item
+    
+    Note that terminal and non-terminal symbols are written
+    using the same syntax -- the differentiation only occurs
+    at the grammar level. The symbols standing on the left side of some rule
+    are considered non-terminal.
+    
+    A rule may have no symbols on the right, such rules produce empty strings.
+    
+    >>> print Rule('e')
+    e ::= <empty>
+    
+    The left and right symbols can be accessed via 'left' and 'right' members.
+    
+    >>> r.left, r.right
+    ('list', ('list', 'item'))
+    
+    Every rule has an associated semantic action, which combines data
+    from the right-hand-side symbols. The number of arguments
+    of this function and the number of symbols on the right plus one must match.
+    The first argument is called a context and represents arbitrary data passed to the parser.
+    The default action is to make a tuple.
+    
+    >>> r.action(None, [], 1)
+    ([], 1)
+    
+    A custom semantic action is associated in constructor.
+    
+    >>> r = Rule('list', 'list', 'item', action=lambda c, l, i: l + [i])
+    >>> r.action(None, [], 1)
+    [1]
+    """
+
+    def __init__(self, left, *right, **kwarg):
+        self.left = left
+        self.right = right
+        self.action = kwarg.get('action', lambda ctx, *args: args)
+    
+    def __str__(self):
+        """
+        >>> print Rule('a', 'b', 'c')
+        a ::= b c
+        >>> print Rule('a')
+        a ::= <empty>
+        """
+        
+        if self.right:
+            out = [self.left, '::=']
+            out.extend(self.right)
+        else:
+            out = [self.left, '::= <empty>']
+        return ' '.join(out)
+    
+    def __repr__(self):
+        """
+        >>> print repr(Rule('a', 'b', 'c'))
+        Rule('a', 'b', 'c')
+        """
+        
+        out = [repr(self.left)]
+        out.extend(repr(s) for s in self.right)
+        return 'Rule(%s)' % ', '.join(out)
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()