Source

pypy-postgresql / lib-python / 2.7 / lib2to3 / btm_matcher.py

"""A bottom-up tree matching algorithm implementation meant to speed
up 2to3's matching process. After the tree patterns are reduced to
their rarest linear path, a linear Aho-Corasick automaton is
created. The linear automaton traverses the linear paths from the
leaves to the root of the AST and returns a set of nodes for further
matching. This reduces significantly the number of candidate nodes."""

__author__ = "George Boutsioukis <gboutsioukis@gmail.com>"

import logging
import itertools
from collections import defaultdict

from . import pytree
from .btm_utils import reduce_tree

class BMNode(object):
    """Class for a node of the Aho-Corasick automaton used in matching"""
    count = itertools.count()
    def __init__(self):
        self.transition_table = {}
        self.fixers = []
        self.id = next(BMNode.count)
        self.content = ''

class BottomMatcher(object):
    """The main matcher class. After instantiating the patterns should
    be added using the add_fixer method"""

    def __init__(self):
        self.match = set()
        self.root = BMNode()
        self.nodes = [self.root]
        self.fixers = []
        self.logger = logging.getLogger("RefactoringTool")

    def add_fixer(self, fixer):
        """Reduces a fixer's pattern tree to a linear path and adds it
        to the matcher(a common Aho-Corasick automaton). The fixer is
        appended on the matching states and called when they are
        reached"""
        self.fixers.append(fixer)
        tree = reduce_tree(fixer.pattern_tree)
        linear = tree.get_linear_subpattern()
        match_nodes = self.add(linear, start=self.root)
        for match_node in match_nodes:
            match_node.fixers.append(fixer)

    def add(self, pattern, start):
        "Recursively adds a linear pattern to the AC automaton"
        #print("adding pattern", pattern, "to", start)
        if not pattern:
            #print("empty pattern")
            return [start]
        if isinstance(pattern[0], tuple):
            #alternatives
            #print("alternatives")
            match_nodes = []
            for alternative in pattern[0]:
                #add all alternatives, and add the rest of the pattern
                #to each end node
                end_nodes = self.add(alternative, start=start)
                for end in end_nodes:
                    match_nodes.extend(self.add(pattern[1:], end))
            return match_nodes
        else:
            #single token
            #not last
            if pattern[0] not in start.transition_table:
                #transition did not exist, create new
                next_node = BMNode()
                start.transition_table[pattern[0]] = next_node
            else:
                #transition exists already, follow
                next_node = start.transition_table[pattern[0]]

            if pattern[1:]:
                end_nodes = self.add(pattern[1:], start=next_node)
            else:
                end_nodes = [next_node]
            return end_nodes

    def run(self, leaves):
        """The main interface with the bottom matcher. The tree is
        traversed from the bottom using the constructed
        automaton. Nodes are only checked once as the tree is
        retraversed. When the automaton fails, we give it one more
        shot(in case the above tree matches as a whole with the
        rejected leaf), then we break for the next leaf. There is the
        special case of multiple arguments(see code comments) where we
        recheck the nodes

        Args:
           The leaves of the AST tree to be matched

        Returns:
           A dictionary of node matches with fixers as the keys
        """
        current_ac_node = self.root
        results = defaultdict(list)
        for leaf in leaves:
            current_ast_node = leaf
            while current_ast_node:
                current_ast_node.was_checked = True
                for child in current_ast_node.children:
                    # multiple statements, recheck
                    if isinstance(child, pytree.Leaf) and child.value == u";":
                        current_ast_node.was_checked = False
                        break
                if current_ast_node.type == 1:
                    #name
                    node_token = current_ast_node.value
                else:
                    node_token = current_ast_node.type

                if node_token in current_ac_node.transition_table:
                    #token matches
                    current_ac_node = current_ac_node.transition_table[node_token]
                    for fixer in current_ac_node.fixers:
                        if not fixer in results:
                            results[fixer] = []
                        results[fixer].append(current_ast_node)

                else:
                    #matching failed, reset automaton
                    current_ac_node = self.root
                    if (current_ast_node.parent is not None
                        and current_ast_node.parent.was_checked):
                        #the rest of the tree upwards has been checked, next leaf
                        break

                    #recheck the rejected node once from the root
                    if node_token in current_ac_node.transition_table:
                        #token matches
                        current_ac_node = current_ac_node.transition_table[node_token]
                        for fixer in current_ac_node.fixers:
                            if not fixer in results.keys():
                                results[fixer] = []
                            results[fixer].append(current_ast_node)

                current_ast_node = current_ast_node.parent
        return results

    def print_ac(self):
        "Prints a graphviz diagram of the BM automaton(for debugging)"
        print("digraph g{")
        def print_node(node):
            for subnode_key in node.transition_table.keys():
                subnode = node.transition_table[subnode_key]
                print("%d -> %d [label=%s] //%s" %
                      (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
                if subnode_key == 1:
                    print(subnode.content)
                print_node(subnode)
        print_node(self.root)
        print("}")

# taken from pytree.py for debugging; only used by print_ac
_type_reprs = {}
def type_repr(type_num):
    global _type_reprs
    if not _type_reprs:
        from .pygram import python_symbols
        # printing tokens is possible but not as useful
        # from .pgen2 import token // token.__dict__.items():
        for name, val in python_symbols.__dict__.items():
            if type(val) == int: _type_reprs[val] = name
    return _type_reprs.setdefault(type_num, type_num)