Commits

Ned Batchelder committed e60bf2d

CodeAnalyzer was a terminology conflict with coverage.analysis, and it's really more of a parser anyway.

  • Participants
  • Parent commits d7eb041

Comments (0)

Files changed (3)

coverage/analyzer.py

-"""Code analysis for coverage.py"""
-
-import re, token, tokenize, types
-import cStringIO as StringIO
-
-from coverage.misc import nice_pair, CoverageException
-
-
-# Python version compatibility
-try:
-    set()       # new in 2.4
-except NameError:
-    import sets
-    set = sets.Set      # pylint: disable-msg=W0622
-    
-
-class CodeAnalyzer:
-    """Analyze code to find executable lines, excluded lines, etc."""
-    
-    def __init__(self, show_tokens=False):
-        self.show_tokens = show_tokens
-
-        # The text lines of the analyzed code.
-        self.lines = None
-
-        # The line numbers of excluded lines of code.
-        self.excluded = set()
-        
-        # The line numbers of docstring lines.
-        self.docstrings = set()
-        
-        # A dict mapping line numbers to (lo,hi) for multi-line statements.
-        self.multiline = {}
-        
-        # The line numbers that start statements.
-        self.statement_starts = set()
-
-    def find_statement_starts(self, code):
-        """Find the starts of statements in compiled code.
-    
-        Uses co_lnotab described in Python/compile.c to find line numbers that
-        start statements, adding them to `self.statement_starts`.
-    
-        """
-        # Adapted from dis.py in the standard library.
-        byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
-        line_increments = [ord(c) for c in code.co_lnotab[1::2]]
-    
-        last_line_num = None
-        line_num = code.co_firstlineno
-        for byte_incr, line_incr in zip(byte_increments, line_increments):
-            if byte_incr:
-                if line_num != last_line_num:
-                    self.statement_starts.add(line_num)
-                    last_line_num = line_num
-            line_num += line_incr
-        if line_num != last_line_num:
-            self.statement_starts.add(line_num)
-
-    def find_statements(self, code):
-        """Find the statements in `code`.
-        
-        Update `self.statement_starts`, a set of line numbers that start
-        statements.  Recurses into all code objects reachable from `code`.
-        
-        """
-        # Adapted from trace.py in the standard library.
-
-        # Get all of the lineno information from this code.
-        self.find_statement_starts(code)
-    
-        # Check the constants for references to other code objects.
-        for c in code.co_consts:
-            if isinstance(c, types.CodeType):
-                # Found another code object, so recurse into it.
-                self.find_statements(c)
-
-    def raw_analyze(self, text=None, filename=None, exclude=None):
-        """Analyze `text` to find the interesting facts about its lines.
-        
-        A handful of member fields are updated.
-        
-        """
-        if not text:
-            sourcef = open(filename, 'rU')
-            text = sourcef.read()
-            sourcef.close()
-        text = text.replace('\r\n', '\n')
-        self.lines = text.split('\n')
-
-        # Find lines which match an exclusion pattern.
-        if exclude:
-            re_exclude = re.compile(exclude)
-            for i, ltext in enumerate(self.lines):
-                if re_exclude.search(ltext):
-                    self.excluded.add(i+1)
-    
-        # Tokenize, to find excluded suites, to find docstrings, and to find
-        # multi-line statements.
-        indent = 0
-        exclude_indent = 0
-        excluding = False
-        prev_toktype = token.INDENT
-        first_line = None
-
-        tokgen = tokenize.generate_tokens(StringIO.StringIO(text).readline)
-        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
-            if self.show_tokens:
-                print "%10s %5s %-20r %r" % (
-                    tokenize.tok_name.get(toktype, toktype),
-                    nice_pair((slineno, elineno)), ttext, ltext
-                    )
-            if toktype == token.INDENT:
-                indent += 1
-            elif toktype == token.DEDENT:
-                indent -= 1
-            elif toktype == token.OP and ttext == ':':
-                if not excluding and elineno in self.excluded:
-                    # Start excluding a suite.  We trigger off of the colon
-                    # token so that the #pragma comment will be recognized on
-                    # the same line as the colon.
-                    exclude_indent = indent
-                    excluding = True
-            elif toktype == token.STRING and prev_toktype == token.INDENT:
-                # Strings that are first on an indented line are docstrings.
-                # (a trick from trace.py in the stdlib.)
-                for i in xrange(slineno, elineno+1):
-                    self.docstrings.add(i)
-            elif toktype == token.NEWLINE:
-                if first_line is not None and elineno != first_line:
-                    # We're at the end of a line, and we've ended on a
-                    # different line than the first line of the statement,
-                    # so record a multi-line range.
-                    rng = (first_line, elineno)
-                    for l in xrange(first_line, elineno+1):
-                        self.multiline[l] = rng
-                first_line = None
-                
-            if ttext.strip() and toktype != tokenize.COMMENT:
-                # A non-whitespace token.
-                if first_line is None:
-                    # The token is not whitespace, and is the first in a
-                    # statement.
-                    first_line = slineno
-                    # Check whether to end an excluded suite.
-                    if excluding and indent <= exclude_indent:
-                        excluding = False
-                    if excluding:
-                        self.excluded.add(elineno)
-                        
-            prev_toktype = toktype
-
-        # Find the starts of the executable statements.
-        filename = filename or "<code>"
-        try:
-            # Python 2.3 and 2.4 don't like partial last lines, so be sure the
-            # text ends nicely for them.
-            text += '\n'
-            code = compile(text, filename, "exec")
-        except SyntaxError, synerr:
-            raise CoverageException(
-                "Couldn't parse '%s' as Python source: '%s' at line %d" %
-                    (filename, synerr.msg, synerr.lineno)
-                )
-
-        self.find_statements(code)
-
-    def map_to_first_line(self, lines, ignore=None):
-        """Map the line numbers in `lines` to the correct first line of the
-        statement.
-        
-        Skip any line mentioned in `ignore`.
-        
-        Returns a sorted list of the first lines.
-        
-        """
-        ignore = ignore or []
-        lset = set()
-        for l in lines:
-            if l in ignore:
-                continue
-            rng = self.multiline.get(l)
-            if rng:
-                new_l = rng[0]
-            else:
-                new_l = l
-            if new_l not in ignore:
-                lset.add(new_l)
-        lines = list(lset)
-        lines.sort()
-        return lines
-    
-    def analyze_source(self, text=None, filename=None, exclude=None):
-        """Analyze source text to find executable lines, excluded lines, etc.
-        
-        Source can be provided as `text`, the text itself, or `filename`, from
-        which text will be read.  Excluded lines are those that match `exclude`,
-        a regex.
-        
-        Return values are 1) a sorted list of executable line numbers,
-        2) a sorted list of excluded line numbers, and 3) a dict mapping line
-        numbers to pairs (lo,hi) for multi-line statements.
-        
-        """
-        self.raw_analyze(text, filename, exclude)
-        
-        excluded_lines = self.map_to_first_line(self.excluded)
-        ignore = excluded_lines + list(self.docstrings)
-        lines = self.map_to_first_line(self.statement_starts, ignore)
-    
-        return lines, excluded_lines, self.multiline
-
-    def print_analysis(self):
-        """Print the results of the analysis."""
-        for i, ltext in enumerate(self.lines):
-            lineno = i+1
-            m0 = m1 = m2 = ' '
-            if lineno in self.statement_starts:
-                m0 = '-'
-            if lineno in self.docstrings:
-                m1 = '"'
-            if lineno in self.excluded:
-                m2 = 'x'
-            print "%4d %s%s%s %s" % (lineno, m0, m1, m2, ltext)
-
-
-if __name__ == '__main__':
-    import sys
-    
-    analyzer = CodeAnalyzer(show_tokens=True)
-    analyzer.raw_analyze(filename=sys.argv[1], exclude=r"no\s*cover")
-    analyzer.print_analysis()

coverage/control.py

         missing from execution, and (5), a readable string of missing lines.
 
         """
-        from coverage.analyzer import CodeAnalyzer
+        from coverage.parser import CodeParser
 
         filename = code_unit.filename
         ext = os.path.splitext(filename)[1]
                         "No source for code '%s'." % code_unit.filename
                         )
 
-        analyzer = CodeAnalyzer()
-        statements, excluded, line_map = analyzer.analyze_source(
+        parser = CodeParser()
+        statements, excluded, line_map = parser.parse_source(
             text=source, filename=filename, exclude=self.exclude_re
             )
 

coverage/parser.py

+"""Code parsing for coverage.py"""
+
+import re, token, tokenize, types
+import cStringIO as StringIO
+
+from coverage.misc import nice_pair, CoverageException
+
+
+# Python version compatibility
+try:
+    set()       # new in 2.4
+except NameError:
+    import sets
+    set = sets.Set      # pylint: disable-msg=W0622
+    
+
+class CodeParser:
+    """Parse code to find executable lines, excluded lines, etc."""
+    
+    def __init__(self, show_tokens=False):
+        self.show_tokens = show_tokens
+
+        # The text lines of the parsed code.
+        self.lines = None
+
+        # The line numbers of excluded lines of code.
+        self.excluded = set()
+        
+        # The line numbers of docstring lines.
+        self.docstrings = set()
+        
+        # A dict mapping line numbers to (lo,hi) for multi-line statements.
+        self.multiline = {}
+        
+        # The line numbers that start statements.
+        self.statement_starts = set()
+
+    def find_statement_starts(self, code):
+        """Find the starts of statements in compiled code.
+    
+        Uses co_lnotab described in Python/compile.c to find line numbers that
+        start statements, adding them to `self.statement_starts`.
+    
+        """
+        # Adapted from dis.py in the standard library.
+        byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
+        line_increments = [ord(c) for c in code.co_lnotab[1::2]]
+    
+        last_line_num = None
+        line_num = code.co_firstlineno
+        for byte_incr, line_incr in zip(byte_increments, line_increments):
+            if byte_incr:
+                if line_num != last_line_num:
+                    self.statement_starts.add(line_num)
+                    last_line_num = line_num
+            line_num += line_incr
+        if line_num != last_line_num:
+            self.statement_starts.add(line_num)
+
+    def find_statements(self, code):
+        """Find the statements in `code`.
+        
+        Update `self.statement_starts`, a set of line numbers that start
+        statements.  Recurses into all code objects reachable from `code`.
+        
+        """
+        # Adapted from trace.py in the standard library.
+
+        # Get all of the lineno information from this code.
+        self.find_statement_starts(code)
+    
+        # Check the constants for references to other code objects.
+        for c in code.co_consts:
+            if isinstance(c, types.CodeType):
+                # Found another code object, so recurse into it.
+                self.find_statements(c)
+
+    def raw_parse(self, text=None, filename=None, exclude=None):
+        """Parse `text` to find the interesting facts about its lines.
+        
+        A handful of member fields are updated.
+        
+        """
+        if not text:
+            sourcef = open(filename, 'rU')
+            text = sourcef.read()
+            sourcef.close()
+        text = text.replace('\r\n', '\n')
+        self.lines = text.split('\n')
+
+        # Find lines which match an exclusion pattern.
+        if exclude:
+            re_exclude = re.compile(exclude)
+            for i, ltext in enumerate(self.lines):
+                if re_exclude.search(ltext):
+                    self.excluded.add(i+1)
+    
+        # Tokenize, to find excluded suites, to find docstrings, and to find
+        # multi-line statements.
+        indent = 0
+        exclude_indent = 0
+        excluding = False
+        prev_toktype = token.INDENT
+        first_line = None
+
+        tokgen = tokenize.generate_tokens(StringIO.StringIO(text).readline)
+        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
+            if self.show_tokens:
+                print "%10s %5s %-20r %r" % (
+                    tokenize.tok_name.get(toktype, toktype),
+                    nice_pair((slineno, elineno)), ttext, ltext
+                    )
+            if toktype == token.INDENT:
+                indent += 1
+            elif toktype == token.DEDENT:
+                indent -= 1
+            elif toktype == token.OP and ttext == ':':
+                if not excluding and elineno in self.excluded:
+                    # Start excluding a suite.  We trigger off of the colon
+                    # token so that the #pragma comment will be recognized on
+                    # the same line as the colon.
+                    exclude_indent = indent
+                    excluding = True
+            elif toktype == token.STRING and prev_toktype == token.INDENT:
+                # Strings that are first on an indented line are docstrings.
+                # (a trick from trace.py in the stdlib.)
+                for i in xrange(slineno, elineno+1):
+                    self.docstrings.add(i)
+            elif toktype == token.NEWLINE:
+                if first_line is not None and elineno != first_line:
+                    # We're at the end of a line, and we've ended on a
+                    # different line than the first line of the statement,
+                    # so record a multi-line range.
+                    rng = (first_line, elineno)
+                    for l in xrange(first_line, elineno+1):
+                        self.multiline[l] = rng
+                first_line = None
+                
+            if ttext.strip() and toktype != tokenize.COMMENT:
+                # A non-whitespace token.
+                if first_line is None:
+                    # The token is not whitespace, and is the first in a
+                    # statement.
+                    first_line = slineno
+                    # Check whether to end an excluded suite.
+                    if excluding and indent <= exclude_indent:
+                        excluding = False
+                    if excluding:
+                        self.excluded.add(elineno)
+                        
+            prev_toktype = toktype
+
+        # Find the starts of the executable statements.
+        filename = filename or "<code>"
+        try:
+            # Python 2.3 and 2.4 don't like partial last lines, so be sure the
+            # text ends nicely for them.
+            text += '\n'
+            code = compile(text, filename, "exec")
+        except SyntaxError, synerr:
+            raise CoverageException(
+                "Couldn't parse '%s' as Python source: '%s' at line %d" %
+                    (filename, synerr.msg, synerr.lineno)
+                )
+
+        self.find_statements(code)
+
+    def map_to_first_line(self, lines, ignore=None):
+        """Map the line numbers in `lines` to the correct first line of the
+        statement.
+        
+        Skip any line mentioned in `ignore`.
+        
+        Returns a sorted list of the first lines.
+        
+        """
+        ignore = ignore or []
+        lset = set()
+        for l in lines:
+            if l in ignore:
+                continue
+            rng = self.multiline.get(l)
+            if rng:
+                new_l = rng[0]
+            else:
+                new_l = l
+            if new_l not in ignore:
+                lset.add(new_l)
+        lines = list(lset)
+        lines.sort()
+        return lines
+    
+    def parse_source(self, text=None, filename=None, exclude=None):
+        """Parse source text to find executable lines, excluded lines, etc.
+        
+        Source can be provided as `text`, the text itself, or `filename`, from
+        which text will be read.  Excluded lines are those that match `exclude`,
+        a regex.
+        
+        Return values are 1) a sorted list of executable line numbers,
+        2) a sorted list of excluded line numbers, and 3) a dict mapping line
+        numbers to pairs (lo,hi) for multi-line statements.
+        
+        """
+        self.raw_parse(text, filename, exclude)
+        
+        excluded_lines = self.map_to_first_line(self.excluded)
+        ignore = excluded_lines + list(self.docstrings)
+        lines = self.map_to_first_line(self.statement_starts, ignore)
+    
+        return lines, excluded_lines, self.multiline
+
+    def print_parse_results(self):
+        """Print the results of the parsing."""
+        for i, ltext in enumerate(self.lines):
+            lineno = i+1
+            m0 = m1 = m2 = ' '
+            if lineno in self.statement_starts:
+                m0 = '-'
+            if lineno in self.docstrings:
+                m1 = '"'
+            if lineno in self.excluded:
+                m2 = 'x'
+            print "%4d %s%s%s %s" % (lineno, m0, m1, m2, ltext)
+
+
+if __name__ == '__main__':
+    import sys
+    
+    parser = CodeParser(show_tokens=True)
+    parser.raw_parse(filename=sys.argv[1], exclude=r"no\s*cover")
+    parser.print_parse_results()