Commits

Sylvain Thénault  committed 0076665

closes #69993: Additional string format checks for logging module

  • Participants
  • Parent commits 2d13a43

Comments (0)

Files changed (5)

 ====================
 
 	--
+	
+    * #69993: Additional string format checks for logging module:
+      check for missing arguments, too many arguments, or invalid string
+      formats in the logging checker module. Contributed by Daniel Arena
+	
     * #69220: add column offset to the reports. If you've a custom reporter,
       this change may break it has now location gain a new item giving the
       column offset.

File checkers/logging.py

 from logilab import astng
 from pylint import checkers
 from pylint import interfaces
+from pylint.checkers import utils
 
-EAGER_STRING_INTERPOLATION = 'W6501'
 
-CHECKED_CONVENIENCE_FUNCTIONS = set([
-    'critical', 'debug', 'error', 'exception', 'fatal', 'info', 'warn',
-    'warning'])
-
-
-class LoggingChecker(checkers.BaseChecker):
-    """Checks use of the logging module."""
-
-    __implements__ = interfaces.IASTNGChecker
-
-    name = 'logging'
-
-    msgs = {EAGER_STRING_INTERPOLATION:
-            ('Specify string format arguments as logging function parameters',
+MSGS = {
+    'W6501': ('Specify string format arguments as logging function parameters',
              'Used when a logging statement has a call form of '
              '"logging.<logging method>(format_string % (format_args...))". '
              'Such calls should leave string interpolation to the logging '
              'so that the program may avoid incurring the cost of the '
              'interpolation in those cases in which no message will be '
              'logged. For more, see '
-             'http://www.python.org/dev/peps/pep-0282/.')
-            }
+             'http://www.python.org/dev/peps/pep-0282/.'),
+    'E6500': ('Unsupported logging format character %r (%#02x) at index %d',
+              'Used when an unsupported format character is used in a logging\
+              statement format string.'),
+    'E6501': ('Logging format string ends in middle of conversion specifier',
+              'Used when a logging statement format string terminates before\
+              the end of a conversion specifier.'),
+    'E6505': ('Too many arguments for logging format string',
+              'Used when a logging format string is given too few arguments.'),
+    'E6506': ('Not enough arguments for logging format string',
+              'Used when a logging format string is given too many arguments'),
+    }
+
+
+CHECKED_CONVENIENCE_FUNCTIONS = set([
+    'critical', 'debug', 'error', 'exception', 'fatal', 'info', 'warn',
+    'warning'])
+
+
+class LoggingChecker(checkers.BaseChecker):
+    """Checks use of the logging module."""
+
+    __implements__ = interfaces.IASTNGChecker
+    name = 'logging'
+    msgs = MSGS
 
     def visit_module(self, unused_node):
         """Clears any state left in this checker from last module checked."""
             or not isinstance(node.func.expr, astng.Name)
             or node.func.expr.name != self._logging_name):
             return
-        self._CheckConvenienceMethods(node)
-        self._CheckLogMethod(node)
+        self._check_convenience_methods(node)
+        self._check_log_methods(node)
 
-    def _CheckConvenienceMethods(self, node):
+    def _check_convenience_methods(self, node):
         """Checks calls to logging convenience methods (like logging.warn)."""
         if node.func.attrname not in CHECKED_CONVENIENCE_FUNCTIONS:
             return
-        if not node.args:
-            # Either no args, or star args, or double-star args. Beyond the
-            # scope of this checker in any case.
+        if node.starargs or node.kwargs or not node.args:
+            # Either no args, star args, or double-star args. Beyond the
+            # scope of this checker.
             return
         if isinstance(node.args[0], astng.BinOp) and node.args[0].op == '%':
-            self.add_message(EAGER_STRING_INTERPOLATION, node=node)
+            self.add_message('W6501', node=node)
+        elif isinstance(node.args[0], astng.Const):
+            self._check_format_string(node, 0)
 
-    def _CheckLogMethod(self, node):
+    def _check_log_methods(self, node):
         """Checks calls to logging.log(level, format, *format_args)."""
         if node.func.attrname != 'log':
             return
-        if len(node.args) < 2:
-            # Either a malformed call or something with crazy star args or
-            # double-star args magic. Beyond the scope of this checker.
+        if node.starargs or node.kwargs or len(node.args) < 2:
+            # Either a malformed call, star args, or double-star args. Beyond
+            # the scope of this checker.
             return
         if isinstance(node.args[1], astng.BinOp) and node.args[1].op == '%':
-            self.add_message(EAGER_STRING_INTERPOLATION, node=node)
+            self.add_message('W6501', node=node)
+        elif isinstance(node.args[1], astng.Const):
+            self._check_format_string(node, 1)
+
+    def _check_format_string(self, node, format_arg):
+        """Checks that format string tokens match the supplied arguments.
+
+        Args:
+          node: AST node to be checked.
+          format_arg: Index of the format string in the node arguments.
+        """
+        num_args = self._count_supplied_tokens(node.args[format_arg + 1:])
+        if not num_args:
+            # If no args were supplied, then all format strings are valid -
+            # don't check any further.
+            return
+        format_string = node.args[format_arg].value
+        if not isinstance(format_string, basestring):
+            # If the log format is constant non-string (e.g. logging.debug(5)),
+            # ensure there are no arguments.
+            required_num_args = 0
+        else:
+            try:
+                keyword_args, required_num_args = \
+                    utils.parse_format_string(format_string)
+                if keyword_args:
+                    # Keyword checking on logging strings is complicated by
+                    # special keywords - out of scope.
+                    return
+            except utils.UnsupportedFormatCharacter, e:
+                c = format_string[e.index]
+                self.add_message('E6500', node=node, args=(c, ord(c), e.index))
+                return
+            except utils.IncompleteFormatString:
+                self.add_message('E6501', node=node)
+                return
+        if num_args > required_num_args:
+            self.add_message('E6505', node=node)
+        elif num_args < required_num_args:
+            self.add_message('E6506', node=node)
+
+    def _count_supplied_tokens(self, args):
+        """Counts the number of tokens in an args list.
+
+        The Python log functions allow for special keyword arguments: func,
+        exc_info and extra. To handle these cases correctly, we only count
+        arguments that aren't keywords.
+
+        Args:
+          args: List of AST nodes that are arguments for a log format string.
+
+        Returns:
+          Number of AST nodes that aren't keywords.
+        """
+        return sum(1 for arg in args if not isinstance(arg, astng.Keyword))
+
 
 def register(linter):
     """Required method to auto-register this checker."""

File checkers/string_format.py

 from logilab import astng
 from pylint.interfaces import IASTNGChecker
 from pylint.checkers import BaseChecker
+from pylint.checkers import utils
+
 
 MSGS = {
     'E9900': ("Unsupported format character %r (%#02x) at index %d",
               specifiers is given too many arguments"),
     }
 
-class IncompleteFormatString(Exception):
-    """A format string ended in the middle of a format specifier."""
-    pass
-
-class UnsupportedFormatCharacter(Exception):
-    """A format character in a format string is not one of the supported
-    format characters."""
-    def __init__(self, index):
-        Exception.__init__(self, index)
-        self.index = index
-
-def parse_format_string(format_string):
-    """Parses a format string, returning a tuple of (keys, num_args), where keys
-    is the set of mapping keys in the format string, and num_args is the number
-    of arguments required by the format string.  Raises
-    IncompleteFormatString or UnsupportedFormatCharacter if a
-    parse error occurs."""
-    keys = set()
-    num_args = 0
-    def next_char(i):
-        i += 1
-        if i == len(format_string):
-            raise IncompleteFormatString
-        return (i, format_string[i])
-    i = 0
-    while i < len(format_string):
-        c = format_string[i]
-        if c == '%':
-            i, c = next_char(i)
-            # Parse the mapping key (optional).
-            key = None
-            if c == '(':
-                depth = 1
-                i, c = next_char(i)
-                key_start = i
-                while depth != 0:
-                    if c == '(':
-                        depth += 1
-                    elif c == ')':
-                        depth -= 1
-                    i, c = next_char(i)
-                key_end = i - 1
-                key = format_string[key_start:key_end]
-
-            # Parse the conversion flags (optional).
-            while c in '#0- +':
-                i, c = next_char(i)
-            # Parse the minimum field width (optional).
-            if c == '*':
-                num_args += 1
-                i, c = next_char(i)
-            else:
-                while c in string.digits:
-                    i, c = next_char(i)
-            # Parse the precision (optional).
-            if c == '.':
-                i, c = next_char(i)
-                if c == '*':
-                    num_args += 1
-                    i, c = next_char(i)
-                else:
-                    while c in string.digits:
-                        i, c = next_char(i)
-            # Parse the length modifier (optional).
-            if c in 'hlL':
-                i, c = next_char(i)
-            # Parse the conversion type (mandatory).
-            if c not in 'diouxXeEfFgGcrs%':
-                raise UnsupportedFormatCharacter(i)
-            if key:
-                keys.add(key)
-            elif c != '%':
-                num_args += 1
-        i += 1
-    return keys, num_args
-
 OTHER_NODES = (astng.Const, astng.List, astng.Backquote,
                astng.Lambda, astng.Function,
                astng.ListComp, astng.SetComp, astng.GenExpr)
         format_string = left.value
         try:
             required_keys, required_num_args = \
-                parse_format_string(format_string)
-        except UnsupportedFormatCharacter, e:
+                utils.parse_format_string(format_string)
+        except utils.UnsupportedFormatCharacter, e:
             c = format_string[e.index]
             self.add_message('E9900', node=node, args=(c, ord(c), e.index))
             return
-        except IncompleteFormatString:
+        except utils.IncompleteFormatString:
             self.add_message('E9901', node=node)
             return
         if required_keys and required_num_args:

File checkers/utils.py

 """some functions that may be useful for various checkers
 """
 
+import string
 from logilab import astng
 from logilab.common.compat import builtins
 BUILTINS_NAME = builtins.__name__
         return func
     return store_messages
 
+class IncompleteFormatString(Exception):
+    """A format string ended in the middle of a format specifier."""
+    pass
+
+class UnsupportedFormatCharacter(Exception):
+    """A format character in a format string is not one of the supported
+    format characters."""
+    def __init__(self, index):
+        Exception.__init__(self, index)
+        self.index = index
+
+def parse_format_string(format_string):
+    """Parses a format string, returning a tuple of (keys, num_args), where keys
+    is the set of mapping keys in the format string, and num_args is the number
+    of arguments required by the format string.  Raises
+    IncompleteFormatString or UnsupportedFormatCharacter if a
+    parse error occurs."""
+    keys = set()
+    num_args = 0
+    def next_char(i):
+        i += 1
+        if i == len(format_string):
+            raise IncompleteFormatString
+        return (i, format_string[i])
+    i = 0
+    while i < len(format_string):
+        c = format_string[i]
+        if c == '%':
+            i, c = next_char(i)
+            # Parse the mapping key (optional).
+            key = None
+            if c == '(':
+                depth = 1
+                i, c = next_char(i)
+                key_start = i
+                while depth != 0:
+                    if c == '(':
+                        depth += 1
+                    elif c == ')':
+                        depth -= 1
+                    i, c = next_char(i)
+                key_end = i - 1
+                key = format_string[key_start:key_end]
+
+            # Parse the conversion flags (optional).
+            while c in '#0- +':
+                i, c = next_char(i)
+            # Parse the minimum field width (optional).
+            if c == '*':
+                num_args += 1
+                i, c = next_char(i)
+            else:
+                while c in string.digits:
+                    i, c = next_char(i)
+            # Parse the precision (optional).
+            if c == '.':
+                i, c = next_char(i)
+                if c == '*':
+                    num_args += 1
+                    i, c = next_char(i)
+                else:
+                    while c in string.digits:
+                        i, c = next_char(i)
+            # Parse the length modifier (optional).
+            if c in 'hlL':
+                i, c = next_char(i)
+            # Parse the conversion type (mandatory).
+            if c not in 'diouxXeEfFgGcrs%':
+                raise UnsupportedFormatCharacter(i)
+            if key:
+                keys.add(key)
+            elif c != '%':
+                num_args += 1
+        i += 1
+    return keys, num_args

File test/unittest_lint.py

         self.linter.error_mode()
         checkers = self.linter.prepare_checkers()
         checker_names = tuple(c.name for c in checkers)
-        should_not = ('design', 'format', 'imports', 'logging', 'metrics',
+        should_not = ('design', 'format', 'imports', 'metrics',
                       'miscellaneous', 'similarities')
         self.failIf(any(name in checker_names for name in should_not))
 
         # FIXME should it be necessary to explicitly desactivate failures ?
         self.linter.set_option('disable', 'R,C,W')
         checker_names = [c.name for c in self.linter.prepare_checkers()]
-        should_not = ('design', 'logging', 'metrics', 'similarities')
+        should_not = ('design', 'metrics', 'similarities')
         rest = [name for name in checker_names if name in should_not]
         self.assertListEqual(rest, [])
         self.linter.set_option('disable', 'R,C,W,F')