Commits

Ry4an Brase committed 38899c4

Add --ignore-imports for symilar / R0801. Also add tests for existing
--ignore-docstrings options.

Comments (0)

Files changed (2)

ignore-similar-imports

+# HG changeset patch
+# Parent 7a1e32ae0c60d872568382a76f3621ce72a81b31
+Add --ignore-imports option to similarity checking.
+
+Additionally:
+ - add access to existing --ignore-docstrings option to symilar command line
+ - add access to new --ignore-imports option to symilar command line
+ - add test for existing --ignore-docstring feature
+ - add test for new --ignore-imports feature
+
+diff -r 7a1e32ae0c60 checkers/similar.py
+--- a/checkers/similar.py	Thu Sep 20 06:54:35 2012 +0200
++++ b/checkers/similar.py	Sat Sep 29 23:19:13 2012 -0400
+@@ -29,10 +29,11 @@
+     """finds copy-pasted lines of code in a project"""
+ 
+     def __init__(self, min_lines=4, ignore_comments=False,
+-                 ignore_docstrings=False):
++                 ignore_docstrings=False, ignore_imports=False):
+         self.min_lines = min_lines
+         self.ignore_comments = ignore_comments
+         self.ignore_docstrings = ignore_docstrings
++        self.ignore_imports = ignore_imports
+         self.linesets = []
+ 
+     def append_stream(self, streamid, stream):
+@@ -41,7 +42,8 @@
+         self.linesets.append(LineSet(streamid,
+                                      stream.readlines(),
+                                      self.ignore_comments,
+-                                     self.ignore_docstrings))
++                                     self.ignore_docstrings,
++                                     self.ignore_imports))
+ 
+     def run(self):
+         """start looking for similarities and display results on stdout"""
+@@ -123,7 +125,11 @@
+                 for sim in self._find_common(lineset, lineset2):
+                     yield sim
+ 
+-def stripped_lines(lines, ignore_comments, ignore_docstrings):
++def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
++    """return lines with leading/trailing whitespace and any ignored code
++    features removed
++    """
++
+     strippedlines = []
+     docstring = None
+     for line in lines:
+@@ -137,6 +143,9 @@
+                 if line.endswith(docstring):
+                     docstring = None
+                 line = ''
++        if ignore_imports:
++            if line.startswith("import ") or line.startswith("from "):
++                line = ''
+         if ignore_comments:
+             # XXX should use regex in checkers/format to avoid cutting
+             # at a "#" in a string
+@@ -147,11 +156,12 @@
+ class LineSet:
+     """Holds and indexes all the lines of a single source file"""
+     def __init__(self, name, lines, ignore_comments=False,
+-                 ignore_docstrings=False):
++                 ignore_docstrings=False, ignore_imports=False):
+         self.name = name
+         self._real_lines = lines
+         self._stripped_lines = stripped_lines(lines, ignore_comments,
+-                                              ignore_docstrings)
++                                              ignore_docstrings,
++                                              ignore_imports)
+         self._index = self._mk_index()
+ 
+     def __str__(self):
+@@ -236,6 +246,10 @@
+                 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
+                  'help': 'Ignore docstrings when computing similarities.'}
+                 ),
++               ('ignore-imports',
++                {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',
++                 'help': 'Ignore imports when computing similarities.'}
++                ),
+                )
+     # reports
+     reports = ( ('RP0801', 'Duplication', report_similarities), )
+@@ -258,6 +272,8 @@
+             self.ignore_comments = self.config.ignore_comments
+         elif optname == 'ignore-docstrings':
+             self.ignore_docstrings = self.config.ignore_docstrings
++        elif optname == 'ignore-imports':
++            self.ignore_imports = self.config.ignore_imports
+ 
+     def open(self):
+         """init the checkers: reset linesets and statistics information"""
+@@ -302,7 +318,7 @@
+     print "finds copy pasted blocks in a set of files"
+     print
+     print 'Usage: symilar [-d|--duplicates min_duplicated_lines] \
+-[-i|--ignore-comments] file1...'
++[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...'
+     sys.exit(status)
+ 
+ def Run(argv=None):
+@@ -311,9 +327,12 @@
+         argv = sys.argv[1:]
+     from getopt import getopt
+     s_opts = 'hdi'
+-    l_opts = ('help', 'duplicates=', 'ignore-comments')
++    l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',
++              'ignore-docstrings')
+     min_lines = 4
+     ignore_comments = False
++    ignore_docstrings = False
++    ignore_imports = False
+     opts, args = getopt(argv, s_opts, l_opts)
+     for opt, val in opts:
+         if opt in ('-d', '--duplicates'):
+@@ -322,9 +341,13 @@
+             usage()
+         elif opt in ('-i', '--ignore-comments'):
+             ignore_comments = True
++        elif opt in ('--ignore-docstrings'):
++            ignore_docstrings = True
++        elif opt in ('--ignore-imports'):
++            ignore_imports = True
+     if not args:
+         usage(1)
+-    sim = Similar(min_lines, ignore_comments)
++    sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)
+     for filename in args:
+         sim.append_stream(filename, open(filename))
+     sim.run()
+diff -r 7a1e32ae0c60 test/input/similar1
+--- a/test/input/similar1	Thu Sep 20 06:54:35 2012 +0200
++++ b/test/input/similar1	Sat Sep 29 23:19:13 2012 -0400
+@@ -1,19 +1,22 @@
+-this file is used
+-to check the similar 
+-command line tool
++import one
++from two import two
++three
++four
++five
++six # comments optionally ignored
++seven
++eight
++nine
++''' ten
++eleven
++twelve '''
++thirteen
++fourteen
++fifteen
+ 
+-see the similar2 file which is almost the
+-same file as this one. 
+-more than 4
+-identical lines should
+-be # ignore comments !
+-detected
+ 
+ 
+-h�h�h�h
+ 
+-
+-
+-
+-
+-Yo !
++sixteen
++seventeen
++eighteen
+diff -r 7a1e32ae0c60 test/input/similar2
+--- a/test/input/similar2	Thu Sep 20 06:54:35 2012 +0200
++++ b/test/input/similar2	Sat Sep 29 23:19:13 2012 -0400
+@@ -1,19 +1,22 @@
+-this file is used
+-to check the similar 
+-command line tool
++import one
++from two import two
++three
++four
++five
++six
++seven
++eight
++nine
++''' ten
++ELEVEN
++twelve '''
++thirteen
++fourteen
++FIFTEEN
+ 
+-see the similar1 file which is almost the
+-same file as this one. 
+-more than 4
+-identical lines should
+-be
+-detected
+ 
+ 
+-hohohoh
+ 
+-
+-
+-
+-
+-Yo !
++sixteen
++seventeen
++eighteen
+diff -r 7a1e32ae0c60 test/test_similar.py
+--- a/test/test_similar.py	Thu Sep 20 06:54:35 2012 +0200
++++ b/test/test_similar.py	Sat Sep 29 23:19:13 2012 -0400
+@@ -24,21 +24,76 @@
+         finally:
+             sys.stdout = sys.__stdout__
+         self.assertMultiLineEqual(output.strip(), ("""
+-7 similar lines in 2 files
+-==%s:5
+-==%s:5
+-   same file as this one. 
+-   more than 4
+-   identical lines should
+-   be
+-   detected
+-   
+-   
+-TOTAL lines=38 duplicates=7 percent=18.42
++10 similar lines in 2 files
++==%s:0
++==%s:0
++   import one
++   from two import two
++   three
++   four
++   five
++   six
++   seven
++   eight
++   nine
++   ''' ten
++TOTAL lines=44 duplicates=10 percent=22.73
+ """ % (SIMILAR1, SIMILAR2)).strip())
+ 
+ 
+-    def test_dont_ignore_comments(self):
++    def test_ignore_docsrings(self):
++        sys.stdout = StringIO()
++        try:
++            similar.Run(['--ignore-docstrings', SIMILAR1, SIMILAR2])
++        except SystemExit, ex:
++            self.assertEqual(ex.code, 0)
++            output = sys.stdout.getvalue()
++        else:
++            self.fail('not system exit')
++        finally:
++            sys.stdout = sys.__stdout__
++        self.assertMultiLineEqual(output.strip(), ("""
++8 similar lines in 2 files
++==%s:6
++==%s:6
++   seven
++   eight
++   nine
++   ''' ten
++   ELEVEN
++   twelve '''
++   thirteen
++   fourteen
++
++5 similar lines in 2 files
++==%s:0
++==%s:0
++   import one
++   from two import two
++   three
++   four
++   five
++TOTAL lines=44 duplicates=13 percent=29.55
++""" % ((SIMILAR1, SIMILAR2) * 2)).strip())
++
++
++    def test_ignore_imports(self):
++        sys.stdout = StringIO()
++        try:
++            similar.Run(['--ignore-imports', SIMILAR1, SIMILAR2])
++        except SystemExit, ex:
++            self.assertEqual(ex.code, 0)
++            output = sys.stdout.getvalue()
++        else:
++            self.fail('not system exit')
++        finally:
++            sys.stdout = sys.__stdout__
++        self.assertMultiLineEqual(output.strip(), """
++TOTAL lines=44 duplicates=0 percent=0.00
++""".strip())
++
++
++    def test_ignore_nothing(self):
+         sys.stdout = StringIO()
+         try:
+             similar.Run([SIMILAR1, SIMILAR2])
+@@ -49,9 +104,17 @@
+             self.fail('not system exit')
+         finally:
+             sys.stdout = sys.__stdout__
+-        self.assertMultiLineEqual(output.strip(), """
+-TOTAL lines=38 duplicates=0 percent=0.00
+-        """.strip())
++        self.assertMultiLineEqual(output.strip(), ("""
++5 similar lines in 2 files
++==%s:0
++==%s:0
++   import one
++   from two import two
++   three
++   four
++   five
++TOTAL lines=44 duplicates=5 percent=11.36
++""" % (SIMILAR1, SIMILAR2)).strip())
+ 
+     def test_help(self):
+         sys.stdout = StringIO()
+ignore-similar-imports
 # Placed by Bitbucket