Commits

roskakori  committed fdb423c

Cleaned up Rexx lexer.

* Cleaned up a few regular expressions to make them easier to read and improve performance.
* Cleaned up analyse_text() which is now a lot smaller and easier to reuse by other languages.
* Removed unnecessary instance variable in test case.
* Added loops to example source code.

  • Participants
  • Parent commits 59614c3

Comments (0)

Files changed (3)

File pygments/lexers/other.py

              r'max|min|overlay|pos|queued|random|reverse|right|sign|'
              r'sourceline|space|stream|strip|substr|subword|symbol|time|'
              r'trace|translate|trunc|value|verify|word|wordindex|'
-             r'wordlength|wordpos|words|x2b|x2c|x2d|xrange)(\s*)([(])',
+             r'wordlength|wordpos|words|x2b|x2c|x2d|xrange)(\s*)(\()',
              bygroups(Name.Builtin, Whitespace, Operator)),
         ],
         'keyword': [
             (r'\n', Text, '#pop'),  # Stray linefeed also terminates strings.
         ],
         'comment': [
+            (r'[^*]+', Comment.Multiline),
             (r'\*/', Comment.Multiline, '#pop'),
-            (r'(.|\n)', Comment.Multiline),
+            (r'\*', Comment.Multiline),
         ]
     }
 
-    _ADDRESS_COMMAND_REGEX = re.compile(r'\s*address\s+command\b', re.IGNORECASE)
-    _ADDRESS_REGEX = re.compile(r'\s*address\s+', re.IGNORECASE)
-    _DO_WHILE_REGEX = re.compile(r'\s*do\s+while\b', re.IGNORECASE)
-    _IF_THEN_DO_REGEX = re.compile(r'\s*if\b.+\bthen\s+do\s*$', re.IGNORECASE)
-    _PROCEDURE_REGEX = re.compile(r'([a-z_][a-z0-9_]*)(\s*)(:)(\s*)(procedure)\b', re.IGNORECASE)
-    _ELSE_DO_REGEX = re.compile(r'\s*else\s+do\s*$', re.IGNORECASE)
-    _PARSE_ARG_REGEX = re.compile(r'\s*parse\s+(upper\s+)?(arg|value)\b', re.IGNORECASE)
-    _REGEXS = [
-        _ADDRESS_COMMAND_REGEX,
-        _ADDRESS_REGEX,
-        _DO_WHILE_REGEX,
-        _ELSE_DO_REGEX,
-        _IF_THEN_DO_REGEX,
-        _PROCEDURE_REGEX,
-        _PARSE_ARG_REGEX,
-    ]
+    _ADDRESS_COMMAND_PATTERN = r'^\s*address\s+command\b'
+    _ADDRESS_PATTERN = r'^\s*address\s+'
+    _DO_WHILE_PATTERN = r'^\s*do\s+while\b'
+    _IF_THEN_DO_PATTERN = r'^\s*if\b.+\bthen\s+do\s*$'
+    _PROCEDURE_PATTERN = r'^\s*([a-z_][a-z0-9_]*)(\s*)(:)(\s*)(procedure)\b'
+    _ELSE_DO_PATTERN = r'\belse\s+do\s*$'
+    _PARSE_ARG_PATTERN = r'^\s*parse\s+(upper\s+)?(arg|value)\b'
+    _PATTERNS_AND_WEIGHTS = (
+        (_ADDRESS_COMMAND_PATTERN, 0.2),
+        (_ADDRESS_PATTERN, 0.05),
+        (_DO_WHILE_PATTERN, 0.1),
+        (_ELSE_DO_PATTERN, 0.1),
+        (_IF_THEN_DO_PATTERN, 0.1),
+        (_PROCEDURE_PATTERN, 0.5),
+        (_PARSE_ARG_PATTERN, 0.2),
+    )
+
+    @staticmethod
+    def _analyse_text_for_weighted_patterns(text, patternsAndWeights):
+        result = 0.0
+        lowerText = text.lower()
+        for pattern, weight in patternsAndWeights:
+            regex = re.compile(pattern, re.MULTILINE)
+            if regex.search(lowerText):
+                result += weight
+        return result
 
     def analyse_text(text):
         """
             # Header matches general Rexx requirements; the source code might
             # still be any language using C comments such as C++, C# or Java.
             result = 0.01
-
-            # Check if lines match certain regular expressions and
-            # collect the respective counts in a dictionary.
-            regexCount = len(RexxLexer._REGEXS)
-            regexToCountMap = {}
-            for regex in RexxLexer._REGEXS:
-                regexToCountMap[regex] = 0
-            for line in (text.split('\n'))[1:]:
-                regexIndex = 0
-                lineHasAnyRegex = False
-                while not lineHasAnyRegex and (regexIndex < regexCount):
-                    regexToCheck = RexxLexer._REGEXS[regexIndex]
-                    if regexToCheck.match(line) is not None:
-                        regexToCountMap[regexToCheck] = \
-                            regexToCountMap[regexToCheck] + 1
-                        lineHasAnyRegex = True
-                    else:
-                        regexIndex += 1
-            # Evaluate the findings.
-            if regexToCountMap[RexxLexer._PROCEDURE_REGEX] > 0:
-                result += 0.5
-            elif regexToCountMap[RexxLexer._ADDRESS_COMMAND_REGEX] > 0:
-                result += 0.2
-            elif regexToCountMap[RexxLexer._ADDRESS_REGEX] > 0:
-                result += 0.05
-            if regexToCountMap[RexxLexer._DO_WHILE_REGEX] > 0:
-                result += 0.1
-            if regexToCountMap[RexxLexer._ELSE_DO_REGEX] > 0:
-                result += 0.1
-            if regexToCountMap[RexxLexer._PARSE_ARG_REGEX] > 0:
-                result += 0.2
-            if regexToCountMap[RexxLexer._IF_THEN_DO_REGEX] > 0:
-                result += 0.1
+            result += RexxLexer._analyse_text_for_weighted_patterns(
+                text, RexxLexer._PATTERNS_AND_WEIGHTS)
             result = min(result, 1.0)
         assert 0.0 <= result <= 1.0
         return result

File tests/examplefiles/example.rexx

 
 call divide(5, 2)
 
+/* Loops */
+do i = 1 to 5
+    do j = -3 to -9 by -3
+        say i '+' j '=' i + j
+    end j
+end i
+
+do forever
+  leave
+end
+
 /* Print a text file on MVS. */
 ADDRESS TSO
 "ALLOC F(TEXTFILE) DSN('some.text.dsn') SHR REU"
 "FREE F(TEXTFILE)"
 I = 1
 DO WHILE I <= LINES.0
-   SAY ' LINE ' I ' : ' LINES.I
-   I = I + 1
+    SAY ' LINE ' I ' : ' LINES.I
+    I = I + 1
 END

File tests/test_lexers_other.py

     return os.path.join(os.path.dirname(__file__), 'examplefiles', filename)
 
 
-class _AnalyseTextTest(unittest.TestCase):
-    def setUp(self):
-        raise NotImplementedError('self.lexer must be set')
+class AnalyseTextTest(unittest.TestCase):
+    def _testCanRecognizeAndGuessExampleFiles(self, lexer):
+        assert lexer is not None
 
-    def testCanRecognizeAndGuessExampleFiles(self):
-        for pattern in self.lexer.filenames:
+        for pattern in lexer.filenames:
             exampleFilesPattern = _exampleFilePath(pattern)
             for exampleFilePath in glob.glob(exampleFilesPattern):
                 exampleFile = open(exampleFilePath, 'rb')
                 try:
                     text = exampleFile.read()
-                    probability = self.lexer.analyse_text(text)
+                    probability = lexer.analyse_text(text)
                     self.assertTrue(probability > 0,
-                            '%s must recognize %r' % (
-                            self.lexer.name, exampleFilePath))
+                        '%s must recognize %r' % (
+                        lexer.name, exampleFilePath))
                     guessedLexer = guess_lexer(text)
-                    self.assertEqual(guessedLexer.name, self.lexer.name)
+                    self.assertEqual(guessedLexer.name, lexer.name)
                 finally:
                     exampleFile.close()
 
+    def testCanRecognizeAndGuessExampleFiles(self):
+        self._testCanRecognizeAndGuessExampleFiles(RexxLexer)
 
-class RexxLexerTest(_AnalyseTextTest):
-    def setUp(self):
-        self.lexer = RexxLexer()
 
+class RexxLexerTest(unittest.TestCase):
     def testCanGuessFromText(self):
         self.assertAlmostEqual(0.01,
-            self.lexer.analyse_text('/* */'))
+            RexxLexer.analyse_text('/* */'))
         self.assertAlmostEqual(1.0,
-            self.lexer.analyse_text('''/* Rexx */
+            RexxLexer.analyse_text('''/* Rexx */
                 say "hello world"'''))
         self.assertLess(0.5,
-            self.lexer.analyse_text('/* */\n' \
-                + 'hello:pRoceduRe\n' \
-                + '  say "hello world"'))
+            RexxLexer.analyse_text('/* */\n'
+                'hello:pRoceduRe\n'
+                '  say "hello world"'))
         self.assertLess(0.2,
-            self.lexer.analyse_text('''/* */
+            RexxLexer.analyse_text('''/* */
                 if 1 > 0 then do
                     say "ok"
                 end
                     say "huh?"
                 end'''))
         self.assertLess(0.2,
-            self.lexer.analyse_text('''/* */
+            RexxLexer.analyse_text('''/* */
                 greeting = "hello world!"
                 parse value greeting "hello" name "!"
                 say name'''))