Commits

Georg Brandl committed 933d0c1 Merge

Merged in roskakori/pygments-main (pull request #199)

Added lexer for Rexx

Comments (0)

Files changed (5)

 Other contributors, listed alphabetically, are:
 
 * Sam Aaron -- Ioke lexer
+* Ali Afshar -- image formatter
+* Thomas Aglassinger -- Rexx lexer
 * Kumar Appaiah -- Debian control lexer
-* Ali Afshar -- image formatter
 * Andreas Amann -- AppleScript lexer
 * Timothy Armstrong -- Dart lexer fixes
 * Jeffrey Arnold -- R/S, Rd, BUGS, Jags, and Stan lexers

pygments/lexers/_mapping.py

     'RebolLexer': ('pygments.lexers.other', 'REBOL', ('rebol',), ('*.r', '*.r3'), ('text/x-rebol',)),
     'RedcodeLexer': ('pygments.lexers.other', 'Redcode', ('redcode',), ('*.cw',), ()),
     'RegeditLexer': ('pygments.lexers.text', 'reg', ('registry',), ('*.reg',), ('text/x-windows-registry',)),
+    'RexxLexer': ('pygments.lexers.other', 'Rexx', ('rexx', 'ARexx', 'arexx'), ('*.rexx', '*.rex', '*.rx', '*.arexx'), ('text/x-rexx',)),
     'RhtmlLexer': ('pygments.lexers.templates', 'RHTML', ('rhtml', 'html+erb', 'html+ruby'), ('*.rhtml',), ('text/html+ruby',)),
     'RobotFrameworkLexer': ('pygments.lexers.other', 'RobotFramework', ('RobotFramework', 'robotframework'), ('*.txt', '*.robot'), ('text/x-robotframework',)),
     'RstLexer': ('pygments.lexers.text', 'reStructuredText', ('rst', 'rest', 'restructuredtext'), ('*.rst', '*.rest'), ('text/x-rst', 'text/prs.fallenstein.rst')),

pygments/lexers/other.py

 from pygments.lexer import RegexLexer, include, bygroups, using, \
      this, combined, ExtendedRegexLexer
 from pygments.token import Error, Punctuation, Literal, Token, \
-     Text, Comment, Operator, Keyword, Name, String, Number, Generic
+     Text, Comment, Operator, Keyword, Name, String, Number, Generic, Other, \
+     Whitespace
 from pygments.util import get_bool_opt
 from pygments.lexers.web import HtmlLexer
 
            'ECLLexer', 'UrbiscriptLexer', 'OpenEdgeLexer', 'BroLexer',
            'MscgenLexer', 'KconfigLexer', 'VGLLexer', 'SourcePawnLexer',
            'RobotFrameworkLexer', 'PuppetLexer', 'NSISLexer', 'RPMSpecLexer',
-           'CbmBasicV2Lexer', 'AutoItLexer']
+           'CbmBasicV2Lexer', 'AutoItLexer', 'RexxLexer']
 
 
 class ECLLexer(RegexLexer):
             include('literals'),
         ],
         'afterobject' : [
-            (r'! !$', Keyword , '#pop'), # squeak chunk delimeter
+            (r'! !$', Keyword , '#pop'), # squeak chunk delimiter
             include('whitespaces'),
             (r'\b(ifTrue:|ifFalse:|whileTrue:|whileFalse:|timesRepeat:)',
              Name.Builtin, '#pop'),
         from pygments.lexers._asybuiltins import ASYFUNCNAME, ASYVARNAME
         for index, token, value in \
                RegexLexer.get_tokens_unprocessed(self, text):
-           if token is Name and value in ASYFUNCNAME:
-               token = Name.Function
-           elif token is Name and value in ASYVARNAME:
-               token = Name.Variable
-           yield index, token, value
+            if token is Name and value in ASYFUNCNAME:
+                token = Name.Function
+            elif token is Name and value in ASYVARNAME:
+                token = Name.Variable
+            yield index, token, value
 
 
 class PostScriptLexer(RegexLexer):
             (r'[a-zA-Z_#@$][a-zA-Z0-9_#@$]*', Name),
             (r'\\|\'', Text),
             (r'\`([\,\%\`abfnrtv\-\+;])', String.Escape),
-            (r'_\n', Text), # Line continuation
+            (r'_\n', Text),  # Line continuation
             include('garbage'),
         ],
         'commands': [
             (r'[^\S\n]', Text),
         ],
     }
+
+
+class RexxLexer(RegexLexer):
+    """
+    `Rexx <http://www.rexxinfo.org/>`_ is a scripting language available for
+    a wide range of different platforms with its roots found on mainframe
+    systems. It is popular for I/O- and data based tasks and can act as glue
+    language to bind different applications together.
+
+    *New in Pygments 1.7.*
+    """
+    name = 'Rexx'
+    aliases = ['rexx', 'ARexx', 'arexx']
+    filenames = ['*.rexx', '*.rex', '*.rx', '*.arexx']
+    mimetypes = ['text/x-rexx']
+    flags = re.IGNORECASE
+
+    tokens = {
+        'root': [
+            (r'\s', Whitespace),
+            (r'/\*', Comment.Multiline, 'comment'),
+            (r'"', String, 'string_double'),
+            (r"'", String, 'string_single'),
+            (r'[0-9]+(\.[0-9]+)?(e[+-]?[0-9])?', Number),
+            (r'([a-z_][a-z0-9_]*)(\s*)(:)(\s*)(procedure)\b',
+             bygroups(Name.Function, Whitespace, Operator, Whitespace, Keyword.Declaration)),
+            (r'([a-z_][a-z0-9_]*)(\s*)(:)',
+             bygroups(Name.Label, Whitespace, Operator)),
+            include('function'),
+            include('keyword'),
+            include('operator'),
+            (r'[a-z_][a-z0-9_]*', Text),
+        ],
+        'function': [
+            (r'(abbrev|abs|address|arg|b2x|bitand|bitor|bitxor|c2d|c2x|'
+             r'center|charin|charout|chars|compare|condition|copies|d2c|'
+             r'd2x|datatype|date|delstr|delword|digits|errortext|form|'
+             r'format|fuzz|insert|lastpos|left|length|linein|lineout|lines|'
+             r'max|min|overlay|pos|queued|random|reverse|right|sign|'
+             r'sourceline|space|stream|strip|substr|subword|symbol|time|'
+             r'trace|translate|trunc|value|verify|word|wordindex|'
+             r'wordlength|wordpos|words|x2b|x2c|x2d|xrange)(\s*)(\()',
+             bygroups(Name.Builtin, Whitespace, Operator)),
+        ],
+        'keyword': [
+            (r'(address|arg|by|call|do|drop|else|end|exit|for|forever|if|'
+             r'interpret|iterate|leave|nop|numeric|off|on|options|parse|'
+             r'pull|push|queue|return|say|select|signal|to|then|trace|until|'
+             r'while)\b', Keyword.Reserved),
+        ],
+        'operator': [
+            (ur'(-|//|/|\(|\)|\*\*|\*|\\<<|\\<|\\==|\\=|\\>>|\\>|\\|\|\||\||'
+             ur'&&|&|%|\+|<<=|<<|<=|<>|<|==|=|><|>=|>>=|>>|>|¬<<|¬<|¬==|¬=|'
+             ur'¬>>|¬>|¬|\.|,)', Operator),
+        ],
+        'string_double': [
+            (r'[^"\n]+', String),
+            (r'""', String),
+            (r'"', String, '#pop'),
+            (r'\n', Text, '#pop'),  # Stray linefeed also terminates strings.
+        ],
+        'string_single': [
+            (r'[^\'\n]', String),
+            (r'\'\'', String),
+            (r'\'', String, '#pop'),
+            (r'\n', Text, '#pop'),  # Stray linefeed also terminates strings.
+        ],
+        'comment': [
+            (r'[^*]+', Comment.Multiline),
+            (r'\*/', Comment.Multiline, '#pop'),
+            (r'\*', Comment.Multiline),
+        ]
+    }
+
+    _ADDRESS_COMMAND_PATTERN = r'^\s*address\s+command\b'
+    _ADDRESS_PATTERN = r'^\s*address\s+'
+    _DO_WHILE_PATTERN = r'^\s*do\s+while\b'
+    _IF_THEN_DO_PATTERN = r'^\s*if\b.+\bthen\s+do\s*$'
+    _PROCEDURE_PATTERN = r'^\s*([a-z_][a-z0-9_]*)(\s*)(:)(\s*)(procedure)\b'
+    _ELSE_DO_PATTERN = r'\belse\s+do\s*$'
+    _PARSE_ARG_PATTERN = r'^\s*parse\s+(upper\s+)?(arg|value)\b'
+    _PATTERNS_AND_WEIGHTS = (
+        (_ADDRESS_COMMAND_PATTERN, 0.2),
+        (_ADDRESS_PATTERN, 0.05),
+        (_DO_WHILE_PATTERN, 0.1),
+        (_ELSE_DO_PATTERN, 0.1),
+        (_IF_THEN_DO_PATTERN, 0.1),
+        (_PROCEDURE_PATTERN, 0.5),
+        (_PARSE_ARG_PATTERN, 0.2),
+    )
+
+    @staticmethod
+    def _analyse_text_for_weighted_patterns(text, patternsAndWeights):
+        result = 0.0
+        lowerText = text.lower()
+        for pattern, weight in patternsAndWeights:
+            regex = re.compile(pattern, re.MULTILINE)
+            if regex.search(lowerText):
+                result += weight
+        return result
+
+    def analyse_text(text):
+        """
+        Check for inital comment.
+        """
+        result = 0.0
+        if re.search(r'/\*\**\s*rexx', text, re.IGNORECASE):
+            # Header matches MVS Rexx requirements, this is certainly a Rexx
+            # script.
+            result = 1.0
+        elif text.startswith('/*'):
+            # Header matches general Rexx requirements; the source code might
+            # still be any language using C comments such as C++, C# or Java.
+            result = 0.01
+            result += RexxLexer._analyse_text_for_weighted_patterns(
+                text, RexxLexer._PATTERNS_AND_WEIGHTS)
+            result = min(result, 1.0)
+        assert 0.0 <= result <= 1.0
+        return result

tests/examplefiles/example.rexx

+/* REXX example. */
+
+/* Some basic constructs. */
+almost_pi = 0.1415 + 3
+if almost_pi < 3 then
+   say 'huh?'
+else do
+   say 'almost_pi=' almost_pi || " - ok"
+end
+x = '"' || "'" || '''' || """" /* quotes */
+
+/* A comment
+ * spawning multiple
+   lines. /* / */
+
+/* Built-in functions. */
+line = 'line containing some short text'
+say WordPos(line, 'some')
+say Word(line, 4)
+
+/* Labels and procedures. */
+some_label :
+
+divide: procedure
+    parse arg some other
+    return some / other
+
+call divide(5, 2)
+
+/* Loops */
+do i = 1 to 5
+    do j = -3 to -9 by -3
+        say i '+' j '=' i + j
+    end j
+end i
+
+do forever
+  leave
+end
+
+/* Print a text file on MVS. */
+ADDRESS TSO
+"ALLOC F(TEXTFILE) DSN('some.text.dsn') SHR REU"
+"EXECIO * DISKR TEXTFILE ( FINIS STEM LINES."
+"FREE F(TEXTFILE)"
+I = 1
+DO WHILE I <= LINES.0
+    SAY ' LINE ' I ' : ' LINES.I
+    I = I + 1
+END

tests/test_lexers_other.py

+# -*- coding: utf-8 -*-
+"""
+    Tests for other lexers
+    ~~~~~~~~~~~~~~~~~~~~~~
+
+    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+import glob
+import os
+import unittest
+
+from pygments.lexers import guess_lexer
+from pygments.lexers.other import RexxLexer
+
+
+def _exampleFilePath(filename):
+    return os.path.join(os.path.dirname(__file__), 'examplefiles', filename)
+
+
+class AnalyseTextTest(unittest.TestCase):
+    def _testCanRecognizeAndGuessExampleFiles(self, lexer):
+        assert lexer is not None
+
+        for pattern in lexer.filenames:
+            exampleFilesPattern = _exampleFilePath(pattern)
+            for exampleFilePath in glob.glob(exampleFilesPattern):
+                exampleFile = open(exampleFilePath, 'rb')
+                try:
+                    text = exampleFile.read()
+                    probability = lexer.analyse_text(text)
+                    self.assertTrue(probability > 0,
+                        '%s must recognize %r' % (
+                        lexer.name, exampleFilePath))
+                    guessedLexer = guess_lexer(text)
+                    self.assertEqual(guessedLexer.name, lexer.name)
+                finally:
+                    exampleFile.close()
+
+    def testCanRecognizeAndGuessExampleFiles(self):
+        self._testCanRecognizeAndGuessExampleFiles(RexxLexer)
+
+
+class RexxLexerTest(unittest.TestCase):
+    def testCanGuessFromText(self):
+        self.assertAlmostEqual(0.01,
+            RexxLexer.analyse_text('/* */'))
+        self.assertAlmostEqual(1.0,
+            RexxLexer.analyse_text('''/* Rexx */
+                say "hello world"'''))
+        self.assertLess(0.5,
+            RexxLexer.analyse_text('/* */\n'
+                'hello:pRoceduRe\n'
+                '  say "hello world"'))
+        self.assertLess(0.2,
+            RexxLexer.analyse_text('''/* */
+                if 1 > 0 then do
+                    say "ok"
+                end
+                else do
+                    say "huh?"
+                end'''))
+        self.assertLess(0.2,
+            RexxLexer.analyse_text('''/* */
+                greeting = "hello world!"
+                parse value greeting "hello" name "!"
+                say name'''))
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.