1. Thomas Aglassinger
  2. pygments-main

Commits

Thomas Aglassinger  committed 2cc5760

#710 Added Mathematica lexer.

  • Participants
  • Parent commits 7304e47
  • Branches mathematica

Comments (0)

Files changed (5)

File AUTHORS

View file
  • Ignore whitespace
 
 * Sam Aaron -- Ioke lexer
 * Ali Afshar -- image formatter
-* Thomas Aglassinger -- Rexx lexer
+* Thomas Aglassinger -- Mathematica and Rexx lexers
 * Kumar Appaiah -- Debian control lexer
 * Andreas Amann -- AppleScript lexer
 * Timothy Armstrong -- Dart lexer fixes

File pygments/lexers/_mapping.py

View file
  • Ignore whitespace
     'MakoXmlLexer': ('pygments.lexers.templates', 'XML+Mako', ('xml+mako',), (), ('application/xml+mako',)),
     'MaqlLexer': ('pygments.lexers.other', 'MAQL', ('maql',), ('*.maql',), ('text/x-gooddata-maql', 'application/x-gooddata-maql')),
     'MasonLexer': ('pygments.lexers.templates', 'Mason', ('mason',), ('*.m', '*.mhtml', '*.mc', '*.mi', 'autohandler', 'dhandler'), ('application/x-mason',)),
+    'MathematicaLexer': ('pygments.lexers.math', 'Mathematica', ('mathematica',), ('*.m',), ('text/x-mathematica',)),
     'MatlabLexer': ('pygments.lexers.math', 'Matlab', ('matlab',), ('*.m',), ('text/matlab',)),
     'MatlabSessionLexer': ('pygments.lexers.math', 'Matlab session', ('matlabsession',), (), ()),
     'MiniDLexer': ('pygments.lexers.agile', 'MiniD', ('minid',), ('*.md',), ('text/x-minidsrc',)),

File pygments/lexers/math.py

View file
  • Ignore whitespace
 
 import re
 
-from pygments.util import shebang_matches
+from pygments.util import shebang_matches, sorted_keywords_pattern
 from pygments.lexer import Lexer, RegexLexer, bygroups, include, \
     combined, do_insertions
 from pygments.token import Comment, String, Punctuation, Keyword, Name, \
-    Operator, Number, Text, Generic
+    Operator, Number, Text, Generic, Whitespace
 
 from pygments.lexers.agile import PythonLexer
 from pygments.lexers import _scilab_builtins
 __all__ = ['JuliaLexer', 'JuliaConsoleLexer', 'MuPADLexer', 'MatlabLexer',
            'MatlabSessionLexer', 'OctaveLexer', 'ScilabLexer', 'NumPyLexer',
            'RConsoleLexer', 'SLexer', 'JagsLexer', 'BugsLexer', 'StanLexer',
-           'IDLLexer', 'RdLexer', 'IgorLexer']
+           'IDLLexer', 'RdLexer', 'IgorLexer', 'MathematicaLexer']
 
 
 class JuliaLexer(RegexLexer):
                 yield item
 
 
+class MathematicaLexer(RegexLexer):
+    """
+    Mathematica is a computational software program used in scientific,
+    engineering, and mathematical fields and other areas of technical
+    computing.
+
+    *New in Pygments 1.7.*
+    """
+    name = 'Mathematica'
+    aliases = ['mathematica']
+    filenames = ['*.m', '*.nb', '*.nbp']
+    mimetypes = ['application/mathematica']
+
+    _NAME_PATTERN = r'[a-zA-Z][a-zA-Z0-9]*'
+    _CONSTANTS_PATTERN = sorted_keywords_pattern([
+        u'π', u'∞',
+    ])
+    _FUNCTIONS_PATTERN = sorted_keywords_pattern([
+        'Cos', 'Plot', 'Sin', 'Tan',
+    ])
+    _OPERATORS_PATTERN = sorted_keywords_pattern([
+        '$', '(', ')', '*', '+', ',', '-', '->', '.', '/', ':=', '=', '[',
+        ']', '_', '`', '{', '}',
+    ])
+    tokens = {
+        'root': [
+            ('\s+', Whitespace),
+            (r'\(\*', Comment.Multiline, 'comment'),
+            (r'"', String, 'string'),
+            # Numbers: base^^mantissa*^exponent`precision (and variants).
+            (r'(\d+\^\^)?-?(\d+|\d*\.\d+)(\*\^-?(\d+|\d*\.\d+))?(`(`?\d+)?)?', Number),
+            (r'%', Name.Builtin),
+            (r'\\\[' + _NAME_PATTERN + r'\]', Name.Constant),
+            (_CONSTANTS_PATTERN, Name.Constant),
+            ( _FUNCTIONS_PATTERN + r'(\s*)(\[)',
+             bygroups(Name.Builtin, Whitespace, Operator)),
+            (_OPERATORS_PATTERN, Operator),
+            (_NAME_PATTERN, Name),
+        ],
+        'comment': [
+            (r'[^*]+', Comment.Multiline),
+            (r'\*\)', Comment.Multiline, '#pop'),
+            (r'\*', Comment.Multiline),
+        ],
+        'string': [
+            (r'"', String, '#pop'),
+            (r'\\\[0-7]{3}', String.Escape),
+            (r'\\\.[0-9a-fA-F]{2}', String.Escape),
+            (r'\\\:[0-9a-fA-F]{4}', String.Escape),
+            (r'\\\[\w+\]', String.Escape),
+            (r'\\!\\\(.*\\\)', String.Escape),
+            (r'\\\n', String.Escape),
+            (r'\\.', String.Escape),
+            (r'[^"\\\\]', String),
+            (r'"', String, '#pop'),
+        ]
+    }
+
+    def analyse_text(text):
+        # Simply check for ``(* ... *)`` comments.
+        if re.match(r'\(\*.*\*\)', text, re.MULTILINE):
+            return 0.11
+
+
 class OctaveLexer(RegexLexer):
     """
     For GNU Octave source code.

File pygments/util.py

View file
  • Ignore whitespace
 
             return u'(?:' + u'|'.join(buf) + u')'
 
+def sorted_keywords_pattern(keywords):
+    '''
+    Pattern for a regular expression that matches any word in ``keywords`` even if
+    some of them are prefixes of others. This is particular useful if '\b' cannot be
+    used as delimiter after a keyword.
+
+    >>> sorted_keywords_pattern(['a', 'aa', 'aaa', 'b', 'cc'])
+    u'(aaa|aa|cc|a|b)'
+    '''
+    assert keywords is not None
+    escaped_keywords = [re.escape(keyword) for keyword in keywords]
+    sort_key = lambda keyword: (-len(keyword), keyword)
+    sorted_keywords = sorted(escaped_keywords, key=sort_key)
+    return u'(' + u'|'.join(sorted_keywords) + u')'
+
 # Python 2/3 compatibility
 
 if sys.version_info < (3,0):

File tests/examplefiles/mathematica.m

View file
  • Ignore whitespace
+(* Mathematica example *)
+
+(* Strings *)
+Text = "hello world\n xxx \[RawAt] xxx \.1234 xxx \:1a2b3c"
+BoxedText = "xxx\!\(box\/xed\)"
+
+(* Numbers *)
+1
+1.23
+.123
+16^^123
+8^^12.34
+-12.34*^7
+123`
+123`8
+234``5
+-12.34*^-23`12
+
+(* Functions and other code *)
+Sin[%]
+A = {{1, 2, 3}, {4, 5, 6}}
+Plot[{Sin[x], Cos[x], Tan[x]}, {x, -3Pi, 3Pi}]
+translate[graphics_, {dx_,dy_,dz_}] := graphics /. Sphere[{x_,y_,z_}, r_] -> Sphere[{x+dx, y+dy, z+dz}, r]
+