Anonymous committed 084467a

[svn] php lexer finetuning

Comments (0)

Files changed (2)


     if _other is this:
         def callback(lexer, match, ctx=None):
+            # if keyword arguments are given the callback
+            # function has to create a new lexer instance
+            if kwargs:
+                # XXX: cache that somehow
+                kwargs.update(lexer.options)
+                lx = lexer.__class__(**kwargs)
+            else:
+                lx = lexer
             s = match.start()
-            for i, t, v in lexer.get_tokens_unprocessed(
+            for i, t, v in lx.get_tokens_unprocessed(
                 yield i + s, t, v
             if ctx:
                 ctx.pos = match.end()


 except NameError:
     from sets import Set as set
-from pygments.lexer import Lexer, RegexLexer, bygroups, using, include
+from pygments.lexer import Lexer, RegexLexer, bygroups, using, include, this
 from pygments.token import \
-     Text, Comment, Operator, Keyword, Name, String, Number, Other
+     Text, Comment, Operator, Keyword, Name, String, Number, Other, Punctuation
 from pygments.util import get_bool_opt, get_list_opt, looks_like_xml, \
             (r'//.*?\n', Comment),
             (r'/\*.*?\*/', Comment),
             (r'/(\\\\|\\/|[^/\n])*/[gim]*', String.Regex),
-            (r'[~\^\*!%&\[\]\{\}\(\)<>\|+=:;,./?-]', Operator),
+            (r'[~\^\*!%&<>\|+=:;,/?-\\]+', Operator),
+            (r'[{}\[\]();.]+', Punctuation),
              r'instanceof|this)\b', Keyword),
     tokens = {
         'root': [
             (r'(@media)(\s+)(\w+)(\s*)({)', bygroups(Keyword, Text, String,
-             Text, Operator), 'media'),
+             Text, Punctuation), 'media'),
         'basics': [
             (r'\s+', Text),
             (r'/\*(?:.|\n)*?\*/', Comment),
-            (r'{', Operator, 'content'),
+            (r'{', Punctuation, 'content'),
             (r'\:[a-zA-Z0-9_-]+', Name.Decorator),
             (r'\.[a-zA-Z0-9_-]+', Name.Class),
             (r'\#[a-zA-Z0-9_-]+', Name.Function),
         'media': [
-            (r'}', Operator, '#pop')
+            (r'}', Punctuation, '#pop')
         'content': [
             (r'\s+', Text),
-            (r'}', Operator, '#pop'),
+            (r'}', Punctuation, '#pop'),
             (r'url\(.*?\)', String.Other),
             (r'^@.*?$', Comment.Preproc),
             (r'\#[a-zA-Z0-9]{1,6}', Number),
             (r'[\.-]?[0-9]*[\.]?[0-9]+(em|px|\%|pt|pc|in|mm|cm|ex)', Number),
             (r'-?[0-9]+', Number),
-            (r'[~\^\*!%&\[\]\(\)<>\|+=@:;,./?-]', Operator),
+            (r'[~\^\*!%&<>\|+=@:,./?-]+', Operator),
+            (r'[\[\]();]+', Punctuation),
             (r'"(\\\\|\\"|[^"])*"', String.Double),
             (r"'(\\\\|\\'|[^'])*'", String.Single),
             (r'[a-zA-Z][a-zA-Z0-9]+', Name)
             (r'/\*.*?\*/', Comment),
              bygroups(Operator, Text, Name.Attribute)),
-            (r'[~!%^&*()+=|\[\]:;,.<>/?{}@-]', Text),
+            (r'[~!%^&*+=|:.<>/?@-]+', Operator),
+            (r'[\[\]{}();,]+', Punctuation),
             (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
               bygroups(Keyword, Text, Operator, Text), 'functionname'),
             ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Other),
              r"0[xX][0-9a-fA-F]+[Ll]?", Number),
-            (r'"(\\\\|\\"|[^"])*"', String.Double),
-            (r"'(\\\\|\\'|[^'])*'", String.Single),
-            (r'`(\\\\|\\`|[^`])*`', String.Backtick)
+            (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single),
+            (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick),
+            (r'"', String.Double, 'string'),
         'classname': [
             (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
         'functionname': [
             (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')
+        ],
+        'string': [
+            (r'"', String.Double, '#pop'),
+            (r'[^{$"\\]+', String.Double),
+            (r'\\([nrt\"$]|[0-7]{1,3}|x[0-9A-Fa-f]{1,2})', String.Escape),
+            (r'\$[a-zA-Z_][a-zA-Z0-9_]*(\[\S+\]|->[a-zA-Z_][a-zA-Z0-9_]*)?',
+             String.Interpol),
+            (r'(\{\$\{)(.*?)(\}\})',
+             bygroups(String.Interpol, using(this, _startinline=True),
+                      String.Interpol)),
+            (r'(\{)(\$.*?)(\})',
+             bygroups(String.Interpol, using(this, _startinline=True),
+                      String.Interpol)),
+            (r'(\$\{)(\S+)(\})',
+             bygroups(String.Interpol, Name.Variable, String.Interpol)),
+            (r'[${\\]+', String.Double)
         self.disabledmodules = get_list_opt(
             options, 'disabledmodules', ['unknown'])
         self.startinline = get_bool_opt(options, 'startinline', False)
+        # private option argument for the lexer itself
+        if '_startinline' in options:
+            self.startinline = options.pop('_startinline')
         # collect activated functions in a set
         self._functions = set()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.