Commits

Anonymous committed 8b70344

#403: Make JS regex parsing better, by Pumbaa80

  • Participants
  • Parent commits e9767a1
  • Branches trunk

Comments (0)

Files changed (2)

pygments/lexers/web.py

 
     flags = re.DOTALL
     tokens = {
-        'root': [
+        'commentsandwhitespace': [
             (r'\s+', Text),
             (r'<!--', Comment),
             (r'//.*?\n', Comment),
-            (r'/\*.*?\*/', Comment),
-            (r'/(\\\\|\\/|[^/\n])*/[gim]+\b', String.Regex),
-            (r'/(\\\\|\\/|[^/\n])*/(?=\s*[,);\n])', String.Regex),
-            (r'/(\\\\|\\/|[^/\n])*/(?=\s*\.[a-z])', String.Regex),
-            (r'[~\^\*!%&<>\|+=:;,/?\\-]+', Operator),
-            (r'[{}\[\]();.]+', Punctuation),
-            (r'(for|in|while|do|break|return|continue|if|else|throw|try|'
-             r'catch|new|typeof|instanceof|this)\b', Keyword),
-            (r'(var|with|const|label|function)\b', Keyword.Declaration),
+            (r'/\*.*?\*/', Comment)
+        ],
+        'slashstartsregex': [
+            include('commentsandwhitespace'),
+            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
+             r'([gim]+\b|\B)', String.Regex, '#pop'),
+            (r'(?=/)', Text, ('#pop', 'badregex')),
+            (r'', Text, '#pop')
+        ],
+        'badregex': [
+            ('\n', Text, '#pop')
+        ],
+        'root': [
+            (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
+            include('commentsandwhitespace'),
+            (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
+             r'(<<|>>>?|==?|!=?|[-<>+*%&\|\^/])=?', Operator, 'slashstartsregex'),
+            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
+            (r'[})\].]', Punctuation),
+            (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|'
+             r'throw|try|catch|finally|new|delete|typeof|instanceof|void|'
+             r'this)\b', Keyword, 'slashstartsregex'),
+            (r'(var|with|function)\b', Keyword.Declaration, 'slashstartsregex'),
+            (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|'
+             r'extends|final|float|goto|implements|import|int|interface|long|native|'
+             r'package|private|protected|public|short|static|super|synchronized|throws|'
+             r'transient|volatile)\b', Keyword.Reserved),
             (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant),
             (r'(Array|Boolean|Date|Error|Function|Math|netscape|'
              r'Number|Object|Packages|RegExp|String|sun|decodeURI|'

tests/examplefiles/evil_regex.js

+/regexp/.test(foo) || x = [/regexp/,/regexp/, /regexp/, // comment
+// comment
+/regexp/];
+if (/regexp/.test(string))
+{/regexp/.test(string);};
+x =/regexp/;
+x = /regexp/;
+if (0</regexp/.exec(string) || 1>/regexp/.exec(string))
+x = { u:/regexp/, v: /regexp/ };
+foo();/regexp/.test(string); /regexp/.test(string);
+if (!/regexp/) foobar();
+x = u %/regexp/.exec(string) */regexp/.exec(string) / /regexp/.exec(string);
+x = u?/regexp/.exec(string) : v +/regexp/.exec(string) -/regexp/.exec(string);
+a = u^/regexp/.exec(string) &/regexp/.exec(string) |/regexp/.exec(string) +~/regexp/.exec(string);
+x = /regexp/ /* a comment */ ;
+x = /[reg/exp]/;
+x = 4/2/i;
+x = (a == b) ?/* this is a comment */ c : d;
+/// a comment //
+a = /regex//2/1; //syntactically correct, returns NaN
+
+
+
+
+/* original examples */
+
+// regex
+
+blah(/abc/);
+x = /abc/;
+x = /abc/.match;
+
+// math
+
+blah(1/2); //comment
+x = 1 / 2 / 3;
+x = 1/1/.1;
+
+// broken
+
+x=/1/;
+x=1/a/g;
+x=a/a/g;
+
+// real-world
+
+var x = 1/(1+Math.sqrt(sum)); // convert to number between 1-0
+return Math.round((num / den) * 100)/100;