Commits

Georg Brandl committed 86dd071

Closes #690: allow Unicode names for XML tags/attributes.

  • Participants
  • Parent commits 84701e2

Comments (0)

Files changed (1)

File pygments/lexers/web.py

     Generic lexer for XML (eXtensible Markup Language).
     """
 
-    flags = re.MULTILINE | re.DOTALL
+    flags = re.MULTILINE | re.DOTALL | re.UNICODE
 
     name = 'XML'
     aliases = ['xml']
             ('<!--', Comment, 'comment'),
             (r'<\?.*?\?>', Comment.Preproc),
             ('<![^>]*>', Comment.Preproc),
-            (r'<\s*[a-zA-Z0-9:._-]+', Name.Tag, 'tag'),
-            (r'<\s*/\s*[a-zA-Z0-9:._-]+\s*>', Name.Tag),
+            (r'<\s*[\w:.-]+', Name.Tag, 'tag'),
+            (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag),
         ],
         'comment': [
             ('[^-]+', Comment),
         ],
         'tag': [
             (r'\s+', Text),
-            (r'[a-zA-Z0-9_.:-]+\s*=', Name.Attribute, 'attr'),
+            (r'[\w.:-]+\s*=', Name.Attribute, 'attr'),
             (r'/?\s*>', Name.Tag, '#pop'),
         ],
         'attr': [