Mher Movsisyan avatar Mher Movsisyan committed ae0522d

adding DTD lexer

Comments (0)

Files changed (3)

pygments/lexers/_mapping.py

     'AppleScriptLexer': ('pygments.lexers.other', 'AppleScript', ('applescript',), ('*.applescript',), ()),
     'AsymptoteLexer': ('pygments.lexers.other', 'Asymptote', ('asy', 'asymptote'), ('*.asy',), ('text/x-asymptote',)),
     'AutohotkeyLexer': ('pygments.lexers.other', 'autohotkey', ('ahk',), ('*.ahk', '*.ahkl'), ('text/x-autohotkey',)),
-    'AwkLexer': ('pygments.lexers.other', 'Awk', ('awk', 'gawk', 'mawk', 'nawk'), ('*.awk',), ('application/x-awk')),
+    'AwkLexer': ('pygments.lexers.other', 'Awk', ('awk', 'gawk', 'mawk', 'nawk'), ('*.awk',), ()),
     'BBCodeLexer': ('pygments.lexers.text', 'BBCode', ('bbcode',), (), ('text/x-bbcode',)),
     'BaseMakefileLexer': ('pygments.lexers.text', 'Makefile', ('basemake',), (), ()),
     'BashLexer': ('pygments.lexers.other', 'Bash', ('bash', 'sh', 'ksh'), ('*.sh', '*.ksh', '*.bash', '*.ebuild', '*.eclass'), ('application/x-sh', 'application/x-shellscript')),
     'DelphiLexer': ('pygments.lexers.compiled', 'Delphi', ('delphi', 'pas', 'pascal', 'objectpascal'), ('*.pas',), ('text/x-pascal',)),
     'DiffLexer': ('pygments.lexers.text', 'Diff', ('diff', 'udiff'), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch')),
     'DjangoLexer': ('pygments.lexers.templates', 'Django/Jinja', ('django', 'jinja'), (), ('application/x-django-templating', 'application/x-jinja')),
+    'DtdLexer': ('pygments.lexers.web', 'DTD', ('dtd',), ('*.dtd',), ('application/xml-dtd',)),
     'DuelLexer': ('pygments.lexers.web', 'Duel', ('duel', 'Duel Engine', 'Duel View', 'JBST', 'jbst', 'JsonML+BST'), ('*.duel', '*.jbst'), ('text/x-duel', 'text/x-jbst')),
     'DylanLexer': ('pygments.lexers.compiled', 'Dylan', ('dylan',), ('*.dylan', '*.dyl'), ('text/x-dylan',)),
     'ErbLexer': ('pygments.lexers.templates', 'ERB', ('erb',), (), ('application/x-ruby-templating',)),

pygments/lexers/web.py

            'PhpLexer', 'ActionScriptLexer', 'XsltLexer', 'ActionScript3Lexer',
            'MxmlLexer', 'HaxeLexer', 'HamlLexer', 'SassLexer', 'ScssLexer',
            'ObjectiveJLexer', 'CoffeeScriptLexer', 'DuelLexer', 'ScamlLexer',
-           'JadeLexer', 'XQueryLexer']
+           'JadeLexer', 'XQueryLexer', 'DtdLexer']
 
 
 class JavascriptLexer(RegexLexer):
         return rv
 
 
+class DtdLexer(RegexLexer):
+    """
+    a lexer for DTD (Document Type Definition).
+    """
+
+    flags = re.MULTILINE | re.DOTALL
+
+    name = 'DTD'
+    aliases = ['dtd']
+    filenames = ['*.dtd']
+    mimetypes = ['application/xml-dtd']
+
+    tokens = {
+        'root': [
+            include('common'),
+
+            (r'(<!ELEMENT)(\s+)([^\s]+)', 
+                bygroups(Keyword, Text, Name.Tag), 'element'),
+            (r'(<!ATTLIST)(\s+)([^\s]+)', 
+                bygroups(Keyword, Text, Name.Tag), 'attlist'),
+            (r'(<!ENTITY)(\s+)([^\s]+)', 
+                bygroups(Keyword, Text, Name.Entity), 'entity'),
+            (r'(<!NOTATION)(\s+)([^\s]+)', 
+                bygroups(Keyword, Text, Name.Tag), 'notation'),
+            (r'(<!\[)([^\[\s]+)(\s*)(\[)', # conditional sections
+                bygroups(Keyword, Name.Entity, Text, Keyword)),
+
+            (r'(<!DOCTYPE)(\s+)([^>\s]+)', 
+                bygroups(Keyword, Text, Name.Tag)),
+            (r'PUBLIC|SYSTEM', Keyword.Constant),
+            (r'[\[\]>]', Keyword),
+        ],
+
+        'common': [
+            (r'\s+', Text),
+            (r'(%|&)[^;]*;', Name.Entity),
+            ('<!--', Comment, 'comment'),
+            (r'[(|)*,?+]', Operator),
+            (r'"[^"]*"', String.Double),
+            (r'\'[^\']*\'', String.Single),
+        ],
+        
+        'comment': [
+            ('[^-]+', Comment),
+            ('-->', Comment, '#pop'),
+            ('-', Comment),
+        ],
+
+        'element': [
+            include('common'),
+            (r'EMPTY|ANY|#PCDATA', Keyword.Constant),
+            (r'[^>\s\|()?+*,]+', Name.Tag),
+            (r'>', Keyword, '#pop'),
+        ],
+
+        'attlist': [
+            include('common'),
+            (r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION', Keyword.Constant),
+            (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant),
+            (r'xml:space|xml:lang', Keyword.Reserved),
+            (r'[^>\s\|()?+*,]+', Name.Attribute),
+            (r'>', Keyword, '#pop'),
+        ],
+
+        'entity': [
+            include('common'),
+            (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant),
+            (r'[^>\s\|()?+*,]+', Name.Entity),
+            (r'>', Keyword, '#pop'),
+        ],
+        
+        'notation': [
+            include('common'),
+            (r'SYSTEM|PUBLIC', Keyword.Constant),
+            (r'[^>\s\|()?+*,]+', Name.Attribute),
+            (r'>', Keyword, '#pop'),
+        ],
+    }
+
+    def analyse_text(text):
+        if not looks_like_xml(text) and \
+            ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text):
+            return 0.8
+
 class XmlLexer(RegexLexer):
     """
     Generic lexer for XML (eXtensible Markup Language).

tests/examplefiles/test.dtd

+<!DOCTYPE html>
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
+  <!-- an internal subset can be embedded here -->
+]>
+
+<!DOCTYPE greeting SYSTEM "hello.dtd">
+
+<!DOCTYPE greeting [
+  <!ELEMENT greeting (#PCDATA)>
+
+<!-- examples from XML spec -->
+
+<!ELEMENT br EMPTY>
+<!ELEMENT p (#PCDATA|emph)* >
+<!ELEMENT %name.para; %content.para; >
+<!ELEMENT container ANY>
+
+<!ELEMENT spec (front, body, back?)>
+<!ELEMENT div1 (head, (p | list | note)*, div2*)>
+<!ELEMENT dictionary-body (%div.mix; | %dict.mix;)*>
+
+<!ELEMENT p (#PCDATA|a|ul|b|i|em)*>
+<!ELEMENT p (#PCDATA | %font; | %phrase; | %special; | %form;)* >
+<!ELEMENT b (#PCDATA)>
+
+<!ATTLIST termdef
+          id      ID      #REQUIRED
+          name    CDATA   #IMPLIED>
+<!ATTLIST list
+          type    (bullets|ordered|glossary)  "ordered">
+<!ATTLIST form
+          method  CDATA   #FIXED "POST">
+
+<!ENTITY d "&#xD;">
+<!ENTITY a "&#xA;">
+<!ENTITY da "&#xD;&#xA;">
+
+<!ENTITY % ISOLat2
+         SYSTEM "http://www.xml.com/iso/isolat2-xml.entities" >
+
+<!ENTITY Pub-Status "This is a pre-release of the
+ specification.">
+ 
+ <!ENTITY open-hatch
+         SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
+<!ENTITY open-hatch
+         PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN"
+         "http://www.textuality.com/boilerplate/OpenHatch.xml">
+<!ENTITY hatch-pic
+         SYSTEM "../grafix/OpenHatch.gif"
+         NDATA gif >
+         
+<!NOTATION gif PUBLIC "gif viewer">
+
+<!ENTITY % YN '"Yes"' >
+<!ENTITY WhatHeSaid "He said %YN;" >
+
+<!ENTITY EndAttr "27'" >
+
+<!ENTITY % pub    "&#xc9;ditions Gallimard" >
+<!ENTITY   rights "All rights reserved" >
+<!ENTITY   book   "La Peste: Albert Camus,
+&#xA9; 1947 %pub;. &rights;" >
+
+<!ENTITY lt     "&#38;#60;">
+<!ENTITY gt     "&#62;">
+<!ENTITY amp    "&#38;#38;">
+<!ENTITY apos   "&#39;">
+<!ENTITY quot   "&#34;">
+
+<!ENTITY % draft 'INCLUDE' >
+<!ENTITY % final 'IGNORE' >
+
+<![%draft;[
+<!ELEMENT book (comments*, title, body, supplements?)>
+]]>
+<![%final;[
+<!ELEMENT book (title, body, supplements?)>
+]]>
+
+]>
+
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.