Anonymous avatar Anonymous committed 76c150b

[svn] Add mime type selection for lexers.

Comments (0)

Files changed (12)

 
 - more unit tests
 
-- docstrings?
+- documentation for new features (guessing)
 
 - goto label HL support for languages that use it
 
 
 - review perl lexer (numerous bugs)
 
+- docstrings?
+
 for 0.7
 -------
 

pygments/lexer.py

     #: fn alias filenames
     alias_filenames = []
 
+    #: mime types
+    mimetypes = []
+
     __metaclass__ = LexerMeta
 
     def __init__(self, **options):

pygments/lexers/__init__.py

 
 def _iter_lexers():
     """
-    Returns a generator for all lexer classes
+    Returns an iterator over all lexer classes.
     """
     for module_name, name, _, _ in LEXERS.itervalues():
         if name not in _lexer_cache:
 
 def get_lexer_by_name(_alias, **options):
     """
-    Get a lexer by an alias
+    Get a lexer by an alias.
     """
     # lookup builtin lexers
-    for module_name, name, aliases, _ in LEXERS.itervalues():
+    for module_name, name, aliases, _, _ in LEXERS.itervalues():
         if _alias in aliases:
             if name not in _lexer_cache:
                 _load_lexers(module_name)
     for cls in find_plugin_lexers():
         if _alias in cls.aliases:
             return cls(**options)
-    raise ValueError('no lexer for alias %r found' % alias)
+    raise ValueError('no lexer for alias %r found' % _alias)
 
 
 def get_lexer_for_filename(_fn, **options):
     """
-    Guess a lexer by a filename
+    Get a lexer for a filename.
     """
     fn = basename(_fn)
-    for lexer in _iter_lexers():
-        for filename in lexer.filenames:
+    for modname, name, _, filenames, _ in LEXERS.itervalues():
+        for filename in filenames:
+            if fnmatch.fnmatch(_fn, filename):
+                if name not in _lexer_cache:
+                    _load_lexers(modname)
+                return _lexer_cache[name](**options)
+    for cls in find_plugin_lexers():
+        for filename in cls.filenames:
             if fnmatch.fnmatch(_fn, filename):
                 return lexer(**options)
-    raise ValueError('no lexer for filename %r found' % fn)
+    raise ValueError('no lexer for filename %r found' % _fn)
+
+
+def get_lexer_for_mimetype(_mime, **options):
+    """
+    Get a lexer for a mimetype.
+    """
+    for modname, name, _, _, mimetypes in LEXERS.itervalues():
+        if _mime in mimetypes:
+            if name not in _lexer_cache:
+                _load_lexers(modname)
+            return _lexer_cache[name](**options)
+    for cls in find_plugin_lexers():
+        if _mime in cls.mimetypes:
+            return lexer(**options)
+    raise ValueError('no lexer for mimetype %r found' % _mime)
 
 
 def guess_lexer_for_filename(_fn, _text, **options):

pygments/lexers/_mapping.py

 """
 
 LEXERS = {
-    'BooLexer': ('pygments.lexers.dotnet', 'Boo', ('boo',), ('*.boo',)),
-    'BrainfuckLexer': ('pygments.lexers.other', 'Brainfuck', ('brainfuck', 'bf'), ('*.bf', '*.b')),
-    'CLexer': ('pygments.lexers.compiled', 'C', ('c',), ('*.c', '*.h')),
-    'CSharpLexer': ('pygments.lexers.dotnet', 'C#', ('csharp', 'c#'), ('*.cs',)),
-    'CppLexer': ('pygments.lexers.compiled', 'C++', ('cpp', 'c++'), ('*.cpp', '*.hpp', '*.c++', '*.h++')),
-    'CssDjangoLexer': ('pygments.lexers.templates', 'CSS+Django/Jinja', ('css+django', 'css+jinja'), ()),
-    'CssErbLexer': ('pygments.lexers.templates', 'CSS+Ruby', ('css+erb', 'css+ruby'), ()),
-    'CssGenshiLexer': ('pygments.lexers.templates', 'CSS+Genshi Text', ('css+genshitext', 'css+genshi'), ()),
-    'CssLexer': ('pygments.lexers.web', 'CSS', ('css',), ('*.css',)),
-    'CssPhpLexer': ('pygments.lexers.templates', 'CSS+PHP', ('css+php',), ()),
-    'CssSmartyLexer': ('pygments.lexers.templates', 'CSS+Smarty', ('css+smarty',), ()),
-    'DelphiLexer': ('pygments.lexers.compiled', 'Delphi', ('delphi', 'pas', 'pascal', 'objectpascal'), ('*.pas',)),
-    'DiffLexer': ('pygments.lexers.text', 'Diff', ('diff',), ('*.diff', '*.patch')),
-    'DjangoLexer': ('pygments.lexers.templates', 'Django/Jinja', ('django', 'jinja'), ()),
-    'ErbLexer': ('pygments.lexers.templates', 'ERB', ('erb',), ()),
-    'GenshiLexer': ('pygments.lexers.templates', 'Genshi', ('genshi', 'kid', 'xml+genshi', 'xml+kid'), ('*.kid',)),
-    'GenshiTextLexer': ('pygments.lexers.templates', 'Genshi Text', ('genshitext',), ()),
-    'HtmlDjangoLexer': ('pygments.lexers.templates', 'HTML+Django/Jinja', ('html+django', 'html+jinja'), ()),
-    'HtmlGenshiLexer': ('pygments.lexers.templates', 'HTML+Genshi', ('html+genshi', 'html+kid'), ()),
-    'HtmlLexer': ('pygments.lexers.web', 'HTML', ('html',), ('*.html', '*.htm', '*.xhtml')),
-    'HtmlPhpLexer': ('pygments.lexers.templates', 'HTML+PHP', ('html+php',), ('*.phtml',)),
-    'HtmlSmartyLexer': ('pygments.lexers.templates', 'HTML+Smarty', ('html+smarty',), ()),
-    'IniLexer': ('pygments.lexers.text', 'INI', ('ini', 'cfg'), ('*.ini', '*.cfg')),
-    'IrcLogsLexer': ('pygments.lexers.text', 'IRC logs', ('irc',), ()),
-    'JavaLexer': ('pygments.lexers.compiled', 'Java', ('java',), ('*.java',)),
-    'JavascriptDjangoLexer': ('pygments.lexers.templates', 'JavaScript+Django/Jinja', ('js+django', 'javascript+django', 'js+jinja', 'javascript+jinja'), ()),
-    'JavascriptErbLexer': ('pygments.lexers.templates', 'JavaScript+Ruby', ('js+erb', 'javascript+erb', 'js+ruby', 'javascript+ruby'), ()),
-    'JavascriptGenshiLexer': ('pygments.lexers.templates', 'JavaScript+Genshi Text', ('js+genshitext', 'js+genshi', 'javascript+genshitext', 'javascript+genshi'), ()),
-    'JavascriptLexer': ('pygments.lexers.web', 'JavaScript', ('js', 'javascript'), ('*.js',)),
-    'JavascriptPhpLexer': ('pygments.lexers.templates', 'JavaScript+PHP', ('js+php', 'javascript+php'), ()),
-    'JavascriptSmartyLexer': ('pygments.lexers.templates', 'JavaScript+Smarty', ('js+smarty', 'javascript+smarty'), ()),
-    'LuaLexer': ('pygments.lexers.agile', 'Lua', ('lua',), ('*.lua',)),
-    'MakefileLexer': ('pygments.lexers.text', 'Makefile', ('make', 'makefile', 'mf'), ('*.mak', 'Makefile', 'makefile')),
-    'PerlLexer': ('pygments.lexers.agile', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm')),
-    'PhpLexer': ('pygments.lexers.web', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]')),
-    'PythonConsoleLexer': ('pygments.lexers.agile', 'Python console session', ('pycon',), ()),
-    'PythonLexer': ('pygments.lexers.agile', 'Python', ('python', 'py'), ('*.py', '*.pyw')),
-    'RawTokenLexer': ('pygments.lexers.special', 'Raw token data', ('raw',), ('*.raw',)),
-    'RhtmlLexer': ('pygments.lexers.templates', 'RHTML', ('rhtml', 'html+erb', 'html+ruby'), ('*.rhtml',)),
-    'RubyConsoleLexer': ('pygments.lexers.agile', 'Ruby irb session', ('rbcon', 'irb'), ()),
-    'RubyLexer': ('pygments.lexers.agile', 'Ruby', ('rb', 'ruby'), ('*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx')),
-    'SmartyLexer': ('pygments.lexers.templates', 'Smarty', ('smarty',), ()),
-    'SqlLexer': ('pygments.lexers.other', 'SQL', ('sql',), ('*.sql',)),
-    'TexLexer': ('pygments.lexers.text', 'TeX', ('tex', 'latex'), ('*.tex', '*.aux', '*.toc')),
-    'TextLexer': ('pygments.lexers.special', 'Text only', ('text',), ('*.txt',)),
-    'VbNetLexer': ('pygments.lexers.dotnet', 'VB.net', ('vb.net', 'vbnet'), ('*.vb', '*.bas')),
-    'XmlDjangoLexer': ('pygments.lexers.templates', 'XML+Django/Jinja', ('xml+django', 'xml+jinja'), ()),
-    'XmlErbLexer': ('pygments.lexers.templates', 'XML+Ruby', ('xml+erb', 'xml+ruby'), ()),
-    'XmlLexer': ('pygments.lexers.web', 'XML', ('xml',), ('*.xml',)),
-    'XmlPhpLexer': ('pygments.lexers.templates', 'XML+PHP', ('xml+php',), ()),
-    'XmlSmartyLexer': ('pygments.lexers.templates', 'XML+Smarty', ('xml+smarty',), ())
+    'BooLexer': ('pygments.lexers.dotnet', 'Boo', ('boo',), ('*.boo',), ('text/x-boo',)),
+    'BrainfuckLexer': ('pygments.lexers.other', 'Brainfuck', ('bf', 'brainfuck'), ('*.b', '*.bf'), ()),
+    'CLexer': ('pygments.lexers.compiled', 'C', ('c',), ('*.c', '*.h'), ('text/x-chdr', 'text/x-csrc')),
+    'CSharpLexer': ('pygments.lexers.dotnet', 'C#', ('c#', 'csharp'), ('*.cs',), ('text/x-csharp',)),
+    'CppLexer': ('pygments.lexers.compiled', 'C++', ('cpp', 'c++'), ('*.c++', '*.hpp', '*.cpp', '*.h++'), ('text/x-c++hdr', 'text/x-c++src')),
+    'CssDjangoLexer': ('pygments.lexers.templates', 'CSS+Django/Jinja', ('css+jinja', 'css+django'), (), ()),
+    'CssErbLexer': ('pygments.lexers.templates', 'CSS+Ruby', ('css+ruby', 'css+erb'), (), ()),
+    'CssGenshiLexer': ('pygments.lexers.templates', 'CSS+Genshi Text', ('css+genshi', 'css+genshitext'), (), ()),
+    'CssLexer': ('pygments.lexers.web', 'CSS', ('css',), ('*.css',), ('text/css',)),
+    'CssPhpLexer': ('pygments.lexers.templates', 'CSS+PHP', ('css+php',), (), ()),
+    'CssSmartyLexer': ('pygments.lexers.templates', 'CSS+Smarty', ('css+smarty',), (), ()),
+    'DelphiLexer': ('pygments.lexers.compiled', 'Delphi', ('objectpascal', 'delphi', 'pas', 'pascal'), ('*.pas',), ('text/x-pascal',)),
+    'DiffLexer': ('pygments.lexers.text', 'Diff', ('diff',), ('*.diff', '*.patch'), ('text/x-diff',)),
+    'DjangoLexer': ('pygments.lexers.templates', 'Django/Jinja', ('jinja', 'django'), (), ()),
+    'ErbLexer': ('pygments.lexers.templates', 'ERB', ('erb',), (), ()),
+    'GenshiLexer': ('pygments.lexers.templates', 'Genshi', ('xml+kid', 'xml+genshi', 'genshi', 'kid'), ('*.kid',), ()),
+    'GenshiTextLexer': ('pygments.lexers.templates', 'Genshi Text', ('genshitext',), (), ()),
+    'HtmlDjangoLexer': ('pygments.lexers.templates', 'HTML+Django/Jinja', ('html+jinja', 'html+django'), (), ()),
+    'HtmlGenshiLexer': ('pygments.lexers.templates', 'HTML+Genshi', ('html+genshi', 'html+kid'), (), ()),
+    'HtmlLexer': ('pygments.lexers.web', 'HTML', ('html',), ('*.xhtml', '*.html', '*.htm'), ('text/html', 'application/xhtml+xml')),
+    'HtmlPhpLexer': ('pygments.lexers.templates', 'HTML+PHP', ('html+php',), ('*.phtml',), ('text/x-php', 'application/x-php', 'application/x-httpd-php', 'application/x-httpd-php3', 'application/x-httpd-php4', 'application/x-httpd-php5')),
+    'HtmlSmartyLexer': ('pygments.lexers.templates', 'HTML+Smarty', ('html+smarty',), (), ()),
+    'IniLexer': ('pygments.lexers.text', 'INI', ('cfg', 'ini'), ('*.ini', '*.cfg'), ()),
+    'IrcLogsLexer': ('pygments.lexers.text', 'IRC logs', ('irc',), (), ()),
+    'JavaLexer': ('pygments.lexers.compiled', 'Java', ('java',), ('*.java',), ('text/x-java',)),
+    'JavascriptDjangoLexer': ('pygments.lexers.templates', 'JavaScript+Django/Jinja', ('javascript+django', 'js+jinja', 'javascript+jinja', 'js+django'), (), ()),
+    'JavascriptErbLexer': ('pygments.lexers.templates', 'JavaScript+Ruby', ('javascript+ruby', 'javascript+erb', 'js+ruby', 'js+erb'), (), ()),
+    'JavascriptGenshiLexer': ('pygments.lexers.templates', 'JavaScript+Genshi Text', ('javascript+genshitext', 'javascript+genshi', 'js+genshitext', 'js+genshi'), (), ()),
+    'JavascriptLexer': ('pygments.lexers.web', 'JavaScript', ('javascript', 'js'), ('*.js',), ('application/x-javascript', 'text/x-javascript')),
+    'JavascriptPhpLexer': ('pygments.lexers.templates', 'JavaScript+PHP', ('js+php', 'javascript+php'), (), ()),
+    'JavascriptSmartyLexer': ('pygments.lexers.templates', 'JavaScript+Smarty', ('javascript+smarty', 'js+smarty'), (), ()),
+    'LuaLexer': ('pygments.lexers.agile', 'Lua', ('lua',), ('*.lua',), ('text/x-lua', 'application/x-lua')),
+    'MakefileLexer': ('pygments.lexers.text', 'Makefile', ('make', 'mf', 'makefile'), ('Makefile', '*.mak', 'makefile'), ('text/x-makefile',)),
+    'PerlLexer': ('pygments.lexers.agile', 'Perl', ('pl', 'perl'), ('*.pl', '*.pm'), ('text/x-perl', 'application/x-perl')),
+    'PhpLexer': ('pygments.lexers.web', 'PHP', ('php4', 'php5', 'php', 'php3'), ('*.php', '*.php[345]'), ()),
+    'PythonConsoleLexer': ('pygments.lexers.agile', 'Python console session', ('pycon',), (), ()),
+    'PythonLexer': ('pygments.lexers.agile', 'Python', ('python', 'py'), ('*.pyw', '*.py'), ('text/x-python', 'application/x-python')),
+    'RawTokenLexer': ('pygments.lexers.special', 'Raw token data', ('raw',), ('*.raw',), ('application/x-pygments-tokens',)),
+    'RhtmlLexer': ('pygments.lexers.templates', 'RHTML', ('html+ruby', 'rhtml', 'html+erb'), ('*.rhtml',), ()),
+    'RubyConsoleLexer': ('pygments.lexers.agile', 'Ruby irb session', ('irb', 'rbcon'), (), ()),
+    'RubyLexer': ('pygments.lexers.agile', 'Ruby', ('ruby', 'rb'), ('*.rb', '*.rbx', '*.gemspec', 'Rakefile', '*.rake', '*.rbw'), ('text/x-ruby', 'application/x-ruby')),
+    'SmartyLexer': ('pygments.lexers.templates', 'Smarty', ('smarty',), ('*.tpl',), ()),
+    'SqlLexer': ('pygments.lexers.other', 'SQL', ('sql',), ('*.sql',), ('text/x-sql',)),
+    'TexLexer': ('pygments.lexers.text', 'TeX', ('tex', 'latex'), ('*.toc', '*.tex', '*.aux'), ('text/x-tex', 'text/x-latex')),
+    'TextLexer': ('pygments.lexers.special', 'Text only', ('text',), ('*.txt',), ('text/plain',)),
+    'VbNetLexer': ('pygments.lexers.dotnet', 'VB.net', ('vb.net', 'vbnet'), ('*.bas', '*.vb'), ('text/x-vbnet', 'text/x-vba')),
+    'XmlDjangoLexer': ('pygments.lexers.templates', 'XML+Django/Jinja', ('xml+django', 'xml+jinja'), (), ()),
+    'XmlErbLexer': ('pygments.lexers.templates', 'XML+Ruby', ('xml+ruby', 'xml+erb'), (), ()),
+    'XmlLexer': ('pygments.lexers.web', 'XML', ('xml',), ('*.xml',), ('text/xml', 'application/xml', 'image/svg+xml')),
+    'XmlPhpLexer': ('pygments.lexers.templates', 'XML+PHP', ('xml+php',), (), ()),
+    'XmlSmartyLexer': ('pygments.lexers.templates', 'XML+Smarty', ('xml+smarty',), (), ())
 }
 
 if __name__ == '__main__':
                                 (module_name,
                                  lexer.name,
                                  tuple(lexer.aliases),
-                                 tuple(lexer.filenames))))
+                                 tuple(lexer.filenames),
+                                 tuple(lexer.mimetypes))))
     # sort them, that should make the diff files for svn smaller
     found_lexers.sort()
 

pygments/lexers/agile.py

     name = 'Python'
     aliases = ['python', 'py']
     filenames = ['*.py', '*.pyw']
+    mimetypes = ['text/x-python', 'application/x-python']
 
     tokens = {
         'root': [
     name = 'Ruby'
     aliases = ['rb', 'ruby']
     filenames = ['*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx']
+    mimetypes = ['text/x-ruby', 'application/x-ruby']
 
     flags = re.DOTALL | re.MULTILINE
 
     name = 'Perl'
     aliases = ['perl', 'pl']
     filenames = ['*.pl', '*.pm']
+    mimetypes = ['text/x-perl', 'application/x-perl']
 
     flags = re.DOTALL | re.MULTILINE
     # TODO: give this a perl guy who knows how to parse perl...
     name = 'Lua'
     aliases = ['lua']
     filenames = ['*.lua']
+    mimetypes = ['text/x-lua', 'application/x-lua']
 
     tokens = {
         'root': [

pygments/lexers/compiled.py

     name = 'C'
     aliases = ['c']
     filenames = ['*.c', '*.h']
+    mimetypes = ['text/x-chdr', 'text/x-csrc']
 
     #: optional Comment or Whitespace
     _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
     name = 'C++'
     aliases = ['cpp', 'c++']
     filenames = ['*.cpp', '*.hpp', '*.c++', '*.h++']
+    mimetypes = ['text/x-c++hdr', 'text/x-c++src']
 
     tokens = {
         'root': [
     name = 'Delphi'
     aliases = ['delphi', 'pas', 'pascal', 'objectpascal']
     filenames = ['*.pas']
+    mimetypes = ['text/x-pascal']
 
     flags = re.IGNORECASE | re.MULTILINE | re.DOTALL
     tokens = {
     name = 'Java'
     aliases = ['java']
     filenames = ['*.java']
+    mimetypes = ['text/x-java']
 
     flags = re.MULTILINE | re.DOTALL
 

pygments/lexers/dotnet.py

     name = 'C#'
     aliases = ['csharp', 'c#']
     filenames = ['*.cs']
+    mimetypes = ['text/x-csharp'] # inferred
 
     flags = re.MULTILINE | re.DOTALL
 
     name = 'Boo'
     aliases = ['boo']
     filenames = ['*.boo']
+    mimetypes = ['text/x-boo']
 
     tokens = {
         'root': [
     name = 'VB.net'
     aliases = ['vb.net', 'vbnet']
     filenames = ['*.vb', '*.bas']
+    mimetypes = ['text/x-vbnet', 'text/x-vba'] # (?)
 
     flags = re.MULTILINE | re.IGNORECASE
     tokens = {

pygments/lexers/other.py

     name = 'SQL'
     aliases = ['sql']
     filenames = ['*.sql']
+    mimetypes = ['text/x-sql']
 
     flags = re.IGNORECASE
     tokens = {

pygments/lexers/special.py

     name = 'Text only'
     aliases = ['text']
     filenames = ['*.txt']
+    mimetypes = ['text/plain']
 
     def get_tokens_unprocessed(self, text):
         yield 0, Text, text
     name = 'Raw token data'
     aliases = ['raw']
     filenames = ['*.raw']
+    mimetypes = ['application/x-pygments-tokens']
 
     def __init__(self, **options):
         self.compress = options.get('compress', '')

pygments/lexers/templates.py

     filenames = ['*.phtml']
     alias_filenames = ['*.php', '*.html', '*.htm', '*.xhtml',
                        '*.php[345]']
+    mimetypes = ['text/x-php', 'application/x-php',
+                 'application/x-httpd-php', 'application/x-httpd-php3',
+                 'application/x-httpd-php4', 'application/x-httpd-php5']
 
     def __init__(self, **options):
         super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options)

pygments/lexers/text.py

     name = 'Makefile'
     aliases = ['make', 'makefile', 'mf']
     filenames = ['*.mak', 'Makefile', 'makefile']
+    mimetypes = ['text/x-makefile']
 
     tokens = {
         'root': [
     name = 'Diff'
     aliases = ['diff']
     filenames = ['*.diff', '*.patch']
+    mimetypes = ['text/x-diff']
 
     tokens = {
         'root': [
     name = 'TeX'
     aliases = ['tex', 'latex']
     filenames = ['*.tex', '*.aux', '*.toc']
+    mimetypes = ['text/x-tex', 'text/x-latex']
 
     tokens = {
         'general': [

pygments/lexers/web.py

     name = 'JavaScript'
     aliases = ['js', 'javascript']
     filenames = ['*.js']
+    mimetypes = ['application/x-javascript', 'text/x-javascript']
 
     flags = re.DOTALL
     tokens = {
     name = 'CSS'
     aliases = ['css']
     filenames = ['*.css']
+    mimetypes = ['text/css']
 
     tokens = {
         'root': [
     name = 'HTML'
     aliases = ['html']
     filenames = ['*.html', '*.htm', '*.xhtml']
+    mimetypes = ['text/html', 'application/xhtml+xml']
 
     flags = re.IGNORECASE | re.DOTALL
     tokens = {
     name = 'XML'
     aliases = ['xml']
     filenames = ['*.xml']
+    mimetypes = ['text/xml', 'application/xml', 'image/svg+xml']
 
     tokens = {
         'root': [
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.