Commits

Anonymous committed 26a3ebe

[svn] added `guess_lexer_for_filename` method that automatically detects dialects of languages

Comments (0)

Files changed (3)

pygments/lexer.py

 # -*- coding: utf-8 -*-
 """
-pygments.lexer
-~~~~~~~~~~~~~~
+    pygments.lexer
+    ~~~~~~~~~~~~~~
 
-Base lexer classes.
+    Base lexer classes.
 
-:copyright: 2006 by Georg Brandl.
-:license: GNU LGPL, see LICENSE for more details.
+    :copyright: 2006 by Georg Brandl.
+    :license: GNU LGPL, see LICENSE for more details.
 """
 import re
 
+try:
+    set
+except NameError:
+    from sets import Set as set
+
 from types import FunctionType
 from pygments.token import Error, Text, Other, _TokenType
 from pygments.util import get_bool_opt, get_int_opt, make_analysator
     def __new__(cls, name, bases, d):
         if 'analyse_text' in d:
             d['analyse_text'] = make_analysator(d['analyse_text'])
+        for key in 'aliases', 'filenames', 'alias_filenames':
+            if key in d:
+                d[key] = set(d[key])
         return type.__new__(cls, name, bases, d)
 
 
     #: fn match rules
     filenames = []
 
+    #: fn alias filenames
+    alias_filenames = []
+
     __metaclass__ = LexerMeta
 
     def __init__(self, **options):
                     yield match.start(i + 1), action, data
             else:
                 if ctx:
-                    ctx.pos = match.start(i+1)
+                    ctx.pos = match.start(i + 1)
                 for item in action(lexer, _PseudoMatch(match.start(i + 1),
                                    match.group(i + 1)), ctx):
                     if item:

pygments/lexers/__init__.py

 import types
 from os.path import basename
 
+try:
+    set
+except NameError:
+    from sets import Set as set
+
 from pygments.lexers._mapping import LEXERS
 from pygments.plugin import find_plugin_lexers
 
         _lexer_cache[cls.name] = cls
 
 
-def get_lexer_by_name(alias, **options):
+def _iter_lexers():
+    """
+    Returns a generator for all lexer classes
+    """
+    for module_name, name, _, _ in LEXERS.itervalues():
+        if name not in _lexer_cache:
+            _load_lexers(module_name)
+        yield _lexer_cache[name]
+    for lexer in find_plugin_lexers():
+        yield lexer
+
+
+def get_lexer_by_name(_alias, **options):
     """
     Get a lexer by an alias
     """
     # lookup builtin lexers
     for module_name, name, aliases, _ in LEXERS.itervalues():
-        if alias in aliases:
+        if _alias in aliases:
             if name not in _lexer_cache:
                 _load_lexers(module_name)
             return _lexer_cache[name](**options)
     # continue with lexers from setuptools entrypoints
     for cls in find_plugin_lexers():
-        if alias in cls.aliases:
+        if _alias in cls.aliases:
             return cls(**options)
     raise ValueError('no lexer for alias %r found' % alias)
 
 
-def get_lexer_for_filename(fn, **options):
+def get_lexer_for_filename(_fn, **options):
     """
     Guess a lexer by a filename
     """
-    fn = basename(fn)
-    # lookup builtin lexers
-    for module_name, name, _, filenames in LEXERS.itervalues():
-        for filename in filenames:
-            if fnmatch.fnmatch(fn, filename):
-                if name not in _lexer_cache:
-                    _load_lexers(module_name)
-                return _lexer_cache[name](**options)
-    # continue with lexers from setuptools entrypoints
-    for cls in find_plugin_lexers():
-        for filename in cls.filenames:
-            if fnmatch.fnmatch(fn, filename):
-                return cls(**options)
+    fn = basename(_fn)
+    for lexer in _iter_lexers():
+        for filename in lexer.filenames:
+            if fnmatch.fnmatch(_fn, filename):
+                return lexer(**options)
     raise ValueError('no lexer for filename %r found' % fn)
 
 
-def guess_lexer(text, **options):
+def guess_lexer_for_filename(_fn, _text, **options):
+    """
+    Lookup all lexers that handle those filenames primary (``filenames``)
+    or secondary (``alias_filenames``). Then run a text analysis for those
+    lexers and choose the best result.
+
+    usage::
+
+        >>> from pygments.lexers import guess_lexer_for_filename
+        >>> guess_lexer_for_filename('hello.html', '<%= @foo %>')
+        <pygments.lexers.templates.RhtmlLexer object at 0xb7d2f32c>
+        >>> guess_lexer_for_filename('hello.html', '<h1>{{ title|e }}</h1>')
+        <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac>
+        >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }')
+        <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c>
+    """
+    fn = basename(_fn)
+    primary = None
+    matching_lexers = set()
+    for lexer in _iter_lexers():
+        for filename in lexer.filenames:
+            if fnmatch.fnmatch(fn, filename):
+                matching_lexers.add(lexer)
+                primary = lexer
+        for filename in lexer.alias_filenames:
+            if fnmatch.fnmatch(fn, filename):
+                matching_lexers.add(lexer)
+    if not matching_lexers:
+        raise ValueError('no lexer for filename %r found' % fn)
+    if len(matching_lexers) == 1:
+        return iter(matching_lexers).next()
+    result = []
+    for lexer in matching_lexers:
+        rv = lexer.analyse_text(_text)
+        if rv == 1.0:
+            return lexer(**options)
+        result.append((rv, lexer))
+    result.sort()
+    if not result[-1][0] and primary is not None:
+        return primary(**options)
+    return result[-1][1](**options)
+
+
+def guess_lexer(_text, **options):
     """
     Guess a lexer by strong distinctions in the text (eg, shebang).
     """
+    #XXX: i (mitsuhiko) would like to drop this function in favor of the
+    #     better guess_lexer_for_filename function.
     best_lexer = [0.0, None]
-    # builtin lexers
-    for module_name, name, _, _ in LEXERS.itervalues():
-        if name not in _lexer_cache:
-            _load_lexers(module_name)
-        lexer = _lexer_cache[name]
+    for lexer in _iter_lexers():
         rv = lexer.analyse_text(text)
         if rv == 1.0:
             return lexer(**options)
         if rv > best_lexer[0]:
             best_lexer[:] = (rv, lexer)
-    # plugin lexers
-    for lexer in find_plugin_lexers():
-        rv = lexer.analyse_text(text)
-        if rv == 1.0:
-            return lexer(**options)
-        if rv > best_lexer[0]:
-            best_lexer[:] = (rv, lexer)
-    if best_lexer[0] == 0.0 or best_lexer[1] is None:
-        from pygments.lexers.special import TextLexer
-        return TextLexer(**options)
+    if not best_lexer[0] or best_lexer[1] is None:
+        raise ValueError('no lexer matching the text found')
     return best_lexer[1](**options)
 
 

pygments/lexers/templates.py

      PhpLexer, HtmlLexer, XmlLexer, JavascriptLexer, CssLexer
 from pygments.lexers.agile import PythonLexer
 from pygments.lexer import \
-     Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, include, using
+     Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, \
+     include, using
 from pygments.token import \
      Text, Comment, Operator, Keyword, Name, String, Number, Other
 from pygments.util import html_doctype_matches, looks_like_xml
 class SmartyLexer(RegexLexer):
     name = 'Smarty'
     aliases = ['smarty']
+    filenames = ['*.tpl']
 
     flags = re.MULTILINE | re.DOTALL
 
     tokens = {
-        # XXX: make smarty delimiters customizable somehow
         'root': [
             (r'[^{]+', Other),
             (r'(\{)(\*.*?\*)(\})',
 class HtmlGenshiLexer(DelegatingLexer):
     name = 'HTML+Genshi'
     aliases = ['html+genshi', 'html+kid']
+    alias_filenames = ['*.html', '*.htm', '*.xhtml']
 
     def __init__(self, **options):
         super(HtmlGenshiLexer, self).__init__(HtmlLexer, GenshiMarkupLexer,
     name = 'Genshi'
     aliases = ['genshi', 'kid', 'xml+genshi', 'xml+kid']
     filenames = ['*.kid']
+    alias_filenames = ['*.xml']
 
     def __init__(self, **options):
         super(GenshiLexer, self).__init__(XmlLexer, GenshiMarkupLexer,
     name = 'JavaScript+Genshi Text'
     aliases = ['js+genshitext', 'js+genshi', 'javascript+genshitext',
                'javascript+genshi']
+    alias_filenames = ['*.js']
 
     def __init__(self, **options):
         super(JavascriptGenshiLexer, self).__init__(JavascriptLexer,
                                                     GenshiTextLexer,
                                                     **options)
 
+    def analyse_text(text):
+        return GenshiLexer.analyse_text(text) - 0.05
+
 
 class CssGenshiLexer(DelegatingLexer):
     name = 'CSS+Genshi Text'
     aliases = ['css+genshitext', 'css+genshi']
+    alias_filenames = ['*.css']
 
     def __init__(self, **options):
         super(CssGenshiLexer, self).__init__(CssLexer, GenshiTextLexer,
                                              **options)
 
+    def analyse_text(text):
+        return GenshiLexer.analyse_text(text) - 0.05
+
 
 class RhtmlLexer(DelegatingLexer):
     name = 'RHTML'
     aliases = ['rhtml', 'html+erb', 'html+ruby']
     filenames = ['*.rhtml']
+    alias_filenames = ['*.html', '*.htm', '*.xhtml']
 
     def __init__(self, **options):
         super(RhtmlLexer, self).__init__(HtmlLexer, ErbLexer, **options)
 class XmlErbLexer(DelegatingLexer):
     name = 'XML+Ruby'
     aliases = ['xml+erb', 'xml+ruby']
+    alias_filenames = ['*.xml']
 
     def __init__(self, **options):
         super(XmlErbLexer, self).__init__(XmlLexer, ErbLexer, **options)
 class CssErbLexer(DelegatingLexer):
     name = 'CSS+Ruby'
     aliases = ['css+erb', 'css+ruby']
+    alias_filenames = ['*.xml']
 
     def __init__(self, **options):
         super(CssErbLexer, self).__init__(CssLexer, ErbLexer, **options)
 
+    def analyse_text(text):
+        return ErbLexer.analyse_text(text) - 0.05
+
 
 class JavascriptErbLexer(DelegatingLexer):
     name = 'JavaScript+Ruby'
     aliases = ['js+erb', 'javascript+erb', 'js+ruby', 'javascript+ruby']
+    alias_filenames = ['*.js']
 
     def __init__(self, **options):
         super(JavascriptErbLexer, self).__init__(JavascriptLexer, ErbLexer,
                                                  **options)
 
+    def analyse_text(text):
+        return ErbLexer.analyse_text(text) - 0.05
+
 
 class HtmlPhpLexer(DelegatingLexer):
     name = 'HTML+PHP'
     aliases = ['html+php']
     filenames = ['*.phtml']
+    alias_filenames = ['*.php', '*.html', '*.htm', '*.xhtml',
+                       '*.php[345]']
 
     def __init__(self, **options):
         super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options)
 class XmlPhpLexer(DelegatingLexer):
     name = 'XML+PHP'
     aliases = ['xml+php']
+    alias_filenames = ['*.xml', '*.php', '*.php[345]']
 
     def __init__(self, **options):
         super(XmlPhpLexer, self).__init__(XmlLexer, PhpLexer, **options)
 class CssPhpLexer(DelegatingLexer):
     name = 'CSS+PHP'
     aliases = ['css+php']
+    alias_filenames = ['*.css']
 
     def __init__(self, **options):
         super(CssPhpLexer, self).__init__(CssLexer, PhpLexer, **options)
 
+    def analyse_text(text):
+        return PhpLexer.analyse_text(text) - 0.05
+
 
 class JavascriptPhpLexer(DelegatingLexer):
     name = 'JavaScript+PHP'
     aliases = ['js+php', 'javascript+php']
+    alias_filenames = ['*.js']
 
     def __init__(self, **options):
         super(JavascriptPhpLexer, self).__init__(JavascriptLexer, PhpLexer,
                                                  **options)
 
+    def analyse_text(text):
+        return PhpLexer.analyse_text(text)
+
 
 class HtmlSmartyLexer(DelegatingLexer):
     name = 'HTML+Smarty'
     aliases = ['html+smarty']
+    alias_filenames = ['*.html', '*.htm', '*.xhtml', '*.tpl']
 
     def __init__(self, **options):
         super(HtmlSmartyLexer, self).__init__(HtmlLexer, SmartyLexer, **options)
 class XmlSmartyLexer(DelegatingLexer):
     name = 'XML+Smarty'
     aliases = ['xml+smarty']
+    alias_filenames = ['*.xml', '*.tpl']
 
     def __init__(self, **options):
         super(XmlSmartyLexer, self).__init__(XmlLexer, SmartyLexer, **options)
 class CssSmartyLexer(DelegatingLexer):
     name = 'CSS+Smarty'
     aliases = ['css+smarty']
+    alias_filenames = ['*.css', '*.tpl']
 
     def __init__(self, **options):
         super(CssSmartyLexer, self).__init__(CssLexer, SmartyLexer, **options)
 
+    def analyse_text(text):
+        return SmartyLexer.analyse_text(text) - 0.05
+
 
 class JavascriptSmartyLexer(DelegatingLexer):
     name = 'JavaScript+Smarty'
     aliases = ['js+smarty', 'javascript+smarty']
+    alias_filenames = ['*.js', '*.tpl']
 
     def __init__(self, **options):
         super(JavascriptSmartyLexer, self).__init__(JavascriptLexer, SmartyLexer,
                                                     **options)
 
+    def analyse_text(text):
+        return SmartyLexer.analyse_text(text) - 0.05
+
 
 class HtmlDjangoLexer(DelegatingLexer):
     name = 'HTML+Django/Jinja'
     aliases = ['html+django', 'html+jinja']
+    alias_filenames = ['*.html', '*.htm', '*.xhtml']
 
     def __init__(self, **options):
         super(HtmlDjangoLexer, self).__init__(HtmlLexer, DjangoLexer, **options)
 class XmlDjangoLexer(DelegatingLexer):
     name = 'XML+Django/Jinja'
     aliases = ['xml+django', 'xml+jinja']
+    alias_filenames = ['*.xml']
 
     def __init__(self, **options):
         super(XmlDjangoLexer, self).__init__(XmlLexer, DjangoLexer, **options)
 class CssDjangoLexer(DelegatingLexer):
     name = 'CSS+Django/Jinja'
     aliases = ['css+django', 'css+jinja']
+    alias_filenames = ['*.css']
 
     def __init__(self, **options):
         super(CssDjangoLexer, self).__init__(CssLexer, DjangoLexer, **options)
 
+    def analyse_text(text):
+        return DjangoLexer.analyse_text(text) - 0.05
+
 
 class JavascriptDjangoLexer(DelegatingLexer):
     name = 'JavaScript+Django/Jinja'
     aliases = ['js+django', 'javascript+django',
                'js+jinja', 'javascript+jinja']
+    alias_filenames = ['*.js']
 
     def __init__(self, **options):
         super(JavascriptDjangoLexer, self).__init__(JavascriptLexer, DjangoLexer,
                                                     **options)
+
+    def analyse_text(text):
+        return DjangoLexer.analyse_text(text) - 0.05
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.