Daniele Varrazzo avatar Daniele Varrazzo committed 6d5e346

Fix occasional parsing of PL/pgSQL using the SQL tokens

Inheritance class refactored avoiding a base class with a _tokens attribute
that may interfere with the creation of the subclass _tokens.

Comments (0)

Files changed (2)

pygments/lexers/postgres.py

 
 language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
 
-class PostgresLexer(RegexLexer):
+def language_callback(lexer, match):
+    """Parse the content of a $-string using a lexer
+
+    The lexer is chosen looking for a nearby LANGUAGE.
+
+    Note: this function should have been a `PostgresBase` method, but the
+    rules deepcopy fails in this case.
     """
-    Lexer for the PostgreSQL dialect of SQL.
+    l = None
+    # TODO: the language can also be before the string
+    m = language_re.match(lexer.text[match.end():])
+    if m is not None:
+        l = lexer._get_lexer(m.group(1))
+
+    if l:
+        yield (match.start(1), String, match.group(1))
+        for x in l.get_tokens_unprocessed(match.group(2)):
+            yield x
+        yield (match.start(3), String, match.group(3))
+
+    else:
+        yield (match.start(), String, match.group())
+
+class PostgresBase(object):
+    """Base class for Postgres-related lexers.
+
+    This is implemented as a mixin to avoid the Lexer metaclass kicking in.
+    this way the different lexer don't have a common Lexer ancestor. If they
+    had, _tokens could be created on this ancestor and not updated for the
+    other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming
+    seem to suggest that regexp lexers are not really subclassable.
+
+    `language_callback` should really be our method, but this breaks deepcopy.
     """
-
-    name = 'PostgreSQL SQL dialect'
-    aliases = ['postgresql', 'postgres']
-    mimetypes = ['text/x-postgresql']
-
     def get_tokens_unprocessed(self, text, *args):
         # Have a copy of the entire text to be used by `language_callback`.
         self.text = text
-        for x in RegexLexer.get_tokens_unprocessed(self, text, *args):
+        for x in super(PostgresBase, self).get_tokens_unprocessed(
+                text, *args):
             yield x
 
-    def language_callback(self, match):
-        lexer = None
-        # TODO: the language can also be before the string
-        m = language_re.match(self.text[match.end():])
-        if m is not None:
-            lexer = self._get_lexer(m.group(1))
-
-        if lexer:
-            yield (match.start(1), String, match.group(1))
-            for x in lexer.get_tokens_unprocessed(match.group(2)):
-                yield x
-            yield (match.start(3), String, match.group(3))
-
-        else:
-            yield (match.start(), String, match.group())
-
     def _get_lexer(self, lang):
         if lang.lower() == 'sql':
             return get_lexer_by_name('postgresql', **self.options)
                 pass
         else:
             # TODO: better logging
-            print >>sys.stderr, "language not found:", lang
+            # print >>sys.stderr, "language not found:", lang
             return None
 
+class PostgresLexer(PostgresBase, RegexLexer):
+    """
+    Lexer for the PostgreSQL dialect of SQL.
+    """
+
+    name = 'PostgreSQL SQL dialect'
+    aliases = ['postgresql', 'postgres']
+    mimetypes = ['text/x-postgresql']
+
     flags = re.IGNORECASE
     tokens = {
         'root': [
     }
 
 
-class PlPgsqlLexer(PostgresLexer):
+class PlPgsqlLexer(PostgresBase, RegexLexer):
     """
     Handle the extra syntax in Pl/pgSQL language.
     """
     name = 'PL/pgSQL'
     aliases = ['plpgsql']
     mimetypes = ['text/x-plpgsql']
+
+    flags = re.IGNORECASE
     tokens = deepcopy(PostgresLexer.tokens)
 
     # extend the keywords list
     else:
         assert 0, "SQL keywords not found"
 
+    # Add specific PL/pgSQL rules (before the SQL ones)
     tokens['root'][:0] = [
         (r'\%[a-z][a-z0-9_]*\b', Name.Builtin),     # actually, a datatype
         (r':=', Operator),
     ]
 
 
-class PsqlRegexLexer(PostgresLexer):
+class PsqlRegexLexer(PostgresBase, RegexLexer):
     """
     Extend the PostgresLexer adding support specific for psql commands.
 
     """
     name = 'PostgreSQL console - regexp based lexer'
     aliases = []    # not public
+
+    flags = re.IGNORECASE
     tokens = deepcopy(PostgresLexer.tokens)
+
     tokens['root'].append(
         (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))
     tokens['psql-command'] = [

tests/examplefiles/postgresql_test.txt

 CREATE FUNCTION nested_lexers (a integer, b integer)
 $function$
 BEGIN
-    RETURN ($1 ~ $q$[\t\r\n\v\\]$q$);
+    SELECT ($1 ~ $q$[\t\r\n\v\\]$q$);
 END;
 $function$
 LANGUAGE sql;
+
+CREATE OR REPLACE FUNCTION measurement_insert_trigger()
+RETURNS TRIGGER AS $$
+BEGIN
+    <<test>>
+    INSERT INTO measurement_y2008m01 VALUES (NEW.*);
+    RETURN NULL;
+END;
+$$
+LANGUAGE plpgsql;
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.