Commits

Georg Brandl committed 48c2b48 Merge

Comments (0)

Files changed (7)

 * Tiberius Teng -- default style overhaul
 * Jeremy Thurgood -- Erlang, Squid config lexers
 * Erick Tryzelaar -- Felix lexer
+* Daniele Varrazzo -- PostgreSQL lexers
 * Whitney Young -- ObjectiveC lexer
 * Nathan Weizenbaum -- Haml and Sass lexers
 * Dietmar Winkler -- Modelica lexer

pygments/lexers/_mapping.py

     'OocLexer': ('pygments.lexers.compiled', 'Ooc', ('ooc',), ('*.ooc',), ('text/x-ooc',)),
     'PerlLexer': ('pygments.lexers.agile', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm'), ('text/x-perl', 'application/x-perl')),
     'PhpLexer': ('pygments.lexers.web', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]'), ('text/x-php',)),
+    'PlPgsqlLexer': ('pygments.lexers.postgres', 'PL/pgSQL', ('plpgsql',), (), ('text/x-plpgsql',)),
     'PostScriptLexer': ('pygments.lexers.other', 'PostScript', ('postscript',), ('*.ps', '*.eps'), ('application/postscript',)),
+    'PostgresConsoleLexer': ('pygments.lexers.postgres', 'PostgreSQL console (psql)', ('psql', 'postgresql-console', 'postgres-console'), (), ('text/x-postgresql-psql',)),
+    'PostgresLexer': ('pygments.lexers.postgres', 'PostgreSQL SQL dialect', ('postgresql', 'postgres'), (), ('text/x-postgresql',)),
     'PovrayLexer': ('pygments.lexers.other', 'POVRay', ('pov',), ('*.pov', '*.inc'), ('text/x-povray',)),
     'PrologLexer': ('pygments.lexers.compiled', 'Prolog', ('prolog',), ('*.prolog', '*.pro', '*.pl'), ('text/x-prolog',)),
     'PropertiesLexer': ('pygments.lexers.text', 'Properties', ('properties',), ('*.properties',), ('text/x-java-properties',)),

pygments/lexers/_postgres_builtins.py

+"""
+    pygments.lexers._postgres_builtins
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Self-updating data files for PostgreSQL lexer.
+
+    :copyright: Copyright 2011 by Daniele Varrazzo.
+    :license: BSD, see LICENSE for details.
+"""
+
+import re
+import urllib2
+
+# One man's constant is another man's variable.
+SOURCE_URL = 'https://github.com/postgres/postgres/raw/master'
+KEYWORDS_URL = SOURCE_URL + '/doc/src/sgml/keywords.sgml'
+DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml'
+
+def update_myself():
+    data_file = list(fetch(DATATYPES_URL))
+    datatypes = parse_datatypes(data_file)
+    pseudos = parse_pseudos(data_file)
+
+    keywords = parse_keywords(fetch(KEYWORDS_URL))
+    update_consts(__file__, 'DATATYPES', datatypes)
+    update_consts(__file__, 'PSEUDO_TYPES', pseudos)
+    update_consts(__file__, 'KEYWORDS', keywords)
+
+def parse_keywords(f):
+    kw = []
+    for m in re.finditer(
+            r'\s*<entry><token>([^<]+)</token></entry>\s*'
+            r'<entry>([^<]+)</entry>', f.read()):
+        kw.append(m.group(1))
+
+    if not kw:
+        raise ValueError('no keyword found')
+
+    kw.sort()
+    return kw
+
+def parse_datatypes(f):
+    dt = set()
+    re_entry = re.compile('\s*<entry><type>([^<]+)</type></entry>')
+    for line in f:
+        if '<sect1' in line:
+            break
+        if '<entry><type>' not in line:
+            continue
+
+        # Parse a string such as
+        # time [ (<replaceable>p</replaceable>) ] [ without time zone ]
+        # into types "time" and "without time zone"
+
+        # remove all the tags
+        line = re.sub("<replaceable>[^<]+</replaceable>", "", line)
+        line = re.sub("<[^>]+>", "", line)
+
+        # Drop the parts containing braces
+        for tmp in [ t for tmp in line.split('[') for t in tmp.split(']') if "(" not in t ]:
+            for t in tmp.split(','):
+                t = t.strip()
+                if not t: continue
+                dt.add(" ".join(t.split()))
+
+    dt = list(dt)
+    dt.sort()
+    return dt
+
+def parse_pseudos(f):
+    dt = []
+    re_start = re.compile(r'\s*<table id="datatype-pseudotypes-table">')
+    re_entry = re.compile(r'\s*<entry><type>([^<]+)</></entry>')
+    re_end = re.compile(r'\s*</table>')
+
+    f = iter(f)
+    for line in f:
+        if re_start.match(line) is not None:
+            break
+    else:
+        raise ValueError('pseudo datatypes table not found')
+
+    for line in f:
+        m = re_entry.match(line)
+        if m is not None:
+            dt.append(m.group(1))
+
+        if re_end.match(line) is not None:
+            break
+    else:
+        raise ValueError('end of pseudo datatypes table not found')
+
+    if not dt:
+        raise ValueError('pseudo datatypes not found')
+
+    return dt
+
+def fetch(url):
+    return urllib2.urlopen(url)
+
+def update_consts(filename, constname, content):
+    f = open(filename)
+    lines = f.readlines()
+    f.close()
+
+    # Line to start/end inserting
+    re_start = re.compile(r'^%s\s*=\s*\[\s*$' % constname)
+    re_end = re.compile(r'^\s*\]\s*$')
+    start = [ n for n, l in enumerate(lines) if re_start.match(l) ]
+    if not start:
+        raise ValueError("couldn't find line containing '%s = ['" % constname)
+    if len(start) > 1:
+        raise ValueError("too many lines containing '%s = ['" % constname)
+    start = start[0] + 1
+
+    end = [ n for n, l in enumerate(lines) if n >= start and re_end.match(l) ]
+    if not end:
+        raise ValueError("couldn't find line containing ']' after %s " % constname)
+    end = end[0]
+
+    # Pack the new content in lines not too long
+    content = [repr(item) for item in content ]
+    new_lines = [[]]
+    for item in content:
+        if sum(map(len, new_lines[-1])) + 2 * len(new_lines[-1]) + len(item) + 4 > 75:
+            new_lines.append([])
+        new_lines[-1].append(item)
+
+    lines[start:end] = [ "    %s,\n" % ", ".join(items) for items in new_lines ]
+
+    f = open(filename, 'w')
+    f.write(''.join(lines))
+    f.close()
+
+
+# Autogenerated: please edit them if you like wasting your time.
+
+KEYWORDS = [
+    'ABORT', 'ABSOLUTE', 'ACCESS', 'ACTION', 'ADD', 'ADMIN', 'AFTER',
+    'AGGREGATE', 'ALL', 'ALSO', 'ALTER', 'ALWAYS', 'ANALYSE', 'ANALYZE',
+    'AND', 'ANY', 'ARRAY', 'AS', 'ASC', 'ASSERTION', 'ASSIGNMENT',
+    'ASYMMETRIC', 'AT', 'ATTRIBUTE', 'AUTHORIZATION', 'BACKWARD', 'BEFORE',
+    'BEGIN', 'BETWEEN', 'BIGINT', 'BINARY', 'BIT', 'BOOLEAN', 'BOTH', 'BY',
+    'CACHE', 'CALLED', 'CASCADE', 'CASCADED', 'CASE', 'CAST', 'CATALOG',
+    'CHAIN', 'CHAR', 'CHARACTER', 'CHARACTERISTICS', 'CHECK', 'CHECKPOINT',
+    'CLASS', 'CLOSE', 'CLUSTER', 'COALESCE', 'COLLATE', 'COLLATION',
+    'COLUMN', 'COMMENT', 'COMMENTS', 'COMMIT', 'COMMITTED', 'CONCURRENTLY',
+    'CONFIGURATION', 'CONNECTION', 'CONSTRAINT', 'CONSTRAINTS', 'CONTENT',
+    'CONTINUE', 'CONVERSION', 'COPY', 'COST', 'CREATE', 'CROSS', 'CSV',
+    'CURRENT', 'CURRENT_CATALOG', 'CURRENT_DATE', 'CURRENT_ROLE',
+    'CURRENT_SCHEMA', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',
+    'CURSOR', 'CYCLE', 'DATA', 'DATABASE', 'DAY', 'DEALLOCATE', 'DEC',
+    'DECIMAL', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE', 'DEFERRED',
+    'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS', 'DESC', 'DICTIONARY',
+    'DISABLE', 'DISCARD', 'DISTINCT', 'DO', 'DOCUMENT', 'DOMAIN', 'DOUBLE',
+    'DROP', 'EACH', 'ELSE', 'ENABLE', 'ENCODING', 'ENCRYPTED', 'END',
+    'ENUM', 'ESCAPE', 'EXCEPT', 'EXCLUDE', 'EXCLUDING', 'EXCLUSIVE',
+    'EXECUTE', 'EXISTS', 'EXPLAIN', 'EXTENSION', 'EXTERNAL', 'EXTRACT',
+    'FALSE', 'FAMILY', 'FETCH', 'FIRST', 'FLOAT', 'FOLLOWING', 'FOR',
+    'FORCE', 'FOREIGN', 'FORWARD', 'FREEZE', 'FROM', 'FULL', 'FUNCTION',
+    'FUNCTIONS', 'GLOBAL', 'GRANT', 'GRANTED', 'GREATEST', 'GROUP',
+    'HANDLER', 'HAVING', 'HEADER', 'HOLD', 'HOUR', 'IDENTITY', 'IF',
+    'ILIKE', 'IMMEDIATE', 'IMMUTABLE', 'IMPLICIT', 'IN', 'INCLUDING',
+    'INCREMENT', 'INDEX', 'INDEXES', 'INHERIT', 'INHERITS', 'INITIALLY',
+    'INLINE', 'INNER', 'INOUT', 'INPUT', 'INSENSITIVE', 'INSERT', 'INSTEAD',
+    'INT', 'INTEGER', 'INTERSECT', 'INTERVAL', 'INTO', 'INVOKER', 'IS',
+    'ISNULL', 'ISOLATION', 'JOIN', 'KEY', 'LABEL', 'LANGUAGE', 'LARGE',
+    'LAST', 'LC_COLLATE', 'LC_CTYPE', 'LEADING', 'LEAST', 'LEFT', 'LEVEL',
+    'LIKE', 'LIMIT', 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME',
+    'LOCALTIMESTAMP', 'LOCATION', 'LOCK', 'MAPPING', 'MATCH', 'MAXVALUE',
+    'MINUTE', 'MINVALUE', 'MODE', 'MONTH', 'MOVE', 'NAME', 'NAMES',
+    'NATIONAL', 'NATURAL', 'NCHAR', 'NEXT', 'NO', 'NONE', 'NOT', 'NOTHING',
+    'NOTIFY', 'NOTNULL', 'NOWAIT', 'NULL', 'NULLIF', 'NULLS', 'NUMERIC',
+    'OBJECT', 'OF', 'OFF', 'OFFSET', 'OIDS', 'ON', 'ONLY', 'OPERATOR',
+    'OPTION', 'OPTIONS', 'OR', 'ORDER', 'OUT', 'OUTER', 'OVER', 'OVERLAPS',
+    'OVERLAY', 'OWNED', 'OWNER', 'PARSER', 'PARTIAL', 'PARTITION',
+    'PASSING', 'PASSWORD', 'PLACING', 'PLANS', 'POSITION', 'PRECEDING',
+    'PRECISION', 'PREPARE', 'PREPARED', 'PRESERVE', 'PRIMARY', 'PRIOR',
+    'PRIVILEGES', 'PROCEDURAL', 'PROCEDURE', 'QUOTE', 'RANGE', 'READ',
+    'REAL', 'REASSIGN', 'RECHECK', 'RECURSIVE', 'REF', 'REFERENCES',
+    'REINDEX', 'RELATIVE', 'RELEASE', 'RENAME', 'REPEATABLE', 'REPLACE',
+    'REPLICA', 'RESET', 'RESTART', 'RESTRICT', 'RETURNING', 'RETURNS',
+    'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROW', 'ROWS', 'RULE',
+    'SAVEPOINT', 'SCHEMA', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY',
+    'SELECT', 'SEQUENCE', 'SEQUENCES', 'SERIALIZABLE', 'SERVER', 'SESSION',
+    'SESSION_USER', 'SET', 'SETOF', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE',
+    'SMALLINT', 'SOME', 'STABLE', 'STANDALONE', 'START', 'STATEMENT',
+    'STATISTICS', 'STDIN', 'STDOUT', 'STORAGE', 'STRICT', 'STRIP',
+    'SUBSTRING', 'SYMMETRIC', 'SYSID', 'SYSTEM', 'TABLE', 'TABLES',
+    'TABLESPACE', 'TEMP', 'TEMPLATE', 'TEMPORARY', 'TEXT', 'THEN', 'TIME',
+    'TIMESTAMP', 'TO', 'TRAILING', 'TRANSACTION', 'TREAT', 'TRIGGER',
+    'TRIM', 'TRUE', 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNBOUNDED',
+    'UNCOMMITTED', 'UNENCRYPTED', 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN',
+    'UNLOGGED', 'UNTIL', 'UPDATE', 'USER', 'USING', 'VACUUM', 'VALID',
+    'VALIDATE', 'VALIDATOR', 'VALUE', 'VALUES', 'VARCHAR', 'VARIADIC',
+    'VARYING', 'VERBOSE', 'VERSION', 'VIEW', 'VOLATILE', 'WHEN', 'WHERE',
+    'WHITESPACE', 'WINDOW', 'WITH', 'WITHOUT', 'WORK', 'WRAPPER', 'WRITE',
+    'XML', 'XMLATTRIBUTES', 'XMLCONCAT', 'XMLELEMENT', 'XMLEXISTS',
+    'XMLFOREST', 'XMLPARSE', 'XMLPI', 'XMLROOT', 'XMLSERIALIZE', 'YEAR',
+    'YES', 'ZONE',
+    ]
+
+DATATYPES = [
+    'bigint', 'bigserial', 'bit', 'bit varying', 'bool', 'boolean', 'box',
+    'bytea', 'char', 'character', 'character varying', 'cidr', 'circle',
+    'date', 'decimal', 'double precision', 'float4', 'float8', 'inet',
+    'int', 'int2', 'int4', 'int8', 'integer', 'interval', 'line', 'lseg',
+    'macaddr', 'money', 'numeric', 'path', 'point', 'polygon', 'real',
+    'serial', 'serial4', 'serial8', 'smallint', 'text', 'time', 'timestamp',
+    'timestamptz', 'timetz', 'tsquery', 'tsvector', 'txid_snapshot', 'uuid',
+    'varbit', 'varchar', 'with time zone', 'without time zone', 'xml',
+    ]
+
+PSEUDO_TYPES = [
+    'any', 'anyarray', 'anyelement', 'anyenum', 'anynonarray', 'cstring',
+    'internal', 'language_handler', 'fdw_handler', 'record', 'trigger',
+    'void', 'opaque',
+    ]
+
+# Remove 'trigger' from types
+PSEUDO_TYPES = sorted(set(PSEUDO_TYPES) - set(map(str.lower, KEYWORDS)))
+
+PLPGSQL_KEYWORDS = [
+    'ALIAS', 'CONSTANT', 'DIAGNOSTICS', 'ELSIF', 'EXCEPTION', 'EXIT',
+    'FOREACH', 'GET', 'LOOP', 'NOTICE', 'OPEN', 'PERFORM', 'QUERY', 'RAISE',
+    'RETURN', 'REVERSE', 'SQLSTATE', 'WHILE',
+    ]
+
+if __name__ == '__main__':
+    update_myself()
+

pygments/lexers/postgres.py

+"""
+    pygments.lexers.postgres
+    ~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Lexers for PostgreSQL-specific SQL and psql interactive session.
+
+    :copyright: Copyright 2011 by Daniele Varrazzo.
+    :license: BSD, see LICENSE for details.
+
+    The module contains lexers related to PostgreSQL languages.
+
+    `PostgresLexer`
+        A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL
+        lexer are:
+
+        - keywords and data types list parsed from the PG docs (run the
+          `_postgres_builtins` module to update them);
+        - Content of $-strings parsed using a specific lexer, e.g. the content
+          of a PL/Python function is parsed using the Python lexer;
+        - parse PG specific constructs: E-strings, $-strings, U&-strings,
+          different operators and punctuation.
+
+    `PlPgsqlLexer`
+        A lexer for the PL/pgSQL language. Adds a few specific construct on
+        top of the PG SQL lexer (such as <<label>>).
+
+    `PostgresConsoleLexer`
+        A lexer to highlight an interactive psql session:
+
+        - identifies the prompt and does its best to detect the end of command
+          in multiline statement where not all the lines are prefixed by a
+          prompt, telling them apart from the output;
+        - highlights errors in the output and notification levels;
+        - handles psql backslash commands.
+
+    The ``tests/examplefiles`` contains a few test files with data to be
+    parsed by these lexers.
+
+"""
+
+import re
+import sys
+from copy import deepcopy
+
+from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
+     this, do_insertions
+from pygments.token import Error, Punctuation, Literal, Token, \
+     Text, Comment, Operator, Keyword, Name, String, Number, Generic
+from pygments.lexers import get_lexer_by_name, ClassNotFound
+
+from pygments.lexers._postgres_builtins import (
+    KEYWORDS, DATATYPES, PSEUDO_TYPES, PLPGSQL_KEYWORDS)
+
+
+__all__ = [ 'PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer' ]
+
+line_re  = re.compile('.*?\n')
+
+language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
+
+def language_callback(lexer, match):
+    """Parse the content of a $-string using a lexer
+
+    The lexer is chosen looking for a nearby LANGUAGE.
+
+    Note: this function should have been a `PostgresBase` method, but the
+    rules deepcopy fails in this case.
+    """
+    l = None
+    m = language_re.match(lexer.text[match.end():match.end()+100])
+    if m is not None:
+        l = lexer._get_lexer(m.group(1))
+    else:
+        m = list(language_re.finditer(
+            lexer.text[max(0, match.start()-100):match.start()]))
+        if m:
+            l = lexer._get_lexer(m[-1].group(1))
+
+    if l:
+        yield (match.start(1), String, match.group(1))
+        for x in l.get_tokens_unprocessed(match.group(2)):
+            yield x
+        yield (match.start(3), String, match.group(3))
+
+    else:
+        yield (match.start(), String, match.group())
+
+class PostgresBase(object):
+    """Base class for Postgres-related lexers.
+
+    This is implemented as a mixin to avoid the Lexer metaclass kicking in.
+    this way the different lexer don't have a common Lexer ancestor. If they
+    had, _tokens could be created on this ancestor and not updated for the
+    other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming
+    seem to suggest that regexp lexers are not really subclassable.
+
+    `language_callback` should really be our method, but this breaks deepcopy.
+    """
+    def get_tokens_unprocessed(self, text, *args):
+        # Have a copy of the entire text to be used by `language_callback`.
+        self.text = text
+        for x in super(PostgresBase, self).get_tokens_unprocessed(
+                text, *args):
+            yield x
+
+    def _get_lexer(self, lang):
+        if lang.lower() == 'sql':
+            return get_lexer_by_name('postgresql', **self.options)
+
+        tries = [ lang ]
+        if lang.startswith('pl'):
+            tries.append(lang[2:])
+        if lang.endswith('u'):
+            tries.append(lang[:-1])
+        if lang.startswith('pl') and lang.endswith('u'):
+            tries.append(lang[2:-1])
+
+        for l in tries:
+            try:
+                return get_lexer_by_name(l, **self.options)
+            except ClassNotFound:
+                pass
+        else:
+            # TODO: better logging
+            # print >>sys.stderr, "language not found:", lang
+            return None
+
+class PostgresLexer(PostgresBase, RegexLexer):
+    """
+    Lexer for the PostgreSQL dialect of SQL.
+    """
+
+    name = 'PostgreSQL SQL dialect'
+    aliases = ['postgresql', 'postgres']
+    mimetypes = ['text/x-postgresql']
+
+    flags = re.IGNORECASE
+    tokens = {
+        'root': [
+            (r'\s+', Text),
+            (r'--.*?\n', Comment.Single),
+            (r'/\*', Comment.Multiline, 'multiline-comments'),
+            (r'(' + '|'.join([s.replace(" ", "\s+")
+                for s in DATATYPES + PSEUDO_TYPES])
+                  + r')\b', Name.Builtin),
+            (r'(' + '|'.join(KEYWORDS) + r')\b', Keyword),
+            (r'[+*/<>=~!@#%^&|`?^-]+', Operator),
+            (r'::', Operator),  # cast
+            (r'\$\d+', Name.Variable),
+            (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
+            (r'[0-9]+', Number.Integer),
+            (r"(E|U&)?'(''|[^'])*'", String.Single),
+            (r'(U&)?"(""|[^"])*"', String.Name), # quoted identifier
+            (r'(?ms)(\$[^\$]*\$)(.*?)(\1)', language_callback),
+            (r'[a-zA-Z_][a-zA-Z0-9_]*', Name),
+
+            # psql variable in SQL
+            (r""":(['"]?)[a-z][a-z0-9_]*\b\1""", Name.Variable),
+
+            (r'[;:()\[\]\{\},\.]', Punctuation),
+        ],
+        'multiline-comments': [
+            (r'/\*', Comment.Multiline, 'multiline-comments'),
+            (r'\*/', Comment.Multiline, '#pop'),
+            (r'[^/\*]+', Comment.Multiline),
+            (r'[/*]', Comment.Multiline)
+        ],
+    }
+
+
+class PlPgsqlLexer(PostgresBase, RegexLexer):
+    """
+    Handle the extra syntax in Pl/pgSQL language.
+    """
+    name = 'PL/pgSQL'
+    aliases = ['plpgsql']
+    mimetypes = ['text/x-plpgsql']
+
+    flags = re.IGNORECASE
+    tokens = deepcopy(PostgresLexer.tokens)
+
+    # extend the keywords list
+    for i, pattern in enumerate(tokens['root']):
+        if pattern[1] == Keyword:
+            tokens['root'][i] = (
+                r'(' + '|'.join(KEYWORDS + PLPGSQL_KEYWORDS) + r')\b',
+                Keyword)
+            del i
+            break
+    else:
+        assert 0, "SQL keywords not found"
+
+    # Add specific PL/pgSQL rules (before the SQL ones)
+    tokens['root'][:0] = [
+        (r'\%[a-z][a-z0-9_]*\b', Name.Builtin),     # actually, a datatype
+        (r':=', Operator),
+        (r'\<\<[a-z][a-z0-9_]*\>\>', Name.Label),
+        (r'\#[a-z][a-z0-9_]*\b', Keyword.Pseudo),   # #variable_conflict
+    ]
+
+
+class PsqlRegexLexer(PostgresBase, RegexLexer):
+    """
+    Extend the PostgresLexer adding support specific for psql commands.
+
+    This is not a complete psql lexer yet as it lacks prompt support
+    and output rendering.
+    """
+    name = 'PostgreSQL console - regexp based lexer'
+    aliases = []    # not public
+
+    flags = re.IGNORECASE
+    tokens = deepcopy(PostgresLexer.tokens)
+
+    tokens['root'].append(
+        (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))
+    tokens['psql-command'] = [
+        (r'\n', Text, 'root'),
+        (r'\s+', Text),
+        (r'\\[^\s]+', Keyword.Pseudo),
+        (r""":(['"]?)[a-z][a-z0-9_]*\b\1""", Name.Variable),
+        (r"'(''|[^'])*'", String.Single),
+        (r"`([^`])*`", String.Backtick),
+        (r"[^\s]+", String.Symbol),
+    ]
+
+re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]')
+re_psql_command = re.compile(r'\s*\\')
+re_end_command = re.compile(r';\s*(--.*?)?$')
+re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')
+re_error = re.compile(r'(ERROR|FATAL):')
+re_message = re.compile(
+    r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|'
+    r'FATAL|HINT|DETAIL|LINE [0-9]+):)(.*?\n)')
+
+def lookahead(x):
+    """Wrap an iterator and allow pushing back an item."""
+    for i in x:
+        while 1:
+            i = yield i
+            if i is None:
+                break
+            yield i
+
+
+class PostgresConsoleLexer(Lexer):
+    """
+    Lexer for psql sessions.
+    """
+
+    name = 'PostgreSQL console (psql)'
+    aliases = ['psql', 'postgresql-console', 'postgres-console']
+    mimetypes = ['text/x-postgresql-psql']
+
+    def get_tokens_unprocessed(self, data):
+        sql = PsqlRegexLexer(**self.options)
+
+        lines = lookahead(line_re.findall(data))
+
+        # prompt-output cycle
+        while 1:
+
+            # consume the lines of the command: start with an optional prompt
+            # and continue until the end of command is detected
+            curcode = ''
+            insertions = []
+            while 1:
+                try:
+                    line = lines.next()
+                except StopIteration:
+                    # allow the emission of partially collected items
+                    # the repl loop will be broken below
+                    break
+
+                # Identify a shell prompt in case of psql commandline example
+                if line.startswith('$') and not curcode:
+                    lexer = get_lexer_by_name('console', **self.options)
+                    for x in lexer.get_tokens_unprocessed(line):
+                        yield x
+                    break
+
+                # Identify a psql prompt
+                mprompt = re_prompt.match(line)
+                if mprompt is not None:
+                    insertions.append((len(curcode),
+                                       [(0, Generic.Prompt, mprompt.group())]))
+                    curcode += line[len(mprompt.group()):]
+                else:
+                    curcode += line
+
+                # Check if this is the end of the command
+                # TODO: better handle multiline comments at the end with
+                # a lexer with an external state?
+                if re_psql_command.match(curcode) \
+                or re_end_command.search(curcode):
+                    break
+
+            # Emit the combined stream of command and prompt(s)
+            for item in do_insertions(insertions,
+                    sql.get_tokens_unprocessed(curcode)):
+                yield item
+
+            # Emit the output lines
+            out_token = Generic.Output
+            while 1:
+                line = lines.next()
+                mprompt = re_prompt.match(line)
+                if mprompt is not None:
+                    # push the line back to have it processed by the prompt
+                    lines.send(line)
+                    break
+
+                mmsg = re_message.match(line)
+                if mmsg is not None:
+                    if mmsg.group(1).startswith("ERROR") \
+                    or mmsg.group(1).startswith("FATAL"):
+                        out_token = Generic.Error
+                    yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
+                    yield (mmsg.start(2), out_token, mmsg.group(2))
+                else:
+                    yield (0, out_token, line)
+
+

tests/examplefiles/postgresql_test.txt

+CREATE OR REPLACE FUNCTION something() RETURNS int4 AS
+$x$
+BEGIN
+    RETURN 42;
+END
+$x$
+LANGUAGE 'plpgsql';
+
+CREATE FUNCTION pymax (a integer, b integer)
+  RETURNS integer
+AS $$
+  if a > b:
+    return a
+  return b
+$$ language plpythonu;
+
+CREATE FUNCTION nested_lexers (a integer, b integer)
+$function$
+BEGIN
+    SELECT ($1 ~ $q$[\t\r\n\v\\]$q$);
+END;
+$function$
+LANGUAGE sql;
+
+CREATE OR REPLACE FUNCTION measurement_insert_trigger()
+RETURNS TRIGGER AS $$
+BEGIN
+    <<test>>
+    INSERT INTO measurement_y2008m01 VALUES (NEW.*);
+    RETURN NULL;
+END;
+$$
+LANGUAGE plpgsql;
+
+-- As returned by pg_dump
+CREATE FUNCTION test_function() RETURNS integer
+    LANGUAGE plpgsql STABLE STRICT
+    AS $$
+begin
+    return 42;
+end
+$$;
+
+-- Unicode names and strings
+SELECT U&'\0441\043B\043E\043D'
+FROM U&"\0441\043B\043E\043D";
+

tests/examplefiles/psql_session.txt

+regression=# select foo;
+ERROR:  column "foo" does not exist
+LINE 1: select foo;
+               ^
+regression=# \q
+
+peter@localhost testdb=> \a \t \x
+Output format is aligned.
+Tuples only is off.
+Expanded display is on.
+
+regression=# select '\x';
+WARNING:  nonstandard use of escape in a string literal
+LINE 1: select '\x';
+               ^
+HINT:  Use the escape string syntax for escapes, e.g., E'\r\n'.
+ ?column? 
+----------
+ x
+(1 row)
+
+regression=# select E'\x';
+
+piro=> \set foo 30;
+piro=> select * from test where foo <= :foo;
+ foo | bar 
+-----+-----
+  10 | 
+  20 | 
+(2 rows)
+
+testdb=> \set foo 'my_table'
+testdb=> SELECT * FROM :"foo";
+
+testdb=> \set content `cat my_file.txt`
+testdb=> INSERT INTO my_table VALUES (:'content');
+
+regression=# select (
+regression(# 1);
+ ?column? 
+----------
+        1
+(1 row)
+
+piro=> select (
+piro(> '
+piro'> ' || $$
+piro$> $$)
+piro-> from "
+piro"> foo";
+ERROR:  relation "
+foo" does not exist
+LINE 5: from "
+             ^
+
+testdb=> CREATE TABLE my_table (
+first integer not null default 0,
+second text) ; -- end of command
+CREATE TABLE
+
+-- Table output
+=# SELECT '0x10'::mpz AS "hex", '10'::mpz AS "dec",
+-#        '010'::mpz AS oct, '0b10'::mpz AS bin;
+ hex | dec | oct | bin
+-----+-----+-----+-----
+ 16  | 10  | 8   | 2
+(1 row)
+
+-- One field output
+regression=# select schemaname from  pg_tables limit 3;
+ schemaname 
+------------
+ pg_catalog
+ pg_catalog
+ pg_catalog
+(3 rows)
+
+-- TODO: prompt in multiline comments still not handled correctly
+test=> select 1 /* multiline
+test*> and 2 /* and 3 */
+test*> end comment */, 2;
+ ?column? | ?column? 
+----------+----------
+        1 |        2
+
+=# select 10.0, 1e-6, 1E+6;
+ ?column? | ?column? | ?column? 
+----------+----------+----------
+     10.0 | 0.000001 |  1000000
+(1 row)
+
+regression=# begin;
+BEGIN
+regression=# create table asdf (foo serial primary key);
+NOTICE:  CREATE TABLE will create implicit sequence "asdf_foo_seq" for serial column "asdf.foo"
+NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "asdf_pkey" for table "asdf"
+CREATE TABLE
+regression=# insert into asdf values (10) returning foo;
+ foo 
+-----
+  10
+(1 row)
+
+INSERT 0 1
+regression=# ROLLBACK ;
+ROLLBACK
+
+=> EXPLAIN SELECT * FROM tenk1
+-> WHERE unique1 < 100;   -- Don't take -> in the plan as a prompt
+
+                                  QUERY PLAN
+------------------------------------------------------------------------------
+ Bitmap Heap Scan on tenk1  (cost=2.37..232.35 rows=106 width=244)
+   Recheck Cond: (unique1 < 100)
+   ->  Bitmap Index Scan on tenk1_unique1  (cost=0.00..2.37 rows=106 width=0)
+         Index Cond: (unique1 < 100)
+
+
+-- don't swallow the end of a malformed line
+test=> select 1,
+'this line must be emitted'

tests/test_basic_api.py

         if cls.__name__ not in (
             'PythonConsoleLexer', 'RConsoleLexer', 'RubyConsoleLexer',
             'SqliteConsoleLexer', 'MatlabSessionLexer', 'ErlangShellLexer',
-            'BashSessionLexer', 'LiterateHaskellLexer'):
+            'BashSessionLexer', 'LiterateHaskellLexer', 'PostgresConsoleLexer'):
             inst = cls(ensurenl=False)
             ensure(inst.get_tokens('a\nb'), 'a\nb')
             inst = cls(ensurenl=False, stripall=True)