Commits

Daniele Varrazzo committed 2be7b53

Multiline input in psql console without requiring prompt at each line

Also support multiline comments in the command, albeit mixing them with
propmts is still broken.

Comments (0)

Files changed (2)

pygments/lexers/postgres.py

 
 
 re_prompt = re.compile(r'^.*?[=\-\(][#>]')
-
+re_psql_command = re.compile(r'\s*\\')
+re_end_command = re.compile(r';\s*(--.*?)?$')
 re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')
 re_error = re.compile(r'(ERROR|FATAL):')
 re_message = re.compile(
     r'FATAL|HINT|DETAIL|LINE [0-9]+):)(.*?\n)')
 re_charhint = re.compile(r'\s*\^\s*\n')
 
+def lookahead(x):
+    """Wrap an iterator and allow pushing back an item."""
+    for i in x:
+        while 1:
+            i = yield i
+            if i is None:
+                break
+            yield i
+
+
 class PostgresConsoleLexer(Lexer):
     """
     Lexer for psql sessions.
-
-    TODO: multiline comments are broken.
     """
 
     name = 'PostgreSQL console (psql)'
     def get_tokens_unprocessed(self, data):
         sql = PsqlRegexLexer(**self.options)
 
-        curcode = ''
-        insertions = []
-        out_token = Generic.Output
-        for match in line_re.finditer(data):
-            line = match.group()
-            mprompt = re_prompt.match(line)
-            if mprompt is not None:
-                out_token = Generic.Output
-                insertions.append((len(curcode),
-                                   [(0, Generic.Prompt, mprompt.group())]))
-                curcode += line[len(mprompt.group()):]
-            else:
-                if curcode:
-                    for item in do_insertions(insertions,
-                                              sql.get_tokens_unprocessed(curcode)):
-                        yield item
-                    curcode = ''
-                    insertions = []
+        lines = lookahead(line_re.findall(data))
+
+        # prompt-output cycle
+        while 1:
+
+            # consume the lines of the command: start with an optional prompt
+            # and continue until the end of command is detected
+            curcode = ''
+            insertions = []
+            while 1:
+                try:
+                    line = lines.next()
+                except StopIteration:
+                    # allow the emission of partially collected items
+                    # the repl loop will be broken below
+                    break
+
+                mprompt = re_prompt.match(line)
+                if mprompt is not None:
+                    insertions.append((len(curcode),
+                                       [(0, Generic.Prompt, mprompt.group())]))
+                    curcode += line[len(mprompt.group()):]
+                else:
+                    curcode += line
+
+                # Check if this is the end of the command
+                # TODO: better handle multiline comments at the end with
+                # a lexer with an external state?
+                if re_psql_command.match(curcode) \
+                or re_end_command.search(curcode):
+                    break
+
+            # Emit the combined stream of command and prompt(s)
+            for item in do_insertions(insertions,
+                    sql.get_tokens_unprocessed(curcode)):
+                yield item
+
+            # Emit the output lines
+            out_token = Generic.Output
+            while 1:
+                line = lines.next()
+                mprompt = re_prompt.match(line)
+                if mprompt is not None:
+                    # push the line back to have it processed by the prompt
+                    lines.send(line)
+                    break
+
                 mmsg = re_message.match(line)
                 if mmsg is not None:
                     if mmsg.group(1).startswith("ERROR") \
                     yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
                     yield (mmsg.start(2), out_token, mmsg.group(2))
                 elif re_charhint.match(line):
-                    yield (match.start(), out_token, line)
+                    yield (0, out_token, line)
                 else:
-                    yield (match.start(), Generic.Output, line)
+                    yield (0, Generic.Output, line)
 
-        if curcode:
-            for item in do_insertions(insertions,
-                                      sql.get_tokens_unprocessed(curcode)):
-                yield item
 
-

tests/examplefiles/psql_session.txt

         1
 (1 row)
 
+testdb=> CREATE TABLE my_table (
+first integer not null default 0,
+second text) ; -- end of command
+CREATE TABLE
+
 -- Table output
 =# SELECT '0x10'::mpz AS "hex", '10'::mpz AS "dec",
 -#        '010'::mpz AS oct, '0b10'::mpz AS bin;
  16  | 10  | 8   | 2
 (1 row)
 
-
 -- One field output
 regression=# select schemaname from  pg_tables limit 3;
  schemaname 
  pg_catalog
 (3 rows)
 
-/* Decimal literals.
- * ha ha, multiline
- select foo from bar;
-=#  ... just joking.  */
+-- TODO: prompt in multiline comments still not handled correctly
+test=> select 1 /* multiline
+test*> and 2 /* and 3 */
+test*> end comment */, 2;
+ ?column? | ?column? 
+----------+----------
+        1 |        2
 
 =# select 10.0, 1e-6, 1E+6;
  ?column? | ?column? | ?column? 
 regression=# ROLLBACK ;
 ROLLBACK
 
+-- don't swallow the end of a malformed line
+test=> select 1,
+'this line must be emitted'