Commits

Kirill Simonov committed f5ba920

Added SQL splitter utility.

Comments (0)

Files changed (7)

 .. autoclass:: Connect
    :members: __call__, open_connection, translate_error
 
+:mod:`htsql.split_sql`
+----------------------
+.. automodule:: htsql.split_sql
+.. autoclass:: SQLToken
+.. autoclass:: SplitSQL
+   :members: __call__
+
 :mod:`htsql.tr`
 ---------------
 .. automodule:: htsql.tr
 .. autoclass:: SQLiteError
 .. autoclass:: SQLiteConnect
 
+:mod:`htsql_sqlite.split_sql`
+-----------------------------
+.. automodule:: htsql_sqlite.split_sql
+.. autoclass:: SplitSQLite
+
 :mod:`htsql_pgsql`
 ------------------
 .. automodule:: htsql_pgsql
 .. autoclass:: PGSQLError
 .. autoclass:: PGSQLConnect
 
+:mod:`htsql_pgsql.split_sql`
+----------------------------
+.. automodule:: htsql_pgsql.split_sql
+.. autoclass:: SplitPGSQL
 
+

src/htsql/export.py

 from addon import Addon
 from wsgi import wsgi_adapters
 from connect import connect_adapters
+from split_sql import split_sql_adapters
 
 
 class HTSQL_CORE(Addon):
     """
 
     # List of adapters exported by the addon.
-    adapters = wsgi_adapters+connect_adapters
+    adapters = (wsgi_adapters +
+                connect_adapters +
+                split_sql_adapters)
 
 

src/htsql/split_sql.py

+#
+# Copyright (c) 2006-2010, Prometheus Research, LLC
+# Authors: Clark C. Evans <cce@clarkevans.com>,
+#          Kirill Simonov <xi@resolvent.net>
+#
+
+
+"""
+This module declares the SQL splitter adapter.
+
+This module exports a global variable:
+
+`sql_splitter_adapters`
+    List of adapters declared in this module.
+"""
+
+
+from .adapter import Utility, find_adapters
+from .util import maybe
+import re
+
+
+class SQLToken(object):
+    """
+    Declares a regular expression pattern to be used by the SQL splitter.
+
+    `pattern` (a string)
+        A regular expression in the verbose format.  The expression will
+        be compiled using ``re.X|re.S`` flags.
+
+    `min_level` (an integer or ``None``)
+        The minimal level at which the pattern activates.
+
+    `max_level` (an integer or ``None``)
+        The maximum level at which the pattern activates.
+
+    `only_level` (an integer or ``None``)
+        The level at which the pattern activates.
+
+    `delta` (an integer)
+        When a token is detected, change the current level by `delta`.
+
+    `is_junk` (Boolean)
+        Ignore the token value.
+
+    `is_end` (Boolean)
+        If set, indicates that the splitter should stop when a token
+        is detected.
+    """
+
+    def __init__(self, pattern,
+                 min_level=None, max_level=None, only_level=None,
+                 delta=0, is_junk=False, is_end=False):
+        # Sanity check on the arguments.
+        assert isinstance(pattern, str)
+        assert isinstance(min_level, maybe(int))
+        assert isinstance(max_level, maybe(int))
+        assert isinstance(only_level, maybe(int))
+        assert only_level is None or (min_level is None and max_level is None)
+        assert isinstance(delta, int)
+        assert isinstance(is_junk, bool)
+        assert isinstance(is_end, bool)
+
+        self.pattern = pattern
+        self.regexp = re.compile(pattern, re.X|re.S)
+        self.min_level = min_level
+        self.max_level = max_level
+        self.only_level = only_level
+        self.delta = delta
+        self.is_junk = is_junk
+        self.is_end = is_end
+
+
+class SplitSQL(Utility):
+    """
+    Declares the SQL splitter interface.
+
+    A SQL splitter takes a string containing one or more SQL statements
+    separated by ``;`` and produces a sequence of SQL statements.
+
+    Usage::
+
+        try:
+            split_sql = SplitSQL()
+            for sql in split_sql(input):
+                cursor.execute(sql)
+        except ValueError:
+            ...
+
+    This is an abstract utility.  To add a new splitter, create a subclass
+    of :class:`SplitSQL` and override the class variable `tokens`:
+
+    `tokens` (a list of :class:`SQLToken` instances)
+        The tokens recognized by the splitter.
+    """
+
+    tokens = None
+
+    def __call__(self, input):
+        """
+        Splits the input to SQL statements.
+
+        `input` (a string)
+            A string containing SQL statements separated by ``;``.
+
+        Generates a sequence of SQL statements.
+        """
+        # The current position in `input`.
+        start = 0
+        # The current level.
+        level = 0
+        # The accumulated token values.
+        values = []
+        # Are we done?
+        is_end = False
+        # Till we are done.
+        while not is_end:
+            # Loop over the token to find one matching the input.
+            for token in self.tokens:
+                # Ignore tokens that are not available at the current level.
+                if token.min_level is not None and level < token.min_level:
+                    continue
+                if token.max_level is not None and level > token.max_level:
+                    continue
+                if token.only_level is not None and level != token.only_level:
+                    continue
+                # Does the input matches the token pattern?
+                match = token.regexp.match(input, start)
+                if match is None:
+                    continue
+                # The value of the token.
+                value = match.group()
+                # Accumulate the value.
+                if not token.is_junk and value:
+                    values.append(value)
+                # Update the current level.
+                level += token.delta
+                assert level >= 0
+                # When we reach the level `0`, the accumulated tokens
+                # are combined to a new statement.
+                if level == 0 and values:
+                    sql = ''.join(values)
+                    yield sql
+                    values = []
+                # Advance the pointer and start over.
+                start = match.end()
+                is_end = token.is_end
+                break
+
+            # None of the tokens matched.
+            else:
+                # Determine the current position and complain.
+                line = sql[:start].count('\n')
+                if line:
+                    column = start-sql[:start].rindex('\n')-1
+                else:
+                    column = start
+                raise ValueError("unable to parse an SQL statement"
+                                 " at line %s, column %s" % (line+1, column+1))
+
+        # Some sanity checks.
+        assert start == len(input)
+        assert not values
+
+
+split_sql_adapters = find_adapters()
+
+

src/htsql_pgsql/export.py

 
 from htsql.addon import Addon
 from .connect import connect_adapters
+from .split_sql import split_sql_adapters
 
 
 class ENGINE_PGSQL(Addon):
     """
 
     # List of adapters exported by the addon.
-    adapters = connect_adapters
+    adapters = (connect_adapters +
+                split_sql_adapters)
 
 

src/htsql_pgsql/split_sql.py

+#
+# Copyright (c) 2006-2010, Prometheus Research, LLC
+# Authors: Clark C. Evans <cce@clarkevans.com>,
+#          Kirill Simonov <xi@resolvent.net>
+#
+
+
+"""
+This module implements the SQL splitter for PostgreSQL.
+
+This module exports a global variable:
+
+`split_sql_adapters`
+    List of adapters declared in this module.
+"""
+
+
+from htsql.split_sql import SQLToken, SplitSQL
+from htsql.adapter import find_adapters
+
+
+class SplitPGSQL(SplitSQL):
+    """
+    Implements the SQL splitter for PostgreSQL.
+    """
+
+    # Note: this is not an exact PostgreSQL tokenizer, but
+    # a good approximation.  In particular, we assume here that
+    # the `standard_conforming_strings` parameter is turned on.
+
+    tokens = [
+            # Whitespace between separate statements.
+            SQLToken(r"""
+                     # whitespaces
+                     [\ \t\r\n]+
+                     # or a SQL comment (FIXME: add C-style comments?)
+                     | -- [^\r\n]* \r?\n
+                     # or a psql command
+                     | \\ [a-zA-Z_] (?: [\ \t] [^\r\n]* )? \r?\n
+                     """, only_level=0, is_junk=True),
+
+            # The beginning of an SQL statement.
+            SQLToken(r""" [a-zA-Z]+ """, only_level=0, delta=+1),
+
+            # A block of regular SQL tokens.
+            SQLToken(r"""
+                     (
+                     # whitespaces
+                     [\ \t\r\n]+
+                     # or a comment
+                     | -- [^\r\n]*\r?\n
+                     # or a standard-conforming string literal
+                     | ' (?: [^'] | '' )* '
+                     # or a C-style escaped string literal
+                     | [eE] ' (?: [^'\\] | \\ . )* '
+                     # or a quoted identifier
+                     | " (?: [^"]+ | "" )+ "
+                     # or a keyword or an unquoted identifier
+                     | [a-zA-Z_][0-9a-zA-Z_$]*
+                     # or a number
+                     | [0-9]+ (?: \. [0-9]* )? (?: [eE] [+-] [0-9]+ )?
+                     # or a symbol
+                     | [*/<>=~!@#%^&|`?,:.+-]
+                     )+
+                     """, min_level=1),
+
+            # $-quoted string literals.
+            SQLToken(r"""
+                     \$ (?P<tag> [^$]* ) \$
+                     (?: [^$] | \$ (?! (?P=tag) \$ ) )*
+                     \$ (?P=tag) \$
+                     """, min_level=1),
+
+            # Open parentheses and brackets nest.
+            SQLToken(r""" [\(\[] """, min_level=1, delta=+1),
+
+            # Close parentheses and brackets un-nest.
+            SQLToken(r""" [\)\]] """, min_level=2, delta=-1),
+
+            # Semicolon indicates the statement ends when there is no nesting.
+            SQLToken(r""" ; """, only_level=1, delta=-1),
+
+            # Same for EOF, but it also stops the splitter.
+            SQLToken(r""" $ """, only_level=1, delta=-1, is_end=True),
+
+            # EOF outside the statement stops the splitter.
+            SQLToken(r""" $ """, only_level=0, is_end=True),
+    ]
+
+
+split_sql_adapters = find_adapters()
+
+

src/htsql_sqlite/export.py

 
 from htsql.addon import Addon
 from .connect import connect_adapters
+from .split_sql import split_sql_adapters
 
 
 class ENGINE_SQLITE(Addon):
     """
 
     # List of adapters exported by the addon.
-    adapters = connect_adapters
+    adapters = (connect_adapters +
+                split_sql_adapters)
 
 

src/htsql_sqlite/split_sql.py

+#
+# Copyright (c) 2006-2010, Prometheus Research, LLC
+# Authors: Clark C. Evans <cce@clarkevans.com>,
+#          Kirill Simonov <xi@resolvent.net>
+#
+
+
+"""
+This module implements the SQL splitter for SQLite.
+
+This module exports a global variable:
+
+`split_sql_adapters`
+    List of adapters declared in this module.
+"""
+
+
+from htsql.split_sql import SQLToken, SplitSQL
+from htsql.adapter import find_adapters
+
+
+class SplitSQLite(SplitSQL):
+    """
+    Implements the SQL splitter for SQLite.
+    """
+
+    # This is a simple tokenizer for SQLite.  It does not verify
+    # that the statements are lexically valid and it may fail
+    # to recognize some valid statements, however it works for
+    # most common cases.
+
+    tokens = [
+            # Whitespace between separate statements.
+            SQLToken(r"""
+                     # whitespaces
+                     [\ \t\r\n]+
+                     # or a SQL comment
+                     | -- [^\r\n]* \r?\n
+                     # or a C-style comment
+                     | /\* .*? \*/
+                     """, only_level=0, is_junk=True),
+
+            # The beginning of a SQL statement.
+            SQLToken(r""" [a-zA-Z]+ """, only_level=0, delta=+1),
+
+            # Start of the BEGIN/END block.
+            SQLToken(r""" \b BEGIN \b """, min_level=1, delta=+1),
+
+            # End of the BEGIN/END block.
+            SQLToken(r""" \b END \b """, min_level=2, delta=-1),
+
+            # A block of regular SQL tokens.
+            SQLToken(r"""
+                     (
+                     # whitespaces
+                     [\ \t\r\n]+
+                     # or a SQL comment
+                     | -- [^\r\n]*\r?\n
+                     # or a C-style comment
+                     | /\* .*? \*/
+                     # or a string literal
+                     | ' (?: [^'] | '' )* '
+                     # or a quoted name
+                     | " (?: [^"]+ | "" )+ "
+                     # or a keyword or a name
+                     | [a-zA-Z_][0-9a-zA-Z_]*
+                     # or a number
+                     | [0-9]+ (?: \. [0-9]* )? (?: [eE] [+-] [0-9]+ )?
+                     # or a symbol
+                     | [().,<>=!&|~*/%+-]
+                     )+
+                     """, min_level=1),
+
+            # Semicolon at the top level indicates the statement end.
+            SQLToken(r""" ; """, only_level=1, delta=-1),
+
+            # Semicolon within BEGIN/END block is just a separator.
+            SQLToken(r""" ; """, min_level=2),
+
+            # Same for EOF, but it also stops the splitter.
+            SQLToken(r""" $ """, only_level=1, delta=-1, is_end=True),
+
+            # EOF outside the statement stops the splitter.
+            SQLToken(r""" $ """, only_level=0, is_end=True),
+    ]
+
+
+split_sql_adapters = find_adapters()
+
+