Commits

Kirill Simonov  committed a2740a2

tweak.filedb: added option `cache-file`.

  • Participants
  • Parent commits 89bd8ee

Comments (0)

Files changed (6)

File src/htsql/core/fmt/emit.py

     if isinstance(format, (str, unicode)):
         format = Accept.__invoke__(format)
         assert not isinstance(format, DefaultFormat), "unknown format"
-    for line in Emit.__invoke__(format, headers):
-        if isinstance(line, unicode):
-            line = line.encode('utf-8')
-        yield line
+    return (line.encode('utf-8') if isinstance(line, unicode) else line
+            for line in Emit.__invoke__(format, headers))
 
 

File src/htsql/tweak/filedb/__init__.py

 #
 
 
-from . import connect
+from . import connect, introspect
 from ...core.addon import Addon, Parameter
 from ...core.util import DB
 from ...core.validator import StrVal, SeqVal, RecordVal
     files; each entry has the following fields:
 
     `file`: the path to the CSV file.
+
+    Optional parameter `cache-file` allows you to specify a persistent
+    storage for the database.
     """
 
     prerequisites = []
                 ("file", StrVal())])),
                 default=[],
                 hint="""source CSV files"""),
+            Parameter('cache_file', StrVal(),
+                hint="""persistent storage"""),
     ]
 
     @classmethod

File src/htsql/tweak/filedb/connect.py

 from ...core.context import context
 from ...core.util import to_name
 from ...core.cache import once
-from ...core.connect import Connect
+from ...core.connect import Connect, DBErrorGuard
 from ...core.adapter import rank, Utility
 from ...core.error import Error
 import sqlite3
         self.connection = connection
 
     def __call__(self):
-        sources = context.app.tweak.filedb.sources
         cursor = self.connection.cursor()
-        table_names = set()
-        for source_idx, source in enumerate(sources):
-            cursor = self.connection.cursor()
-            if not os.path.exists(source.file):
-                raise Error("File does not exist: %s" % source.file)
+        source_meta = {}
+        cursor.execute("""
+            SELECT 1
+            FROM sqlite_master
+            WHERE name = '!source'
+        """)
+        if cursor.fetchone() is None:
+            cursor.execute("""
+                CREATE TABLE "!source" (
+                    name        TEXT PRIMARY KEY NOT NULL,
+                    file        TEXT UNIQUE NOT NULL,
+                    size        INTEGER NOT NULL,
+                    timestamp   FLOAT NOT NULL
+                )
+            """)
+        else:
+            cursor.execute("""
+                SELECT name, file, size, timestamp
+                FROM "!source"
+                ORDER BY name
+            """)
+            for name, file, size, timestamp in cursor.fetchall():
+                source_meta[name] = (file, size, timestamp)
+        for table_name, source_file in build_names():
+            if not os.path.exists(source_file):
+                raise Error("File does not exist", source_file)
+            stat = os.stat(source_file)
+            meta = (source_file, stat.st_size, stat.st_mtime)
+            if table_name in source_meta:
+                if meta == source_meta[table_name]:
+                    continue
+                else:
+                    cursor.execute("""
+                        UPDATE "!source"
+                        SET file = ?,
+                            size = ?,
+                            timestamp = ?
+                        WHERE name = ?
+                    """, meta+(table_name,))
+                    cursor.execute("""
+                        DROP TABLE "%s"
+                    """ % table_name)
+            else:
+                cursor.execute("""
+                    INSERT INTO "!source" (name, file, size, timestamp)
+                    VALUES (?, ?, ?, ?)
+                """, (table_name,)+meta)
             try:
-                stream = open(source.file)
+                stream = open(source_file)
             except IOError, exc:
-                raise Error("failed to open file: %s" % source.file)
-            table_name = os.path.splitext(os.path.basename(source.file))[0]
-            table_name = table_name.decode('utf-8', 'replace')
-            if table_name:
-                table_name = to_name(table_name)
-            if (not table_name or table_name in table_names or
-                    re.match(r"^_\d+$", table_name)):
-                table_name = "_%s" % (source_idx+1)
-            table_names.add(table_name)
+                raise Error("Failed to open file", source_file)
             reader = csv.reader(stream)
             try:
                 columns_row = next(reader)
             except StopIteration:
-                return
+                continue
             if not columns_row:
-                return
+                continue
             column_names = []
             for idx, name in enumerate(columns_row):
                 name = name.decode('utf-8', 'replace')
 
 
 @once
+def build_names():
+    sources = context.app.tweak.filedb.sources
+    names = []
+    table_names = set()
+    for source_idx, source in enumerate(sources):
+        table_name = os.path.splitext(os.path.basename(source.file))[0]
+        table_name = table_name.decode('utf-8', 'replace')
+        table_name = to_name(table_name)
+        if (table_name in table_names or
+                re.match(r"^sqlite_", table_name) or
+                re.match(r"^_\d+$", table_name)):
+            table_name = "_%s" % (source_idx+1)
+        table_names.add(table_name)
+        names.append((table_name, source.file))
+    return names
+
+
+@once
 def build_filedb():
-    connection = sqlite3.connect(':memory:', check_same_thread=False)
-    BuildFileDB.__invoke__(connection)
-    connection.commit()
+    cache_file = context.app.tweak.filedb.cache_file
+    if cache_file is None:
+        cache_file = ':memory:'
+    with DBErrorGuard():
+        connection = sqlite3.connect(cache_file, check_same_thread=False)
+        BuildFileDB.__invoke__(connection)
+        connection.commit()
     return connection
 
 

File src/htsql/tweak/filedb/introspect.py

+#
+# Copyright (c) 2006-2012, Prometheus Research, LLC
+#
+
+
+from htsql_sqlite.core.introspect import IntrospectSQLite
+from .connect import build_names
+
+
+class IntrospectFileDBCleanup(IntrospectSQLite):
+
+    def __call__(self):
+        catalog = super(IntrospectFileDBCleanup, self).__call__()
+        table_names = set(name for name, file in build_names())
+        for schema in catalog.schemas:
+            for table in list(schema.tables):
+                if table.name not in table_names:
+                    table.remove()
+        return catalog
+
+

File test/input/addon.yaml

   - uri: /empty
     expect: 400
 
+  # CSV files for testing caching (first batch)
+  - write: build/regress/volatile.csv
+    data: |
+        source
+        volatile
+  - write: build/regress/permanent.csv
+    data: |
+        source
+        permanent
+  - write: build/regress/hidden.csv
+    data: |
+        source
+        hidden
+
+  # Initialize the application with a cache
+  - db: null
+    extensions:
+      htsql:
+        debug: true
+      tweak.filedb:
+        sources:
+        - file: build/regress/volatile.csv
+        - file: build/regress/permanent.csv
+        - file: build/regress/hidden.csv
+        cache-file: build/regress/filedb.sqlite
+
+  # Available tables
+  - uri: /volatile
+  - uri: /permanent
+  - uri: /hidden
+  - uri: /_source
+    expect: 400
+
+  # Update some CSV files and add another one
+  - write: build/regress/volatile.csv
+    data: |
+        source
+        volatile/updated
+  - write: build/regress/new.csv
+    data: |
+        source
+        new
+
+  # Make an application with updated files
+  - db: null
+    extensions:
+      htsql:
+        debug: true
+      tweak.filedb:
+        sources:
+        - file: build/regress/permanent.csv
+        - file: build/regress/volatile.csv
+        - file: build/regress/new.csv
+        cache-file: build/regress/filedb.sqlite
+
+  # Available tables
+  - uri: /permanent
+  - uri: /volatile
+  - uri: /hidden
+    expect: 400
+  - uri: /new
+
+  # Make an applicate with the cache file directly
+  - db:
+      engine: sqlite
+      database: build/regress/filedb.sqlite
+    extensions:
+      htsql:
+        debug: true
+
+  # Cache content
+  - uri: /permanent
+  - uri: /volatile
+  - uri: /hidden
+  - uri: /new
+  - uri: /_source
+    ignore: true
+  - uri: /{_source[permanent].timestamp<_source[volatile].timestamp}
+
   # Cleanup
   - remove:
     - build/regress/table.csv
     - build/regress/irregular-fields.csv
     - build/regress/nothing.csv
     - build/regress/duplicate-table-name.csv
-    - build/regress/Duplicate_Table_name.csv
+    - build/regress/Duplicate_Table_Name.csv
     - build/regress/empty.csv
+    - build/regress/permanent.csv
+    - build/regress/volatile.csv
+    - build/regress/hidden.csv
+    - build/regress/new.csv
+    - build/regress/filedb.sqlite
 
 # TWEAK.GATEWAY - define gateways to other databases
 - title: tweak.gateway

File test/output/sqlite.yaml

 
             `file`: the path to the CSV file.
 
+            Optional parameter `cache-file` allows you to specify a persistent
+            storage for the database.
+
             Parameters:
               sources=SOURCES          : source CSV files
+              cache-file=CACHE-FILE    : persistent storage
 
           exit: 0
         - uri: /table{id:integer,text,date:date}
             While translating:
                 /empty
                  ^^^^^
+        - uri: /volatile
+          status: 200 OK
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          - [Vary, Accept]
+          body: |2
+             | volatile |
+             +----------+
+             | source   |
+            -+----------+-
+             | volatile |
+
+             ----
+             /volatile
+             SELECT "volatile"."source"
+             FROM "volatile"
+             ORDER BY 1 ASC
+        - uri: /permanent
+          status: 200 OK
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          - [Vary, Accept]
+          body: |2
+             | permanent |
+             +-----------+
+             | source    |
+            -+-----------+-
+             | permanent |
+
+             ----
+             /permanent
+             SELECT "permanent"."source"
+             FROM "permanent"
+             ORDER BY 1 ASC
+        - uri: /hidden
+          status: 200 OK
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          - [Vary, Accept]
+          body: |2
+             | hidden |
+             +--------+
+             | source |
+            -+--------+-
+             | hidden |
+
+             ----
+             /hidden
+             SELECT "hidden"."source"
+             FROM "hidden"
+             ORDER BY 1 ASC
+        - uri: /_source
+          status: 400 Bad Request
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          body: |
+            Found unknown attribute:
+                _source
+            While translating:
+                /_source
+                 ^^^^^^^
+        - uri: /permanent
+          status: 200 OK
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          - [Vary, Accept]
+          body: |2
+             | permanent |
+             +-----------+
+             | source    |
+            -+-----------+-
+             | permanent |
+
+             ----
+             /permanent
+             SELECT "permanent"."source"
+             FROM "permanent"
+             ORDER BY 1 ASC
+        - uri: /volatile
+          status: 200 OK
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          - [Vary, Accept]
+          body: |2
+             | volatile         |
+             +------------------+
+             | source           |
+            -+------------------+-
+             | volatile/updated |
+
+             ----
+             /volatile
+             SELECT "volatile"."source"
+             FROM "volatile"
+             ORDER BY 1 ASC
+        - uri: /hidden
+          status: 400 Bad Request
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          body: |
+            Found unknown attribute:
+                hidden
+            While translating:
+                /hidden
+                 ^^^^^^
+        - uri: /new
+          status: 200 OK
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          - [Vary, Accept]
+          body: |2
+             | new    |
+             +--------+
+             | source |
+            -+--------+-
+             | new    |
+
+             ----
+             /new
+             SELECT "new"."source"
+             FROM "new"
+             ORDER BY 1 ASC
+        - uri: /permanent
+          status: 200 OK
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          - [Vary, Accept]
+          body: |2
+             | permanent |
+             +-----------+
+             | source    |
+            -+-----------+-
+             | permanent |
+
+             ----
+             /permanent
+             SELECT "permanent"."source"
+             FROM "permanent"
+             ORDER BY 1 ASC
+        - uri: /volatile
+          status: 200 OK
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          - [Vary, Accept]
+          body: |2
+             | volatile         |
+             +------------------+
+             | source           |
+            -+------------------+-
+             | volatile/updated |
+
+             ----
+             /volatile
+             SELECT "volatile"."source"
+             FROM "volatile"
+             ORDER BY 1 ASC
+        - uri: /hidden
+          status: 200 OK
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          - [Vary, Accept]
+          body: |2
+             | hidden |
+             +--------+
+             | source |
+            -+--------+-
+             | hidden |
+
+             ----
+             /hidden
+             SELECT "hidden"."source"
+             FROM "hidden"
+             ORDER BY 1 ASC
+        - uri: /new
+          status: 200 OK
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          - [Vary, Accept]
+          body: |2
+             | new    |
+             +--------+
+             | source |
+            -+--------+-
+             | new    |
+
+             ----
+             /new
+             SELECT "new"."source"
+             FROM "new"
+             ORDER BY 1 ASC
+        - uri: /_source
+          status: 200 OK
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          - [Vary, Accept]
+          body: |2
+             | _source                                                        |
+             +-----------+-----------------------------+------+---------------+
+             | name      | file                        | size | timestamp     |
+            -+-----------+-----------------------------+------+---------------+-
+             | hidden    | build/regress/hidden.csv    |   14 | 1354577059.14 |
+             | new       | build/regress/new.csv       |   11 | 1354577059.93 |
+             | permanent | build/regress/permanent.csv |   17 | 1354577059.14 |
+             | volatile  | build/regress/volatile.csv  |   24 | 1354577059.93 |
+
+             ----
+             /_source
+             SELECT "!source"."name",
+                    "!source"."file",
+                    "!source"."size",
+                    "!source"."timestamp"
+             FROM "!source"
+             ORDER BY 1 ASC
+        - uri: /{_source[permanent].timestamp<_source[volatile].timestamp}
+          status: 200 OK
+          headers:
+          - [Content-Type, text/plain; charset=UTF-8]
+          - [Vary, Accept]
+          body: |2
+             | _source[permanent].timestamp<_source[volatile].timestamp |
+            -+----------------------------------------------------------+-
+             | true                                                     |
+
+             ----
+             /{_source[permanent].timestamp<_source[volatile].timestamp}
+             SELECT ("!source_1"."timestamp" < "!source_2"."timestamp")
+             FROM (SELECT 1 AS "/") AS "!"
+                  LEFT OUTER JOIN (SELECT "!source"."timestamp"
+                                   FROM "!source"
+                                   WHERE ("!source"."name" = 'permanent')) AS "!source_1"
+                                  ON 1
+                  LEFT OUTER JOIN (SELECT "!source"."timestamp"
+                                   FROM "!source"
+                                   WHERE ("!source"."name" = 'volatile')) AS "!source_2"
+                                  ON 1
       - id: tweak.meta
         tests:
         - ctl: [ext, tweak.meta]