Source

htsql-firebird / src / htsql / ctl / request.py

Full commit
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
#
# Copyright (c) 2006-2008, Prometheus Research, LLC
# See `LICENSE` for license information, `AUTHORS` for the list of authors.
#


"""
:mod:`htsql_ctl.request`
========================

This module implements the `get` and `post` routines.
"""


from .error import ScriptError
from .routine import Argument, Routine
from .option import (InputOption, OutputOption, PasswordOption,
                     RemoteUserOption, WithHeadersOption,
                     ContentTypeOption, ExtensionsOption, ConfigOption)
from ..core.util import DB, maybe, oneof, listof, tupleof, dictof, filelike
from ..core.validator import DBVal, StrVal
import sys
import wsgiref.util
import urllib
import traceback
import StringIO
import mimetypes
import getpass
import re
import yaml, yaml.constructor


BaseYAMLLoader = yaml.SafeLoader
if hasattr(yaml, 'CSafeLoader'):
    BaseYAMLLoader = yaml.CSafeLoader


class ConfigYAMLLoader(BaseYAMLLoader):

    name_pattern = ur"""
        ^
        [a-zA-Z_-][0-9a-zA-Z_-]*
        $
    """
    name_regexp = re.compile(name_pattern, re.X)
    dotted_name_pattern = ur"""
        ^
        [a-zA-Z_-][0-9a-zA-Z_-]*
        (?: \. [a-zA-Z_-][0-9a-zA-Z_-]* )*
        $
    """
    dotted_name_regexp = re.compile(dotted_name_pattern, re.X)

    def load(self):
        return self.get_single_data()

    def construct_document(self, node):
        document_node = node
        if (not (isinstance(document_node, yaml.ScalarNode) and
                document_node.tag == u'tag:yaml.org,2002:null') and
            not (isinstance(document_node, yaml.MappingNode) and
                 document_node.tag == u'tag:yaml.org,2002:map')):
            raise yaml.constructor.ConstructorError(None, None,
                    "invalid structure of configuration file",
                    document_node.start_mark)
        if isinstance(document_node, yaml.MappingNode):
            for name_node, addon_node in document_node.value:
                if not (isinstance(name_node, yaml.ScalarNode) and
                        name_node.tag == u'tag:yaml.org,2002:str' and
                        self.dotted_name_regexp.match(name_node.value)):
                    raise yaml.constructor.ConstructorError(None, None,
                            "invalid addon name", name_node.start_mark)
            if (not (isinstance(addon_node, yaml.ScalarNode) and
                    addon_node.tag == u'tag:yaml.org,2002:null') and
                not (isinstance(addon_node, yaml.MappingNode) and
                     addon_node.tag == u'tag:yaml.org,2002:map')):
                raise yaml.constructor.ConstructorError(None, None,
                        "invalid addon configuration", addon_node.start_mark)
                if isinstance(addon_node, yaml.MappingNode):
                    for attribute_node, value_node in addon_node.value:
                        if not (isinstance(attribute_node, yaml.ScalarNode) and
                                attribute_node.tag
                                    == u'tag:yaml.org,2002:str' and
                                self.name_regexp.match(attribute_node.value)):
                            raise yaml.constructor.ConstructorError(None, None,
                                    "invalid parameter name",
                                    attribute_node.start_mark)
        return super(ConfigYAMLLoader, self).construct_document(document_node)


class Request(object):
    """
    Represents a WSGI request.

    `environ`
        A WSGI `environ` dictionary.
    """

    @classmethod
    def prepare(cls, method, query, remote_user=None,
                content_type=None, content_body=None,
                extra_headers=None):
        """
        Produces a :class:`Request` object from the given parameters.

        `method` (``'GET'`` or ``'POST'``)
            The HTTP request method.

        `query` (a string)
            The path and the query parts of the URI.

        `remote_user` (a string or ``None``)
            The name of the authenticated user.

        `content_type` (a string or ``None``)
            The content type of the POST data, used only when `method` is
            ``'POST'``.  If not provided, guessed from the file name of the
            `content_body` stream.  If that fails,
            ``'application/octet-stream'`` is used.

        `content_body` (a string, a file or a file-like object or ``None``)
            The body of the HTTP request, used only when `method` is
            ``'POST'``.

        `extra_headers` (a dictionary or ``None``)
            A dictionary of HTTP headers.
        """

        # Sanity check on the arguments
        assert method in ['GET', 'POST']
        assert isinstance(query, str)
        assert isinstance(remote_user, maybe(str))
        assert isinstance(content_type, maybe(str))
        assert isinstance(content_body, maybe(oneof(str, filelike())))
        assert isinstance(extra_headers, maybe(dictof(str, str)))
        if method == 'GET':
            assert content_type is None
            assert content_body is None
        if method == 'POST':
            assert content_body is not None

        # The WSGI `environ` variable, see PEP 333.
        environ = {}

        environ['REQUEST_METHOD'] = method

        # Split `query` into components.
        environ['SCRIPT_NAME'] = ''
        if '?' in query:
            path_info, query_string = query.split('?', 1)
        else:
            path_info = query
            query_string = ''
        path_info = urllib.unquote(path_info)
        environ['PATH_INFO'] = path_info
        environ['QUERY_STRING'] = query_string

        if remote_user is not None:
            environ['REMOTE_USER'] = remote_user

        if method == 'POST':
            # When `content_type` is not explicitly provided,
            # guess it from the file name if possible.
            if content_type is None:
                if hasattr(content_body, 'name'):
                    content_type = mimetypes.guess_type(content_body.name)[0]
            # If we can't guess the content type, use the default value.
            if content_type is None:
                content_type = 'application/octet-stream'
            # If `content_body` is a file-like object, read its content.
            if not isinstance(content_body, str):
                content_body = content_body.read()
            environ['CONTENT_TYPE'] = content_type
            environ['CONTENT_LENGTH'] = str(len(content_body))
            environ['wsgi.input'] = StringIO.StringIO(content_body)

        # Transfer HTTP headers to the WSGI `environ`.
        if extra_headers is not None:
            for key in extra_headers:
                variable = 'HTTP_%s' % key.upper().replace('-', '_')
                environ[variable] = extra_headers[key]

        # Assign reasonable values of the missing WSGI parameters.
        wsgiref.util.setup_testing_defaults(environ)

        return cls(environ)

    def __init__(self, environ):
        assert isinstance(environ, dictof(str, object))
        self.environ = environ

    def execute(self, app):
        """
        Executes the request against the given WSGI application.

        `app`
            A WSGI application.

        Returns a :class:`Response` object.
        """

        # The container for the response data.
        response = Response()

        # A WSGI `start_response` function; saves the response data.
        def start_response(status, headers, exc_info=None):
            response.set(status=status, headers=headers)
            # Note that we don't expect the application to use the returned
            # stream object, so we don't keep it.
            return StringIO.StringIO()

        # Copy the `environ` dictionary in case the application modifies it.
        # TODO: that is not enough to make `execute()` truly re-entrant: for
        # POST requests, we also need to save the `environ['wsgi.input']`
        # stream.  For now, assume that a `Request` object could be executed
        # only once.
        environ = self.environ.copy()

        # Execute the WSGI request.
        try:
            iterator = app(environ, start_response)
            try:
                response.set(body=''.join(iterator))
            finally:
                if hasattr(iterator, 'close'):
                    iterator.close()
        except Exception:
            # Save the exception data.
            response.set(exc_info=sys.exc_info())

        return response


class Response(object):
    """
    Represents a response to a WSGI request.

    `status` (a string)
        The HTTP status line.

    `headers` (a list of pairs)
        The HTTP headers.

    `body` (a string)
        The HTTP body.

    `exc_info` (a tuple ``(type, value, traceback)`` or ``None``)
        Any exception occured when the request was executed.
    """

    def __init__(self):
        self.status = None
        self.headers = None
        self.body = None
        self.exc_info = None

    def set(self, **attributes):
        """
        Updates the response parameters.
        """
        for name in attributes:
            assert hasattr(self, name)
            setattr(self, name, attributes[name])

    def complete(self):
        """
        Returns ``True`` if the response is complete; ``False`` otherwise.

        The response is considered valid if the HTTP status, headers and
        body are set and valid and no exception occured during the execution
        of the request.
        """
        return (isinstance(self.status, str) and
                self.status[:3].isdigit() and
                self.status[3:4] == ' ' and
                isinstance(self.headers, listof(tupleof(str, str))) and
                isinstance(self.body, str) and
                self.exc_info is None)

    def dump(self, stream, with_headers=False):
        """
        Writes the response to the output stream.

        `stream` (a file or a file-like object)
            The stream where to write the response.

        `with_headers`
            Indicates whether the status line and the headers should
            also be written.
        """
        # The response must be complete at this point.
        assert self.complete()

        # Write the HTTP status code and headers if asked to.
        if with_headers:
            stream.write("%s\r\n" % self.status)
            for header, value in self.headers:
                stream.write("%s: %s\r\n" % (header, value))
            stream.write("\r\n")

        # Write the HTTP body.
        stream.write(self.body)

        # Write CR if the body does not end with a new line and the
        # output stream is a console.
        if self.body and self.body[-1] not in "\r\n":
            if hasattr(stream, 'isatty') and stream.isatty():
                stream.write("\r\n")


class GetPostBaseRoutine(Routine):
    """
    Implements the common methods for the `get` and `post` routines.

    Both routines take a connection URI and an HTSQL query as arguments
    and execute an HTTP request.
    """

    # The arguments are the same for both routines.
    arguments = [
            Argument('db', DBVal(),
                     hint="""the connection URI"""),
            Argument('query', StrVal(),
                     hint="""the HTSQL query"""),
    ]
    # These are common options for both routines.  The `post` routine
    # adds some extra options.
    options = [
            PasswordOption,
            ExtensionsOption,
            ConfigOption,
            RemoteUserOption,
            OutputOption,
            WithHeadersOption,
    ]
    # The HTTP method implemented by the routine.
    method = None

    def run(self):
        # The database URI.
        db = self.db

        # Ask for the database password if necessary.
        if self.password and db is not None:
            db = DB(engine=db.engine,
                    username=db.username,
                    password=getpass.getpass(),
                    host=db.host,
                    port=db.port,
                    database=db.database,
                    options=db.options)

        # Load addon configuration.
        extensions = self.extensions
        if self.config is not None:
            stream = open(self.config, 'rb')
            loader = ConfigYAMLLoader(stream)
            try:
                config_extension = loader.load()
            except yaml.YAMLError, exc:
                raise ScriptError("failed to load application configuration:"
                                  " %s" % exc)
            extensions = extensions + [config_extension]

        # Create the HTSQL application.
        from htsql import HTSQL
        try:
            app = HTSQL(db, *extensions)
        except ImportError, exc:
            raise ScriptError("failed to construct application: %s" % exc)

        # Prepare a WSGI `environ` variable.
        if self.method == 'GET':
            request = Request.prepare('GET', self.query, self.remote_user)
        elif self.method == 'POST':
            if self.input is None:
                input_stream = self.ctl.stdin
            else:
                input_stream = open(self.input, 'rb')
            request = Request.prepare('POST', self.query, self.remote_user,
                                      self.content_type, input_stream)

        # Execute the WSGI request.
        response = request.execute(app)

        # Check for errors.
        if response.exc_info is not None:
            exc_type, exc_value, exc_traceback = response.exc_info
            traceback.print_exception(exc_type, exc_value, exc_traceback,
                                      file=self.ctl.stderr)
            raise ScriptError("exception while executing an HTSQL request")
        if not response.complete():
            raise ScriptError("incomplete response")

        # Dump the response.
        if self.output is None:
            output_stream = self.ctl.stdout
        else:
            output_stream = open(self.output, 'wb')
        response.dump(output_stream, self.with_headers)

        # Complain when the response status is not `200 OK`.
        if not response.status.startswith('200'):
            raise ScriptError("unexpected status code: %s" % response.status)


class GetRoutine(GetPostBaseRoutine):
    """
    Implements the `get` routine.

    The routine executes an HTSQL query over the specified database.
    """

    name = 'get'
    hint = """execute and render an HTSQL query"""
    help = """
    The routine executes an HTSQL query and displays the response.

    The DB argument specifies database connection parameters; must have the
    form:
    
        engine://username:password@host:port/database

    Here,
    
      - ENGINE is the type of the database server; supported values are
        `sqlite`, `pgsql`, `mysql`, `mssql` and `oracle`.
      - The parameters USERNAME:PASSWORD are used for authentication.
      - The parameters HOST:PORT indicate the address of the database
        server.
      - DATABASE is the name of the database; for SQLite, the path to the
        database file.

    All parameters except ENGINE and DATABASE are optional.

    The QUERY argument is the HTSQL query to execute.

    Use option `--remote-user USER` to specify the remote user of the HTTP
    request.  By default, the remote user is not set.

    Use option `--output FILE` to specify the file to write the response.
    If the option is not set, the response is written to the console.

    Use option `--with-headers` to indicate that the response status code
    and headers should be displayed.  By default, only the response body is
    written.
    """
    method = 'GET'


class PostRoutine(GetPostBaseRoutine):
    """
    Implements the `post` routine.

    The routine executes an HTSQL query with POST data over the specified
    database.
    """

    name = 'post'
    options = [
            InputOption,
            ContentTypeOption,
    ] + GetPostBaseRoutine.options
    hint = """execute and render an HTSQL query with POST data"""
    help = """
    The routine executes an HTSQL query with POST data and displays the
    response.

    The DB argument specifies database connection parameters; must have
    the form:
    
        engine://username:password@host:port/database

    Here,
    
      - ENGINE is the type of the database server; supported values are
        `sqlite`, `pgsql`, `mysql`, `mssql` and `oracle`.
      - The parameters USERNAME:PASSWORD are used for authentication.
      - The parameters HOST:PORT indicate the address of the database
        server.
      - DATABASE is the name of the database; for SQLite, the path to the
        database file.

    All parameters except ENGINE and DATABASE are optional.

    The QUERY argument is the HTSQL query to execute.

    Use option `--content-type TYPE` to specify the content type of the POST
    data.  If the option is not provided, the content type is guessed from
    the file name.

    Use option `--input FILE` to specify a file containing the POST data.
    If the option is not set, the routine reads the POST data from the
    console.

    Use option `--remote-user USER` to specify the remote user of the HTTP
    request.  By default, the remote user is not set.

    Use option `--output FILE` to specify the file to write the response.
    If the option is not set, the response is written to the console.

    Use option `--with-headers` to indicate that the response status code
    and headers should be displayed.  By default, only the response body is
    written.
    """
    method = 'POST'