Source

wsgiform / wsgiform.py

Full commit
# Copyright (c) 2006 L. C. Rees
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
#    1. Redistributions of source code must retain the above copyright notice, 
#       this list of conditions and the following disclaimer.
#    
#    2. Redistributions in binary form must reproduce the above copyright 
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#
#    3. Neither the name of WsgiForm nor the names of its contributors may be used
#       to endorse or promote products derived from this software without
#       specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

'''WSGI middleware for parsing form data into dictionaries, individual environ
entries, cgi.FieldStorage instances, or keyword arguments that can be passed to
WSGI applications in the environ dictionary. Features include hooks for form
validation, HTML escaping, and data sterilization.
'''

import cgi
import xml.sax.saxutils as s
import string

# Contains only alphanumeric characters, ., -, and _
_notsterile = '!"#$%&\'()*+,/:;<=>?@[\\]^`{|}~'
_trans = string.maketrans('', '')    


def extract(environ, empty=False, err=False):
    '''Extracts strings in form data.'''
    qdict = cgi.parse(environ['wsgi.input'], environ, empty, err)
    for key, value in qdict.iteritems():
        if len(value) == 1: qdict[key] = value[0]
    return qdict

def escape(environ, empty=False, err=False):
    '''Escapes XML/HTML in form data.'''
    qdict = extract(environ, empty, err)
    for key, value in qdict.iteritems():
        if isinstance(value, basestring):
            qdict[key] = s.escape(value, {'"':"&quot;", "'":'&#39;'})
        elif isinstance(value, list):
            for num, item in enumerate(value):
                if isinstance(value, basestring):
                    value[num] = s.escape(item, {'"':"&quot;", "'":'&#39;'})
    return qdict    

def sterilize(environ, empty=False, err=False):
    '''Removes all form data characters except alphanumerics, ., -, and _.'''
    qdict = extract(environ, empty, err)
    for key, value in qdict.iteritems():
        if isinstance(value, basestring):
            qdict[key] = value.translate(_trans, _notsterile)
        elif isinstance(value, list):
            for num, item in value:
                if isinstance(value, basestring):
                    qdict[num] = item.translate(_trans, _notsterile)
    return qdict
    

class WsgiForm(object):

    '''Class that parsing form data into dictionaries, individual environ entries,
    cgi.FieldStorage instances, or keyword arguments that can be passed to WSGI
    applications in the environ dictionary. 
    '''     

    def __init__(self, app, **kw):
        '''@param app WSGI callable
        @param kw Keyword arguments.'''
        self.app = app
        # Prefix for individual environ entries
        self.prefix = kw.get('envprefix', 'wsgiform.%s')
        # Prefix for field storage environ entries
        self.fkey = kw.get('fieldstorage', 'wsgiform.fieldstorage')
        # Prefix for dictionary environ entries
        self.dkey = kw.get('dict', 'wsgiform.dict')
        # Prefix for keyword environ entries
        self.kkey = kw.get('kwargs', 'wsgize.kwargs')
        # Dictionary of validators where the keywords are form field names 
        self.validators = kw.get('validators', {})

    def _call(self, environ, func, key, empty=False, err=False):
        '''Pattern for dicts and kwargs.'''
        qdict = func(environ)
        if self.validators: self.validate(qdict) 
        environ[key] = qdict
        return environ

    def _callenv(self, environ, func, empty=False, err=False):
        '''Pattern for environ entries.'''
        qdict = func(environ)
        if self.validators: self.validate(qdict) 
        for k, v in qdict.iteritems(): environ[self.prefix % k] = v
        return environ
    
    def validate(self, qdict):
        '''Validates form data.

        qdict Dictionary of validators indexed by form field        
        '''
        for key, value in qdict.iteritems():
            try:
                self.validators[key](value)
            except KeyError: pass
     
    def fieldstorage(self, env, start_return):
        '''Parses WSGI input from a form into a FieldStorage instance.'''
        env[self.fkey] = cgi.FieldStorage(fp=env['wsgi.input'], environ=env)
        return self.app(env, start_return)
        
    def dictionary(self, environ, start_return):
        '''Parses WSGI input from a form into a dictionary.'''        
        return self.app(self._call(environ, extract, self.dkey), start_return)

    def kwargs(self, environ, start_return):
        '''Parses WSGI input from a form into keyword arguments.'''
        return self.app(self._call(environ, extract, self.kkey), start_return)

    def environ(self, environ, start_return):
        '''Parses WSGI input from a form into individual environ entries.'''
        return self.app(self._callenv(environ, extract), start_return)    
        
    def dict_escape(self, environ, start_return):
        '''Parses WSGI input from a form into a dictionary and HTML escapes
        each string value.
        '''
        return self.app(self._call(environ, escape, self.dkey), start_return)

    def kwargs_escape(self, environ, start_return):
        '''Parses WSGI input from a form into keyword arguments and HTML
        escapes each string value.
        '''
        return self.app(self._call(environ, escape, self.kkey), start_return)

    def environ_escape(self, environ, start_return):
        '''Parses WSGI input from a form into individual environ entries and
        HTML escapes each string value.
        '''
        return self.app(self._callenv(environ, escape), start_return)    

    def dict_strictescape(self, environ, start_return):
        '''Parses WSGI input from a form into a dictionary and strictly HTML escapes
        each string value.
        '''
        return self.app(self._call(environ, escape, self.dkey, True, True), start_return)

    def kwargs_strictescape(self, environ, start_return):
        '''Parses WSGI input from a form into keyword arguments and strictly HTML
        escapes each string value.
        '''
        return self.app(self._call(environ, escape, self.kkey, True, True), start_return)

    def environ_strictescape(self, environ, start_return):
        '''Parses WSGI input from a form into individual environ entries and
        strictly HTML escapes each string value.
        '''
        return self.app(self._callenv(environ, escape, True, True), start_return)        
    
    def dict_sterilize(self, environ, start_return):
        '''Parses WSGI input from a form into a dictionary and sterilizes the
        data of each string value.
        '''
        return self.app(self._call(environ, sterilize, self.dkey), start_return)

    def kwargs_sterilize(self, environ, start_return):
        '''Parses WSGI input from a form into keyword arguments and sterilizes the
        data of each string value.
        '''
        return self.app(self._call(environ, sterilize, self.kkey), start_return)

    def environ_sterilize(self, environ, start_return):
        '''Parses WSGI input from a form into individual environ entries and
        sterilizes the data of each string value.
        '''
        return self.app(self._callenv(environ, sterilize), start_return)        

    def dict_strictsterilize(self, environ, start_return):
        '''Parses WSGI input from a form into a dictionary and strictly sterilizes the
        data of each string value.
        '''
        return self.app(self._call(environ, sterilize, self.dkey, True, True), start_return)

    def kwargs_strictsterilize(self, environ, start_return):
        '''Parses WSGI input from a form into keyword arguments and strictly sterilizes the
        data of each string value.
        '''
        return self.app(self._call(environ, sterilize, self.kkey, True, True), start_return)

    def environ_strictsterilize(self, environ, start_return):
        '''Parses WSGI input from a form into individual environ entries and
        strictly sterilizes the data of each string value.
        '''
        return self.app(self._callenv(environ, sterilize, True, True), start_return)     


__all__ = ['WsgiForm']