Anonymous avatar Anonymous committed c411b79

Added a comment middleware example

Comments (0)

Files changed (4)

docs/comment-example-code/example.py

+import os
+import urllib
+import time
+import re
+from cPickle import load, dump
+from webob import Request, Response, html_escape
+from webob import exc
+    
+class Commenter(object):
+
+    def __init__(self, app, storage_dir):
+        self.app = app
+        self.storage_dir = storage_dir
+        if not os.path.exists(storage_dir):
+            os.makedirs(storage_dir)
+
+    def __call__(self, environ, start_response):
+        req = Request(environ)
+        if req.path_info_peek() == '.comments':
+            return self.process_comment(req)(environ, start_response)
+        # This is the base path of *this* middleware:
+        base_url = req.application_url
+        resp = req.get_response(self.app)
+        if resp.content_type != 'text/html' or resp.status_int != 200:
+            # Not an HTML response, we don't want to
+            # do anything to it
+            return resp(environ, start_response)
+        # Make sure the content isn't gzipped:
+        resp.decode_content()
+        comments = self.get_data(req.url)
+        body = resp.body
+        body = self.add_to_end(body, self.format_comments(comments))
+        body = self.add_to_end(body, self.submit_form(base_url, req))
+        resp.body = body
+        return resp(environ, start_response)
+
+    def get_data(self, url):
+        # Double-quoting makes the filename safe
+        filename = self.url_filename(url)
+        if not os.path.exists(filename):
+            return []
+        else:
+            f = open(filename, 'rb')
+            data = load(f)
+            f.close()
+            return data
+
+    def save_data(self, url, data):
+        filename = self.url_filename(url)
+        f = open(filename, 'wb')
+        dump(data, f)
+        f.close()
+
+    def url_filename(self, url):
+        return os.path.join(self.storage_dir, urllib.quote(url, ''))
+
+    _end_body_re = re.compile(r'</body.*?>', re.I|re.S)
+
+    def add_to_end(self, html, extra_html):
+        """
+        Adds extra_html to the end of the html page (before </body>)
+        """
+        match = self._end_body_re.search(html)
+        if not match:
+            return html + extra_html
+        else:
+            return html[:match.start()] + extra_html + html[match.start():]
+
+    def format_comments(self, comments):
+        if not comments:
+            return ''
+        text = []
+        text.append('<hr>')
+        text.append('<h2><a name="comment-area"></a>Comments (%s):</h2>' % len(comments))
+        for comment in comments:
+            text.append('<h3><a href="%s">%s</a> at %s:</h3>' % (
+                html_escape(comment['homepage']), html_escape(comment['name']),
+                time.strftime('%c', comment['time'])))
+            # Susceptible to XSS attacks!:
+            text.append(comment['comments'])
+        return ''.join(text)
+
+    def submit_form(self, base_path, req):
+        return '''<h2>Leave a comment:</h2>
+        <form action="%s/.comments" method="POST">
+         <input type="hidden" name="url" value="%s">
+         <table width="100%%">
+          <tr><td>Name:</td>
+              <td><input type="text" name="name" style="width: 100%%"></td></tr>
+          <tr><td>URL:</td>
+              <td><input type="text" name="homepage" style="width: 100%%"></td></tr>
+         </table>
+         Comments:<br>
+         <textarea name="comments" rows=10 style="width: 100%%"></textarea><br>
+         <input type="submit" value="Submit comment">
+        </form>
+        ''' % (base_path, html_escape(req.url))
+
+    def process_comment(self, req):
+        try:
+            url = req.params['url']
+            name = req.params['name']
+            homepage = req.params['homepage']
+            comments = req.params['comments']
+        except KeyError, e:
+            resp = exc.HTTPBadRequest('Missing parameter: %s' % e)
+            return resp
+        data = self.get_data(url)
+        data.append(dict(
+            name=name,
+            homepage=homepage,
+            comments=comments,
+            time=time.gmtime()))
+        self.save_data(url, data)
+        resp = exc.HTTPSeeOther(location=url+'#comment-area')
+        return resp
+
+if __name__ == '__main__':
+    import optparse
+    parser = optparse.OptionParser(
+        usage='%prog --port=PORT BASE_DIRECTORY'
+        )
+    parser.add_option(
+        '-p', '--port',
+        default='8080',
+        dest='port',
+        type='int',
+        help='Port to serve on (default 8080)')
+    parser.add_option(
+        '--comment-data',
+        default='./comments',
+        dest='comment_data',
+        help='Place to put comment data into (default ./comments/)')
+    options, args = parser.parse_args()
+    if not args:
+        parser.error('You must give a BASE_DIRECTORY')
+    base_dir = args[0]
+    from paste.urlparser import StaticURLParser
+    app = StaticURLParser(base_dir)
+    app = Commenter(app, options.comment_data)
+    from wsgiref.simple_server import make_server
+    httpd = make_server('localhost', options.port, app)
+    print 'Serving on http://localhost:%s' % options.port
+    try:
+        httpd.serve_forever()
+    except KeyboardInterrupt:
+        print '^C'
+
+    
+    

docs/comment-example.txt

+Comment Example
+===============
+
+.. contents::
+
+Introduction
+------------
+
+This is an example of how to write WSGI middleware with WebOb.  The
+specific example adds a simple comment form to HTML web pages; any
+page served through the middleware that is HTML gets a comment form
+added to it, and shows any existing comments.
+
+Code
+----
+
+The finished code for this is available in
+``docs/comment-example-code/example.py`` -- you can run this file as a
+script to try it out.
+
+Instantiating Middleware
+------------------------
+
+Middleware of any complexity at all is usually best created as a
+class with its configuration as arguments to that class.
+
+Every middleware needs an application (``app``) that it wraps.  This
+middleware also needs a location to store the comments; we'll put them
+all in a single directory.
+
+.. code-block::
+
+    import os
+    
+    class Commenter(object):
+        def __init__(self, app, storage_dir):
+            self.app = app
+            self.storage_dir = storage_dir
+            if not os.path.exists(storage_dir):
+                os.makedirs(storage_dir)
+
+When you use this middleware, you'll use it like:
+
+.. code-block::
+
+    app = ... make the application ...
+    app = Commenter(app, storage_dir='./comments')
+
+For our application we'll use a simple static file server that is
+included with `Paste <http://pythonpaste.org>`_ (use ``easy_install
+Paste`` to install this).  The setup is all at the bottom of
+``example.py``, and looks like this:
+
+.. code-block::
+
+    if __name__ == '__main__':
+        import optparse
+        parser = optparse.OptionParser(
+            usage='%prog --port=PORT BASE_DIRECTORY'
+            )
+        parser.add_option(
+            '-p', '--port',
+            default='8080',
+            dest='port',
+            type='int',
+            help='Port to serve on (default 8080)')
+        parser.add_option(
+            '--comment-data',
+            default='./comments',
+            dest='comment_data',
+            help='Place to put comment data into (default ./comments/)')
+        options, args = parser.parse_args()
+        if not args:
+            parser.error('You must give a BASE_DIRECTORY')
+        base_dir = args[0]
+        from paste.urlparser import StaticURLParser
+        app = StaticURLParser(base_dir)
+        app = Commenter(app, options.comment_data)
+        from wsgiref.simple_server import make_server
+        httpd = make_server('localhost', options.port, app)
+        print 'Serving on http://localhost:%s' % options.port
+        try:
+            httpd.serve_forever()
+        except KeyboardInterrupt:
+            print '^C'
+
+I won't explain it here, but basically it takes some options, creates
+an application that serves static files
+(``StaticURLParser(base_dir)``), wraps it with ``Commenter(app,
+options.comment_data)`` then serves that.
+
+The Middleware
+--------------
+
+While we've created the class structure for the middleware, it doesn't
+actually do anything.  Here's a kind of minimal version of the
+middleware (using WebOb):
+
+.. code-block::
+
+    from webob import Request
+
+    class Commenter(object):
+
+        def __init__(self, app, storage_dir):
+            self.app = app
+            self.storage_dir = storage_dir
+            if not os.path.exists(storage_dir):
+                os.makedirs(storage_dir)
+
+        def __call__(self, environ, start_response):
+            req = Request(environ)
+            resp = req.get_response(self.app)
+            return resp(environ, start_response)
+
+This doesn't modify the response it any way.  You could write it like
+this, without WebOb, like this:
+
+.. code-block::
+
+    class Commenter(object):
+        ...
+        def __call__(self, environ, start_response):
+            return self.app(environ, start_response)
+
+But it won't be as convenient later.  First, lets create a little bit
+of infrastructure for our middleware.  We need to save and load
+per-url data (the comments themselves).  We'll keep them in pickles,
+where each url has a pickle named after the url (but re-encoded, so
+``http://localhost:8080/index.html`` becomes
+``http%3A%2F%2Flocalhost%3A8080%2Findex.html``).
+
+.. code-block::
+
+    from cPickle import load, dump
+
+    class Commenter(object):
+        ...
+
+        def get_data(self, url):
+            # Double-quoting makes the filename safe
+            filename = self.url_filename(url)
+            if not os.path.exists(filename):
+                return []
+            else:
+                f = open(filename, 'rb')
+                data = load(f)
+                f.close()
+                return data
+
+        def save_data(self, url, data):
+            filename = self.url_filename(url)
+            f = open(filename, 'wb')
+            dump(data, f)
+            f.close()
+
+        def url_filename(self, url):
+            return os.path.join(self.storage_dir, urllib.quote(url, ''))
+
+You can get the full request URL with ``req.url``, so to get the
+comment data with these methods you do ``data =
+self.get_data(req.url)``.
+
+Now we'll update the ``__call__`` method to filter *some* responses,
+and get the comment data for those.  We don't want to change responses
+that were error responses (anything but ``200``), nor do we want to
+filter responses that aren't HTML.  So we get:
+
+.. code-block::
+
+    class Commenter(object):
+        ...
+
+        def __call__(self, environ, start_response):
+            req = Request(environ)
+            resp = req.get_response(self.app)
+            if resp.content_type != 'text/html' or resp.status_int != 200:
+                return resp(environ, start_response)
+            data = self.get_data(req.url)
+            ... do stuff with data, update resp ...
+            return resp(environ, start_response)
+
+So far we're punting on actually adding the comments to the page.  We
+also haven't defined what ``data`` will hold.  Let's say it's a list
+of dictionaries, where each dictionary looks like ``{'name': 'John
+Doe', 'homepage': 'http://blog.johndoe.com', 'comments': 'Great
+site!'}``.
+
+We'll also need a simple method to add stuff to the page.  We'll use a
+regular expression to find the end of the page and put text in:
+
+.. code-block::
+
+    import re
+
+    class Commenter(object):
+        ...
+
+        _end_body_re = re.compile(r'</body.*?>', re.I|re.S)
+
+        def add_to_end(self, html, extra_html):
+            """
+            Adds extra_html to the end of the html page (before </body>)
+            """
+            match = self._end_body_re.search(html)
+            if not match:
+                return html + extra_html
+            else:
+                return html[:match.start()] + extra_html + html[match.start():]
+
+And then we'll use it like:
+
+.. code-block::
+
+    data = self.get_data(req.url)
+    body = resp.body
+    body = self.add_to_end(body, self.format_comments(data))
+    resp.body = body
+    return resp(environ, start_response)
+
+We get the body, update it, and put it back in the response.  This
+also updates ``Content-Length``.  Then we define:
+
+.. code-block::
+
+    from webob import html_escape
+
+    class Commenter(object):
+        ...
+
+        def format_comments(self, comments):
+            if not comments:
+                return ''
+            text = []
+            text.append('<hr>')
+            text.append('<h2><a name="comment-area"></a>Comments (%s):</h2>' % len(comments))
+            for comment in comments:
+                text.append('<h3><a href="%s">%s</a> at %s:</h3>' % (
+                    html_escape(comment['homepage']), html_escape(comment['name']), 
+                    time.strftime('%c', comment['time'])))
+                # Susceptible to XSS attacks!:
+                text.append(comment['comments'])
+            return ''.join(text)
+
+We put in a header (with an anchor we'll use later), and a section for
+each comment.  Note that ``html_escape`` is the same as ``cgi.escape``
+and just turns ``&`` into ``&amp;``, etc.  
+
+Because we put in some text without quoting it is susceptible to a
+`Cross-Site Scripting
+<http://en.wikipedia.org/wiki/Cross-site_scripting>`_ attack.  Fixing
+that is beyond the scope of this tutorial; you could quote it or clean
+it with something like ``lxml.html.clean``.
+
+Accepting Comments
+------------------
+
+All of those pieces *display* comments, but still no one can actually
+make comments.  To handle this we'll take a little piece of the URL
+space for our own, everything under ``/.comments``, so when someone
+POSTs there it will add a comment.
+
+When the request comes in there are two parts to the path:
+``SCRIPT_NAME`` and ``PATH_INFO``.  Everything in ``SCRIPT_NAME`` has
+already been parsed, and everything in ``PATH_INFO`` has yet to be
+parsed.  That means that the URL *without* ``PATH_INFO`` is the path
+to the middleware; we can intercept anything else below
+``SCRIPT_NAME`` but nothing above it.  The name for the URL without
+``PATH_INFO`` is ``req.application_url``.  We have to capture it early
+to make sure it doesn't change (since the WSGI application we are
+wrapping may update those values).
+
+So here's what this all looks like:
+
+.. code-block::
+
+    class Commenter(object):
+        ...
+
+        def __call__(self, environ, start_response):
+            req = Request(environ)
+            if req.path_info_peek() == '.comments':
+                return self.process_comment(req)(environ, start_response)
+            # This is the base path of *this* middleware:
+            base_url = req.application_url
+            resp = req.get_response(self.app)
+            if resp.content_type != 'text/html' or resp.status_int != 200:
+                # Not an HTML response, we don't want to
+                # do anything to it
+                return resp(environ, start_response)
+            # Make sure the content isn't gzipped:
+            resp.decode_content()
+            comments = self.get_data(req.url)
+            body = resp.body
+            body = self.add_to_end(body, self.format_comments(comments))
+            body = self.add_to_end(body, self.submit_form(base_url, req))
+            resp.body = body
+            return resp(environ, start_response)
+
+``base_url`` is the path where the middleware is located (if you run
+the example server, it will be ``http://localhost:PORT/``).  We use
+``req.path_info_peek()`` to look at the next segment of the URL --
+what comes after base_url.  If it is ``.comments`` then we handle it
+internally and don't pass the request on.
+
+We also put in a little guard, ``resp.decode_content()`` in case the
+application returns a gzipped response.
+
+Then we get the data, add the comments, add the *form* to make new
+comments, and return the result.
+
+submit_form
+~~~~~~~~~~~
+
+Here's what the form looks like:
+
+.. code-block::
+
+    class Commenter(object):
+        ...
+
+        def submit_form(self, base_path, req):
+            return '''<h2>Leave a comment:</h2>
+            <form action="%s/.comments" method="POST">
+             <input type="hidden" name="url" value="%s">
+             <table width="100%%">
+              <tr><td>Name:</td>
+                  <td><input type="text" name="name" style="width: 100%%"></td></tr>
+              <tr><td>URL:</td>
+                  <td><input type="text" name="homepage" style="width: 100%%"></td></tr>
+             </table>
+             Comments:<br>
+             <textarea name="comments" rows=10 style="width: 100%%"></textarea><br>
+             <input type="submit" value="Submit comment">
+            </form>
+            ''' % (base_path, html_escape(req.url))
+
+Nothing too exciting.  It submits a form with the keys ``url`` (the
+URL being commented on), ``name``, ``homepage``, and ``comments``.
+
+process_comments
+~~~~~~~~~~~~~~~~
+
+If you look at the method call, what we do is call the method then
+treat the result as a WSGI application:
+
+.. code-block::
+
+    return self.process_comment(req)(environ, start_response)
+
+This pattern is used to keep ``start_response`` from leaking out of
+the ``__call__`` method.  The ``webob.Response`` object is itself a
+WSGI application, so in ``process_comment`` we create a response
+object and return that.
+
+.. code-block::
+
+    from webob import exc
+    from webob import Response
+
+    class Commenter(object):
+        ...
+
+        def process_comment(self, req):
+            try:
+                url = req.params['url']
+                name = req.params['name']
+                homepage = req.params['homepage']
+                comments = req.params['comments']
+            except KeyError, e:
+                resp = exc.HTTPBadRequest('Missing parameter: %s' % e)
+                return resp
+            data = self.get_data(url)
+            data.append(dict(
+                name=name,
+                homepage=homepage,
+                comments=comments,
+                time=time.gmtime()))
+            self.save_data(url, data)
+            resp = exc.HTTPSeeOther(location=url+'#comment-area')
+            return resp
+
+We either give a Bad Request response (if the form submission is
+somehow malformed), or a redirect back to the original page.
+
+The classes in ``webob.exc`` (like ``HTTPBadRequest`` and
+``HTTPSeeOther``) are Response subclasses that can be used to quickly
+create responses for these non-200 cases where the response body
+usually doesn't matter much.
+
+Conclusion
+----------
+
+This shows how to make response modifying middleware, which is
+probably the most difficult kind of middleware to write with WSGI --
+modifying the request is quite simple in comparison, as you simply
+update ``environ``.
 
 Other documents:
 
+* `Reference <reference.html>`_
+
+* Extracted documentation for the `request
+  <class-webob.Request.html>`_ and `response
+  <class-webob.Response.html>`_.
+
 * `Differences between the WebOb API and other framework/libraries
   <differences.html>`_
 
 * `File-serving example <file-example.html>`_
 
-* `Reference <reference.html>`_
-
-* Extracted documentation for the `request
-  <class-webob.Request.html>`_ and `response
-  <class-webob.Response.html>`_.
+* `Comment middleware example <comment-example.html>`_
 
 .. contents::
 
 	extra_credits=Hosting courtesy of <a href="http://tummy.com">Tummy.com</a>
 dest = docs/html
 docs = docs/index.txt docs/license.txt docs/differences.txt docs/file-example.txt
-       docs/news.txt docs/reference.txt
+       docs/news.txt docs/reference.txt docs/comment-example.txt
 title = WebOb
 modules = webob
           webob.acceptparse
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.