Commits

Constantin Veretennicov committed 840d05c

* Implemented basic scrape protocol.
Note: "all torrents" scrape requests return empty list, this is intentional.
* Added /robots.txt handler.
* Corrected charset declarations.

Comments (0)

Files changed (9)

 application: tracko
-version: 8
+version: 9
 runtime: python
 api_version: 1
 
 import logging, datetime
 from bencode.bencode import bencode
 from models import Torrent, TorrentPeer
+from google.appengine.api import memcache
 
 
-_required_request_param_names = frozenset([
+_required_announce_request_param_names = frozenset([
     'info_hash', 'peer_id', 'port', 'uploaded', 'downloaded', 'left'])
 
+_encoded_zero = TorrentPeer.n_bytes_left.encode(0)
+
+
+def _get_download_count_for_torrent(torrent_entity):
+  counter_key = torrent_entity.info_hash + '/download-count'
+  return memcache.get(counter_key) or 0
+
+
+def _register_torrent_download(torrent_entity):
+  counter_key = torrent_entity.info_hash + '/download-count'
+  if memcache.get(counter_key) is None:
+    # not atomic, but race condition is unlikely
+    # and we don't care much about this counter anyway
+    memcache.set(counter_key, 1)
+  else:
+    memcache.incr(counter_key)
+
+
 def process_announce_request(
-  request_params, apparent_peer_address, settings):
+  request_params, apparent_client_address, settings):
 
   def fail(reason):
     return bencode({'failure reason': reason})
       ("Your client didn't send required parameter \"%s\". " % param_name)
       + "Please upgrade your client.")
 
-  for param_name in _required_request_param_names:
+  for param_name in _required_announce_request_param_names:
     if param_name not in request_params:
       return fail_on_missing_required_param(param_name)
 
 
   info_hash = request_params.get('info_hash', None)
   peer_id = request_params.get('peer_id')
-  peer_address = request_params.get('ip') or apparent_peer_address
+  peer_address = request_params.get('ip') or apparent_client_address
   peer_port_number = int(request_params.get('port'))
   n_bytes_uploaded = int(request_params.get('uploaded'))
   n_bytes_downloaded = int(request_params.get('downloaded'))
       return fail("Unfamiliar peer.")
     p.n_bytes_left = 0
     p.put()
+    _register_torrent_download(t)
   elif event_type is None:
     # update peer status
     t = Torrent.get_or_create_by_info_hash(info_hash)
       min(requested_peer_count, n_max_peers_to_return))
     if p.update_date_utc > peer_freshness_threshold_utc)
 
-  encoded_zero = TorrentPeer.n_bytes_left.encode(0)
-  n_leechers = t.peers.filter('n_bytes_left >', encoded_zero).count()
-  n_seeders = t.peers.filter('n_bytes_left <=', encoded_zero).count()
+  n_leechers = t.peers.filter('n_bytes_left >', _encoded_zero).count()
+  n_seeders = t.peers.filter('n_bytes_left <=', _encoded_zero).count()
 
   if omit_peer_id:
     encoded_peers = [
     'complete': n_seeders,
     'incomplete': n_leechers}
   return bencode(response)
+
+
+def process_scrape_request(info_hashes, settings):
+  infos = {}
+  if info_hashes:
+    info_hashes = frozenset(info_hashes)
+    for info_hash in info_hashes:
+      t = Torrent.get_by_info_hash(info_hash)
+      if t:
+        info = {}
+        info['complete'] = t.peers.filter('n_bytes_left <=', _encoded_zero).count()
+        info['incomplete'] = t.peers.filter('n_bytes_left >', _encoded_zero).count()
+        info['downloaded'] = _get_download_count_for_torrent(t)
+        infos[info_hash] = info
+  return bencode({'files': infos})
 import wsgiref.handlers
 
 import models, settings
-from logic import process_announce_request
+from logic import process_announce_request, process_scrape_request
 
 
 class MainHandler(webapp.RequestHandler):
     response_body = process_announce_request(
         request_params, self.request.remote_addr, settings)
     #
-    self.response.headers['content-type'] = 'text/plain'
+    self.response.headers['Content-Type'] = 'text/plain'
+    self.response.out.write(response_body)
+
+class ScrapeHandler(webapp.RequestHandler):
+  def get(self):
+    query_params = cgi.parse_qs(self.request.query_string)
+    info_hashes = frozenset(query_params.get('info_hash', []))
+    response_body = process_scrape_request(info_hashes, settings)
+    #
+    self.response.headers['Content-Type'] = 'text/plain'
     self.response.out.write(response_body)
 
 class SitemapHandler(webapp.RequestHandler):
   def get(self):
-    self.response.headers['content-type'] = 'text/plain'
+    self.response.headers['Content-Type'] = 'text/plain; charset=us-ascii'
     self.response.out.write(
       'http://%s%s/\n' % (
         self.request.server_name,
         ':%d' % self.request.server_port \
           if self.request.server_port != 80 else ''))
 
+class RobotsHandler(webapp.RequestHandler):
+  def get(self):
+    self.response.headers['Content-Type'] = 'text/plain; charset=us-ascii'
+    self.response.out.write('User-agent: *\n')
+
+
 def create_application():
   return webapp.WSGIApplication(
     [('/', MainHandler),
      ('/announce', AnnounceHandler),
+     ('/scrape', ScrapeHandler),
+     ('/robots.txt', RobotsHandler),
      ('/sitemap.txt', SitemapHandler),],
     debug=True)
 

app/static/site.css

+@charset "utf-8";
+
 html {
   background-image: url(page-background.png);
   font-size: 10pt;

app/templates/base.html

 "http://www.w3.org/TR/html4/strict.dtd">
 <html>
   <head>
+    <meta http-equiv="content-type" content="text/html;charset=UTF-8">
     <title>{% block title %}Tracko - Public BitTorrent Tracker{% endblock %}</title>
     {% block styles %}
       <link rel="stylesheet" type="text/css" href="/static/site.css">

tests/test_scrape_basic.py

+import sys, os, urllib
+sys.path.append(os.path.join(os.path.dirname(__file__), '../app'))
+
+import util, main, logic, models, settings, webtest
+from bencode.bencode import bdecode
+from nose.tools import *
+
+
+def setup():
+  util.clear_datastore()
+  util.clear_memcache()
+  assert models.Torrent.all().count() == 0
+  assert models.TorrentPeer.all().count() == 0
+
+
+initial_seeder_start_request_params = dict(
+  info_hash='!' * 20,
+  peer_id='-' * 20,
+  port=str(6681),
+  uploaded='0',
+  downloaded='0',
+  left='0',
+  event='started')
+
+good_start_request_params = dict(
+  info_hash='*' * 20,
+  peer_id='?' * 20,
+  port=str(6681),
+  uploaded=str(2**128),
+  downloaded=str(2**64),
+  left=str(2**16),
+  event='started')
+
+torrent_completed_request_params = dict(
+  info_hash='+' * 20,
+  peer_id='=' * 20,
+  port=str(6681),
+  uploaded=str(2**128),
+  downloaded=str(2**64),
+  left='0',
+  event='completed')
+
+
+def test_return_empty_list_if_info_hash_is_missing():
+  result = bdecode(logic.process_scrape_request([], settings))
+  assert_equals(result, {'files': {}})
+
+
+def test_http_response_is_ok_when_info_hash_is_missing():
+  app = webtest.TestApp(main.create_application())
+  resp = app.get('/scrape')
+  assert_equals(resp.status, '200 OK')
+  assert_equals(resp.headers['Content-Type'], 'text/plain')
+  assert_equals(bdecode(resp.body), {'files': {}})
+
+
+@with_setup(util.clear_datastore)
+def test_return_empty_list_for_unknown_torrent():
+  result = bdecode(logic.process_scrape_request(['x' * 20], settings))
+  assert_equals(result, {'files': {}})
+
+
+@with_setup(util.clear_datastore)
+@with_setup(util.clear_memcache)
+def test_single_torrent_scrape_request():
+
+  request_params = initial_seeder_start_request_params.copy()
+  info_hash = request_params['info_hash']
+  logic.process_announce_request(request_params, '0.0.0.0', settings)
+  result = bdecode(logic.process_scrape_request([info_hash], settings))
+
+  assert_equals(
+    result,
+    {'files': {info_hash: {'complete': 1, 'incomplete': 0, 'downloaded': 0}}})
+
+
+@with_setup(util.clear_datastore)
+@with_setup(util.clear_memcache)
+def test_multiple_torrents_scrape_request():
+
+  request_params_1 = initial_seeder_start_request_params.copy()
+  info_hash_1 = request_params_1['info_hash']
+  logic.process_announce_request(request_params_1, '0.0.0.0', settings)
+
+  request_params_2 = good_start_request_params.copy()
+  info_hash_2 = request_params_2['info_hash']
+  logic.process_announce_request(request_params_2, '0.0.0.0', settings)
+
+  result = bdecode(logic.process_scrape_request(
+    [info_hash_1, info_hash_2], settings))
+
+  assert_equals(
+    result,
+    {'files': {
+      info_hash_1: {'complete': 1, 'incomplete': 0, 'downloaded': 0},
+      info_hash_2: {'complete': 0, 'incomplete': 1, 'downloaded': 0}}})
+
+
+@with_setup(util.clear_datastore)
+@with_setup(util.clear_memcache)
+def test_http_response_to_multiple_torrents_scrape_request():
+
+  app = webtest.TestApp(main.create_application())
+
+  request_params_1 = initial_seeder_start_request_params.copy()
+  info_hash_1 = request_params_1['info_hash']
+  resp = app.get(
+    '/announce', request_params_1,
+    extra_environ={'REMOTE_ADDR': '0.0.0.0'})
+
+  request_params_2 = good_start_request_params.copy()
+  info_hash_2 = request_params_2['info_hash']
+  resp = app.get(
+    '/announce', request_params_2,
+    extra_environ={'REMOTE_ADDR': '1.2.3.4'})
+
+  scrape_params = [('info_hash', info_hash_1), ('info_hash', info_hash_2)]
+  resp = app.get('/scrape?' + urllib.urlencode(scrape_params))
+
+  assert_equals(resp.status, '200 OK')
+  assert_equals(
+    bdecode(resp.body),
+    {'files': {
+      info_hash_1: {'complete': 1, 'incomplete': 0, 'downloaded': 0},
+      info_hash_2: {'complete': 0, 'incomplete': 1, 'downloaded': 0}}})
+
+
+@with_setup(util.clear_datastore)
+@with_setup(util.clear_memcache)
+def test_download_count_is_correct():
+
+  # initial announce from seeder
+  request_params_1 = initial_seeder_start_request_params.copy()
+  info_hash = request_params_1['info_hash']
+  response_1 = bdecode(
+    logic.process_announce_request(request_params_1, '0.0.0.0', settings))
+  assert 'failure reason' not in response_1
+
+  # download start announce from leecher
+  request_params_2 = good_start_request_params.copy()
+  request_params_2['info_hash'] = info_hash
+  response_2 = bdecode(
+    logic.process_announce_request(request_params_2, '0.1.2.3', settings))
+  assert 'failure reason' not in response_2
+
+  # torrent completed announce from former leecher
+  request_params_3 = torrent_completed_request_params.copy()
+  request_params_3['peer_id'] = request_params_2['peer_id']
+  request_params_3['info_hash'] = info_hash
+  response_3 = bdecode(
+    logic.process_announce_request(request_params_3, '0.1.2.3', settings))
+  assert 'failure reason' not in response_3
+
+  result = bdecode(logic.process_scrape_request([info_hash], settings))
+
+  assert_equals(
+    result,
+    {'files': {info_hash: {'complete': 2, 'incomplete': 0, 'downloaded': 1}}})

tests/webtest/__init__.py

+# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
+# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
+"""
+Routines for testing WSGI applications.
+
+Most interesting is TestApp
+"""
+
+import sys
+import random
+import urllib
+import urlparse
+import mimetypes
+import time
+import cgi
+import os
+from Cookie import BaseCookie, CookieError
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+import re
+from webob import Response, Request
+from webtest import lint
+
+__all__ = ['TestApp']
+
+def tempnam_no_warning(*args):
+    """
+    An os.tempnam with the warning turned off, because sometimes
+    you just need to use this and don't care about the stupid
+    security warning.
+    """
+    return os.tempnam(*args)
+
+class NoDefault(object):
+    pass
+
+try:
+    sorted
+except NameError:
+    def sorted(l):
+        l = list(l)
+        l.sort()
+        return l
+
+class AppError(Exception):
+    pass
+
+class TestApp(object):
+
+    # for py.test
+    disabled = True
+
+    def __init__(self, app, extra_environ=None, relative_to=None):
+        """
+        Wraps a WSGI application in a more convenient interface for
+        testing.
+
+        ``app`` may be an application, or a Paste Deploy app
+        URI, like ``'config:filename.ini#test'``.
+
+        ``extra_environ`` is a dictionary of values that should go
+        into the environment for each request.  These can provide a
+        communication channel with the application.
+
+        ``relative_to`` is a directory, and filenames used for file
+        uploads are calculated relative to this.  Also ``config:``
+        URIs that aren't absolute.
+        """
+        if isinstance(app, (str, unicode)):
+            from paste.deploy import loadapp
+            # @@: Should pick up relative_to from calling module's
+            # __file__
+            app = loadapp(app, relative_to=relative_to)
+        self.app = app
+        self.relative_to = relative_to
+        if extra_environ is None:
+            extra_environ = {}
+        self.extra_environ = extra_environ
+        self.reset()
+
+    def reset(self):
+        """
+        Resets the state of the application; currently just clears
+        saved cookies.
+        """
+        self.cookies = {}
+
+    def _make_environ(self, extra_environ=None):
+        environ = self.extra_environ.copy()
+        environ['paste.throw_errors'] = True
+        if extra_environ:
+            environ.update(extra_environ)
+        return environ
+    
+    def _remove_fragment(self, url):
+        scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
+        return urlparse.urlunsplit((scheme, netloc, path, query, ""))
+    
+    def get(self, url, params=None, headers=None, extra_environ=None,
+            status=None, expect_errors=False):
+        """
+        Get the given url (well, actually a path like
+        ``'/page.html'``).
+
+        ``params``:
+            A query string, or a dictionary that will be encoded
+            into a query string.  You may also include a query
+            string on the ``url``.
+
+        ``headers``:
+            A dictionary of extra headers to send.
+
+        ``extra_environ``:
+            A dictionary of environmental variables that should
+            be added to the request.
+
+        ``status``:
+            The integer status code you expect (if not 200 or 3xx).
+            If you expect a 404 response, for instance, you must give
+            ``status=404`` or it will be an error.  You can also give
+            a wildcard, like ``'3*'`` or ``'*'``.
+
+        ``expect_errors``:
+            If this is not true, then if anything is written to
+            ``wsgi.errors`` it will be an error.  If it is true, then
+            non-200/3xx responses are also okay.
+
+        Returns a ``webob.Response`` object.
+        """
+        environ = self._make_environ(extra_environ)
+        # Hide from py.test:
+        __tracebackhide__ = True
+        if params:
+            if not isinstance(params, (str, unicode)):
+                params = urllib.urlencode(params, doseq=True)
+            if '?' in url:
+                url += '&'
+            else:
+                url += '?'
+            url += params
+        url = str(url)
+        if '?' in url:
+            url, environ['QUERY_STRING'] = url.split('?', 1)
+        else:
+            environ['QUERY_STRING'] = ''
+        url = self._remove_fragment(url)
+        req = TestRequest.blank(url, environ)
+        if headers:
+            req.headers.update(headers)
+        return self.do_request(req, status=status,
+                               expect_errors=expect_errors)
+
+    def _gen_request(self, method, url, params='', headers=None, extra_environ=None,
+                     status=None, upload_files=None, expect_errors=False,
+                     content_type=None):
+        """
+        Do a generic request.  
+        """
+        environ = self._make_environ(extra_environ)
+        # @@: Should this be all non-strings?
+        if isinstance(params, (list, tuple, dict)):
+            params = urllib.urlencode(params)
+        if hasattr(params, 'items'):
+            params = urllib.urlencode(params.items())
+        if upload_files:
+            params = cgi.parse_qsl(params, keep_blank_values=True)
+            content_type, params = self.encode_multipart(
+                params, upload_files)
+            environ['CONTENT_TYPE'] = content_type
+        elif params:
+            environ.setdefault('CONTENT_TYPE', 'application/x-www-form-urlencoded')
+        if '?' in url:
+            url, environ['QUERY_STRING'] = url.split('?', 1)
+        else:
+            environ['QUERY_STRING'] = ''
+        if content_type is not None:
+            environ['CONTENT_TYPE'] = content_type
+        environ['CONTENT_LENGTH'] = str(len(params))
+        environ['REQUEST_METHOD'] = method
+        environ['wsgi.input'] = StringIO(params)
+        url = self._remove_fragment(url)
+        req = TestRequest.blank(url, environ)
+        if headers:
+            req.headers.update(headers)
+        return self.do_request(req, status=status,
+                               expect_errors=expect_errors)
+
+    def post(self, url, params='', headers=None, extra_environ=None,
+             status=None, upload_files=None, expect_errors=False,
+             content_type=None):
+        """
+        Do a POST request.  Very like the ``.get()`` method.
+        ``params`` are put in the body of the request.
+
+        ``upload_files`` is for file uploads.  It should be a list of
+        ``[(fieldname, filename, file_content)]``.  You can also use
+        just ``[(fieldname, filename)]`` and the file content will be
+        read from disk.
+
+        Returns a ``webob.Response`` object.
+        """
+        return self._gen_request('POST', url, params=params, headers=headers,
+                                 extra_environ=extra_environ,status=status,
+                                 upload_files=upload_files,
+                                 expect_errors=expect_errors, 
+                                 content_type=content_type)
+
+    def put(self, url, params='', headers=None, extra_environ=None,
+            status=None, upload_files=None, expect_errors=False,
+            content_type=None):
+        """
+        Do a PUT request.  Very like the ``.put()`` method.
+        ``params`` are put in the body of the request, if params is a
+        tuple, dictionary, list, or iterator it will be urlencoded and
+        placed in the body as with a POST, if it is string it will not
+        be encoded, but placed in the body directly.
+
+        Returns a ``webob.Response`` object.
+        """
+        return self._gen_request('PUT', url, params=params, headers=headers,
+                                 extra_environ=extra_environ,status=status,
+                                 upload_files=upload_files,
+                                 expect_errors=expect_errors,
+                                 content_type=content_type)
+
+    def delete(self, url, headers=None, extra_environ=None,
+               status=None, expect_errors=False):
+        """
+        Do a DELETE request.  Very like the ``.get()`` method.
+
+        Returns a ``webob.Response`` object.
+        """
+        return self._gen_request('DELETE', url, headers=headers,
+                                 extra_environ=extra_environ,status=status,
+                                 upload_files=None, expect_errors=expect_errors)
+
+    def encode_multipart(self, params, files):
+        """
+        Encodes a set of parameters (typically a name/value list) and
+        a set of files (a list of (name, filename, file_body)) into a
+        typical POST body, returning the (content_type, body).
+        """
+        boundary = '----------a_BoUnDaRy%s$' % random.random()
+        lines = []
+        for key, value in params:
+            lines.append('--'+boundary)
+            lines.append('Content-Disposition: form-data; name="%s"' % key)
+            lines.append('')
+            lines.append(value)
+        for file_info in files:
+            key, filename, value = self._get_file_info(file_info)
+            lines.append('--'+boundary)
+            lines.append('Content-Disposition: form-data; name="%s"; filename="%s"'
+                         % (key, filename))
+            fcontent = mimetypes.guess_type(filename)[0]
+            lines.append('Content-Type: %s' %
+                         fcontent or 'application/octet-stream')
+            lines.append('')
+            lines.append(value)
+        lines.append('--' + boundary + '--')
+        lines.append('')
+        body = '\r\n'.join(lines)
+        content_type = 'multipart/form-data; boundary=%s' % boundary
+        return content_type, body
+
+    def _get_file_info(self, file_info):
+        if len(file_info) == 2:
+            # It only has a filename
+            filename = file_info[1]
+            if self.relative_to:
+                filename = os.path.join(self.relative_to, filename)
+            f = open(filename, 'rb')
+            content = f.read()
+            f.close()
+            return (file_info[0], filename, content)
+        elif len(file_info) == 3:
+            return file_info
+        else:
+            raise ValueError(
+                "upload_files need to be a list of tuples of (fieldname, "
+                "filename, filecontent) or (fieldname, filename); "
+                "you gave: %r"
+                % repr(file_info)[:100])
+
+    def do_request(self, req, status, expect_errors):
+        """
+        Executes the given request (``req``), with the expected
+        ``status``.  Generally ``.get()`` and ``.post()`` are used
+        instead.
+        """
+        __tracebackhide__ = True
+        errors = StringIO()
+        req.environ['wsgi.errors'] = errors
+        if self.cookies:
+            c = BaseCookie()
+            for name, value in self.cookies.items():
+                c[name] = value
+            req.environ['HTTP_COOKIE'] = str(c).split(': ', 1)[1]
+        req.environ['paste.testing'] = True
+        req.environ['paste.testing_variables'] = {}
+        app = lint.middleware(self.app)
+        old_stdout = sys.stdout
+        out = CaptureStdout(old_stdout)
+        try:
+            sys.stdout = out
+            start_time = time.time()
+            ## FIXME: should it be an option to not catch exc_info?
+            res = req.get_response(app, catch_exc_info=True)
+            end_time = time.time()
+        finally:
+            sys.stdout = old_stdout
+            sys.stderr.write(out.getvalue())
+        res.app = app
+        res.test_app = self
+        # We do this to make sure the app_iter is exausted:
+        res.body
+        res.errors = errors.getvalue()
+        total_time = end_time - start_time
+        for name, value in req.environ['paste.testing_variables'].items():
+            if hasattr(res, name):
+                raise ValueError(
+                    "paste.testing_variables contains the variable %r, but "
+                    "the response object already has an attribute by that "
+                    "name" % name)
+            setattr(res, name, value)
+        if not expect_errors:
+            self._check_status(status, res)
+            self._check_errors(res)
+        res.cookies_set = {}
+        for header in res.headers.getall('set-cookie'):
+            try:
+                c = BaseCookie(header)
+            except CookieError, e:
+                raise CookieError(
+                    "Could not parse cookie header %r: %s" % (header, e))
+            for key, morsel in c.items():
+                self.cookies[key] = morsel.value
+                res.cookies_set[key] = morsel.value
+        return res
+
+    def _check_status(self, status, res):
+        __tracebackhide__ = True
+        if status == '*':
+            return
+        if isinstance(status, (list, tuple)):
+            if res.status_int not in status:
+                raise AppError(
+                    "Bad response: %s (not one of %s for %s)\n%s"
+                    % (res.status, ', '.join(map(str, status)),
+                       res.request.url, res.body))
+            return
+        if status is None:
+            if res.status_int >= 200 and res.status_int < 400:
+                return
+            raise AppError(
+                "Bad response: %s (not 200 OK or 3xx redirect for %s)\n%s"
+                % (res.status, res.request.url,
+                   res.body))
+        if status != res.status_int:
+            raise AppError(
+                "Bad response: %s (not %s)" % (res.status, status))
+
+    def _check_errors(self, res):
+        errors = res.errors
+        if errors:
+            raise AppError(
+                "Application had errors logged:\n%s" % errors)
+
+class CaptureStdout(object):
+
+    def __init__(self, actual):
+        self.captured = StringIO()
+        self.actual = actual
+
+    def write(self, s):
+        self.captured.write(s)
+        self.actual.write(s)
+
+    def flush(self):
+        self.actual.flush()
+
+    def writelines(self, lines):
+        for item in lines:
+            self.write(item)
+
+    def getvalue(self):
+        return self.captured.getvalue()
+
+class TestResponse(Response):
+
+    """
+    Instances of this class are return by ``TestApp``
+    """
+
+    _forms_indexed = None
+
+
+    def forms__get(self):
+        """
+        Returns a dictionary of ``Form`` objects.  Indexes are both in
+        order (from zero) and by form id (if the form is given an id).
+        """
+        if self._forms_indexed is None:
+            self._parse_forms()
+        return self._forms_indexed
+
+    forms = property(forms__get,
+                     doc="""
+                     A list of <form>s found on the page (instances of
+                     ``Form``)
+                     """)
+
+    def form__get(self):
+        forms = self.forms
+        if not forms:
+            raise TypeError(
+                "You used response.form, but no forms exist")
+        if 1 in forms:
+            # There is more than one form
+            raise TypeError(
+                "You used response.form, but more than one form exists")
+        return forms[0]
+
+    form = property(form__get,
+                    doc="""
+                    Returns a single ``Form`` instance; it
+                    is an error if there are multiple forms on the
+                    page.
+                    """)
+
+    _tag_re = re.compile(r'<(/?)([:a-z0-9_\-]*)(.*?)>', re.S|re.I)
+
+    def _parse_forms(self):
+        forms = self._forms_indexed = {}
+        form_texts = []
+        started = None
+        for match in self._tag_re.finditer(self.body):
+            end = match.group(1) == '/'
+            tag = match.group(2).lower()
+            if tag != 'form':
+                continue
+            if end:
+                assert started, (
+                    "</form> unexpected at %s" % match.start())
+                form_texts.append(self.body[started:match.end()])
+                started = None
+            else:
+                assert not started, (
+                    "Nested form tags at %s" % match.start())
+                started = match.start()
+        assert not started, (
+            "Danging form: %r" % self.body[started:])
+        for i, text in enumerate(form_texts):
+            form = Form(self, text)
+            forms[i] = form
+            if form.id:
+                forms[form.id] = form
+
+    def follow(self, **kw):
+        """
+        If this request is a redirect, follow that redirect.  It
+        is an error if this is not a redirect response.  Returns
+        another response object.
+        """
+        assert self.status_int >= 300 and self.status_int < 400, (
+            "You can only follow redirect responses (not %s)"
+            % self.status)
+        location = self.headers['location']
+        type, rest = urllib.splittype(location)
+        host, path = urllib.splithost(rest)
+        # @@: We should test that it's not a remote redirect
+        return self.test_app.get(location, **kw)
+
+    def click(self, description=None, linkid=None, href=None,
+              anchor=None, index=None, verbose=False):
+        """
+        Click the link as described.  Each of ``description``,
+        ``linkid``, and ``url`` are *patterns*, meaning that they are
+        either strings (regular expressions), compiled regular
+        expressions (objects with a ``search`` method), or callables
+        returning true or false.
+
+        All the given patterns are ANDed together:
+
+        * ``description`` is a pattern that matches the contents of the
+          anchor (HTML and all -- everything between ``<a...>`` and
+          ``</a>``)
+
+        * ``linkid`` is a pattern that matches the ``id`` attribute of
+          the anchor.  It will receive the empty string if no id is
+          given.
+
+        * ``href`` is a pattern that matches the ``href`` of the anchor;
+          the literal content of that attribute, not the fully qualified
+          attribute.
+
+        * ``anchor`` is a pattern that matches the entire anchor, with
+          its contents.
+
+        If more than one link matches, then the ``index`` link is
+        followed.  If ``index`` is not given and more than one link
+        matches, or if no link matches, then ``IndexError`` will be
+        raised.
+
+        If you give ``verbose`` then messages will be printed about
+        each link, and why it does or doesn't match.  If you use
+        ``app.click(verbose=True)`` you'll see a list of all the
+        links.
+
+        You can use multiple criteria to essentially assert multiple
+        aspects about the link, e.g., where the link's destination is.
+        """
+        __tracebackhide__ = True
+        found_html, found_desc, found_attrs = self._find_element(
+            tag='a', href_attr='href',
+            href_extract=None,
+            content=description,
+            id=linkid, 
+            href_pattern=href,
+            html_pattern=anchor,
+            index=index, verbose=verbose)
+        return self.goto(found_attrs['uri'])
+
+    def clickbutton(self, description=None, buttonid=None, href=None,
+                    button=None, index=None, verbose=False):
+        """
+        Like ``.click()``, except looks for link-like buttons.
+        This kind of button should look like
+        ``<button onclick="...location.href='url'...">``.
+        """
+        __tracebackhide__ = True
+        found_html, found_desc, found_attrs = self._find_element(
+            tag='button', href_attr='onclick',
+            href_extract=re.compile(r"location\.href='(.*?)'"),
+            content=description,
+            id=buttonid,
+            href_pattern=href,
+            html_pattern=button,
+            index=index, verbose=verbose)
+        return self.goto(found_attrs['uri'])
+
+    def _find_element(self, tag, href_attr, href_extract,
+                      content, id,
+                      href_pattern,
+                      html_pattern,
+                      index, verbose):
+        content_pat = _make_pattern(content)
+        id_pat = _make_pattern(id)
+        href_pat = _make_pattern(href_pattern)
+        html_pat = _make_pattern(html_pattern)
+
+        _tag_re = re.compile(r'<%s\s+(.*?)>(.*?)</%s>' % (tag, tag),
+                             re.I+re.S)
+        _script_re = re.compile(r'<script.*?>.*?</script>', re.I|re.S)
+        bad_spans = []
+        for match in _script_re.finditer(self.body):
+            bad_spans.append((match.start(), match.end()))
+
+        def printlog(s):
+            if verbose:
+                print s
+
+        found_links = []
+        total_links = 0
+        for match in _tag_re.finditer(self.body):
+            found_bad = False
+            for bad_start, bad_end in bad_spans:
+                if (match.start() > bad_start 
+                    and match.end() < bad_end):
+                    found_bad = True
+                    break
+            if found_bad:
+                continue
+            el_html = match.group(0)
+            el_attr = match.group(1)
+            el_content = match.group(2)
+            attrs = _parse_attrs(el_attr)
+            if verbose:
+                printlog('Element: %r' % el_html)
+            if not attrs.get(href_attr):
+                printlog('  Skipped: no %s attribute' % href_attr)
+                continue
+            el_href = attrs[href_attr]
+            if href_extract:
+                m = href_extract.search(el_href)
+                if not m:
+                    printlog("  Skipped: doesn't match extract pattern")
+                    continue
+                el_href = m.group(1)
+            attrs['uri'] = el_href
+            if el_href.startswith('#'):
+                printlog('  Skipped: only internal fragment href')
+                continue
+            if el_href.startswith('javascript:'):
+                printlog('  Skipped: cannot follow javascript:')
+                continue
+            total_links += 1
+            if content_pat and not content_pat(el_content):
+                printlog("  Skipped: doesn't match description")
+                continue
+            if id_pat and not id_pat(attrs.get('id', '')):
+                printlog("  Skipped: doesn't match id")
+                continue
+            if href_pat and not href_pat(el_href):
+                printlog("  Skipped: doesn't match href")
+                continue
+            if html_pat and not html_pat(el_html):
+                printlog("  Skipped: doesn't match html")
+                continue
+            printlog("  Accepted")
+            found_links.append((el_html, el_content, attrs))
+        if not found_links:
+            raise IndexError(
+                "No matching elements found (from %s possible)"
+                % total_links)
+        if index is None:
+            if len(found_links) > 1:
+                raise IndexError(
+                    "Multiple links match: %s"
+                    % ', '.join([repr(anc) for anc, d, attr in found_links]))
+            found_link = found_links[0]
+        else:
+            try:
+                found_link = found_links[index]
+            except IndexError:
+                raise IndexError(
+                    "Only %s (out of %s) links match; index %s out of range"
+                    % (len(found_links), total_links, index))
+        return found_link
+
+    def goto(self, href, method='get', **args):
+        """
+        Go to the (potentially relative) link ``href``, using the
+        given method (``'get'`` or ``'post'``) and any extra arguments
+        you want to pass to the ``app.get()`` or ``app.post()``
+        methods.
+
+        All hostnames and schemes will be ignored.
+        """
+        scheme, host, path, query, fragment = urlparse.urlsplit(href)
+        # We
+        scheme = host = fragment = ''
+        href = urlparse.urlunsplit((scheme, host, path, query, fragment))
+        href = urlparse.urljoin(self.request.url, href)
+        method = method.lower()
+        assert method in ('get', 'post'), (
+            'Only "get" or "post" are allowed for method (you gave %r)'
+            % method)
+        if method == 'get':
+            method = self.test_app.get
+        else:
+            method = self.test_app.post
+        return method(href, **args)
+
+    _normal_body_regex = re.compile(r'[ \n\r\t]+')
+
+    _normal_body = None
+    
+    def normal_body__get(self):
+        if self._normal_body is None:
+            self._normal_body = self._normal_body_regex.sub(
+                ' ', self.body)
+        return self._normal_body
+
+    normal_body = property(normal_body__get,
+                           doc="""
+                           Return the whitespace-normalized body
+                           """.strip())
+
+    def unicode_normal_body__get(self):
+        if not self.charset:
+            raise AttributeError(
+                "You cannot access Response.unicode_normal_body unless charset is set")
+        return self.normal_body.decode(self.charset)
+
+    unicode_normal_body = property(
+        unicode_normal_body__get, doc="""
+        Return the whitespace-normalized body, as unicode
+        """.strip())
+
+    def __contains__(self, s):
+        """
+        A response 'contains' a string if it is present in the body
+        of the response.  Whitespace is normalized when searching
+        for a string.
+        """
+        if not isinstance(s, basestring):
+            if hasattr(s, '__unicode__'):
+                s = unicode(s)
+            else:
+                s = str(s)
+        if isinstance(s, unicode):
+            body = self.unicode_body
+            normal_body = self.unicode_normal_body
+        else:
+            body = self.body
+            normal_body = self.normal_body
+        return s in body or s in normal_body
+
+    def mustcontain(self, *strings, **kw):
+        """
+        Assert that the response contains all of the strings passed
+        in as arguments.
+
+        Equivalent to::
+
+            assert string in res
+        """
+        if 'no' in kw:
+            no = kw['no']
+            del kw['no']
+            if isinstance(no, basestring):
+                no = [no]
+        else:
+            no = []
+        if kw:
+            raise TypeError(
+                "The only keyword argument allowed is 'no'")
+        for s in strings:
+            if not s in self:
+                print >> sys.stderr, "Actual response (no %r):" % s
+                print >> sys.stderr, self
+                raise IndexError(
+                    "Body does not contain string %r" % s)
+        for no_s in no:
+            if no_s in self:
+                print >> sys.stderr, "Actual response (has %r)" % no_s
+                print >> sys.stderr, self
+                raise IndexError(
+                    "Body contains bad string %r" % no_s)
+
+    def __str__(self):
+        simple_body = '\n'.join([l for l in self.body.splitlines()
+                                 if l.strip()])
+        headers = [(self._normalize_header_name(n), v)
+                   for n, v in self.headerlist
+                   if n.lower() != 'content-length']
+        headers.sort()
+        return 'Response: %s\n%s\n%s' % (
+            self.status,
+            '\n'.join(['%s: %s' % (n, v) for n, v in headers]),
+            simple_body)
+
+    def _normalize_header_name(self, name):
+        name = name.replace('-', ' ').title().replace(' ', '-')
+        return name
+
+    def __repr__(self):
+        # Specifically intended for doctests
+        if self.content_type:
+            ct = ' %s' % self.content_type
+        else:
+            ct = ''
+        if self.body:
+            br = repr(self.body)
+            if len(br) > 18:
+                br = br[:10]+'...'+br[-5:]
+                br += '/%s' % len(self.body)
+            body = ' body=%s' % br
+        else:
+            body = ' no body'
+        if self.location:
+            location = ' location: %s' % self.location
+        else:
+            location = ''
+        return ('<' + self.status + ct + location + body + '>')
+
+    def html(self):
+        """
+        Returns the response as a `BeautifulSoup
+        <http://www.crummy.com/software/BeautifulSoup/documentation.html>`_
+        object.
+
+        Only works with HTML responses; other content-types raise
+        AttributeError.
+        """
+        if 'html' not in self.content_type:
+            raise AttributeError(
+                "Not an HTML response body (content-type: %s)"
+                % self.content_type)
+        try:
+            from BeautifulSoup import BeautifulSoup
+        except ImportError:
+            raise ImportError(
+                "You must have BeautifulSoup installed to use response.html")
+        soup = BeautifulSoup(self.body)
+        return soup
+
+    html = property(html, doc=html.__doc__)
+
+    def xml(self):
+        """
+        Returns the response as an `ElementTree
+        <http://python.org/doc/current/lib/module-xml.etree.ElementTree.html>`_
+        object.
+
+        Only works with XML responses; other content-types raise
+        AttributeError
+        """
+        if 'xml' not in self.content_type:
+            raise AttributeError(
+                "Not an XML response body (content-type: %s)"
+                % self.content_type)
+        try:
+            from xml.etree import ElementTree
+        except ImportError:
+            try:
+                import ElementTree
+            except ImportError:
+                try:
+                    from elementtree import ElementTree
+                except ImportError:
+                    raise ImportError(
+                        "You must have ElementTree installed (or use Python 2.5) to use response.xml")
+        return ElementTree.XML(self.body)
+
+    xml = property(xml, doc=xml.__doc__)
+
+    def lxml(self):
+        """
+        Returns the response as an `lxml object
+        <http://codespeak.net/lxml/>`_.  You must have lxml installed
+        to use this.
+
+        If this is an HTML response and you have lxml 2.x installed,
+        then an ``lxml.html.HTML`` object will be returned; if you
+        have an earlier version of lxml then a ``lxml.HTML`` object
+        will be returned.
+        """
+        if ('html' not in self.content_type
+            and 'xml' not in self.content_type):
+            raise AttributeError(
+                "Not an XML or HTML response body (content-type: %s)"
+                % self.content_type)
+        try:
+            from lxml import etree
+        except ImportError:
+            raise ImportError(
+                "You must have lxml installed to use response.lxml")
+        try:
+            from lxml.html import fromstring
+        except ImportError:
+            fromstring = etree.HTML
+        ## FIXME: would be nice to set xml:base, in some fashion
+        if self.content_type == 'text/html':
+            return fromstring(self.body)
+        else:
+            return etree.XML(self.body)
+
+    lxml = property(lxml, doc=lxml.__doc__)
+
+    def json(self):
+        """
+        Return the response as a JSON response.  You must have
+        `simplejson
+        <http://svn.red-bean.com/bob/simplejson/tags/simplejson-1.7/docs/index.html>`_
+        installed to use this.
+
+        The content type must be application/json to use this.
+        """
+        if self.content_type != 'application/json':
+            raise AttributeError(
+                "Not a JSON response body (content-type: %s)"
+                % self.content_type)
+        try:
+            from simplejson import loads
+        except ImportError:
+            raise ImportError(
+                "You must have simplejson installed to use response.json")
+        return loads(self.body)
+
+    json = property(json, doc=json.__doc__)
+
+    def showbrowser(self):
+        """
+        Show this response in a browser window (for debugging purposes,
+        when it's hard to read the HTML).
+        """
+        import webbrowser
+        fn = tempnam_no_warning(None, 'webtest-page') + '.html'
+        f = open(fn, 'wb')
+        f.write(self.body)
+        f.close()
+        url = 'file:' + fn.replace(os.sep, '/')
+        webbrowser.open_new(url)
+
+class TestRequest(Request):
+
+    # for py.test
+    disabled = True
+    ResponseClass = TestResponse
+
+########################################
+## Form objects
+######################################## 
+
+class Form(object):
+
+    """
+    This object represents a form that has been found in a page.
+    This has a couple useful attributes:
+
+    ``text``:
+        the full HTML of the form.
+
+    ``action``:
+        the relative URI of the action.
+
+    ``method``:
+        the method (e.g., ``'GET'``).
+
+    ``id``:
+        the id, or None if not given.
+
+    ``fields``:
+        a dictionary of fields, each value is a list of fields by
+        that name.  ``<input type=\"radio\">`` and ``<select>`` are
+        both represented as single fields with multiple options.
+    """
+
+    # @@: This really should be using Mechanize/ClientForm or
+    # something...
+
+    _tag_re = re.compile(r'<(/?)([a-z0-9_\-]*)([^>]*?)>', re.I)
+
+    def __init__(self, response, text):
+        self.response = response
+        self.text = text
+        self._parse_fields()
+        self._parse_action()
+
+    def _parse_fields(self):
+        in_select = None
+        in_textarea = None
+        fields = {}
+        for match in self._tag_re.finditer(self.text):
+            end = match.group(1) == '/'
+            tag = match.group(2).lower()
+            if tag not in ('input', 'select', 'option', 'textarea',
+                           'button'):
+                continue
+            if tag == 'select' and end:
+                assert in_select, (
+                    '%r without starting select' % match.group(0))
+                in_select = None
+                continue
+            if tag == 'textarea' and end:
+                assert in_textarea, (
+                    "</textarea> with no <textarea> at %s" % match.start())
+                in_textarea[0].value = html_unquote(self.text[in_textarea[1]:match.start()])
+                in_textarea = None
+                continue
+            if end:
+                continue
+            attrs = _parse_attrs(match.group(3))
+            if 'name' in attrs:
+                name = attrs.pop('name')
+            else:
+                name = None
+            if tag == 'option':
+                in_select.options.append((attrs.get('value'),
+                                          'selected' in attrs))
+                continue
+            if tag == 'input' and attrs.get('type') == 'radio':
+                field = fields.get(name)
+                if not field:
+                    field = Radio(self, tag, name, match.start(), **attrs)
+                    fields.setdefault(name, []).append(field)
+                else:
+                    field = field[0]
+                    assert isinstance(field, Radio)
+                field.options.append((attrs.get('value'),
+                                      'checked' in attrs))
+                continue
+            tag_type = tag
+            if tag == 'input':
+                tag_type = attrs.get('type', 'text').lower()
+            if tag_type == "select" and attrs.get("multiple"):
+                FieldClass = Field.classes.get("multiple_select", Field)
+            else:
+                FieldClass = Field.classes.get(tag_type, Field)
+            field = FieldClass(self, tag, name, match.start(), **attrs)
+            if tag == 'textarea':
+                assert not in_textarea, (
+                    "Nested textareas: %r and %r"
+                    % (in_textarea, match.group(0)))
+                in_textarea = field, match.end()
+            elif tag == 'select':
+                assert not in_select, (
+                    "Nested selects: %r and %r"
+                    % (in_select, match.group(0)))
+                in_select = field
+            fields.setdefault(name, []).append(field)
+        self.fields = fields
+
+    def _parse_action(self):
+        self.action = None
+        for match in self._tag_re.finditer(self.text):
+            end = match.group(1) == '/'
+            tag = match.group(2).lower()
+            if tag != 'form':
+                continue
+            if end:
+                break
+            attrs = _parse_attrs(match.group(3))
+            self.action = attrs.get('action', '')
+            self.method = attrs.get('method', 'GET')
+            self.id = attrs.get('id')
+            # @@: enctype?
+        else:
+            assert 0, "No </form> tag found"
+        assert self.action is not None, (
+            "No <form> tag found")
+
+    def __setitem__(self, name, value):
+        """
+        Set the value of the named field.  If there is 0 or multiple
+        fields by that name, it is an error.
+
+        Setting the value of a ``<select>`` selects the given option
+        (and confirms it is an option).  Setting radio fields does the
+        same.  Checkboxes get boolean values.  You cannot set hidden
+        fields or buttons.
+
+        Use ``.set()`` if there is any ambiguity and you must provide
+        an index.
+        """
+        fields = self.fields.get(name)
+        assert fields is not None, (
+            "No field by the name %r found (fields: %s)"
+            % (name, ', '.join(map(repr, self.fields.keys()))))
+        assert len(fields) == 1, (
+            "Multiple fields match %r: %s"
+            % (name, ', '.join(map(repr, fields))))
+        fields[0].value = value
+
+    def __getitem__(self, name):
+        """
+        Get the named field object (ambiguity is an error).
+        """
+        fields = self.fields.get(name)
+        assert fields is not None, (
+            "No field by the name %r found" % name)
+        assert len(fields) == 1, (
+            "Multiple fields match %r: %s"
+            % (name, ', '.join(map(repr, fields))))
+        return fields[0]
+
+    def set(self, name, value, index=None):
+        """
+        Set the given name, using ``index`` to disambiguate.
+        """
+        if index is None:
+            self[name] = value
+        else:
+            fields = self.fields.get(name)
+            assert fields is not None, (
+                "No fields found matching %r" % name)
+            field = fields[index]
+            field.value = value
+
+    def get(self, name, index=None, default=NoDefault):
+        """
+        Get the named/indexed field object, or ``default`` if no field
+        is found.
+        """
+        fields = self.fields.get(name)
+        if fields is None and default is not NoDefault:
+            return default
+        if index is None:
+            return self[name]
+        else:
+            fields = self.fields.get(name)
+            assert fields is not None, (
+                "No fields found matching %r" % name)
+            field = fields[index]
+            return field
+
+    def select(self, name, value, index=None):
+        """
+        Like ``.set()``, except also confirms the target is a
+        ``<select>``.
+        """
+        field = self.get(name, index=index)
+        assert isinstance(field, Select)
+        field.value = value
+
+    def submit(self, name=None, index=None, **args):
+        """
+        Submits the form.  If ``name`` is given, then also select that
+        button (using ``index`` to disambiguate)``.
+
+        Any extra keyword arguments are passed to the ``.get()`` or
+        ``.post()`` method.
+        """
+        fields = self.submit_fields(name, index=index)
+        return self.response.goto(self.action, method=self.method,
+                                  params=fields, **args)
+
+    def submit_fields(self, name=None, index=None):
+        """
+        Return a list of ``[(name, value), ...]`` for the current
+        state of the form.
+        """
+        submit = []
+        if name is not None:
+            field = self.get(name, index=index)
+            submit.append((field.name, field.value_if_submitted()))
+        for name, fields in self.fields.items():
+            if name is None:
+                continue
+            for field in fields:
+                value = field.value
+                if value is None:
+                    continue
+                if isinstance(value, list):
+                    for item in value:
+                        submit.append((name, item))
+                else:
+                    submit.append((name, value))
+        return submit
+
+
+_attr_re = re.compile(r'([^= \n\r\t]+)[ \n\r\t]*(?:=[ \n\r\t]*(?:"([^"]*)"|([^"][^ \n\r\t>]*)))?', re.S)
+
+def _parse_attrs(text):
+    attrs = {}
+    for match in _attr_re.finditer(text):
+        attr_name = match.group(1).lower()
+        attr_body = match.group(2) or match.group(3)
+        attr_body = html_unquote(attr_body or '')
+        attrs[attr_name] = attr_body
+    return attrs
+
+class Field(object):
+
+    """
+    Field object.
+    """
+
+    # Dictionary of field types (select, radio, etc) to classes
+    classes = {}
+
+    settable = True
+
+    def __init__(self, form, tag, name, pos,
+                 value=None, id=None, **attrs):
+        self.form = form
+        self.tag = tag
+        self.name = name
+        self.pos = pos
+        self._value = value
+        self.id = id
+        self.attrs = attrs
+
+    def value__set(self, value):
+        if not self.settable:
+            raise AttributeError(
+                "You cannot set the value of the <%s> field %r"
+                % (self.tag, self.name))
+        self._value = value
+
+    def force_value(self, value):
+        """
+        Like setting a value, except forces it even for, say, hidden
+        fields.
+        """
+        self._value = value
+
+    def value__get(self):
+        return self._value
+
+    value = property(value__get, value__set)
+
+class NoValue(object):
+    pass
+
+class Select(Field):
+
+    """
+    Field representing ``<select>``
+    """
+
+    def __init__(self, *args, **attrs):
+        super(Select, self).__init__(*args, **attrs)
+        self.options = []
+        # Undetermined yet:
+        self.selectedIndex = None
+        # we have no forced value
+        self._forced_value = NoValue
+    
+    def force_value(self, value):
+        self._forced_value = value
+    
+    def value__set(self, value):
+        if self._forced_value is not NoValue:
+            self._forced_value = NoValue
+        for i, (option, checked) in enumerate(self.options):
+            if option == str(value):
+                self.selectedIndex = i
+                break
+        else:
+            raise ValueError(
+                "Option %r not found (from %s)"
+                % (value, ', '.join(
+                [repr(o) for o, c in self.options])))
+
+    def value__get(self):
+        if self._forced_value is not NoValue:
+            return self._forced_value
+        elif self.selectedIndex is not None:
+            return self.options[self.selectedIndex][0]
+        else:
+            for option, checked in self.options:
+                if checked:
+                    return option
+            else:
+                if self.options:
+                    return self.options[0][0]
+                else:
+                    return None
+
+    value = property(value__get, value__set)
+
+Field.classes['select'] = Select
+
+class MultipleSelect(Field):
+
+    """
+    Field representing ``<select multiple="multiple">``
+    """
+
+    def __init__(self, *args, **attrs):
+        super(MultipleSelect, self).__init__(*args, **attrs)
+        self.options = []
+        # Undetermined yet:
+        self.selectedIndices = []
+        self._forced_values = []
+    
+    def force_value(self, values):
+        self._forced_values = values
+        self.selectedIndices = []
+    
+    def value__set(self, values):
+        str_values = [str(value) for value in values]
+        self.selectedIndicies = []
+        for i, (option, checked) in enumerate(self.options):
+            if option in str_values:
+                self.selectedIndices.append(i)
+                str_values.remove(option)
+        if str_values:
+            raise ValueError(
+                "Option(s) %r not found (from %s)"
+                % (', '.join(str_values),
+                   ', '.join(
+                        [repr(o) for o, c in self.options])))
+
+    def value__get(self):
+        selected_values = []
+        if self.selectedIndices:
+            selected_values = [self.options[i][0] for i in self.selectedIndices]
+        elif not self._forced_values:
+            selected_values = []
+            for option, checked in self.options:
+                if checked:
+                    selected_values.append(option)
+        if self._forced_values:
+            selected_values += self._forced_values
+        
+        if self.options and (not selected_values):
+            selected_values = None
+        return selected_values
+    value = property(value__get, value__set)
+
+Field.classes['multiple_select'] = MultipleSelect
+
+class Radio(Select):
+
+    """
+    Field representing ``<input type="radio">``
+    """
+
+    def value__get(self):
+        if self.selectedIndex is not None:
+            return self.options[self.selectedIndex][0]
+        else:
+            for option, checked in self.options:
+                if checked:
+                    return option
+            else:
+                return None
+
+    value = property(value__get, Select.value__set)
+
+
+Field.classes['radio'] = Radio
+
+class Checkbox(Field):
+
+    """
+    Field representing ``<input type="checkbox">``
+    """
+
+    def __init__(self, *args, **attrs):
+        super(Checkbox, self).__init__(*args, **attrs)
+        self.checked = 'checked' in attrs
+
+    def value__set(self, value):
+        self.checked = not not value
+
+    def value__get(self):
+        if self.checked:
+            if self._value is None:
+                return 'on'
+            else:
+                return self._value
+        else:
+            return None
+
+    value = property(value__get, value__set)
+
+Field.classes['checkbox'] = Checkbox
+
+class Text(Field):
+    """
+    Field representing ``<input type="text">``
+    """
+
+    def value__get(self):
+        if self._value is None:
+            return ''
+        else:
+            return self._value
+
+    value = property(value__get, Field.value__set)
+
+Field.classes['text'] = Text
+
+
+class File(Field):
+    """
+    Field representing ``<input type="file">``
+    """
+
+    ## FIXME: This doesn't actually handle file uploads and enctype
+    def value__get(self):
+        if self._value is None:
+            return ''
+        else:
+            return self._value
+
+Field.classes['file'] = File
+
+class Textarea(Text):
+    """
+    Field representing ``<textarea>``
+    """
+
+Field.classes['textarea'] = Textarea
+
+class Hidden(Text):
+    """
+    Field representing ``<input type="hidden">``
+    """
+
+Field.classes['hidden'] = Hidden
+
+class Submit(Field):
+    """
+    Field representing ``<input type="submit">`` and ``<button>``
+    """
+    
+    settable = False
+
+    def value__get(self):
+        return None
+
+    value = property(value__get)
+
+    def value_if_submitted(self):
+        return self._value
+
+Field.classes['submit'] = Submit
+
+Field.classes['button'] = Submit
+
+Field.classes['image'] = Submit
+
+########################################
+## Utility functions
+########################################
+
+def _popget(d, key, default=None):
+    """
+    Pop the key if found (else return default)
+    """
+    if key in d:
+        return d.pop(key)
+    return default
+
+def _space_prefix(pref, full, sep=None, indent=None, include_sep=True):
+    """
+    Anything shared by pref and full will be replaced with spaces
+    in full, and full returned.
+    """
+    if sep is None:
+        sep = os.path.sep
+    pref = pref.split(sep)
+    full = full.split(sep)
+    padding = []
+    while pref and full and pref[0] == full[0]:
+        if indent is None:
+            padding.append(' ' * (len(full[0]) + len(sep)))
+        else:
+            padding.append(' ' * indent)
+        full.pop(0)
+        pref.pop(0)
+    if padding:
+        if include_sep:
+            return ''.join(padding) + sep + sep.join(full)
+        else:
+            return ''.join(padding) + sep.join(full)
+    else:
+        return sep.join(full)
+
+def _make_pattern(pat):
+    if pat is None:
+        return None
+    if isinstance(pat, (str, unicode)):
+        pat = re.compile(pat)
+    if hasattr(pat, 'search'):
+        return pat.search
+    if callable(pat):
+        return pat
+    assert 0, (
+        "Cannot make callable pattern object out of %r" % pat)
+
+def html_unquote(v):
+    """
+    Unquote (some) entities in HTML.  (incomplete)
+    """
+    for ent, repl in [('&nbsp;', ' '), ('&gt;', '>'),
+                      ('&lt;', '<'), ('&quot;', '"'),
+                      ('&amp;', '&')]:
+        v = v.replace(ent, repl)
+    return v
+

tests/webtest/debugapp.py

+from webob import Request
+try:
+    sorted
+except NameError:
+    from webtest import sorted
+
+__all__ = ['debug_app']
+
+def debug_app(environ, start_response):
+    req = Request(environ)
+    if 'error' in req.GET:
+        raise Exception('Exception requested')
+    status = req.GET.get('status', '200 OK')
+    parts = []
+    for name, value in sorted(environ.items()):
+        if name.upper() != name:
+            value = repr(value)
+        parts.append('%s: %s\n' % (name, value))
+    req_body = req.body
+    if req_body:
+        parts.append('-- Body ----------\n')
+        parts.append(req_body)
+    body = ''.join(parts)
+    headers = [
+        ('Content-Type', 'text/plain'),
+        ('Content-Length', str(len(body)))]
+    for name, value in req.GET.items():
+        if name.startswith('header-'):
+            header_name = name[len('header-'):]
+            headers.append((header_name, value))
+    start_response(status, headers)
+    return [body]
+
+def make_debug_app(global_conf):
+    """
+    An application that displays the request environment, and does
+    nothing else (useful for debugging and test purposes).
+    """
+    return debug_app

tests/webtest/lint.py

+# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
+# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
+# Also licenced under the Apache License, 2.0: http://opensource.org/licenses/apache2.0.php
+# Licensed to PSF under a Contributor Agreement
+"""
+Middleware to check for obedience to the WSGI specification.
+
+Some of the things this checks:
+
+* Signature of the application and start_response (including that
+  keyword arguments are not used).
+
+* Environment checks:
+
+  - Environment is a dictionary (and not a subclass).
+
+  - That all the required keys are in the environment: REQUEST_METHOD,
+    SERVER_NAME, SERVER_PORT, wsgi.version, wsgi.input, wsgi.errors,
+    wsgi.multithread, wsgi.multiprocess, wsgi.run_once
+
+  - That HTTP_CONTENT_TYPE and HTTP_CONTENT_LENGTH are not in the
+    environment (these headers should appear as CONTENT_LENGTH and
+    CONTENT_TYPE).
+
+  - Warns if QUERY_STRING is missing, as the cgi module acts
+    unpredictably in that case.
+
+  - That CGI-style variables (that don't contain a .) have
+    (non-unicode) string values
+
+  - That wsgi.version is a tuple
+
+  - That wsgi.url_scheme is 'http' or 'https' (@@: is this too
+    restrictive?)
+
+  - Warns if the REQUEST_METHOD is not known (@@: probably too
+    restrictive).
+
+  - That SCRIPT_NAME and PATH_INFO are empty or start with /
+
+  - That at least one of SCRIPT_NAME or PATH_INFO are set.
+
+  - That CONTENT_LENGTH is a positive integer.
+
+  - That SCRIPT_NAME is not '/' (it should be '', and PATH_INFO should
+    be '/').
+
+  - That wsgi.input has the methods read, readline, readlines, and
+    __iter__
+
+  - That wsgi.errors has the methods flush, write, writelines
+
+* The status is a string, contains a space, starts with an integer,
+  and that integer is in range (> 100).
+
+* That the headers is a list (not a subclass, not another kind of
+  sequence).
+
+* That the items of the headers are tuples of strings.
+
+* That there is no 'status' header (that is used in CGI, but not in
+  WSGI).
+
+* That the headers don't contain newlines or colons, end in _ or -, or
+  contain characters codes below 037.
+
+* That Content-Type is given if there is content (CGI often has a
+  default content type, but WSGI does not).
+
+* That no Content-Type is given when there is no content (@@: is this
+  too restrictive?)
+
+* That the exc_info argument to start_response is a tuple or None.
+
+* That all calls to the writer are with strings, and no other methods
+  on the writer are accessed.
+
+* That wsgi.input is used properly:
+
+  - .read() is called with zero or one argument
+
+  - That it returns a string
+
+  - That readline, readlines, and __iter__ return strings
+
+  - That .close() is not called
+
+  - No other methods are provided
+
+* That wsgi.errors is used properly:
+
+  - .write() and .writelines() is called with a string
+
+  - That .close() is not called, and no other methods are provided.
+
+* The response iterator:
+
+  - That it is not a string (it should be a list of a single string; a
+    string will work, but perform horribly).
+
+  - That .next() returns a string
+
+  - That the iterator is not iterated over until start_response has
+    been called (that can signal either a server or application
+    error).
+
+  - That .close() is called (doesn't raise exception, only prints to
+    sys.stderr, because we only know it isn't called when the object
+    is garbage collected).
+"""
+
+import re
+import sys
+from types import DictType, StringType, TupleType, ListType
+import warnings
+
+header_re = re.compile(r'^[a-zA-Z][a-zA-Z0-9\-_]*$')
+bad_header_value_re = re.compile(r'[\000-\037]')
+
+class WSGIWarning(Warning):
+    """
+    Raised in response to WSGI-spec-related warnings
+    """
+
+def middleware(application, global_conf=None):
+
+    """
+    When applied between a WSGI server and a WSGI application, this
+    middleware will check for WSGI compliancy on a number of levels.
+    This middleware does not modify the request or response in any
+    way, but will throw an AssertionError if anything seems off
+    (except for a failure to close the application iterator, which
+    will be printed to stderr -- there's no way to throw an exception
+    at that point).
+    """
+    
+    def lint_app(*args, **kw):
+        assert len(args) == 2, "Two arguments required"
+        assert not kw, "No keyword arguments allowed"
+        environ, start_response = args
+
+        check_environ(environ)
+
+        # We use this to check if the application returns without
+        # calling start_response:
+        start_response_started = []
+
+        def start_response_wrapper(*args, **kw):
+            assert len(args) == 2 or len(args) == 3, (
+                "Invalid number of arguments: %s" % args)
+            assert not kw, "No keyword arguments allowed"
+            status = args[0]
+            headers = args[1]
+            if len(args) == 3:
+                exc_info = args[2]
+            else:
+                exc_info = None
+
+            check_status(status)
+            check_headers(headers)
+            check_content_type(status, headers)
+            check_exc_info(exc_info)
+
+            start_response_started.append(None)
+            return WriteWrapper(start_response(*args))
+
+        environ['wsgi.input'] = InputWrapper(environ['wsgi.input'])
+        environ['wsgi.errors'] = ErrorWrapper(environ['wsgi.errors'])
+
+        iterator = application(environ, start_response_wrapper)
+        assert iterator is not None and iterator != False, (
+            "The application must return an iterator, if only an empty list")
+
+        check_iterator(iterator)
+
+        return IteratorWrapper(iterator, start_response_started)
+
+    return lint_app
+
+class InputWrapper(object):
+
+    def __init__(self, wsgi_input):
+        self.input = wsgi_input
+
+    def read(self, *args):
+        assert len(args) <= 1
+        v = self.input.read(*args)
+        assert type(v) is type("")
+        return v
+
+    def readline(self, *args):
+        v = self.input.readline(*args)
+        assert type(v) is type("")
+        return v
+
+    def readlines(self, *args):
+        assert len(args) <= 1
+        lines = self.input.readlines(*args)
+        assert type(lines) is type([])
+        for line in lines:
+            assert type(line) is type("")
+        return lines
+    
+    def __iter__(self):
+        while 1:
+            line = self.readline()
+            if not line:
+                return
+            yield line
+
+    def close(self):
+        assert 0, "input.close() must not be called"
+
+class ErrorWrapper(object):
+
+    def __init__(self, wsgi_errors):
+        self.errors = wsgi_errors
+
+    def write(self, s):
+        assert type(s) is type("")
+        self.errors.write(s)
+
+    def flush(self):
+        self.errors.flush()
+
+    def writelines(self, seq):
+        for line in seq:
+            self.write(line)
+
+    def close(self):
+        assert 0, "errors.close() must not be called"
+
+class WriteWrapper(object):
+
+    def __init__(self, wsgi_writer):
+        self.writer = wsgi_writer
+
+    def __call__(self, s):
+        assert type(s) is type("")
+        self.writer(s)
+
+class PartialIteratorWrapper(object):
+
+    def __init__(self, wsgi_iterator):
+        self.iterator = wsgi_iterator
+
+    def __iter__(self):
+        # We want to make sure __iter__ is called
+        return IteratorWrapper(self.iterator)
+
+class IteratorWrapper(object):
+
+    def __init__(self, wsgi_iterator, check_start_response):
+        self.original_iterator = wsgi_iterator
+        self.iterator = iter(wsgi_iterator)
+        self.closed = False
+        self.check_start_response = check_start_response
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        assert not self.closed, (
+            "Iterator read after closed")
+        v = self.iterator.next()
+        if self.check_start_response is not None:
+            assert self.check_start_response, (
+                "The application returns and we started iterating over its body, but start_response has not yet been called")
+            self.check_start_response = None
+        return v
+        
+    def close(self):
+        self.closed = True
+        if hasattr(self.original_iterator, 'close'):
+            self.original_iterator.close()
+
+    def __del__(self):
+        if not self.closed:
+            sys.stderr.write(
+                "Iterator garbage collected without being closed")
+        assert self.closed, (
+            "Iterator garbage collected without being closed")
+
+def check_environ(environ):
+    assert type(environ) is DictType, (
+        "Environment is not of the right type: %r (environment: %r)"
+        % (type(environ), environ))
+    
+    for key in ['REQUEST_METHOD', 'SERVER_NAME', 'SERVER_PORT',
+                'wsgi.version', 'wsgi.input', 'wsgi.errors',
+                'wsgi.multithread', 'wsgi.multiprocess',
+                'wsgi.run_once']:
+        assert key in environ, (
+            "Environment missing required key: %r" % key)
+
+    for key in ['HTTP_CONTENT_TYPE', 'HTTP_CONTENT_LENGTH']:
+        assert key not in environ, (
+            "Environment should not have the key: %s "
+            "(use %s instead)" % (key, key[5:]))
+
+    if 'QUERY_STRING' not in environ:
+        warnings.warn(
+            'QUERY_STRING is not in the WSGI environment; the cgi '
+            'module will use sys.argv when this variable is missing, '
+            'so application errors are more likely',