Mike Orr avatar Mike Orr committed a206af2 Merge

Merge.

Comments (0)

Files changed (17)

   html.tags - HTML tags (rails replacement).
   html.tools - larger HTML chunks (rails replacement).
   mail - sending email.
+  misc -  helpers that are neither text, numeric, container, nor date.
+  number - numeric helpers and number formatters.
   paginate - successor to deprecated pagination module.
   text - non-HTML text formatting (rails replacement).
 * Removed dependency on simplejson and normalized quotes. Patch by Elisha 
 ``constants``
     Country codes, states and provinces.
 
+``containers``
+    High-level container objects and dict/list helpers.
+
 ``date``
     Date/time helpers.  These currently format strings based on dates.
 
     ``webhelpers.tools.markdown()``.  (If you use this library directly, you
     may have to wrap the results in ``literal()`` to prevent double escaping.)
 
+``misc``
+    Miscellaneous helpers that are neither text, numeric, container, or date.
+
+``number``
+    Numeric helpers and number formatters.
+
 ``paginate``
     A tool for letting you view a large sequence a screenful at a time,
     with previous/next links.

tests/test_new_number.py

+from nose.tools import eq_
+
+from webhelpers.number import *
+
+class TestFormatNumber(object):
+    def test_positive(self):
+        eq_(format_number(1234567.89), "1,234,567.89")
+        eq_(format_number(1234567), "1,234,567")
+        eq_(format_number(123456), "123,456")
+        eq_(format_number(12345), "12,345")
+        eq_(format_number(1234), "1,234")
+        eq_(format_number(123), "123")
+        eq_(format_number(12), "12")
+        eq_(format_number(1), "1")
+        eq_(format_number(123.4), "123.4")
+
+    def test_negative(self):
+        eq_(format_number(-1234567.89), "-1,234,567.89")
+        eq_(format_number(-1234567), "-1,234,567")
+        eq_(format_number(-123456), "-123,456")
+        eq_(format_number(-12345), "-12,345")
+        eq_(format_number(-1234), "-1,234")
+        eq_(format_number(-123), "-123")
+        eq_(format_number(-12), "-12")
+        eq_(format_number(-1), "-1")
+        
+    def test_other(self):
+        eq_(format_number(1234.5, " ", ","), "1 234,5")
+        eq_(format_number(1234.5, ".", ","), "1.234,5")
+        eq_(format_number(-1234.5, ".", ","), "-1.234,5")

tests/test_text.py

 from util import WebHelpersTestCase
 import unittest
 
+from nose.tools import eq_
+
 from webhelpers.text import *
 
 class TestTextHelper(WebHelpersTestCase):
     def test_truncate(self):
         self.assertEqual("Hello World!", truncate("Hello World!", 12))
         self.assertEqual("Hello Wor...", truncate("Hello World!!", 12))
+        self.assertEqual("Hello...", truncate("Hello World!!", 12, whole_word=True))
 
+    def test_strip_leading_whitespace(self):
+        s = "    def fn(x):\n        return x\n"
+        control = "def fn(x):\nreturn x\n"
+        eq_(control, strip_leading_whitespace(s))
 
-if __name__ == '__main__':
-    suite = [unittest.makeSuite(TestTextHelper)]
-    for testsuite in suite:
-        unittest.TextTestRunner(verbosity=1).run(testsuite)
+    # @@MO wrap_paragraphs untested.

unfinished/baseN.py

+"""Contributed by Shazow.
+
+These functions convert an int to/from any base, using any alphabet.
+Hexadecimal, binary, and base64 are three well-known alphabets, but you can 
+also create your own.  Shazow's examples::
+
+    >> number_to_string(12345678, '01')
+    '101111000110000101001110'
+    >> number_to_string(12345678, 'ab')
+    'babbbbaaabbaaaababaabbba'
+    >>> number_to_string(12345678, string.letters + string.digits)
+    'ZXP0
+    >> string_to_number('101111000110000101001110', '01')
+    12345678
+    >> string_to_number('babbbbaaabbaaaababaabbba', 'ab')
+    12345678
+    >> string_to_number('ZXP0', string.letters + string.digits)
+    12345678
+    >> number_to_string(12345, ['zero ', 'one ', 'two ', 'three ', 'four ', 'five ', 'six ', 'seven ', 'eight ', 'nine '])
+    'one two three four five '
+
+YouTube does this to compress numeric video IDs a shorter ID string than
+decimal.  This module remains in the unfinished directory because we're not
+sure how useful it is.  ``base64.urlsafe_b64encode`` and decode in the Python
+standard library cover the most common use case, even if those functions have
+awful names.  (Standard base 64 is not filesystem safe because it uses "/".
+(URL-safe base 64 is also safe for POSIX filenames.  Standard base 64 is 
+*not* filesystem safe because it uses the "/" character.)
+
+Experimental use compressing URL strings to a filesystem-safe alphabet also
+yielded medicre results.  I (Mike Orr) tried taking the hex MD5 digest of a
+long URL (always 32 hex characters), converting that to long, and then to
+base 64 URLsafe.  This created a string longer than the hex number!  Trying
+various hashlib functions and alphabets produced strings down to 20 chars.
+This savings is not worth the complexity over well-tested-and-understood hex
+MD5 unless the collection of numbers is very large.  Binary MD5 is even
+shorter: 16 bytes, although they must be stored in a binary-safe manner.
+"""
+
+def baseN_encode(n, alphabet):
+    """
+    Given an non-negative int, convert it to a string composed of the given
+    alphabet mapping
+    """
+    result = ''
+    alphabet = list(alphabet)
+    base = len(alphabet)
+    current = int(n)
+    while 1:
+        result = alphabet[current % base] + result
+        current = current // base
+        if not current: break
+    return result
+
+def baseN_decode(s, alphabet):
+    """
+    Given a string, convert it to an int composed of the given alphabet mapping
+    """
+    s = list(s)
+    alphabet = list(alphabet)
+    base = len(alphabet)
+    inverse_alphabet = dict(zip(alphabet, xrange(0, base)))
+    n = 0
+    exp = 0
+    for i in reversed(s):
+        n += inverse_alphabet[i] * (base ** exp)
+        exp += 1
+    return n
+

unfinished/config.py

+"""Helpers for configuration files."""
+
+class ConfigurationError(Exception):
+    pass
+
+def validate_config(config, validator, filename=None):
+    """Validate an application's configuration.
+
+    ``config`` 
+        A dict-like object containing configuration values.
+
+    ``validator``
+        A FormEncode `Schema``.  A ``FancyValidator`` is also acceptable if it
+        operates on a dict of values (not on a single value) and raises
+        ``Invalid`` with a dict of error messages (not a single error message).
+
+    ``filename``
+        The configuration file's path if known.  Paste users should pass
+        ``config.__file__`` here.
+
+    This helper depends on Ian Bicking's FormEncode package.
+    """
+    from formencode import Invalid
+    try:
+        return validator.to_python(config)
+    except Invalid, e:
+        if filename:
+            message = "configuration file '%s'" % filename
+        else:
+            message = "the application configuration"
+        message += " has the following errors: "
+        lines = [message]
+        for key, error in sorted(e.error_dict.iteritems()):
+            message = "    %s: %s" % (key, error)
+            lines.append(message)
+        message = "\n".join(lines)
+        raise ConfigurationError(message)
+        
+
+### This is a lower-level alternative to the validation function above, and
+### may produce more appropriate error messages.  In Pylons, these functions
+### should be called by a fix_config() function called in load_environment()
+### in environment.py
+
+class NotGiven(object):
+    pass
+
+def require(config, key):
+    if key not in config:
+        raise KeyError("config variable '%s' is required" % key)
+
+def require_int(config, key, default=NotGiven):
+    want_conversion = True
+    if key not in config:
+        if default is NotGiven:
+            raise KeyError("config variable '%s' is required" % key)
+        value = default
+        want_conversion = False  # Bypass in case default is None.
+    if want_conversion:
+        try:
+            value = int(config[key])
+        except ValueError:
+            raise ValueError("config variable '%s' must be int" % key)
+    config[key] = value
+    return value
+
+def require_bool(config, key, default=NotGiven):
+    from paste.deploy.converters import asbool
+    want_conversion = True
+    if key not in config:
+        if default is NotGiven:
+            raise KeyError("config variable '%s' is required" % key)
+        value = default
+        want_conversion = False  # Bypass in case default is None.
+    if want_conversion:
+        try:
+            value = asbool(config[key])
+        except ValueError:
+            tup = key, config[key]
+            raise ValueError("config option '%s' is not true/false: %r" % tup)
+    config[key] = value
+    return value
+
+def require_dir(config, key, create_if_missing=False):
+    from unipath import FSPath as Path
+    try:
+        dir = config[key]
+    except KeyError:
+        msg = "config option '%s' missing"
+        raise KeyError(msg % key)
+    dir = Path(config[key])
+    if not dir.exists():
+        dir.mkdir(parents=True)
+    if not dir.isdir():
+        msg = ("directory '%s' is missing or not a directory "
+               "(from config option '%s')")
+        tup = dir, key
+        raise OSError(msg % tup)

unfinished/containers.py

+"""Functions being considered for webhelpers.containers.
+
+These functions are useful but they're hard to explain and their 
+implementation is more cryptic than we'd like.
+"""
+
+from webhelpers.containers import distribute
+
+def columnize_as_rows(lis, columns, horizontal=False, fill=None):
+    """Like 'zip' but fill any missing elements."""
+    data = distribute(lis, columns, horizontal, fill)
+    rowcount = len(data)
+    length = max(len(x) for x in data)
+    for c, lis in enumerate(data):
+        n = length - len(lis)
+        if n > 0:
+            extension = [fill] * n
+            lis.extend(extension)
+    return zip(*data)
+
+def izip_fill(*iterables, **kw):
+    """Like itertools.izip but use a default value for the missing elements
+       in short lists rather than stopping at the end of the shortest list.
+
+       ``*iterables`` are the iterables to zip.
+       ``default`` is the default value (default ``None``, must be a keyword
+       arg.
+    """
+    iterables = map(iter, iterables)
+    default = kw.pop('default', None)
+    if kw:
+        raise TypeError("unrecognized keyword arguments")
+    columns = len(iterables)
+    columns_range = range(columns)
+    while True:
+        found_data = False
+        row = [None] * columns
+        for i in columns_range:
+            try:
+                row[i] = iterables[i].next()
+                found_data = True
+            except StopIteration:
+                row[i] = default
+        if not found_data:
+            break
+        yield tuple(row)

unfinished/image_size.py

+"""Image helpers
+
+The following module extracts the width and height from an image file with
+No C code or external dependencies.
+
+This version is too complicated and GPL, but serves as an algorithm 
+reference.  It was downloaded from
+http://www.pycode.com/modules/?id=32&tab=download
+
+We need a get_dimensions() function for JPG/PNG/GIF.
+Also, I hesitate to return -1 for an unknown dimension because what would
+a web browser do with it?  Should either return None or raise an exception.
+I've never seen an image without valid dimensions in it, so raising an
+exception may be fine.
+
+WebHelpers/unfinished/multimedia.py contains an alternative
+``get_dimensions()`` function that depends on the Python Imaging Library.
+"""
+
+#!/usr/bin/env python
+# (c) Copyright 2001-2005 Hewlett-Packard Development Company, L.P.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+#
+# Author: Don Welch
+# Ported from Perl's Image::Size module by Randy J. Ray
+# Modified by Perenzo, 2006
+
+import os, os.path, re, struct
+
+xbm_pat = re.compile(r'^#defines*S*s*(d+)s*n#defines*S*s*(d+)', re.IGNORECASE)
+xpm_pat = re.compile(r'"s*(d+)s+(d+)(s+d+s+d+){1,2}s*"', re.IGNORECASE)
+ppm_pat1 = re.compile(r'^#.*', re.IGNORECASE | re.MULTILINE)
+ppm_pat2 = re.compile(r'^(P[1-6])s+(d+)s+(d+)', re.IGNORECASE)
+ppm_pat3 = re.compile(r'IMGINFO:(d+)x(d+)', re.IGNORECASE)
+tiff_endian_pat = re.compile(r'IIx2ax00')
+
+def readin(stream, length, offset=0):
+   if offset != 0:
+       stream.seek(offset, 0)
+   return stream.read(length)
+
+def xbmsize(stream):
+   width, height = -1, -1
+   match = xbm_pat.match(readin(stream, 1024))
+   try:
+       width = int(match.group(1))
+       height = int(match.group(2))
+   except:
+       pass
+   return width, height
+
+def xpmsize(stream):
+   width, height = -1, -1
+   match = re.search(xpm_pat, readin(stream, 1024))
+   try:
+       width = int(match.group(1))
+       height = int(match.group(2))
+   except:
+       pass
+   return width, height
+
+def pngsize(stream): # also does MNG
+   width, height = -1, -1
+   if readin(stream, 4, 12) in ('IHDR', 'MHDR'):
+       height, width = struct.unpack("!II", stream.read(8))
+
+   return width, height
+
+def jpegsize(stream):
+   width, height = -1, -1
+   stream.seek(2)
+   while 1:
+       length = 4
+       buffer = readin(stream, length)
+       try:
+           marker, code, length = struct.unpack("!ccH", buffer)
+       except:
+           break
+       if marker != 'xff':
+           break
+       if 0xc0 <= ord(code) <= 0xc3:
+           length = 5
+           height, width = struct.unpack("!xHH", readin(stream, length))
+       else:
+           readin(stream, length-2)
+   return width, height
+
+def ppmsize(stream):
+   width, height = -1, -1
+   header = re.sub(ppm_pat1, '', readin(stream, 1024))
+   match = ppm_pat2.match(header)
+   typ = ''
+   try:
+       typ = match.group(1)
+       width = int(match.group(2))
+       height = int(match.group(3))
+   except:
+       pass
+   if typ == 'P7':
+       match = ppm_pat3.match(header)
+
+       try:
+           width = int(match.group(1))
+           height = int(match.group(2))
+       except:
+           pass
+   return width, height
+
+def tiffsize(stream):
+   header = readin(stream, 4)
+   endian = ">"
+   match = tiff_endian_pat.match(header)
+   if match is not None:
+       endian = "<"
+   input = readin(stream, 4, 4)
+   offset = struct.unpack('%si' % endian, input)[0]
+   num_dirent = struct.unpack('%sH' % endian, readin(stream, 2, offset))[0]
+   offset += 2
+   num_dirent = offset+(num_dirent*12)
+   width, height = -1, -1
+   while True:
+       ifd = readin(stream, 12, offset)
+       if ifd == '' or offset > num_dirent:
+           break
+       offset += 12
+       tag = struct.unpack('%sH'% endian, ifd[0:2])[0]
+       type = struct.unpack('%sH' % endian, ifd[2:4])[0]
+       if tag == 0x0100:
+           width = struct.unpack("%si" % endian, ifd[8:12])[0]
+       elif tag == 0x0101:
+           height = struct.unpack("%si" % endian, ifd[8:12])[0]
+   return width, height
+
+def bmpsize(stream):
+   width, height = struct.unpack("<II", readin(stream, 8, 18))
+   return width, height
+
+def gifsize(stream):
+   # since we only care about the printed size of the image
+   # we only need to get the logical screen sizes, which are
+   # the maximum extents of the image. This code is much simpler
+   # than the code from Image::Size
+   #width, height = -1, -1
+   buf = readin(stream, 7, 6) # LSx, GCTF, etc
+   height, width, flags, bci, par = struct.unpack('<HHBBB', buf)
+   return width, height
+
+TYPE_MAP = { re.compile('^GIF8[7,9]a')              : ('image/gif', gifsize),
+            re.compile("^xFFxD8")                : ('image/jpeg', jpegsize),
+            re.compile("^x89PNGx0dx0ax1ax0a") : ('image/png', pngsize),
+            re.compile("^P[1-7]")                  : ('image/x-portable-pixmap', ppmsize),
+            re.compile('#defines+S+s+d+')     : ('image/x-xbitmap', xbmsize),
+            re.compile('/* XPM */')            : ('image/x-xpixmap', xpmsize),
+            re.compile('^MMx00x2a')              : ('image/tiff', tiffsize),
+            re.compile('^II*x00')                : ('image/tiff', tiffsize),
+            re.compile('^BM')                      : ('image/x-bitmap', bmpsize),
+            re.compile("^x8aMNGx0dx0ax1ax0a") : ('image/png', pngsize),
+          }
+
+def imagesize(filename, mime_type=''):
+   width, height = -1, -1
+   f = file(filename, 'rb')
+   buffer = f.read(4096)
+   if not mime_type:
+       for t in TYPE_MAP:
+           match = t.search(buffer)
+           if match is not None:
+               mime_type, func = TYPE_MAP[t]
+               break
+   if mime_type and func:
+       f.seek(0)
+       width, height = func(f)
+   else:
+       width, height = -1, -1
+   f.close()
+   return height, width, mime_type
+
+if __name__=="__main__":
+   print imagesize('f:\test.jpg')

unfinished/multimedia.py

+"""Helpers for images, PDFs, etc.
+
+This module is in 'unfinished' because the functions have various dependencies,
+file naming conventions, and cache conventions which may not be appropriate for
+WebHelpers.  This module was written by Mike Orr, except
+``make_pdf_thumbnail2()`` which was written by Chris Barker.
+
+``static_image()`` takes a path relative to the Python application's public
+directory, extracts the width and height from the image file, and returns an
+<img> tag based on the expected public URL, using webhelpers.html.tags.image.
+
+``get_dimensions()`` extracts the width and height from an image file using
+PIL.  It uses an optional dimensions cache for speed.  The cache is a memory
+dict.  Because the files are on disk they're independent of the thread or
+process, so this is sufficiently thread-safe/multiprocess-safe.  It doesn't
+recognize changes to the files unless you invalidate the cache or restart the
+application, but it's assumed the files won't change frequently enough for this
+to be an issue.
+
+``open_image()`` returns a PIL Image object, or None if PIL doesn't recognize
+the file type.
+
+``make_thumb()`` creates a thumbnail of an image in the same directory as the
+original.  The thumbnail is named FILENAME_STEM + "_thumb.jpg".
+
+``get_thumb_path()`` returns the thumbnail path based on the original image
+path, using the naming conventions of ``make_thumb``.
+
+``make_pdf_thumbnail()`` and ``make_pdf_thumbnail2`` create a thumbnail image
+of the first page of a PDF file.  The former depends on ImageMagick which uses
+Ghostscript, the latter depends on Ghostscript directly.  The former seems to
+be more reliable currently.
+
+
+"""
+
+import glob
+import logging
+import os
+import re
+import subprocess
+import sys
+import traceback
+import warnings
+
+import Image     # Python Imaging Library (PIL)
+
+warn = logging.getLogger("multimedia").warn
+
+# Suppress FutureWarning from PIL; we can't do anything about it.
+warnings.filterwarnings('ignore', '.*return a long.*')
+
+THUMB_PIL_TYPE = "JPEG"   # Thumbnail type; one of PIL's output formats.
+THUMB_EXT = ".jpg"        # The filename extension for that type.
+
+# Caches image dimensions for reuse.
+_dimensions_cache = {}
+
+RX_DECODER_NOT_AVAILABLE = re.compile( R"decoder .* not available" )
+
+def static_image(relative_path, alt, **html_attrs):
+    """Create an <img> tag for a path relative to the public directory.
+       
+    If keyword arg ``use_cache`` is false, don't use the global dimensions
+    cache.
+    """
+    use_cache = html_attrs.pop("use_cache", True)
+    if "width" not in html_attrs or "height" not in html_attrs:
+        try:
+            path = Path(config["pylons.paths"]["public_files"], relative_path)
+            width, height = get_dimensions(path, use_cache)
+        except IOError:
+            pass
+        else:
+            if width:
+                html_attrs.setdefault("width", width)
+            if height:
+                html_attrs.setdefault("height", height)
+    # @@MO Temporary kludge due to url_for ambiguity in Routes 1.
+    src = "/" + relative_path
+    return image(src, alt=alt, **html_attrs)
+
+
+def open_image(image_path):
+    """Open an image file in PIL, return the Image object.
+       Return None if PIL doesn't recognize the file type.
+    """
+    try:
+        im = Image.open(image_path)
+    except IOError, e:
+        if str(e) == "cannot identify image file":
+            return None
+        else:
+            raise
+    except:
+        m = "caught exception identifying '%s', assuming non-image:\n%s"
+        e = traceback.format_exc()
+        warn(m, image_path, e)
+        return None
+    return im
+
+def make_thumb(image_path, width):
+    """Make a thumbnail and save it in the same directory as the original.
+
+       See get_thumb_path() for the arguments.
+       @return The thumbnail filename, or None if PIL
+           didn't recognize the image type.
+
+       Does NOT work with PDF originals; use make_thumb_from_pdf for those.
+    """
+    dst = get_thumb_path(image_path, width)
+    im = open_image(image_path)
+    if im is None:
+        return None
+    orig_width, orig_height = im.size
+    height = choose_height(width, orig_width, orig_height)
+    if im.mode == 'P':
+        im = im.convert()   # Convert GIF palette to RGB mode.
+    try:
+        im.thumbnail((width, height), Image.ANTIALIAS)
+    except IOError, e:
+        reason = str(e)
+        if RX_DECODER_NOT_AVAILABLE.search(reason):
+            return None   # PIL error, cannot thumbnail.
+        else:
+            raise
+    im.save(dst, THUMB_PIL_TYPE)
+    return dst
+
+def choose_height(new_width, width, height):
+    """Return the height corresponding to 'new_width' that's proportional
+       to the original size.
+    """
+    proportion = float(height) / float(width)
+    return int(new_width * proportion)
+
+def get_dimensions(image_path, use_cache=False):
+    """Return the width and height of an image.
+       Returns (None, None) if PIL doesn't recognize the file type.
+
+       @param use_cache bool If true, use the cached dimensions if
+       available.  This cuts down on filesystem accesses, but the cache may
+       be wrong if the image has changed.  If false, update the cache anyway
+       so it's correct.
+
+       @exc IOError raised by PIL if the image file is missing or you don't
+       have read permission for it.
+    """
+    image_path = str(image_path)   # Don't need a path object.
+    if use_cache and image_path in _dimensions_cache:
+        return _dimensions_cache[image_path]
+    im = open_image(image_path)
+    if im is None:
+        size = (None, None)
+    else:
+        size = im.size
+    _dimensions_cache[image_path] = size
+    return size
+
+def changed(image_path=None):
+    """Delete all cached data regarding this path because the file has
+       changed.  If arg is unspecified or None, delete all cached data
+       for all paths.
+    """
+    if image_path is None:
+        _dimensions_cache.clear()
+        return
+    if image_path in _dimensions_cache:
+        del _dimensions_cache[image_path]
+
+def get_thumb_path(image_path, width):
+    """Return the thumbnail path for the given image.
+       
+       @parm image_path str The original image filename.
+       @param width int The thumbnail width in pixels.
+       @return path The thumbnail path.
+       For "a/foo.jpg", returns path("a/foo_thumbWIDTH.jpg").
+       The return value always ends with THUMB_EXT regardless of the original
+       extension.
+    """
+    dir, old_name = os.path.split(image_path)
+    base, ext = os.path.splitext(old_name)
+    new_name = "%s_thumb%d%s" % (base, width, THUMB_EXT)
+    return os.path.join(dir, new_name)
+
+def test():
+    print "Height for 600x480 @ width 200 is", choose_height(200, 600, 480)
+    print "Path 200 for a/foo.jpg is", get_thumb_path('a/foo.jpg', 200)
+    print "Path 200 for a/foo.png is", get_thumb_path('a/foo.png', 200)
+
+if __name__ == "__main__":  test()
+
+def make_pdf_thumbnail(path, width):
+    """Make a thumbnail from a PDF file.
+
+       @parm image_path str The original image filename.
+       @param width int The thumbnail width in pixels. (Will be approximate.)
+       @return path The thumbnail path.
+       For "a/foo.jpg", returns path("a/foo_thumbWIDTH.jpg").
+       The return value always ends with THUMB_EXT regardless of the original
+       extension.
+
+       Requires the "imagemagick" package to be installed.  By Mike Orr.
+    """
+    width_str = str(width)
+    dir, name = os.path.split(path)
+    base, ext = os.path.splitext(name)
+    newbase = "%s_thumb%s" % (base, width_str)
+    dst = os.path.join(dir, newbase + THUMB_EXT)
+
+    def page(n):
+        """Return the filename for page n's thumbnail, n >= 0.
+           'n' may also be a string (e.g., "*" for wildcard patterns).
+           If 'n' is None, return value has no page suffix.
+        """
+        if n is not None:
+            suffix = "-%s" % n
+        else:
+            suffix = ""
+        return os.path.join(dir, newbase + suffix + THUMB_EXT)
+
+    trashcan = open("/dev/null", "w")
+    cmd = ["/usr/bin/convert", "-geometry", width_str, path, dst]
+    status = subprocess.call(cmd, shell=False, stderr=trashcan)
+    if status:
+        warn("make_pdf_thumbnail subcommand exited with status %s: %s", 
+            status, cmd)
+    trashcan.close()
+    found = False
+    if os.path.exists(dst):
+        found = True
+    page0_fn = page(0)
+    other_files = glob.glob(page("*"))
+    for fn in other_files:
+        if fn == page0_fn and not found:
+            os.rename(fn, dst)
+            found = True
+        else:
+            os.remove(fn)
+    if found:
+        return dst
+    else:
+        return None
+
+def make_pdf_thumbnail2(path, width):
+    """Make a thumbnail from a PDF file.
+
+       This version uses just ghostscript, rather than ImageMagik
+       -- chb
+
+       @parm image_path str The original image filename.
+       @param width int The thumbnail width in pixels. (Will be approximate -- assumes 8.5in wide paper.)
+       @return path The thumbnail path.
+       For "a/foo.jpg", returns path("a/foo_thumbWIDTH.jpg").
+       The return value always ends with THUMB_EXT regardless of the original
+       extension.
+
+       Requires ghostscript to be installed.  By Chris Barker.
+    """
+    width_str = str(width)
+    dir, name = os.path.split(path)
+    base, ext = os.path.splitext(name)
+    newbase = "%s_thumb%s" % (base, width_str)
+    dst = os.path.join(dir, newbase + THUMB_EXT)
+
+    def page(n):
+        """Return the filename for page n's thumbnail, n >= 0.
+           'n' may also be a string (e.g., "*" for wildcard patterns).
+           If 'n' is None, return value has no page suffix.
+        """
+        if n is not None:
+            suffix = "-%s" % n
+        else:
+            suffix = ""
+        return os.path.join(dir, newbase + suffix + THUMB_EXT)
+
+    trashcan = open("/dev/null", "w")
+    
+    ## A few settable options
+    if THUMB_EXT == ".jpg":
+        filetype = "jpeg" # jpeg
+    elif THUM_EXT == ".png":
+        filetype = "png16m" # 24 bit png
+    else:
+        filetype = "jpeg" # should this be default
+    
+    gs_path = "/usr/local/bin/gs"
+    ps_cmd = "save pop currentglobal true setglobal false/product where{pop product(Ghostscript)search{pop pop pop revision 600 ge{pop true}if}{pop}ifelse}if{/pdfdict where{pop pdfdict begin/pdfshowpage_setpage[pdfdict/pdfshowpage_setpage get{dup type/nametype eq{dup/OutputFile eq{pop/AntiRotationHack}{dup/MediaBox eq revision 650 ge and{/THB.CropHack{1 index/CropBox pget{2 index exch/MediaBox exch put}if}def/THB.CropHack cvx}if}ifelse}if}forall]cvx def end}if}if setglobal"
+    cmd = [gs_path, "-dSAFER","-dBATCH","-dNOPAUSE","-dLastPage=1","-dTextAlphaBits=4"]
+    cmd.append("-sDEVICE=%s"%filetype)
+    #dpi  = int(width / 8.5) ## this assumes an 8.5in wide piece of paper.
+    dpi = 20
+    cmd.append("-r%i"%dpi)
+    
+    cmd.append("-sOutputFile=%s"% dst)
+    cmd.extend(("-c", ps_cmd, "-f"),)
+    cmd.append(path)
+    
+    ## the desired command string
+    ## gs -dSAFER -dBATCH -dNOPAUSE -r150 -sDEVICE=jpeg -dTextAlphaBits=4 -sOutputFile=$1-%02d.jpg $1
+    status = subprocess.call(cmd, shell=False) #, stdout=trashcan, stderr=trashcan)
+    if status:
+        warn("make_pdf_thumbnail subcommand exited with status %s: %s", 
+            status, cmd)
+    trashcan.close()
+    found = False
+    if os.path.exists(dst):
+        return dst
+    else:
+        return None
+    
+
+def get_pdf_text(path):
+    raise NotImplementedError
+
+def get_word_text(path):
+    raise NotImplementedError
+
+
+if __name__ == "__main__":
+    import optparse
+    logging.basicConfig()
+    parser = optparse.OptionParser(usage="%prog PDF_FILE")
+    opts, args = parser.parse_args()
+    if len(args) != 1:
+        parser.error("wrong number of command-line arguments")
+    source_file = args[0]
+    
+    width = 200
+    dst = make_pdf_thumbnail2(source_file, width)
+    print "Thumbnail made:", dst
+
+#ps_cmd = "save pop currentglobal true setglobal false/product where{pop product(Ghostscript)search{pop pop pop revision 600 ge{pop true}if}{pop}ifelse}if{/pdfdict where{pop pdfdict begin/pdfshowpage_setpage[pdfdict/pdfshowpage_setpage get{dup type/nametype eq{dup/OutputFile eq{pop/AntiRotationHack}{dup/MediaBox eq revision 650 ge and{/THB.CropHack{1 index/CropBox pget{2 index exch/MediaBox exch put}if}def/THB.CropHack cvx}if}ifelse}if}forall]cvx def end}if}if setglobal"
+
+#gs -dLastPage=1 -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -dNOPAUSE -dBATCH -sDEVICE=jpeg -r20 -sOutputFile=Chem_Sheet_LPG.jpg -c "save pop currentglobal true setglobal false/product where{pop product(Ghostscript)search{pop pop pop revision 600 ge{pop true}if}{pop}ifelse}if{/pdfdict where{pop pdfdict begin/pdfshowpage_setpage[pdfdict/pdfshowpage_setpage get{dup type/nametype eq{dup/OutputFile eq{pop/AntiRotationHack}{dup/MediaBox eq revision 650 ge and{/THB.CropHack{1 index/CropBox pget{2 index exch/MediaBox exch put}if}def/THB.CropHack cvx}if}ifelse}if}forall]cvx def end}if}if setglobal" -f Chem_Sheet_LPG.pdf
+
+#gs -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -dNOPAUSE -dBATCH -sDEVICE=png16m -r9.06531732174037 -sOutputFile=thb%d.png -c "save pop currentglobal true setglobal false/product where{pop product(Ghostscript)search{pop pop pop revision 600 ge{pop true}if}{pop}ifelse}if{/pdfdict where{pop pdfdict begin/pdfshowpage_setpage[pdfdict/pdfshowpage_setpage get{dup type/nametype eq{dup/OutputFile eq{pop/AntiRotationHack}{dup/MediaBox eq revision 650 ge and{/THB.CropHack{1 index/CropBox pget{2 index exch/MediaBox exch put}if}def/THB.CropHack cvx}if}ifelse}if}forall]cvx def end}if}if setglobal" -f Chem_Sheet_LPG.pdf
+
+

unfinished/opener.py

+"""A unified front end for opening plain or compressed files.
+
+This is in 'unfinished' because a smarter opener could also decode Unicode,
+and check the file's magic number rather depending on the filename
+extension.  Are these worth implementing?  Would they be more worthwhile
+under Python 3, which might provide a Unicode-aware opener for bz2 and gz?
+"""
+
+def smart_open(filename, mode):
+    """Unified front end for opening plain files and compressed files."""
+    if   filename.endswith(".bz2"):
+        import bz2
+        opener = bz2.BZ2File
+    elif filename.endswith(".gz"):
+        import gzip
+        opener = gzip.open
+    else:
+        opener = open
+    return opener(filename, mode)
+    

unfinished/unit_conversion.py

+"""
+There are several unit conversion modules available for Python.  However, most
+are large and complex.  A very small simple set of converters may be
+appropriate for WebHelpers.  Here are some alternatives.
+
+Chris Barker wrote some conversion tables (see module body) and recommends a
+general convert function::
+
+    convert(type, unit, to_unit, value)
+
+``type`` is a unit type such as "Length", "Volume", "Temperature", "Mass",
+etc.  ``unit`` and ``to_unit`` are two units of that type.  ``value`` is the
+number you wish to convert.  The result is the value converted to ``to_unit``.
+
+The Python Cookbook has a recipe called "Unit-safe measured quantities" 
+(http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/270589)
+by George Sakkis that does something similar.  Values are created in a more
+conventional manner.  This example shows creating two length values, adding
+them, comparing them, displaying them in a particular unit, and changing the
+default display unit::
+
+    l1 = Length(12.3,'cm')
+    l2 = Length(50,'mm')
+    Length.setDefaultUnit("in")
+    print l1+l2             # "6.811024 in"
+    print 3*l2.value("m")   # "0.15"
+    print l1>l2             # "True"
+
+Sakkis notes that values stored as plain floats tend to get confused when
+multiple units or external modules are involved.  However, they can't be stored
+in a numeric database field.  This implementation also doesn't handle aliases
+(different names for the same unit), and has concrete classes only for Length
+and Temperature.  It may be worthwhile to refactor this to use a converter like
+Barker's, and then plug Barker's tables and aliases into it.  This would allow
+users to choose between plain floats and Sakkis objects as desired, and to also
+make canned converters as partials.
+
+The Unum project is even more sophistocated, with a large collection of units
+that can be used as Python operands.  For instance, ``3 * M/S`` creates a "3
+meters per second" object.  Compatible values can be added, and plugged into
+Numeric matrices.  In spite of this it's pure Python and has no nonstandard
+dependencies.  However, it may be slower than other implementations due to the
+overhead of the magic.  And it has a high learning curve, perhaps too high for
+non-mathematical users with simple needs.
+
+    * Unum project home:  http://sourceforge.net/projects/unum/
+    * Tutorial: http://home.scarlet.be/be052320/Unum_tutorial.html
+    * FAQ (including limitations):  http://home.scarlet.be/be052320/faq.html
+"""
+
+### Chris Barker's conversion tables.
+### (Additional scientific tables are available.)
+
+ConvertDataUnits = {
+# All lengths in terms of meter
+
+"Length" : {"meter"      : (1.0,["m","meters","metre"]),
+            "centimeter" : (0.01,["cm", "centimeters"]),
+            "millimeter"  : (0.001,["mm","millimeters"]),
+            "micron"  : (0.000001,["microns"]),
+            "kilometer"  : (1000.0,["km","kilometers"]),
+            "foot"        : (0.3048,["ft", "feet"]),
+            "inch"      : (0.0254,["in","inches"]),
+            "yard"       : (0.9144,[ "yrd","yards"]),
+            "mile"       : (1609.344,["mi", "miles"]),
+            "nautical mile" : (1852.0,["nm","nauticalmiles"]),
+            "fathom"  : (1.8288,["fthm", "fathoms"]),
+            "latitude degree": (111120.0,["latitudedegrees"]),
+            "latitude minute": (1852.0,["latitudeminutes"])
+            },
+
+# All Areas in terms of square meter
+"Area" : {"square meter"  : (1.0,["m^2","sq m","squaremeter"]),
+          "square centimeter": (.0001,["cm^2","sq cm"]),
+          "square kilometer"  : (1e6,["km^2","sq km","squarekilometer"]),
+          "acre"  : (4046.8564,["acres"]),
+          "square mile"  : (2589988.1,["sq miles","squaremile"]),
+          "square yard"  : (0.83612736,["sq yards","squareyards"]),
+          "square foot"  : (0.09290304,["ft^2", "sq foot","square feet"]),
+          "square inch"  : (0.00064516,["in^2", "sq inch","square inches"]),
+          "hectar"  : (10000.0,["hectares"]),
+          },
+
+# All volumes in terms of cubic meter
+"Volume" : {"cubic meter"  : (1.0,["m^3","cu m","cubic meters"]),
+            "cubic centimeter"  : (1e-6,["cm^3","cu cm"]),
+            "barrels (petroleum)" : (.1589873,["bbl","barrels","barrel","bbls",]),
+            "liter"        : (1e-3,["l","liters"]),
+            "gallon"       : (0.0037854118, ["gal","gallons","gallon","usgal"]),
+            "gallon (UK)"  : (0.004546090, ["ukgal","gallons(uk)"]),
+            "million US gallons"  : (3785.4118, ["milliongallons","milgal"]),
+            "cubic foot"    : (0.028316847, ["ft^3","cu feet","cubicfeet"]),
+            "cubic inch"    : (16.387064e-6, ["in^3","cu inch","cubicinches"]),
+            "cubic yard"    : (.76455486, ["yd^3","cu yard","cubicyard","cubicyards"]),
+            "fluid oz"      : (2.9573530e-5, ["oz","ounces(fluid)", "fluid oz"]),
+            "fluid oz (UK)" : (2.841306e-5, ["ukoz", "fluid oz(uk)"]),
+            },
+
+# All Temperature units in K (multiply by, add)
+"Temperature" : {"Kelvin"  : ((1.0, 0.0),["K","degrees k","degrees k","degrees kelvin","degree kelvin","deg k"]),
+                 "centigrade"     : ((1.0, 273.16),["C","degrees c","degrees celsius","degree celsius","deg c"]),
+                 "farenheight"  : ((0.55555555555555558, (273.16*9/5 - 32) ),["F","degrees f","degree f","degrees farenheight","deg f"]),
+                 },
+
+# All Mass units in Kg (weight is taken to be mass at standard g)
+"Mass" : {"kilograms"  : (1.0,["kg","kilogram"]),
+          "pound"     : (0.45359237,["lb","pounds","lbs"]),
+          "gram"  : (.001,["g","grams"]),
+          "ton"   : (907.18474, ["tons","uston"]),
+          "metric ton" : (1000.0, ["tonnes","metric tons"]),
+          "slug"       : (14.5939, ["slugs"]),
+          "ounce"       : (.028349523, ["oz","ounces"]),
+          "ton(UK)"       : (1016.0469, ["ukton","long ton"]),
+          },
+
+# All Time In second
+"Time" : {"second"  : (1.0,["sec","seconds"]),
+          "minute"  : (60.0,["min","minutes"]),
+          "hour"    : (3600.0,["hr","hours","hrs"]),
+          "day"     : (86400.0,["day","days"]),
+          },
+# All Velocities in meter per second
+"Velocity" : {"meter per second"  : (1.0,["m/s","meters per second","mps"]),
+              "centimeter per second"  : (.01,["cm/s"]),
+              "kilometer per hour"  : (0.277777,["km/h", "km/hr"]),
+              "knot"  : (0.514444,["knots","kts"]),
+              "mile per hour"  : (0.44704,["mph","miles per hour"]),
+              "foot per second"  : (0.3048,["ft/s", "feet per second", "feet/s"]),
+              },
+}
+
+
+# Aliases should be stored in a normalized manner to prevent spelling
+# variations from causing lookup failures.  Barker uses the following
+# normalizer:
+#
+#    def normalize_unit(unit):
+#        return "".join(unit.lower().split())
+#
+# Unit arguments are then filtered by this before lookup.

webhelpers/containers.py

+"""Container objects and list/dict helpers.
+
+I would have called this "collections" except that Python 2 can't import a
+top-level module that's the same name as a module in the current package.
+"""
+
+import sys
+
+try:
+    from collections import defaultdict
+except ImportError:   # Python < 2.5
+    class defaultdict(dict):
+        """Backport of Python 2.5's ``defaultdict``.
+
+        From the Python Cookbook.  Written by Jason Kirtland.
+        http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/523034
+        """
+        def __init__(self, default_factory=None, *a, **kw):
+            if (default_factory is not None and
+                not hasattr(default_factory, '__call__')):
+                raise TypeError('first argument must be callable')
+            dict.__init__(self, *a, **kw)
+            self.default_factory = default_factory
+        def __getitem__(self, key):
+            try:
+                return dict.__getitem__(self, key)
+            except KeyError:
+                return self.__missing__(key)
+        def __missing__(self, key):
+            if self.default_factory is None:
+                raise KeyError(key)
+            self[key] = value = self.default_factory()
+            return value
+        def __reduce__(self):
+            if self.default_factory is None:
+                args = tuple()
+            else:
+                args = self.default_factory,
+            return type(self), args, None, None, self.items()
+        def copy(self):
+            return self.__copy__()
+        def __copy__(self):
+            return type(self)(self.default_factory, self)
+        def __deepcopy__(self, memo):
+            import copy
+            return type(self)(self.default_factory,
+                              copy.deepcopy(self.items()))
+        def __repr__(self):
+            return 'defaultdict(%s, %s)' % (self.default_factory,
+                                            dict.__repr__(self))
+
+class NoDefault(object):
+    pass
+
+
+class DumbObject(object):
+    """A container for arbitrary attributes.
+
+    Usage:
+    >>> do = DumbObject(a=1, b=2)
+    >>> do.b
+    2
+    
+    Alternatives to this class include ``collections.namedtuple`` in Python
+    2.6, and ``formencode.declarative.Declarative`` in Ian Bicking's FormEncode
+    package.  Both alternatives offer more featues, but ``DumbObject``
+    shines in its simplicity and lack of dependencies.
+    """
+    def __init__(self, **kw):
+        self.__dict__.update(kw)
+
+
 class Flash(object):
     """Accumulate a list of messages to show at the next page request.
 
         messages = session.pop(self.session_key, [])
         session.save()
         return messages
+
+
+class Counter(object):
+    """I count the number of occurrences of each value registered with me.
+    
+    Usage:
+    >>> counter = Counter()
+    >>> counter("foo")
+    >>> counter("bar")
+    >>> counter("foo")
+    >>> sorted(counter.result.items())
+    [('bar', 1), ('foo', 2)]
+
+    >> counter.result
+    {'foo': 2, 'bar': 1}
+
+    To see the most frequently-occurring items in order:
+
+    >>> counter.get_popular(1)
+    [(2, 'foo')]
+    >>> counter.get_popular()
+    [(2, 'foo'), (1, 'bar')]
+
+    Or if you prefer the list in item order:
+
+    >>> counter.get_sorted_items()
+    [('bar', 1), ('foo', 2)]
+    """
+
+    def __init__(self):
+        self.result = defaultdict(int)
+        self.total = 0  # Number of times instance has been called.
+
+    def __call__(self, item):
+        """Register an item with the counter."""
+        self.result[item] += 1
+        self.total += 1
+
+    def get_popular(self, max_items=None):
+        """Return the results as as a list of (count, item) pairs, with the
+        most frequently occurring items first.
+        If ``max_items`` is provided, return no more than that many items.
+        """
+        data = [(x[1], x[0]) for x in self.result.iteritems()]
+        data.sort(key=lambda x: (sys.maxint - x[0], x[1]))
+        if max_items:
+            return data[:max_items]
+        else:
+            return data
+
+    def get_sorted_items(self):
+        """Return the result as a list of (item, count) pairs sorted by item.
+        """
+        data = self.result.items()
+        data.sort()
+        return data
+
+class Accumulator(object):
+    """Accumulate a dict of all values for each key.
+
+    Usage:
+    >>> bowling_scores = Accumulator()
+    >>> bowling_scores("Fred", 0)
+    >>> bowling_scores("Barney", 10)
+    >>> bowling_scores("Fred", 1)
+    >>> bowling_scores("Barney", 9)
+    >>> sorted(bowling_scores.result.items())
+    [('Barney', [10, 9]), ('Fred', [0, 1])]
+
+    >> bowling_scores.result
+    {'Fred': [0, 1], 'Barney': [10, 9]}
+
+    The values are stored in the order they're registered.
+
+    Alternatives to this class include ``paste.util. multidict.MultiDict``
+    in Ian Bicking's Paste package.
+    """
+
+    def __init__(self):
+        self.result = defaultdict(list)
+
+    def __call__(self, key, value):
+        self.result[key].append(value)
+
+
+class UniqueAccumulator(object):
+    """Accumulate a dict of unique values for each key.
+
+    The values are stored in an unordered set.
+    """
+
+    def __init__(self):
+        self.result = defaultdict(set)
+
+    def __call__(self, key, value):
+        self.result[key].add(value)
+
+
+def unique(it):
+    """Return a list of unique elements in the iterable, preserving the order.
+
+    Usage:
+    >>> unique([None, "spam", 2, "spam", "A", "spam", "spam", "eggs", "spam"])
+    [None, 'spam', 2, 'A', 'eggs']
+    """
+    seen = set()
+    ret = []
+    for elm in it:
+        if elm not in seen:
+            ret.append(elm)
+            seen.add(elm)
+    return ret
+
+def only_some_keys(dic, *keys):
+    """Return a copy of the dict with only the specified keys present.  
+    
+    ``dic`` may be any mapping; the return value is always a Python dict.
+    """
+    ret = {}
+    for key in keys:
+        ret[key] = dic[key]   # Raises KeyError.
+    return ret
+
+def except_keys(dic, *keys):
+    """Return a copy of the dict without the specified keys.
+    """
+    ret = dic.copy()
+    for key in keys:
+        try:
+            del ret[key]
+        except KeyError:
+            pass
+    return ret
+
+def extract_keys(dic, *keys):
+    """Return two copies of the dict.  The first has only the keys
+       specified.  The second has all the *other* keys from the original dict.
+    """
+    for k in keys:
+        if k not in dic:
+            raise KeyError("key %r is not in original mapping" % k)
+    r1 = {}
+    r2 = {}
+    for k, v in dic.items():
+        if k in keys:
+            r1[k] = v
+        else:
+            r2[k] = v
+    return r1, r2
+
+def ordered_items(dic, key_order, other_keys=True, default=NoDefault):
+    """Like dict.iteritems() but with a specified key order.
+
+    ``dic`` is any mapping.
+    ``key_order`` is a list of keys.  Items will be yielded in this order.
+    ``other_keys`` is a boolean.
+    ``default`` is a value returned if the key is not in the dict.
+
+    This yields the items listed in ``key_order``.  If a key does not exist
+    in the dict, yield the default value if specified, otherwise skip the
+    missing key.  Afterwards, if ``other_keys`` is true, yield the remaining
+    items in an arbitrary order.
+
+    Usage:
+    >>> dic = {"To": "you", "From": "me", "Date": "2008/1/4", "Subject": "X"}
+    >>> dic["received"] = "..."
+    >>> order = ["From", "To", "Subject"]
+    >>> list(ordered_items(dic, order, False))
+    [('From', 'me'), ('To', 'you'), ('Subject', 'X')]
+    """
+    d = dict(dic)
+    for key in key_order:
+        if key in d:
+            yield key, d.pop(key)
+        elif default is not NoDefault:
+            yield key, default
+    if other_keys:
+        for key, value in d.iteritems():
+            yield key, value
+
+def del_quiet(dic, *keys):
+    """Delete several keys from a dict, ignoring those that don't exist.
+    
+    This modifies the dict in place.
+    """
+    for key in keys:
+        try:
+            del dic[key]
+        except KeyError:
+            pass
+
+def dict_of_dicts(dicts, key):
+    """Correlate several dicts under one superdict.
+
+    E.g., If you have several dicts each with a 'name' key, this will
+    create a superdict containing each dict keyed by name.
+    """
+    ret = {}
+    i = 0
+    for d in dicts:
+        try:
+            my_key = d[key]
+        except KeyError:
+            msg = "'dicts' element %d contains no key '%s'"
+            tup = i, key 
+            raise KeyError(msg % tup)
+        ret[my_key] = d
+        i += 1
+    return ret
+
+
+def dict_of_objects(objects, attr):
+    """Correlate several dict under one dict.
+
+    E.g., If you have several objects each with a 'name' attribute, this will
+    create a dict containing each object keyed by name.
+    """
+    ret = {}
+    i = 0
+    for obj in objects:
+        try:
+            my_key = getattr(obj, attr)
+        except AttrError:
+            msg = "'%s' object at 'objects[%d]' contains no attribute '%s'"
+            tup = type(obj).__name__, i, attr 
+            raise AttributeError(msg % tup)
+        ret[my_key] = obj
+        i += 1
+    return ret
+
+
+def distribute(lis, columns, horizontal=False, fill=None):
+    """Distribute a list into a N-column table (list of lists).
+
+    Each list in the return value represents one row of the table.
+    table[0] is the first row.
+    table[0][1] is the first column in the first row.
+    
+    If ``horizontal`` is true, the elements are distributed horizontally,
+    filling each row before going on to the next.  Use this if you're building
+    an HTML table.  If the data runs out before the last row is completed,
+    the remaining cells are filled with the ``fill`` value to ensure all rows
+    are equal length.
+    
+    If false (default), the elements are distributed vertically, filling all
+    table[N][0] elements before going to table[N][1].  The column length is
+    calculated to ensure the smallest number of extra cells in the last
+    column.  Extra cells are filled with the ``fill`` value.  This structure
+    is useful to produce a list of words that can be output left to right but
+    is alphabetical vertically like a dictionary or file listing.  It's also
+    useful for HTML tables when an entire "column" will be placed in a single
+    <td>, perhaps with a <br> or <li> between elements.
+    """
+    if columns < 1:
+        raise ValueError("arg 'columns' must be >= 1")
+    if horizontal:
+        ret = []
+        for i in range(0, len(lis), columns):
+            row = lis[i:i+columns]
+            row_len = len(row)
+            if row_len < columns:
+                extra = [fill] * (columns - row_len)
+                row.extend(extra)
+            ret.append(row)
+        return ret
+    lis_len = len(lis)
+    column_len, remainder = divmod(lis_len, columns)
+    if remainder:
+        column_len += 1
+    ret = [None] * columns
+    for i in range(columns):
+        start = i * column_len
+        end = min(start + column_len, lis_len)
+        #print "i=%d, start=%d, end=%d, element=%r" % (i, start, end, lis[start:end])
+        ret[i] = lis[start:end]
+    return ret
+
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()

webhelpers/date.py

     return ", ".join(return_strings[:-1]) + " and " + return_strings[-1]
 
 
-def time_ago_in_words(from_time, grandularity="second", round=False):
+def time_ago_in_words(from_time, granularity="second", round=False):
     """
     Return approximate-time-distance string for ``from_time`` till now.
 
     
     """
     return distance_of_time_in_words(from_time, datetime.now(), 
-        grandularity, round)
+        granularity, round)
 

webhelpers/misc.py

+"""Helpers that are neither text, numeric, container, or date.
+"""
+
+def all(seq, pred=None):
+    """Is ``pred(elm)`` true for all elements?
+
+    With the default predicate, this is the same as Python 2.5's ``all()``
+    function; i.e., it returns true if all elements are true.
+
+    From recipe in itertools docs.
+    """
+    for elem in itertools.ifilter(pred, seq):
+        return True
+    return False
+
+def any(seq, pred=None):
+    """Is ``pred(elm)`` is true for any element?
+
+    With the default predicate, this is the same as Python 2.5's ``any()``
+    function; i.e., it returns true if any element is true.
+
+    From recipe in itertools docs.
+    """
+    for elem in itertoos.ifilterfalse(pred, seq):
+        return False
+    return True
+
+def no(seq, pred=None):
+    """Is ``pred(elm)`` true for no elements?
+
+    With the default predicate, this returns true if all elements are false.
+
+    From recipe in itertools docs.
+    """
+    for elem in ifilter(pred, seq):
+        return False
+    return True
+
+def count_true(seq, pred=lambda x: x):
+    """How many elements is ``pred(elm)`` true for?
+
+    With the default predicate, this counts the number of true elements.
+
+    This is equivalent to the ``itertools.quantify`` recipe, which I couldn't
+    get to work.
+    """
+    ret = 0
+    for x in seq:
+        if pred(x):
+            ret += 1
+    return ret
+
+def convert_or_none(value, type_):
+    """Return the value converted to the type, or None if error.
+       ``type_`` may be a Python type or any function.
+    """
+    try:
+        return type_(value)
+    except Exception:
+        return None
+
+class DeclarativeException(Exception):
+    """A simpler way to define an exception with a fixed message.
+
+    Example:
+    class MyException(DeclarativeException):
+        message="can't frob the bar when foo is enabled"
+    """
+    message=""
+
+    def __init__(self, message=None):
+        Exception.__init__(self, message or self.message)

webhelpers/number.py

+"""Number formatting and numeric helpers"""
+
+import re
+
+def percent_of(part, whole):
+    """What percent of ``whole`` is ``part``?
+
+    >>> percent_of(5, 100)
+    5.0
+    >>> percent_of(13, 26)
+    50.0
+    """
+    # Use float to force true division.
+    return float(part * 100) / whole
+
+def mean(r):
+    """Return the mean of a sequence of numbers.
+    
+    The mean is the average of all the numbers.
+
+    >>> mean([5, 10])
+    7.5
+    """
+    try:
+        return float(sum(r)) / len(r)
+    except ZeroDivisionError:
+        raise ValueError("can't calculate mean of empty collection")
+
+average = mean
+
+def median(r):
+    """Return the median of an iterable of numbers.
+
+    The median is the point at which half the numbers are lower than it and
+    half the numbers are higher.  This gives a better sense of the majority
+    level than the mean (average) does, because the mean can be skewed by a few
+    extreme numbers at either end.  For instance, say you want to calculate
+    the typical household income in a community and you've sampled four
+    households:
+
+    >>> incomes = [18000]       # Fast food crew
+    >>> incomes.append(24000)   # Janitor
+    >>> incomes.append(32000)   # Journeyman
+    >>> incomes.append(44000)   # Experienced journeyman
+    >>> incomes.append(67000)   # Manager
+    >>> incomes.append(9999999) # Bill Gates
+    >>> median(incomes)
+    49500.0
+    >>> mean(incomes)
+    1697499.8333333333
+
+    The median here is somewhat close to the majority of incomes, while the
+    mean is far from anybody's income.
+
+        20 000,
+        40 000,
+        60 000,
+        9 999 999] 
+    The median would be around 50 000, which is close to what the majority of
+    respondents make.  The average would be in the millions, which is far from
+    what any of the respondents make.
+    
+    This implementation makes a temporary list of all numbers in memory.
+    """
+    s = list(r)
+    s_len = len(s)
+    if s_len == 0:
+        raise ValueError("can't calculate mean of empty collection")
+    s.sort()
+    center = s_len // 2
+    is_odd = s_len % 2
+    if is_odd:
+        return s[center]   # Return the center element.
+    # Return the average of the two elements nearest the center.
+    low = s[center-1]
+    high = s[center+1]
+    return mean([low, high])
+
+def standard_deviation(r):
+    """Standard deviation, from the Python Cookbook
+    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/442412
+
+    Standard deviation shows the variability within a sequence of numbers.
+    A small standard deviation shows the numbers are close to the same.  A
+    large standard deviation shows they are widely different.  In fact it
+    shows how far the numbers tend to deviate from the average.  This can be
+    used to detect whether the average has been skewed by a few extremely high
+    or extremely low values.
+
+    The following examples are taken from Wikipedia.
+    http://en.wikipedia.org/wiki/Standard_deviation
+
+    >>> standard_deviation([0, 0, 14, 14])
+    8.0829037686547611
+    >>> standard_deviation([0, 6, 8, 14])
+    5.7735026918962582
+    >>> standard_deviation([6, 6, 8, 8])
+    1.1547005383792515
+
+    (Wikipedia reports 7, 5, and 1 respectively. Some of the difference is
+    due to rounding, but the rest may be a bug?)
+
+    # Fictitious average monthly temperatures in Southern California.
+    #                       Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
+    >>> standard_deviation([70, 70, 70, 75, 80, 85, 90, 95, 90, 80, 75, 70]) 
+    9.0033663737851999
+
+    # Fictitious average mothly temperatures in Montana.
+    #                       Jan  Feb  Mar Apr May Jun Jul  Aug Sep Oct Nov Dec
+    >>> standard_deviation([-32, -10, 20, 30, 60, 90, 100, 80, 60, 30, 10, -32])
+    45.137836040557403
+
+    Most natural and random phenomena follow the normal distribution (aka the
+    bell curve), which says that most values are close to average but a few are
+    extreme.  E.g., most people are close to 5'9" tall but a few are very tall
+    or very short.  If the data does follow the bell curve, 68% of the values
+    will be within 1 standard deviation (stdev) of the average, and 95% will be
+    within 2 standard deviations.  So a university professor grading exams on a
+    curve might give a "C" (mediocre) grade to students within 1 stdev of the
+    average score, "B" (better than average) to those within 2 stdevs above,
+    and "A" (perfect) to the 0.25% higher than 2 stdevs.  Those between 1 and 2
+    stdevs below get a "D" (poor), and those below 2 stdevs... we won't talk
+    about them.
+
+    a large standard
+    i.e., how far they deviate from the average.
+    If all numbers are the same, the standard deviation is zero.  If the
+    numbers are widely different from average, no matter whether above or
+    below, the standard deviation will be high.  Most natural distributions
+    follow the bell curve and have a standard deviation of 1.
+    """
+    avg = average(r)
+    sdsq = sum([(i - avg) ** 2 for i in r])
+    return (sdsq / (len(r) - 1 or 1)) ** 0.5
+
+
+class SimpleStats(object):
+    """Calculate a few simple stats on data.
+    
+    This class calculates the minimum, maximum, and count of all the values
+    given to it.  The values are not saved in the object.  Usage:
+
+    >>> stats = SimpleStats()
+    >>> stats(2)               # Add one data value.
+    >>> stats.extend([6, 4])   # Add several data values at once.  
+
+    The statistics are available as instance attributes:
+
+    >>> stats.count
+    3
+    >>> stats.min
+    2
+    >>> stats.max
+    6
+
+    Non-numeric data is also allowed:
+
+    >>> stats2 = SimpleStats()
+    >>> stats2("foo")
+    >>> stats2("bar")
+    >>> stats2.count
+    2
+    >>> stats2.min
+    'bar'
+    >>> stats2.max
+    'foo'
+
+    If the ``numeric`` constructor arg is true, only ``int``, ``long``, and 
+    ``float`` values will be accepted.  This flag is intended to enable
+    additional numeric statistics, although none are currently implemented.
+
+    ``.min`` and ``.max`` are ``None`` until the first data value is
+    registered.
+
+    Subclasses can override ``._init_stats`` and ``._update_stats`` to add
+    additional statistics.
+    """
+    __version__ = 1
+
+    def __init__(self, numeric=False):
+        self.numeric = numeric
+        self.count = 0
+        self.min = None
+        self.max = None
+        self._init_stats()
+        
+    def __nonzero__(self):
+        """The instance is true if it has seen any data."""
+        return bool(self.count)
+
+    def __call__(self, value):
+        """Add a data value."""
+        if self.numeric:
+            value + 0   # Raises TypeError if value is not numeric.
+        if self.count == 0:
+            self.min = self.max = value
+        else:
+            self.min = min(self.min, value)
+            self.max = max(self.max, value)
+        self.count += 1
+        self._update_stats(value)
+
+    def extend(self, values):
+        """Add several data values at once, akin to ``list.extend``."""
+        for value in values:
+            self(value)
+
+    ### Hooks for subclasses
+    def _init_stats(self):
+        """Initialize state data used by subclass statistics."""
+        pass
+
+    def _update_stats(self, value):
+        """Add a value to the subclass statistics."""
+        pass
+
+
+class Stats(SimpleStats):
+    """A container for data and statistics.
+
+    This class extends ``SimpleStats`` by calculating additional statistics,
+    and by storing all data seen.  All values must be numeric (``int``,
+    ``long``, and/or ``float``), and you must call ``.finish()`` to generate
+    the additional statistics.  That's because the statistics here cannot be
+    calculated incrementally, but only after all data is known.
+
+    
+    >>> stats = Stats()
+    >>> stats.extend([5, 10, 10])
+    >>> stats.count
+    3
+    >>> stats.finish()
+    >>> stats.mean
+    8.3333333333333339
+    >>> stats.median
+    10
+    >>> stats.standard_deviation
+    2.8867513459481287
+
+    All data is stored in a list and a set for later use:
+
+    >>> stats.list
+    [5, 10, 10]
+
+    >>  stats.set
+    set([5, 10])
+
+    (The double prompt ">>" is used to hide the example from doctest.)
+
+    The stat attributes are ``None`` until you call ``.finish()``.  It's
+    permissible -- though not recommended -- to add data after calling
+    ``.finish()`` and then call ``.finish()`` again.  This recalculates the
+    stats over the entire data set.
+
+    The ``SimpleStats`` hook methods are available for subclasses, and 
+    additionally the ``._finish_stats`` method.
+    """
+    __version__ = 1
+
+    def __init__(self):
+        SimpleStats.__init__(self, numeric=True)
+        self.list = []
+        self.set = set()
+        self.mean = None
+        self.median = None
+        self.standard_deviation = None
+        self._init_stats()
+
+    def __call__(self, value):
+        if self.count == 0:
+            self.min = self.max = value
+        else:
+            self.min = min(self.min, value)
+            self.max = max(self.max, value)
+        self.count += 1
+        self._update_stats(value)
+        self.list.append(value)
+        self.set.add(value)
+
+    def finish(self):
+        self.mean = mean(self.list)
+        self.median = median(self.list)
+        self.standard_deviation = standard_deviation(self.list)
+        self._finish_stats()
+
+    ### Hooks for subclasses.
+    def _finish_stats(self):
+        """Finish the subclass statistics now that all data are known."""
+        pass
+
+
+def format_number(n, thousands=",", decimal="."):
+    """Format a number with a thousands separator and decimal delimeter.
+
+    ``n`` may be an int, long, float, or numeric string.
+    ``thousands`` is a separator to put after each thousand.
+    ``decimal`` is the delimiter to put before the fractional portion if any.
+
+    The default style has a thousands comma and decimal point per American
+    usage:
+
+    >>> format_number(1234567.89)
+    '1,234,567.89'
+    >>> format_number(123456)
+    '123,456'
+    >>> format_number(-123)
+    '-123'
+
+    Various European and international styles are also possible:
+
+    >>> format_number(1234567.89, " ")
+    '1 234 567.89'
+    >>> format_number(1234567.89, " ", ",")
+    '1 234 567,89'
+    >>> format_number(1234567.89, ".", ",")
+    '1.234.567,89'
+    """
+    parts = str(n).split(".")
+    parts[0] = re.sub(
+        R"(\d)(?=(\d\d\d)+(?!\d))", 
+        R"\1%s" % thousands, 
+        parts[0])
+    return decimal.join(parts)
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()

webhelpers/text.py

 """
 
 import re
+import textwrap
 
-__all__ = ["truncate", "excerpt"]
+__all__ = [
+    "truncate", 
+    "excerpt",
+    "plural",
+    "chop_at",
+    "lchop",
+    "rchop",
+    "strip_leading_whitespace",
+    "wrap_paragraphs",
+    ]
 
-def truncate(text, length=30, truncate_string='...'):
+def truncate(text, length=30, indicator='...', whole_word=False):
     """Truncate ``text`` with replacement characters.
     
     ``length``
         The maximum length of ``text`` before replacement
-    ``truncate_string``
+    ``indicator``
         If ``text`` exceeds the ``length``, this string will replace
         the end of the string
+    ``whole_word``
+        If true, shorten the string further to avoid breaking a word in the
+        middle.  A word is defined as any string not containing whitespace.
+        If the entire text before the break is a single word, it will have to
+        be broken.
 
     Example::
 
         'Once upon a...'
         
     """
-    if not text: return ''
-    
-    new_len = length-len(truncate_string)
-    if len(text) > length:
-        return text[:new_len] + truncate_string
-    else:
+    if not text: 
+        return ""
+    if len(text) <= length:
         return text
+    short_length = length - len(indicator)
+    if not whole_word:
+        return text[:short_length] + indicator
+    # Go back to end of previous word.
+    i = short_length
+    while i >= 0 and not text[i].isspace():
+        i -= 1
+    while i >= 0 and text[i].isspace():
+        i -= 1
+    #if i < short_length:
+    #    i += 1   # Set to one after the last char we want to keep.
+    if i <= 0:
+        # Entire text before break is one word, or we miscalculated.
+        return text[:short_length] + indicator
+    return text[:i+1] + indicator
+
 
 def excerpt(text, phrase, radius=100, excerpt_string="..."):
     """Extract an excerpt from the ``text``, or '' if the phrase isn't
     else:
         return excerpt
 
+
+def plural(n, singular, plural, with_number=True):
+    """Return the singular or plural form of a word, according to the number.
+
+    Usage:
+    >>> plural(2, "ox", "oxen")
+    '2 oxen'
+    >>> plural(2, "ox", "oxen", False)
+    'oxen'
+    """
+    if n == 1:
+        form = singular
+    else:
+        form = plural
+    if with_number:
+        return "%s %s" % (n, form)
+    else:
+        return form
+
+def chop_at(s, sub, inclusive=False):
+    """Truncate string ``s`` at the first occurence of ``sub``.
+
+    If ``inclusive`` is true, truncate just after ``sub`` rather than at it.
+
+    >>> chop_at("plutocratic brats", "rat")
+    'plutoc'
+    >>> chop_at("plutocratic brats", "rat", True)
+    'plutocrat'
+    """
+    pos = s.find(sub)
+    if pos == -1:
+        return s
+    if inclusive:
+        return s[:pos+len(sub)]
+    return s[:pos]
+
+def lchop(s, sub):
+    """Chop ``sub`` off the front of ``s`` if present.
+    
+    >>> lchop("##This is a comment.##", "##")
+    'This is a comment.##'
+    """
+    if s.startswith(sub):
+        s = s[len(sub):]
+    return s
+    
+def rchop(s, sub):
+    """Chop ``sub`` off the end of ``s`` if present.
+    
+    >>> rchop("##This is a comment.##", "##")
+    '##This is a comment.'
+    """
+    if s.endswith(sub):
+        s = s[:-len(sub)]
+    return s
+
+def strip_leading_whitespace(s):
+    """Strip the leading whitespace in all lines in ``s``.
+    
+    This deletes *all* leading whitespace.  ``textwrap.dedent`` deletes only
+    the whitespace common to all lines.
+    """
+    ret = [x.lstrip() for x in s.splitlines(True)]
+    return "".join(ret)
+
+def wrap_paragraphs(text, width=72):
+    """Wrap all paragraphs in a text string to the specified width.
+
+    ``width`` may also be a ``textwrap.TextWrapper`` instance, in which case it
+    will be used to do the wrapping.  This provides a way to set other options
+    besides the width, and is more efficient when wrapping many texts.
+    """
+    if isinstance(width, textwrap.TextWrapper):
+        wrapper = width
+    else:
+        wrapper = textwrap.TextWrapper(width=width)
+    result = []
+    lines = text.splitlines(True)
+    lines_len = len(lines)
+    start = 0
+    end = None
+    while start < lines_len:
+        # Leave short lines as-is.
+        if len(lines[start]) <= width:
+            result.append(lines[start])
+            start += 1
+            continue
+        # Found a long line, peek forward to end of paragraph.
+        end = start + 1
+        while end < lines_len and not lines[end].isspace():
+            end += 1
+        # 'end' is one higher than last long lone.
+        paragraph = ''.join(lines[start:end])
+        paragraph = wrapper.fill(paragraph) + "\n"
+        result.append(paragraph)
+        start = end
+        end = None
+    return "".join(result)
+

webhelpers/util.py

-"""Utility functions used by various web helpers"""
+"""Utility functions used by various web helpers
+
+This module is such a mess that no new helpers should be added to it.
+"""
 import cgi
 import copy
 import sys
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.