Mike Orr avatar Mike Orr committed f4861f6

Unfinished multimedia helpers.

Comments (0)

Files changed (2)

unfinished/image_size.py

-"""Calculate the size of an image.
+"""Image helpers
 
+The following module extracts the width and height from an image file with
 No C code or external dependencies.
 
 This version is too complicated and GPL, but serves as an algorithm 
 I've never seen an image without valid dimensions in it, so raising an
 exception may be fine.
 
-A dict dimensions cache is also useful.
+WebHelpers/unfinished/multimedia.py contains an alternative
+``get_dimensions()`` function that depends on the Python Imaging Library.
 """
 
-To resize an image to a certain width and keep the height proportional:
-
-def choose_height(new_width, width, height):
-    """Return the height corresponding to 'new_width' that's proportional
-    to the original size.
-
-    This is useful when resizing an image to a certain width.
-
-    This function is by Mike Orr.
-    """
-    proportion = float(height) / float(width)
-    return int(new_width * proportion)
 #!/usr/bin/env python
 # (c) Copyright 2001-2005 Hewlett-Packard Development Company, L.P.
 #

unfinished/multimedia.py

+"""Helpers for images, PDFs, etc.
+
+This module is in 'unfinished' because the functions have various dependencies,
+file naming conventions, and cache conventions which may not be appropriate for
+WebHelpers.  This module was written by Mike Orr, except
+``make_pdf_thumbnail2()`` which was written by Chris Barker.
+
+``static_image()`` takes a path relative to the Python application's public
+directory, extracts the width and height from the image file, and returns an
+<img> tag based on the expected public URL, using webhelpers.html.tags.image.
+
+``get_dimensions()`` extracts the width and height from an image file using
+PIL.  It uses an optional dimensions cache for speed.  The cache is a memory
+dict.  Because the files are on disk they're independent of the thread or
+process, so this is sufficiently thread-safe/multiprocess-safe.  It doesn't
+recognize changes to the files unless you invalidate the cache or restart the
+application, but it's assumed the files won't change frequently enough for this
+to be an issue.
+
+``open_image()`` returns a PIL Image object, or None if PIL doesn't recognize
+the file type.
+
+``make_thumb()`` creates a thumbnail of an image in the same directory as the
+original.  The thumbnail is named FILENAME_STEM + "_thumb.jpg".
+
+``get_thumb_path()`` returns the thumbnail path based on the original image
+path, using the naming conventions of ``make_thumb``.
+
+``make_pdf_thumbnail()`` and ``make_pdf_thumbnail2`` create a thumbnail image
+of the first page of a PDF file.  The former depends on ImageMagick which uses
+Ghostscript, the latter depends on Ghostscript directly.  The former seems to
+be more reliable currently.
+
+
+"""
+
+import glob
+import logging
+import os
+import re
+import subprocess
+import sys
+import traceback
+import warnings
+
+import Image     # Python Imaging Library (PIL)
+
+warn = logging.getLogger("multimedia").warn
+
+# Suppress FutureWarning from PIL; we can't do anything about it.
+warnings.filterwarnings('ignore', '.*return a long.*')
+
+THUMB_PIL_TYPE = "JPEG"   # Thumbnail type; one of PIL's output formats.
+THUMB_EXT = ".jpg"        # The filename extension for that type.
+
+# Caches image dimensions for reuse.
+_dimensions_cache = {}
+
+RX_DECODER_NOT_AVAILABLE = re.compile( R"decoder .* not available" )
+
+def static_image(relative_path, alt, **html_attrs):
+    """Create an <img> tag for a path relative to the public directory.
+       
+    If keyword arg ``use_cache`` is false, don't use the global dimensions
+    cache.
+    """
+    use_cache = html_attrs.pop("use_cache", True)
+    if "width" not in html_attrs or "height" not in html_attrs:
+        try:
+            path = Path(config["pylons.paths"]["public_files"], relative_path)
+            width, height = get_dimensions(path, use_cache)
+        except IOError:
+            pass
+        else:
+            if width:
+                html_attrs.setdefault("width", width)
+            if height:
+                html_attrs.setdefault("height", height)
+    # @@MO Temporary kludge due to url_for ambiguity in Routes 1.
+    src = "/" + relative_path
+    return image(src, alt=alt, **html_attrs)
+
+
+def open_image(image_path):
+    """Open an image file in PIL, return the Image object.
+       Return None if PIL doesn't recognize the file type.
+    """
+    try:
+        im = Image.open(image_path)
+    except IOError, e:
+        if str(e) == "cannot identify image file":
+            return None
+        else:
+            raise
+    except:
+        m = "caught exception identifying '%s', assuming non-image:\n%s"
+        e = traceback.format_exc()
+        warn(m, image_path, e)
+        return None
+    return im
+
+def make_thumb(image_path, width):
+    """Make a thumbnail and save it in the same directory as the original.
+
+       See get_thumb_path() for the arguments.
+       @return The thumbnail filename, or None if PIL
+           didn't recognize the image type.
+
+       Does NOT work with PDF originals; use make_thumb_from_pdf for those.
+    """
+    dst = get_thumb_path(image_path, width)
+    im = open_image(image_path)
+    if im is None:
+        return None
+    orig_width, orig_height = im.size
+    height = choose_height(width, orig_width, orig_height)
+    if im.mode == 'P':
+        im = im.convert()   # Convert GIF palette to RGB mode.
+    try:
+        im.thumbnail((width, height), Image.ANTIALIAS)
+    except IOError, e:
+        reason = str(e)
+        if RX_DECODER_NOT_AVAILABLE.search(reason):
+            return None   # PIL error, cannot thumbnail.
+        else:
+            raise
+    im.save(dst, THUMB_PIL_TYPE)
+    return dst
+
+def choose_height(new_width, width, height):
+    """Return the height corresponding to 'new_width' that's proportional
+       to the original size.
+    """
+    proportion = float(height) / float(width)
+    return int(new_width * proportion)
+
+def get_dimensions(image_path, use_cache=False):
+    """Return the width and height of an image.
+       Returns (None, None) if PIL doesn't recognize the file type.
+
+       @param use_cache bool If true, use the cached dimensions if
+       available.  This cuts down on filesystem accesses, but the cache may
+       be wrong if the image has changed.  If false, update the cache anyway
+       so it's correct.
+
+       @exc IOError raised by PIL if the image file is missing or you don't
+       have read permission for it.
+    """
+    image_path = str(image_path)   # Don't need a path object.
+    if use_cache and image_path in _dimensions_cache:
+        return _dimensions_cache[image_path]
+    im = open_image(image_path)
+    if im is None:
+        size = (None, None)
+    else:
+        size = im.size
+    _dimensions_cache[image_path] = size
+    return size
+
+def changed(image_path=None):
+    """Delete all cached data regarding this path because the file has
+       changed.  If arg is unspecified or None, delete all cached data
+       for all paths.
+    """
+    if image_path is None:
+        _dimensions_cache.clear()
+        return
+    if image_path in _dimensions_cache:
+        del _dimensions_cache[image_path]
+
+def get_thumb_path(image_path, width):
+    """Return the thumbnail path for the given image.
+       
+       @parm image_path str The original image filename.
+       @param width int The thumbnail width in pixels.
+       @return path The thumbnail path.
+       For "a/foo.jpg", returns path("a/foo_thumbWIDTH.jpg").
+       The return value always ends with THUMB_EXT regardless of the original
+       extension.
+    """
+    dir, old_name = os.path.split(image_path)
+    base, ext = os.path.splitext(old_name)
+    new_name = "%s_thumb%d%s" % (base, width, THUMB_EXT)
+    return os.path.join(dir, new_name)
+
+def test():
+    print "Height for 600x480 @ width 200 is", choose_height(200, 600, 480)
+    print "Path 200 for a/foo.jpg is", get_thumb_path('a/foo.jpg', 200)
+    print "Path 200 for a/foo.png is", get_thumb_path('a/foo.png', 200)
+
+if __name__ == "__main__":  test()
+
+def make_pdf_thumbnail(path, width):
+    """Make a thumbnail from a PDF file.
+
+       @parm image_path str The original image filename.
+       @param width int The thumbnail width in pixels. (Will be approximate.)
+       @return path The thumbnail path.
+       For "a/foo.jpg", returns path("a/foo_thumbWIDTH.jpg").
+       The return value always ends with THUMB_EXT regardless of the original
+       extension.
+
+       Requires the "imagemagick" package to be installed.  By Mike Orr.
+    """
+    width_str = str(width)
+    dir, name = os.path.split(path)
+    base, ext = os.path.splitext(name)
+    newbase = "%s_thumb%s" % (base, width_str)
+    dst = os.path.join(dir, newbase + THUMB_EXT)
+
+    def page(n):
+        """Return the filename for page n's thumbnail, n >= 0.
+           'n' may also be a string (e.g., "*" for wildcard patterns).
+           If 'n' is None, return value has no page suffix.
+        """
+        if n is not None:
+            suffix = "-%s" % n
+        else:
+            suffix = ""
+        return os.path.join(dir, newbase + suffix + THUMB_EXT)
+
+    trashcan = open("/dev/null", "w")
+    cmd = ["/usr/bin/convert", "-geometry", width_str, path, dst]
+    status = subprocess.call(cmd, shell=False, stderr=trashcan)
+    if status:
+        warn("make_pdf_thumbnail subcommand exited with status %s: %s", 
+            status, cmd)
+    trashcan.close()
+    found = False
+    if os.path.exists(dst):
+        found = True
+    page0_fn = page(0)
+    other_files = glob.glob(page("*"))
+    for fn in other_files:
+        if fn == page0_fn and not found:
+            os.rename(fn, dst)
+            found = True
+        else:
+            os.remove(fn)
+    if found:
+        return dst
+    else:
+        return None
+
+def make_pdf_thumbnail2(path, width):
+    """Make a thumbnail from a PDF file.
+
+       This version uses just ghostscript, rather than ImageMagik
+       -- chb
+
+       @parm image_path str The original image filename.
+       @param width int The thumbnail width in pixels. (Will be approximate -- assumes 8.5in wide paper.)
+       @return path The thumbnail path.
+       For "a/foo.jpg", returns path("a/foo_thumbWIDTH.jpg").
+       The return value always ends with THUMB_EXT regardless of the original
+       extension.
+
+       Requires ghostscript to be installed.  By Chris Barker.
+    """
+    width_str = str(width)
+    dir, name = os.path.split(path)
+    base, ext = os.path.splitext(name)
+    newbase = "%s_thumb%s" % (base, width_str)
+    dst = os.path.join(dir, newbase + THUMB_EXT)
+
+    def page(n):
+        """Return the filename for page n's thumbnail, n >= 0.
+           'n' may also be a string (e.g., "*" for wildcard patterns).
+           If 'n' is None, return value has no page suffix.
+        """
+        if n is not None:
+            suffix = "-%s" % n
+        else:
+            suffix = ""
+        return os.path.join(dir, newbase + suffix + THUMB_EXT)
+
+    trashcan = open("/dev/null", "w")
+    
+    ## A few settable options
+    if THUMB_EXT == ".jpg":
+        filetype = "jpeg" # jpeg
+    elif THUM_EXT == ".png":
+        filetype = "png16m" # 24 bit png
+    else:
+        filetype = "jpeg" # should this be default
+    
+    gs_path = "/usr/local/bin/gs"
+    ps_cmd = "save pop currentglobal true setglobal false/product where{pop product(Ghostscript)search{pop pop pop revision 600 ge{pop true}if}{pop}ifelse}if{/pdfdict where{pop pdfdict begin/pdfshowpage_setpage[pdfdict/pdfshowpage_setpage get{dup type/nametype eq{dup/OutputFile eq{pop/AntiRotationHack}{dup/MediaBox eq revision 650 ge and{/THB.CropHack{1 index/CropBox pget{2 index exch/MediaBox exch put}if}def/THB.CropHack cvx}if}ifelse}if}forall]cvx def end}if}if setglobal"
+    cmd = [gs_path, "-dSAFER","-dBATCH","-dNOPAUSE","-dLastPage=1","-dTextAlphaBits=4"]
+    cmd.append("-sDEVICE=%s"%filetype)
+    #dpi  = int(width / 8.5) ## this assumes an 8.5in wide piece of paper.
+    dpi = 20
+    cmd.append("-r%i"%dpi)
+    
+    cmd.append("-sOutputFile=%s"% dst)
+    cmd.extend(("-c", ps_cmd, "-f"),)
+    cmd.append(path)
+    
+    ## the desired command string
+    ## gs -dSAFER -dBATCH -dNOPAUSE -r150 -sDEVICE=jpeg -dTextAlphaBits=4 -sOutputFile=$1-%02d.jpg $1
+    status = subprocess.call(cmd, shell=False) #, stdout=trashcan, stderr=trashcan)
+    if status:
+        warn("make_pdf_thumbnail subcommand exited with status %s: %s", 
+            status, cmd)
+    trashcan.close()
+    found = False
+    if os.path.exists(dst):
+        return dst
+    else:
+        return None
+    
+
+def get_pdf_text(path):
+    raise NotImplementedError
+
+def get_word_text(path):
+    raise NotImplementedError
+
+
+if __name__ == "__main__":
+    import optparse
+    logging.basicConfig()
+    parser = optparse.OptionParser(usage="%prog PDF_FILE")
+    opts, args = parser.parse_args()
+    if len(args) != 1:
+        parser.error("wrong number of command-line arguments")
+    source_file = args[0]
+    
+    width = 200
+    dst = make_pdf_thumbnail2(source_file, width)
+    print "Thumbnail made:", dst
+
+#ps_cmd = "save pop currentglobal true setglobal false/product where{pop product(Ghostscript)search{pop pop pop revision 600 ge{pop true}if}{pop}ifelse}if{/pdfdict where{pop pdfdict begin/pdfshowpage_setpage[pdfdict/pdfshowpage_setpage get{dup type/nametype eq{dup/OutputFile eq{pop/AntiRotationHack}{dup/MediaBox eq revision 650 ge and{/THB.CropHack{1 index/CropBox pget{2 index exch/MediaBox exch put}if}def/THB.CropHack cvx}if}ifelse}if}forall]cvx def end}if}if setglobal"
+
+#gs -dLastPage=1 -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -dNOPAUSE -dBATCH -sDEVICE=jpeg -r20 -sOutputFile=Chem_Sheet_LPG.jpg -c "save pop currentglobal true setglobal false/product where{pop product(Ghostscript)search{pop pop pop revision 600 ge{pop true}if}{pop}ifelse}if{/pdfdict where{pop pdfdict begin/pdfshowpage_setpage[pdfdict/pdfshowpage_setpage get{dup type/nametype eq{dup/OutputFile eq{pop/AntiRotationHack}{dup/MediaBox eq revision 650 ge and{/THB.CropHack{1 index/CropBox pget{2 index exch/MediaBox exch put}if}def/THB.CropHack cvx}if}ifelse}if}forall]cvx def end}if}if setglobal" -f Chem_Sheet_LPG.pdf
+
+#gs -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -dNOPAUSE -dBATCH -sDEVICE=png16m -r9.06531732174037 -sOutputFile=thb%d.png -c "save pop currentglobal true setglobal false/product where{pop product(Ghostscript)search{pop pop pop revision 600 ge{pop true}if}{pop}ifelse}if{/pdfdict where{pop pdfdict begin/pdfshowpage_setpage[pdfdict/pdfshowpage_setpage get{dup type/nametype eq{dup/OutputFile eq{pop/AntiRotationHack}{dup/MediaBox eq revision 650 ge and{/THB.CropHack{1 index/CropBox pget{2 index exch/MediaBox exch put}if}def/THB.CropHack cvx}if}ifelse}if}forall]cvx def end}if}if setglobal" -f Chem_Sheet_LPG.pdf
+
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.