Commits

ja...@bcc190cf-cafb-0310-a4f2-bffc1f526a37  committed 81ba38c

Fixed #2070: refactored Django's file upload capabilities.

A description of the new features can be found in the new [http://www.djangoproject.com/documentation/upload_handing/ upload handling documentation]; the executive summary is that Django will now happily handle uploads of large files without issues.

This changes the representation of uploaded files from dictionaries to bona fide objects; see BackwardsIncompatibleChanges for details.

  • Participants
  • Parent commits d4a2668

Comments (0)

Files changed (38)

     Arthur <avandorp@gmail.com>
     av0000@mail.ru
     David Avsajanishvili <avsd05@gmail.com>
-    axiak@mit.edu
+    Mike Axiak <axiak@mit.edu>
     Niran Babalola <niran@niran.org>
     Morten Bagai <m@bagai.com>
     Mikaël Barbero <mikael.barbero nospam at nospam free.fr>
     Marc Fargas <telenieko@telenieko.com>
     Szilveszter Farkas <szilveszter.farkas@gmail.com>
     favo@exoweb.net
+    fdr <drfarina@gmail.com>
     Dmitri Fedortchenko <zeraien@gmail.com>
+    Jonathan Feignberg <jdf@pobox.com>
     Liang Feng <hutuworm@gmail.com>
     Bill Fenner <fenner@gmail.com>
     Stefane Fermgier <sf@fermigier.com>

File django/conf/global_settings.py

 # Example: "http://media.lawrence.com"
 MEDIA_URL = ''
 
+# List of upload handler classes to be applied in order.
+FILE_UPLOAD_HANDLERS = (
+    'django.core.files.uploadhandler.MemoryFileUploadHandler',
+    'django.core.files.uploadhandler.TemporaryFileUploadHandler',
+)
+
+# Maximum size, in bytes, of a request before it will be streamed to the
+# file system instead of into memory.
+FILE_UPLOAD_MAX_MEMORY_SIZE = 2621440 # i.e. 2.5 MB
+
+# Directory in which upload streamed files will be temporarily saved. A value of
+# `None` will make Django use the operating system's default temporary directory
+# (i.e. "/tmp" on *nix systems).
+FILE_UPLOAD_TEMP_DIR = None
+
 # Default formatting for date objects. See all available format strings here:
 # http://www.djangoproject.com/documentation/templates/#now
 DATE_FORMAT = 'N j, Y'

File django/core/files/__init__.py

Empty file added.

File django/core/files/locks.py

+"""
+Portable file locking utilities.
+
+Based partially on example by Jonathan Feignberg <jdf@pobox.com> in the Python
+Cookbook, licensed under the Python Software License.
+
+    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65203
+
+Example Usage::
+
+    >>> from django.core.files import locks
+    >>> f = open('./file', 'wb')
+    >>> locks.lock(f, locks.LOCK_EX)
+    >>> f.write('Django')
+    >>> f.close()
+"""
+
+__all__ = ('LOCK_EX','LOCK_SH','LOCK_NB','lock','unlock')
+
+system_type = None
+
+try:
+    import win32con
+    import win32file
+    import pywintypes
+    LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
+    LOCK_SH = 0
+    LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
+    __overlapped = pywintypes.OVERLAPPED()
+    system_type = 'nt'
+except (ImportError, AttributeError):
+    pass
+
+try:
+    import fcntl
+    LOCK_EX = fcntl.LOCK_EX
+    LOCK_SH = fcntl.LOCK_SH
+    LOCK_NB = fcntl.LOCK_NB
+    system_type = 'posix'
+except (ImportError, AttributeError):
+    pass
+
+if system_type == 'nt':
+    def lock(file, flags):
+        hfile = win32file._get_osfhandle(file.fileno())
+        win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
+
+    def unlock(file):
+        hfile = win32file._get_osfhandle(file.fileno())
+        win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
+elif system_type == 'posix':
+    def lock(file, flags):
+        fcntl.flock(file.fileno(), flags)
+
+    def unlock(file):
+        fcntl.flock(file.fileno(), fcntl.LOCK_UN)
+else:
+    # File locking is not supported.
+    LOCK_EX = LOCK_SH = LOCK_NB = None
+
+    # Dummy functions that don't do anything.
+    def lock(file, flags):
+        pass
+
+    def unlock(file):
+        pass

File django/core/files/move.py

+"""
+Move a file in the safest way possible::
+
+    >>> from django.core.files.move import file_move_save
+    >>> file_move_save("/tmp/old_file", "/tmp/new_file")
+"""
+
+import os
+from django.core.files import locks
+
+__all__ = ['file_move_safe']
+
+try:
+    import shutil
+    file_move = shutil.move
+except ImportError:
+    file_move = os.rename
+
+def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False):
+    """
+    Moves a file from one location to another in the safest way possible.
+
+    First, try using ``shutils.move``, which is OS-dependent but doesn't break
+    if moving across filesystems. Then, try ``os.rename``, which will break
+    across filesystems. Finally, streams manually from one file to another in
+    pure Python.
+
+    If the destination file exists and ``allow_overwrite`` is ``False``, this
+    function will throw an ``IOError``.
+    """
+
+    # There's no reason to move if we don't have to.
+    if old_file_name == new_file_name:
+        return
+
+    if not allow_overwrite and os.path.exists(new_file_name):
+        raise IOError("Cannot overwrite existing file '%s'." % new_file_name)
+
+    try:
+        file_move(old_file_name, new_file_name)
+        return
+    except OSError:
+        # This will happen with os.rename if moving to another filesystem
+        pass
+
+    # If the built-in didn't work, do it the hard way.
+    new_file = open(new_file_name, 'wb')
+    locks.lock(new_file, locks.LOCK_EX)
+    old_file = open(old_file_name, 'rb')
+    current_chunk = None
+
+    while current_chunk != '':
+        current_chunk = old_file.read(chunk_size)
+        new_file.write(current_chunk)
+
+    new_file.close()
+    old_file.close()
+
+    os.remove(old_file_name)

File django/core/files/uploadedfile.py

+"""
+Classes representing uploaded files.
+"""
+
+import os
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+
+__all__ = ('UploadedFile', 'TemporaryUploadedFile', 'InMemoryUploadedFile')
+
+class UploadedFile(object):
+    """
+    A abstract uploadded file (``TemporaryUploadedFile`` and
+    ``InMemoryUploadedFile`` are the built-in concrete subclasses).
+
+    An ``UploadedFile`` object behaves somewhat like a file object and
+    represents some file data that the user submitted with a form.
+    """
+    DEFAULT_CHUNK_SIZE = 64 * 2**10
+
+    def __init__(self, file_name=None, content_type=None, file_size=None, charset=None):
+        self.file_name = file_name
+        self.file_size = file_size
+        self.content_type = content_type
+        self.charset = charset
+
+    def __repr__(self):
+        return "<%s: %s (%s)>" % (self.__class__.__name__, self.file_name, self.content_type)
+
+    def _set_file_name(self, name):
+        # Sanitize the file name so that it can't be dangerous.
+        if name is not None:
+            # Just use the basename of the file -- anything else is dangerous.
+            name = os.path.basename(name)
+            
+            # File names longer than 255 characters can cause problems on older OSes.
+            if len(name) > 255:
+                name, ext = os.path.splitext(name)
+                name = name[:255 - len(ext)] + ext
+                
+        self._file_name = name
+        
+    def _get_file_name(self):
+        return self._file_name
+        
+    file_name = property(_get_file_name, _set_file_name)
+
+    def chunk(self, chunk_size=None):
+        """
+        Read the file and yield chucks of ``chunk_size`` bytes (defaults to
+        ``UploadedFile.DEFAULT_CHUNK_SIZE``).
+        """
+        if not chunk_size:
+            chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE
+
+        if hasattr(self, 'seek'):
+            self.seek(0)
+        # Assume the pointer is at zero...
+        counter = self.file_size
+
+        while counter > 0:
+            yield self.read(chunk_size)
+            counter -= chunk_size
+
+    def multiple_chunks(self, chunk_size=None):
+        """
+        Returns ``True`` if you can expect multiple chunks.
+
+        NB: If a particular file representation is in memory, subclasses should
+        always return ``False`` -- there's no good reason to read from memory in
+        chunks.
+        """
+        if not chunk_size:
+            chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE
+        return self.file_size < chunk_size
+
+    # Abstract methods; subclasses *must* default read() and probably should
+    # define open/close.
+    def read(self, num_bytes=None):
+        raise NotImplementedError()
+
+    def open(self):
+        pass
+
+    def close(self):
+        pass
+
+    # Backwards-compatible support for uploaded-files-as-dictionaries.
+    def __getitem__(self, key):
+        import warnings
+        warnings.warn(
+            message = "The dictionary access of uploaded file objects is deprecated. Use the new object interface instead.",
+            category = DeprecationWarning,
+            stacklevel = 2
+        )
+        backwards_translate = {
+            'filename': 'file_name',
+            'content-type': 'content_type',
+            }
+
+        if key == 'content':
+            return self.read()
+        elif key == 'filename':
+            return self.file_name
+        elif key == 'content-type':
+            return self.content_type
+        else:
+            return getattr(self, key)
+
+class TemporaryUploadedFile(UploadedFile):
+    """
+    A file uploaded to a temporary location (i.e. stream-to-disk).
+    """
+
+    def __init__(self, file, file_name, content_type, file_size, charset):
+        super(TemporaryUploadedFile, self).__init__(file_name, content_type, file_size, charset)
+        self.file = file
+        self.path = file.name
+        self.file.seek(0)
+
+    def temporary_file_path(self):
+        """
+        Returns the full path of this file.
+        """
+        return self.path
+
+    def read(self, *args, **kwargs):
+        return self.file.read(*args, **kwargs)
+
+    def open(self):
+        self.seek(0)
+
+    def seek(self, *args, **kwargs):
+        self.file.seek(*args, **kwargs)
+
+class InMemoryUploadedFile(UploadedFile):
+    """
+    A file uploaded into memory (i.e. stream-to-memory).
+    """
+    def __init__(self, file, field_name, file_name, content_type, charset, file_size):
+        super(InMemoryUploadedFile, self).__init__(file_name, content_type, charset, file_size)
+        self.file = file
+        self.field_name = field_name
+        self.file.seek(0)
+
+    def seek(self, *args, **kwargs):
+        self.file.seek(*args, **kwargs)
+
+    def open(self):
+        self.seek(0)
+
+    def read(self, *args, **kwargs):
+        return self.file.read(*args, **kwargs)
+
+    def chunk(self, chunk_size=None):
+        self.file.seek(0)
+        yield self.read()
+
+    def multiple_chunks(self, chunk_size=None):
+        # Since it's in memory, we'll never have multiple chunks.
+        return False
+
+class SimpleUploadedFile(InMemoryUploadedFile):
+    """
+    A simple representation of a file, which just has content, size, and a name.
+    """
+    def __init__(self, name, content, content_type='text/plain'):
+        self.file = StringIO(content or '')
+        self.file_name = name
+        self.field_name = None
+        self.file_size = len(content or '')
+        self.content_type = content_type
+        self.charset = None
+        self.file.seek(0)
+
+    def from_dict(cls, file_dict):
+        """
+        Creates a SimpleUploadedFile object from
+        a dictionary object with the following keys:
+           - filename
+           - content-type
+           - content
+        """
+        return cls(file_dict['filename'],
+                   file_dict['content'],
+                   file_dict.get('content-type', 'text/plain'))
+
+    from_dict = classmethod(from_dict)

File django/core/files/uploadhandler.py

+"""
+Base file upload handler classes, and the built-in concrete subclasses
+"""
+import os
+import tempfile
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+
+from django.conf import settings
+from django.core.exceptions import ImproperlyConfigured
+from django.core.files.uploadedfile import TemporaryUploadedFile, InMemoryUploadedFile
+
+__all__ = ['UploadFileException','StopUpload', 'SkipFile', 'FileUploadHandler',
+           'TemporaryFileUploadHandler', 'MemoryFileUploadHandler',
+           'load_handler']
+
+class UploadFileException(Exception):
+    """
+    Any error having to do with uploading files.
+    """
+    pass
+
+class StopUpload(UploadFileException):
+    """
+    This exception is raised when an upload must abort.
+    """
+    def __init__(self, connection_reset=False):
+        """
+        If ``connection_reset`` is ``True``, Django knows will halt the upload
+        without consuming the rest of the upload. This will cause the browser to
+        show a "connection reset" error.
+        """
+        self.connection_reset = connection_reset
+
+    def __unicode__(self):
+        if self.connection_reset:
+            return u'StopUpload: Halt current upload.'
+        else:
+            return u'StopUpload: Consume request data, then halt.'
+
+class SkipFile(UploadFileException):
+    """
+    This exception is raised by an upload handler that wants to skip a given file.
+    """
+    pass
+    
+class StopFutureHandlers(UploadFileException):
+    """
+    Upload handers that have handled a file and do not want future handlers to
+    run should raise this exception instead of returning None.
+    """
+    pass
+
+class FileUploadHandler(object):
+    """
+    Base class for streaming upload handlers.
+    """
+    chunk_size = 64 * 2 ** 10 #: The default chunk size is 64 KB.
+
+    def __init__(self, request=None):
+        self.file_name = None
+        self.content_type = None
+        self.content_length = None
+        self.charset = None
+        self.request = request
+
+    def handle_raw_input(self, input_data, META, content_length, boundary, encoding=None):
+        """
+        Handle the raw input from the client.
+
+        Parameters:
+
+            :input_data:
+                An object that supports reading via .read().
+            :META:
+                ``request.META``.
+            :content_length:
+                The (integer) value of the Content-Length header from the
+                client.
+            :boundary: The boundary from the Content-Type header. Be sure to
+                prepend two '--'.
+        """
+        pass
+
+    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
+        """
+        Signal that a new file has been started.
+
+        Warning: As with any data from the client, you should not trust
+        content_length (and sometimes won't even get it).
+        """
+        self.field_name = field_name
+        self.file_name = file_name
+        self.content_type = content_type
+        self.content_length = content_length
+        self.charset = charset
+
+    def receive_data_chunk(self, raw_data, start):
+        """
+        Receive data from the streamed upload parser. ``start`` is the position
+        in the file of the chunk.
+        """
+        raise NotImplementedError()
+
+    def file_complete(self, file_size):
+        """
+        Signal that a file has completed. File size corresponds to the actual
+        size accumulated by all the chunks.
+
+        Subclasses must should return a valid ``UploadedFile`` object.
+        """
+        raise NotImplementedError()
+
+    def upload_complete(self):
+        """
+        Signal that the upload is complete. Subclasses should perform cleanup
+        that is necessary for this handler.
+        """
+        pass
+
+class TemporaryFileUploadHandler(FileUploadHandler):
+    """
+    Upload handler that streams data into a temporary file.
+    """
+    def __init__(self, *args, **kwargs):
+        super(TemporaryFileUploadHandler, self).__init__(*args, **kwargs)
+
+    def new_file(self, file_name, *args, **kwargs):
+        """
+        Create the file object to append to as data is coming in.
+        """
+        super(TemporaryFileUploadHandler, self).new_file(file_name, *args, **kwargs)
+        self.file = TemporaryFile(settings.FILE_UPLOAD_TEMP_DIR)
+        self.write = self.file.write
+
+    def receive_data_chunk(self, raw_data, start):
+        self.write(raw_data)
+
+    def file_complete(self, file_size):
+        self.file.seek(0)
+        return TemporaryUploadedFile(self.file, self.file_name,
+                                     self.content_type, file_size,
+                                     self.charset)
+
+class MemoryFileUploadHandler(FileUploadHandler):
+    """
+    File upload handler to stream uploads into memory (used for small files).
+    """
+
+    def handle_raw_input(self, input_data, META, content_length, boundary, encoding=None):
+        """
+        Use the content_length to signal whether or not this handler should be in use.
+        """
+        # Check the content-length header to see if we should
+        # If the the post is too large, we cannot use the Memory handler.
+        if content_length > settings.FILE_UPLOAD_MAX_MEMORY_SIZE:
+            self.activated = False
+        else:
+            self.activated = True
+
+    def new_file(self, *args, **kwargs):
+        super(MemoryFileUploadHandler, self).new_file(*args, **kwargs)
+        if self.activated:
+            self.file = StringIO()
+            raise StopFutureHandlers()
+
+    def receive_data_chunk(self, raw_data, start):
+        """
+        Add the data to the StringIO file.
+        """
+        if self.activated:
+            self.file.write(raw_data)
+        else:
+            return raw_data
+
+    def file_complete(self, file_size):
+        """
+        Return a file object if we're activated.
+        """
+        if not self.activated:
+            return
+
+        return InMemoryUploadedFile(self.file, self.field_name, self.file_name,
+                                    self.content_type, self.charset, file_size)
+
+class TemporaryFile(object):
+    """
+    A temporary file that tries to delete itself when garbage collected.
+    """
+    def __init__(self, dir):
+        if not dir:
+            dir = tempfile.gettempdir()
+        try:
+            (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
+            self.file = os.fdopen(fd, 'w+b')
+        except (OSError, IOError):
+            raise OSError("Could not create temporary file for uploading, have you set settings.FILE_UPLOAD_TEMP_DIR correctly?")
+        self.name = name
+
+    def __getattr__(self, name):
+        a = getattr(self.__dict__['file'], name)
+        if type(a) != type(0):
+            setattr(self, name, a)
+        return a
+
+    def __del__(self):
+        try:
+            os.unlink(self.name)
+        except OSError:
+            pass
+
+def load_handler(path, *args, **kwargs):
+    """
+    Given a path to a handler, return an instance of that handler.
+
+    E.g.::
+        >>> load_handler('django.core.files.uploadhandler.TemporaryFileUploadHandler', request)
+        <TemporaryFileUploadHandler object at 0x...>
+
+    """
+    i = path.rfind('.')
+    module, attr = path[:i], path[i+1:]
+    try:
+        mod = __import__(module, {}, {}, [attr])
+    except ImportError, e:
+        raise ImproperlyConfigured('Error importing upload handler module %s: "%s"' % (module, e))
+    except ValueError, e:
+        raise ImproperlyConfigured('Error importing upload handler module. Is FILE_UPLOAD_HANDLERS a correctly defined list or tuple?')
+    try:
+        cls = getattr(mod, attr)
+    except AttributeError:
+        raise ImproperlyConfigured('Module "%s" does not define a "%s" upload handler backend' % (module, attr))
+    return cls(*args, **kwargs)

File django/core/handlers/modpython.py

     def _load_post_and_files(self):
         "Populates self._post and self._files"
         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
-            self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
+            self._raw_post_data = ''
+            self._post, self._files = self.parse_file_upload(self.META, self._req)
         else:
             self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
 

File django/core/handlers/wsgi.py

         # Populates self._post and self._files
         if self.method == 'POST':
             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
-                header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
-                header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
-                self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
+                self._raw_post_data = ''
+                self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input'])
             else:
                 self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
         else:

File django/db/models/base.py

 from django.utils.datastructures import SortedDict
 from django.utils.functional import curry
 from django.utils.encoding import smart_str, force_unicode, smart_unicode
+from django.core.files.move import file_move_safe
+from django.core.files import locks
 from django.conf import settings
 
 try:
     def _get_FIELD_size(self, field):
         return os.path.getsize(self._get_FIELD_filename(field))
 
-    def _save_FIELD_file(self, field, filename, raw_contents, save=True):
+    def _save_FIELD_file(self, field, filename, raw_field, save=True):
         directory = field.get_directory_name()
         try: # Create the date-based directory if it doesn't exist.
             os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
         except OSError: # Directory probably already exists.
             pass
+
+        #
+        # Check for old-style usage (files-as-dictionaries). Warn here first
+        # since there are multiple locations where we need to support both new
+        # and old usage.
+        #
+        if isinstance(raw_field, dict):
+            import warnings
+            warnings.warn(
+                message = "Representing uploaded files as dictionaries is"\
+                          " deprected. Use django.core.files.SimpleUploadedFile"\
+                          " instead.",
+                category = DeprecationWarning,
+                stacklevel = 2
+            )
+            from django.core.files.uploadedfile import SimpleUploadedFile
+            raw_field = SimpleUploadedFile.from_dict(raw_field)
+
+        elif isinstance(raw_field, basestring):
+            import warnings
+            warnings.warn(
+                message = "Representing uploaded files as strings is "\
+                          " deprecated. Use django.core.files.SimpleUploadedFile "\
+                          " instead.",
+                category = DeprecationWarning,
+                stacklevel = 2
+            )
+            from django.core.files.uploadedfile import SimpleUploadedFile
+            raw_field = SimpleUploadedFile(filename, raw_field)
+
+        if filename is None:
+            filename = raw_field.file_name
+
         filename = field.get_filename(filename)
 
+        #
         # If the filename already exists, keep adding an underscore to the name of
         # the file until the filename doesn't exist.
+        #
         while os.path.exists(os.path.join(settings.MEDIA_ROOT, filename)):
             try:
                 dot_index = filename.rindex('.')
                 filename += '_'
             else:
                 filename = filename[:dot_index] + '_' + filename[dot_index:]
+        #
+        # Save the file name on the object and write the file to disk
+        #
 
-        # Write the file to disk.
         setattr(self, field.attname, filename)
 
         full_filename = self._get_FIELD_filename(field)
-        fp = open(full_filename, 'wb')
-        fp.write(raw_contents)
-        fp.close()
+
+        if hasattr(raw_field, 'temporary_file_path'):
+            # This file has a file path that we can move.
+            raw_field.close()
+            file_move_safe(raw_field.temporary_file_path(), full_filename)
+
+        else:
+            # This is a normal uploadedfile that we can stream.
+            fp = open(full_filename, 'wb')
+            locks.lock(fp, locks.LOCK_EX)
+            for chunk in raw_field.chunk():
+                fp.write(chunk)
+            locks.unlock(fp)
+            fp.close()
 
         # Save the width and/or height, if applicable.
         if isinstance(field, ImageField) and (field.width_field or field.height_field):

File django/db/models/fields/__init__.py

         setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
         setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
         setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
-        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
+        setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
         dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
 
     def delete_file(self, instance):
         if new_data.get(upload_field_name, False):
             func = getattr(new_object, 'save_%s_file' % self.name)
             if rel:
-                func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
+                file = new_data[upload_field_name][0]
             else:
-                func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
+                file = new_data[upload_field_name]
+
+            # Backwards-compatible support for files-as-dictionaries.
+            # We don't need to raise a warning because Model._save_FIELD_file will
+            # do so for us.
+            try:
+                file_name = file.file_name
+            except AttributeError:
+                file_name = file['filename']
+
+            func(file_name, file, save)
 
     def get_directory_name(self):
         return os.path.normpath(force_unicode(datetime.datetime.now().strftime(smart_str(self.upload_to))))
     def save_form_data(self, instance, data):
         from django.newforms.fields import UploadedFile
         if data and isinstance(data, UploadedFile):
-            getattr(instance, "save_%s_file" % self.name)(data.filename, data.content, save=False)
+            getattr(instance, "save_%s_file" % self.name)(data.filename, data.data, save=False)
 
     def formfield(self, **kwargs):
         defaults = {'form_class': forms.FileField}

File django/http/__init__.py

 except ImportError:
     from cgi import parse_qsl
 
-from django.utils.datastructures import MultiValueDict, FileDict
+from django.utils.datastructures import MultiValueDict, ImmutableList
 from django.utils.encoding import smart_str, iri_to_uri, force_unicode
-
+from django.http.multipartparser import MultiPartParser
+from django.conf import settings
+from django.core.files import uploadhandler
 from utils import *
 
 RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
 
-
 class Http404(Exception):
     pass
 
 
     # The encoding used in GET/POST dicts. None means use default setting.
     _encoding = None
+    _upload_handlers = []
 
     def __init__(self):
         self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {}
 
     encoding = property(_get_encoding, _set_encoding)
 
-def parse_file_upload(header_dict, post_data):
-    """Returns a tuple of (POST QueryDict, FILES MultiValueDict)."""
-    import email, email.Message
-    from cgi import parse_header
-    raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
-    raw_message += '\r\n\r\n' + post_data
-    msg = email.message_from_string(raw_message)
-    POST = QueryDict('', mutable=True)
-    FILES = MultiValueDict()
-    for submessage in msg.get_payload():
-        if submessage and isinstance(submessage, email.Message.Message):
-            name_dict = parse_header(submessage['Content-Disposition'])[1]
-            # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
-            # or {'name': 'blah'} for POST fields
-            # We assume all uploaded files have a 'filename' set.
-            if 'filename' in name_dict:
-                assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
-                if not name_dict['filename'].strip():
-                    continue
-                # IE submits the full path, so trim everything but the basename.
-                # (We can't use os.path.basename because that uses the server's
-                # directory separator, which may not be the same as the
-                # client's one.)
-                filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
-                FILES.appendlist(name_dict['name'], FileDict({
-                    'filename': filename,
-                    'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
-                    'content': submessage.get_payload(),
-                }))
-            else:
-                POST.appendlist(name_dict['name'], submessage.get_payload())
-    return POST, FILES
+    def _initialize_handlers(self):
+        self._upload_handlers = [uploadhandler.load_handler(handler, self)
+                                 for handler in settings.FILE_UPLOAD_HANDLERS]
 
+    def _set_upload_handlers(self, upload_handlers):
+        if hasattr(self, '_files'):
+            raise AttributeError("You cannot set the upload handlers after the upload has been processed.")
+        self._upload_handlers = upload_handlers
+
+    def _get_upload_handlers(self):
+        if not self._upload_handlers:
+            # If thre are no upload handlers defined, initialize them from settings.
+            self._initialize_handlers()
+        return self._upload_handlers
+
+    upload_handlers = property(_get_upload_handlers, _set_upload_handlers)
+
+    def parse_file_upload(self, META, post_data):
+        """Returns a tuple of (POST QueryDict, FILES MultiValueDict)."""
+        self.upload_handlers = ImmutableList(
+            self.upload_handlers,
+            warning = "You cannot alter upload handlers after the upload has been processed."
+        )
+        parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
+        return parser.parse()
 
 class QueryDict(MultiValueDict):
     """

File django/http/multipartparser.py

+"""
+Multi-part parsing for file uploads.
+
+Exposes one class, ``MultiPartParser``, which feeds chunks of uploaded data to
+file upload handlers for processing.
+"""
+import cgi
+from django.conf import settings
+from django.core.exceptions import SuspiciousOperation
+from django.utils.datastructures import MultiValueDict
+from django.utils.encoding import force_unicode
+from django.utils.text import unescape_entities
+from django.core.files.uploadhandler import StopUpload, SkipFile, StopFutureHandlers
+
+__all__ = ('MultiPartParser','MultiPartParserError','InputStreamExhausted')
+
+class MultiPartParserError(Exception):
+    pass
+
+class InputStreamExhausted(Exception):
+    """
+    No more reads are allowed from this device.
+    """
+    pass
+
+RAW = "raw"
+FILE = "file"
+FIELD = "field"
+
+class MultiPartParser(object):
+    """
+    A rfc2388 multipart/form-data parser.
+
+    ``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks
+    and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. If
+    ``file_upload_dir`` is defined files will be streamed to temporary files in
+    that directory.
+    """
+    def __init__(self, META, input_data, upload_handlers, encoding=None):
+        """
+        Initialize the MultiPartParser object.
+
+        :META:
+            The standard ``META`` dictionary in Django request objects.
+        :input_data:
+            The raw post data, as a bytestring.
+        :upload_handler:
+            An UploadHandler instance that performs operations on the uploaded
+            data.
+        :encoding:
+            The encoding with which to treat the incoming data.
+        """
+
+        #
+        # Content-Type should containt multipart and the boundary information.
+        #
+
+        content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', ''))
+        if not content_type.startswith('multipart/'):
+            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
+
+        # Parse the header to get the boundary to split the parts.
+        ctypes, opts = parse_header(content_type)
+        boundary = opts.get('boundary')
+        if not boundary or not cgi.valid_boundary(boundary):
+            raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary)
+
+
+        #
+        # Content-Length should contain the length of the body we are about
+        # to receive.
+        #
+        try:
+            content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0)))
+        except (ValueError, TypeError):
+            # For now set it to 0; we'll try again later on down.
+            content_length = 0
+
+        if content_length <= 0:
+            # This means we shouldn't continue...raise an error.
+            raise MultiPartParserError("Invalid content length: %r" % content_length)
+
+        self._boundary = boundary
+        self._input_data = input_data
+
+        # For compatibility with low-level network APIs (with 32-bit integers),
+        # the chunk size should be < 2^31, but still divisible by 4.
+        self._chunk_size = min(2**31-4, *[x.chunk_size for x in upload_handlers if x.chunk_size])
+
+        self._meta = META
+        self._encoding = encoding or settings.DEFAULT_CHARSET
+        self._content_length = content_length
+        self._upload_handlers = upload_handlers
+
+    def parse(self):
+        """
+        Parse the POST data and break it into a FILES MultiValueDict and a POST
+        MultiValueDict.
+
+        Returns a tuple containing the POST and FILES dictionary, respectively.
+        """
+        # We have to import QueryDict down here to avoid a circular import.
+        from django.http import QueryDict
+
+        encoding = self._encoding
+        handlers = self._upload_handlers
+
+        limited_input_data = LimitBytes(self._input_data, self._content_length)
+
+        # See if the handler will want to take care of the parsing.
+        # This allows overriding everything if somebody wants it.
+        for handler in handlers:
+            result = handler.handle_raw_input(limited_input_data,
+                                              self._meta,
+                                              self._content_length,
+                                              self._boundary,
+                                              encoding)
+            if result is not None:
+                return result[0], result[1]
+
+        # Create the data structures to be used later.
+        self._post = QueryDict('', mutable=True)
+        self._files = MultiValueDict()
+
+        # Instantiate the parser and stream:
+        stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size))
+
+        # Whether or not to signal a file-completion at the beginning of the loop.
+        old_field_name = None
+        counters = [0] * len(handlers)
+
+        try:
+            for item_type, meta_data, field_stream in Parser(stream, self._boundary):
+                if old_field_name:
+                    # We run this at the beginning of the next loop
+                    # since we cannot be sure a file is complete until
+                    # we hit the next boundary/part of the multipart content.
+                    self.handle_file_complete(old_field_name, counters)
+
+                try:
+                    disposition = meta_data['content-disposition'][1]
+                    field_name = disposition['name'].strip()
+                except (KeyError, IndexError, AttributeError):
+                    continue
+
+                transfer_encoding = meta_data.get('content-transfer-encoding')
+                field_name = force_unicode(field_name, encoding, errors='replace')
+
+                if item_type == FIELD:
+                    # This is a post field, we can just set it in the post
+                    if transfer_encoding == 'base64':
+                        raw_data = field_stream.read()
+                        try:
+                            data = str(raw_data).decode('base64')
+                        except:
+                            data = raw_data
+                    else:
+                        data = field_stream.read()
+
+                    self._post.appendlist(field_name,
+                                          force_unicode(data, encoding, errors='replace'))
+                elif item_type == FILE:
+                    # This is a file, use the handler...
+                    file_successful = True
+                    file_name = disposition.get('filename')
+                    if not file_name:
+                        continue
+                    file_name = force_unicode(file_name, encoding, errors='replace')
+                    file_name = self.IE_sanitize(unescape_entities(file_name))
+
+                    content_type = meta_data.get('content-type', ('',))[0].strip()
+                    try:
+                        charset = meta_data.get('content-type', (0,{}))[1].get('charset', None)
+                    except:
+                        charset = None
+
+                    try:
+                        content_length = int(meta_data.get('content-length')[0])
+                    except (IndexError, TypeError, ValueError):
+                        content_length = None
+
+                    counters = [0] * len(handlers)
+                    try:
+                        for handler in handlers:
+                            try:
+                                handler.new_file(field_name, file_name,
+                                                 content_type, content_length,
+                                                 charset)
+                            except StopFutureHandlers:
+                                break
+
+                        for chunk in field_stream:
+                            if transfer_encoding == 'base64':
+                                # We only special-case base64 transfer encoding
+                                try:
+                                    chunk = str(chunk).decode('base64')
+                                except Exception, e:
+                                    # Since this is only a chunk, any error is an unfixable error.
+                                    raise MultiPartParserError("Could not decode base64 data: %r" % e)
+
+                            for i, handler in enumerate(handlers):
+                                chunk_length = len(chunk)
+                                chunk = handler.receive_data_chunk(chunk,
+                                                                   counters[i])
+                                counters[i] += chunk_length
+                                if chunk is None:
+                                    # If the chunk received by the handler is None, then don't continue.
+                                    break
+
+                    except SkipFile, e:
+                        file_successful = False
+                        # Just use up the rest of this file...
+                        exhaust(field_stream)
+                    else:
+                        # Handle file upload completions on next iteration.
+                        old_field_name = field_name
+                else:
+                    # If this is neither a FIELD or a FILE, just exhaust the stream.
+                    exhaust(stream)
+        except StopUpload, e:
+            if not e.connection_reset:
+                exhaust(limited_input_data)
+        else:
+            # Make sure that the request data is all fed
+            exhaust(limited_input_data)
+
+        # Signal that the upload has completed.
+        for handler in handlers:
+            retval = handler.upload_complete()
+            if retval:
+                break
+
+        return self._post, self._files
+
+    def handle_file_complete(self, old_field_name, counters):
+        """
+        Handle all the signalling that takes place when a file is complete.
+        """
+        for i, handler in enumerate(self._upload_handlers):
+            file_obj = handler.file_complete(counters[i])
+            if file_obj:
+                # If it returns a file object, then set the files dict.
+                self._files.appendlist(force_unicode(old_field_name,
+                                                     self._encoding,
+                                                     errors='replace'),
+                                       file_obj)
+                break
+
+    def IE_sanitize(self, filename):
+        """Cleanup filename from Internet Explorer full paths."""
+        return filename and filename[filename.rfind("\\")+1:].strip()
+
+class LazyStream(object):
+    """
+    The LazyStream wrapper allows one to get and "unget" bytes from a stream.
+
+    Given a producer object (an iterator that yields bytestrings), the
+    LazyStream object will support iteration, reading, and keeping a "look-back"
+    variable in case you need to "unget" some bytes.
+    """
+    def __init__(self, producer, length=None):
+        """
+        Every LazyStream must have a producer when instantiated.
+
+        A producer is an iterable that returns a string each time it
+        is called.
+        """
+        self._producer = producer
+        self._empty = False
+        self._leftover = ''
+        self.length = length
+        self._position = 0
+        self._remaining = length
+
+        # These fields are to do sanity checking to make sure we don't
+        # have infinite loops getting/ungetting from the stream. The
+        # purpose overall is to raise an exception if we perform lots
+        # of stream get/unget gymnastics without getting
+        # anywhere. Naturally this is not sound, but most probably
+        # would indicate a bug if the exception is raised.
+
+        # largest position tell us how far this lazystream has ever
+        # been advanced
+        self._largest_position = 0
+
+        # "modifications since" will start at zero and increment every
+        # time the position is modified but a new largest position is
+        # not achieved.
+        self._modifications_since = 0
+
+    def tell(self):
+        return self.position
+
+    def read(self, size=None):
+        def parts():
+            remaining = (size is not None and [size] or [self._remaining])[0]
+            # do the whole thing in one shot if no limit was provided.
+            if remaining is None:
+                yield ''.join(self)
+                return
+
+            # otherwise do some bookkeeping to return exactly enough
+            # of the stream and stashing any extra content we get from
+            # the producer
+            while remaining != 0:
+                assert remaining > 0, 'remaining bytes to read should never go negative'
+
+                chunk = self.next()
+
+                emitting = chunk[:remaining]
+                self.unget(chunk[remaining:])
+                remaining -= len(emitting)
+                yield emitting
+
+        out = ''.join(parts())
+        return out
+
+    def next(self):
+        """
+        Used when the exact number of bytes to read is unimportant.
+
+        This procedure just returns whatever is chunk is conveniently returned
+        from the iterator instead. Useful to avoid unnecessary bookkeeping if
+        performance is an issue.
+        """
+        if self._leftover:
+            output = self._leftover
+            self._leftover = ''
+        else:
+            output = self._producer.next()
+        self.position += len(output)
+        return output
+
+    def close(self):
+        """
+        Used to invalidate/disable this lazy stream.
+
+        Replaces the producer with an empty list. Any leftover bytes that have
+        already been read will still be reported upon read() and/or next().
+        """
+        self._producer = []
+
+    def __iter__(self):
+        return self
+
+    def unget(self, bytes):
+        """
+        Places bytes back onto the front of the lazy stream.
+
+        Future calls to read() will return those bytes first. The
+        stream position and thus tell() will be rewound.
+        """
+        self.position -= len(bytes)
+        self._leftover = ''.join([bytes, self._leftover])
+
+    def _set_position(self, value):
+        if value > self._largest_position:
+            self._modifications_since = 0
+            self._largest_position = value
+        else:
+            self._modifications_since += 1
+            if self._modifications_since > 500:
+                raise SuspiciousOperation(
+                    "The multipart parser got stuck, which shouldn't happen with"
+                    " normal uploaded files. Check for malicious upload activity;"
+                    " if there is none, report this to the Django developers."
+                )
+
+        self._position = value
+
+    position = property(lambda self: self._position, _set_position)
+
+class ChunkIter(object):
+    """
+    An iterable that will yield chunks of data. Given a file-like object as the
+    constructor, this object will yield chunks of read operations from that
+    object.
+    """
+    def __init__(self, flo, chunk_size=64 * 1024):
+        self.flo = flo
+        self.chunk_size = chunk_size
+
+    def next(self):
+        try:
+            data = self.flo.read(self.chunk_size)
+        except InputStreamExhausted:
+            raise StopIteration()
+        if data:
+            return data
+        else:
+            raise StopIteration()
+
+    def __iter__(self):
+        return self
+
+class LimitBytes(object):
+    """ Limit bytes for a file object. """
+    def __init__(self, fileobject, length):
+        self._file = fileobject
+        self.remaining = length
+
+    def read(self, num_bytes=None):
+        """
+        Read data from the underlying file.
+        If you ask for too much or there isn't anything left,
+        this will raise an InputStreamExhausted error.
+        """
+        if self.remaining <= 0:
+            raise InputStreamExhausted()
+        if num_bytes is None:
+            num_bytes = self.remaining
+        else:
+            num_bytes = min(num_bytes, self.remaining)
+        self.remaining -= num_bytes
+        return self._file.read(num_bytes)
+
+class InterBoundaryIter(object):
+    """
+    A Producer that will iterate over boundaries.
+    """
+    def __init__(self, stream, boundary):
+        self._stream = stream
+        self._boundary = boundary
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        try:
+            return LazyStream(BoundaryIter(self._stream, self._boundary))
+        except InputStreamExhausted:
+            raise StopIteration()
+
+class BoundaryIter(object):
+    """
+    A Producer that is sensitive to boundaries.
+
+    Will happily yield bytes until a boundary is found. Will yield the bytes
+    before the boundary, throw away the boundary bytes themselves, and push the
+    post-boundary bytes back on the stream.
+
+    The future calls to .next() after locating the boundary will raise a
+    StopIteration exception.
+    """
+
+    def __init__(self, stream, boundary):
+        self._stream = stream
+        self._boundary = boundary
+        self._done = False
+        # rollback an additional six bytes because the format is like
+        # this: CRLF<boundary>[--CRLF]
+        self._rollback = len(boundary) + 6
+
+        # Try to use mx fast string search if available. Otherwise
+        # use Python find. Wrap the latter for consistency.
+        unused_char = self._stream.read(1)
+        if not unused_char:
+            raise InputStreamExhausted()
+        self._stream.unget(unused_char)
+        try:
+            from mx.TextTools import FS
+            self._fs = FS(boundary).find
+        except ImportError:
+            self._fs = lambda data: data.find(boundary)
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        if self._done:
+            raise StopIteration()
+
+        stream = self._stream
+        rollback = self._rollback
+
+        bytes_read = 0
+        chunks = []
+        for bytes in stream:
+            bytes_read += len(bytes)
+            chunks.append(bytes)
+            if bytes_read > rollback:
+                break
+            if not bytes:
+                break
+        else:
+            self._done = True
+
+        if not chunks:
+            raise StopIteration()
+
+        chunk = ''.join(chunks)
+        boundary = self._find_boundary(chunk, len(chunk) < self._rollback)
+
+        if boundary:
+            end, next = boundary
+            stream.unget(chunk[next:])
+            self._done = True
+            return chunk[:end]
+        else:
+            # make sure we dont treat a partial boundary (and
+            # its separators) as data
+            if not chunk[:-rollback]:# and len(chunk) >= (len(self._boundary) + 6):
+                # There's nothing left, we should just return and mark as done.
+                self._done = True
+                return chunk
+            else:
+                stream.unget(chunk[-rollback:])
+                return chunk[:-rollback]
+
+    def _find_boundary(self, data, eof = False):
+        """
+        Finds a multipart boundary in data.
+
+        Should no boundry exist in the data None is returned instead. Otherwise
+        a tuple containing the indices of the following are returned:
+
+         * the end of current encapsulation
+         * the start of the next encapsulation
+        """
+        index = self._fs(data)
+        if index < 0:
+            return None
+        else:
+            end = index
+            next = index + len(self._boundary)
+            data_len = len(data) - 1
+            # backup over CRLF
+            if data[max(0,end-1)] == '\n':
+                end -= 1
+            if data[max(0,end-1)] == '\r':
+                end -= 1
+            # skip over --CRLF
+            #if data[min(data_len,next)] == '-':
+            #    next += 1
+            #if data[min(data_len,next)] == '-':
+            #    next += 1
+            #if data[min(data_len,next)] == '\r':
+            #    next += 1
+            #if data[min(data_len,next)] == '\n':
+            #    next += 1
+            return end, next
+
+def exhaust(stream_or_iterable):
+    """
+    Completely exhausts an iterator or stream.
+
+    Raise a MultiPartParserError if the argument is not a stream or an iterable.
+    """
+    iterator = None
+    try:
+        iterator = iter(stream_or_iterable)
+    except TypeError:
+        iterator = ChunkIter(stream_or_iterable, 16384)
+
+    if iterator is None:
+        raise MultiPartParserError('multipartparser.exhaust() was passed a non-iterable or stream parameter')
+
+    for __ in iterator:
+        pass
+
+def parse_boundary_stream(stream, max_header_size):
+    """
+    Parses one and exactly one stream that encapsulates a boundary.
+    """
+    # Stream at beginning of header, look for end of header
+    # and parse it if found. The header must fit within one
+    # chunk.
+    chunk = stream.read(max_header_size)
+
+    # 'find' returns the top of these four bytes, so we'll
+    # need to munch them later to prevent them from polluting
+    # the payload.
+    header_end = chunk.find('\r\n\r\n')
+
+    def _parse_header(line):
+        main_value_pair, params = parse_header(line)
+        try:
+            name, value = main_value_pair.split(':', 1)
+        except:
+            raise ValueError("Invalid header: %r" % line)
+        return name, (value, params)
+
+    if header_end == -1:
+        # we find no header, so we just mark this fact and pass on
+        # the stream verbatim
+        stream.unget(chunk)
+        return (RAW, {}, stream)
+
+    header = chunk[:header_end]
+
+    # here we place any excess chunk back onto the stream, as
+    # well as throwing away the CRLFCRLF bytes from above.
+    stream.unget(chunk[header_end + 4:])
+
+    TYPE = RAW
+    outdict = {}
+
+    # Eliminate blank lines
+    for line in header.split('\r\n'):
+        # This terminology ("main value" and "dictionary of
+        # parameters") is from the Python docs.
+        try:
+            name, (value, params) = _parse_header(line)
+        except:
+            continue
+
+        if name == 'content-disposition':
+            TYPE = FIELD
+            if params.get('filename'):
+                TYPE = FILE
+
+        outdict[name] = value, params
+
+    if TYPE == RAW:
+        stream.unget(chunk)
+
+    return (TYPE, outdict, stream)
+
+class Parser(object):
+    def __init__(self, stream, boundary):
+        self._stream = stream
+        self._separator = '--' + boundary
+
+    def __iter__(self):
+        boundarystream = InterBoundaryIter(self._stream, self._separator)
+        for sub_stream in boundarystream:
+            # Iterate over each part
+            yield parse_boundary_stream(sub_stream, 1024)
+
+def parse_header(line):
+    """ Parse the header into a key-value. """
+    plist = _parse_header_params(';' + line)
+    key = plist.pop(0).lower()
+    pdict = {}
+    for p in plist:
+        i = p.find('=')
+        if i >= 0:
+            name = p[:i].strip().lower()
+            value = p[i+1:].strip()
+            if len(value) >= 2 and value[0] == value[-1] == '"':
+                value = value[1:-1]
+                value = value.replace('\\\\', '\\').replace('\\"', '"')
+            pdict[name] = value
+    return key, pdict
+
+def _parse_header_params(s):
+    plist = []
+    while s[:1] == ';':
+        s = s[1:]
+        end = s.find(';')
+        while end > 0 and s.count('"', 0, end) % 2:
+            end = s.find(';', end + 1)
+        if end < 0:
+            end = len(s)
+        f = s[:end]
+        plist.append(f.strip())
+        s = s[end:]
+    return plist

File django/newforms/fields.py

 import os
 import re
 import time
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+
 # Python 2.3 fallbacks
 try:
     from decimal import Decimal, DecimalException
 
 class UploadedFile(StrAndUnicode):
     "A wrapper for files uploaded in a FileField"
-    def __init__(self, filename, content):
+    def __init__(self, filename, data):
         self.filename = filename
-        self.content = content
+        self.data = data
 
     def __unicode__(self):
         """
             return None
         elif not data and initial:
             return initial
+
+        if isinstance(data, dict):
+            # We warn once, then support both ways below.
+            import warnings
+            warnings.warn(
+                message = "Representing uploaded files as dictionaries is"\
+                          " deprecated. Use django.core.files.SimpleUploadedFile "\
+                          " instead.",
+                category = DeprecationWarning,
+                stacklevel = 2
+            )
+
         try:
-            f = UploadedFile(data['filename'], data['content'])
-        except TypeError:
+            file_name = data.file_name
+            file_size = data.file_size
+        except AttributeError:
+            try:
+                file_name = data.get('filename')
+                file_size = bool(data['content'])
+            except (AttributeError, KeyError):
+                raise ValidationError(self.error_messages['invalid'])
+
+        if not file_name:
             raise ValidationError(self.error_messages['invalid'])
-        except KeyError:
-            raise ValidationError(self.error_messages['missing'])
-        if not f.content:
+        if not file_size:
             raise ValidationError(self.error_messages['empty'])
-        return f
+
+        return UploadedFile(file_name, data)
 
 class ImageField(FileField):
     default_error_messages = {
         elif not data and initial:
             return initial
         from PIL import Image
-        from cStringIO import StringIO
+
+        # We need to get a file object for PIL. We might have a path or we might
+        # have to read the data into memory.
+        if hasattr(data, 'temporary_file_path'):
+            file = data.temporary_file_path()
+        else:
+            if hasattr(data, 'read'):
+                file = StringIO(data.read())
+            else:
+                file = StringIO(data['content'])
+
         try:
             # load() is the only method that can spot a truncated JPEG,
             #  but it cannot be called sanely after verify()
-            trial_image = Image.open(StringIO(f.content))
+            trial_image = Image.open(file)
             trial_image.load()
+
+            # Since we're about to use the file again we have to reset the
+            # file object if possible.
+            if hasattr(file, 'reset'):
+                file.reset()
+
             # verify() is the only method that can spot a corrupt PNG,
             #  but it must be called immediately after the constructor
-            trial_image = Image.open(StringIO(f.content))
+            trial_image = Image.open(file)
             trial_image.verify()
         except Exception: # Python Imaging Library doesn't recognize it as an image
             raise ValidationError(self.error_messages['invalid_image'])

File django/oldforms/__init__.py

         self.field_name, self.is_required = field_name, is_required
         self.validator_list = [self.isNonEmptyFile] + validator_list
 
-    def isNonEmptyFile(self, field_data, all_data):
+    def isNonEmptyFile(self, new_data, all_data):
+        if hasattr(new_data, 'upload_errors'):
+            upload_errors = new_data.upload_errors()
+            if upload_errors:
+                raise validators.CriticalValidationError, upload_errors
         try:
-            content = field_data['content']
-        except TypeError:
-            raise validators.CriticalValidationError, ugettext("No file was submitted. Check the encoding type on the form.")
-        if not content:
+            file_size = new_data.file_size
+        except AttributeError:
+            file_size = len(new_data['content'])
+        if not file_size:
             raise validators.CriticalValidationError, ugettext("The submitted file is empty.")
 
     def render(self, data):
         return mark_safe(u'<input type="file" id="%s" class="v%s" name="%s" />' % \
             (self.get_id(), self.__class__.__name__, self.field_name))
 
+    def prepare(self, new_data):
+        if hasattr(new_data, 'upload_errors'):
+            upload_errors = new_data.upload_errors()
+            new_data[self.field_name] = { '_file_upload_error': upload_errors }
+
     def html2python(data):
         if data is None:
             raise EmptyValue

File django/test/client.py

 import urllib
 import sys
 import os
-from cStringIO import StringIO
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
 from django.conf import settings
 from django.contrib.auth import authenticate, login
 from django.core.handlers.base import BaseHandler
 BOUNDARY = 'BoUnDaRyStRiNg'
 MULTIPART_CONTENT = 'multipart/form-data; boundary=%s' % BOUNDARY
 
+class FakePayload(object):
+    """
+    A wrapper around StringIO that restricts what can be read since data from
+    the network can't be seeked and cannot be read outside of its content
+    length. This makes sure that views can't do anything under the test client
+    that wouldn't work in Real Life.
+    """
+    def __init__(self, content):
+        self.__content = StringIO(content)
+        self.__len = len(content)
+
+    def read(self, num_bytes=None):
+        if num_bytes is None:
+            num_bytes = self.__len or 1
+        assert self.__len >= num_bytes, "Cannot read more than the available bytes from the HTTP incoming data."
+        content = self.__content.read(num_bytes)
+        self.__len -= num_bytes
+        return content
+
 class ClientHandler(BaseHandler):
     """
     A HTTP Handler that can be used for testing purposes.
             'CONTENT_TYPE':   content_type,
             'PATH_INFO':      urllib.unquote(path),
             'REQUEST_METHOD': 'POST',
-            'wsgi.input':     StringIO(post_data),
+            'wsgi.input':     FakePayload(post_data),
         }
         r.update(extra)
 

File django/utils/datastructures.py

             except TypeError: # Special-case if current isn't a dict.
                 current = {bits[-1]: v}
 
-class FileDict(dict):
+class ImmutableList(tuple):
     """
-    A dictionary used to hold uploaded file contents. The only special feature
-    here is that repr() of this object won't dump the entire contents of the
-    file to the output. A handy safeguard for a large file upload.
+    A tuple-like object that raises useful errors when it is asked to mutate.
+
+    Example::
+
+        >>> a = ImmutableList(range(5), warning="You cannot mutate this.")
+        >>> a[3] = '4'
+        Traceback (most recent call last):
+            ...
+        AttributeError: You cannot mutate this.
     """
-    def __repr__(self):
-        if 'content' in self:
-            d = dict(self, content='<omitted>')
-            return dict.__repr__(d)
-        return dict.__repr__(self)
+
+    def __new__(cls, *args, **kwargs):
+        if 'warning' in kwargs:
+            warning = kwargs['warning']
+            del kwargs['warning']
+        else:
+            warning = 'ImmutableList object is immutable.'
+        self = tuple.__new__(cls, *args, **kwargs)
+        self.warning = warning
+        return self
+
+    def complain(self, *wargs, **kwargs):
+        if isinstance(self.warning, Exception):
+            raise self.warning
+        else:
+            raise AttributeError, self.warning
+
+    # All list mutation functions complain.
+    __delitem__  = complain
+    __delslice__ = complain
+    __iadd__     = complain
+    __imul__     = complain
+    __setitem__  = complain
+    __setslice__ = complain
+    append       = complain
+    extend       = complain
+    insert       = complain
+    pop          = complain
+    remove       = complain
+    sort         = complain
+    reverse      = complain
 
 class DictWrapper(dict):
     """

File django/utils/text.py

 from django.utils.encoding import force_unicode
 from django.utils.functional import allow_lazy
 from django.utils.translation import ugettext_lazy
+from htmlentitydefs import name2codepoint
 
 # Capitalizes the first letter of a string.
 capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:]
             yield bit
 smart_split = allow_lazy(smart_split, unicode)
 
+def _replace_entity(match):
+     text = match.group(1)
+     if text[0] == u'#':
+         text = text[1:]
+         try:
+             if text[0] in u'xX':
+                 c = int(text[1:], 16)
+             else:
+                 c = int(text)
+             return unichr(c)
+         except ValueError:
+             return match.group(0)
+     else:
+         try:
+             return unichr(name2codepoint[text])
+         except (ValueError, KeyError):
+             return match.group(0)
+
+_entity_re = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
+
+def unescape_entities(text):
+     return _entity_re.sub(_replace_entity, text)
+unescape_entities = allow_lazy(unescape_entities, unicode)

File docs/newforms.txt

 need to bind the file data containing the mugshot image::
 
     # Bound form with an image field
+    >>> from django.core.files.uploadedfile import SimpleUploadedFile
     >>> data = {'subject': 'hello',
     ...         'message': 'Hi there',
     ...         'sender': 'foo@example.com',
     ...         'cc_myself': True}
-    >>> file_data = {'mugshot': {'filename':'face.jpg'
-    ...                          'content': <file data>}}
+    >>> file_data = {'mugshot': SimpleUploadedFile('face.jpg', <file data>)}
     >>> f = ContactFormWithMugshot(data, file_data)
 
 In practice, you will usually specify ``request.FILES`` as the source

File docs/request_response.txt

     strings.
 
 ``FILES``
+    
+    .. admonition:: Changed in Django development version
+        
+        In previous versions of Django, ``request.FILES`` contained
+        simple ``dict`` objects representing uploaded files. This is
+        no longer true -- files are represented by ``UploadedFile``
+        objects as described below.
+        
+        These ``UploadedFile`` objects will emulate the old-style ``dict``
+        interface, but this is deprecated and will be removed in the next
+        release of Django.
+        
     A dictionary-like object containing all uploaded files. Each key in
     ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
-    value in ``FILES`` is a standard Python dictionary with the following three
-    keys:
+    value in ``FILES`` is an ``UploadedFile`` object containing the following
+    attributes:
 
-        * ``filename`` -- The name of the uploaded file, as a Python string.
-        * ``content-type`` -- The content type of the uploaded file.
-        * ``content`` -- The raw content of the uploaded file.
+        * ``read(num_bytes=None)`` -- Read a number of bytes from the file.
+        * ``file_name`` -- The name of the uploaded file.
+        * ``file_size`` -- The size, in bytes, of the uploaded file.
+        * ``chunk()`` -- A generator that yields sequential chunks of data.
 
+    See `File Uploads`_ for more information. 
+    
     Note that ``FILES`` will only contain data if the request method was POST
     and the ``<form>`` that posted to the request had
     ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank
     dictionary-like object.
+    
+    .. _File Uploads: ../upload_handling/
 
 ``META``
     A standard Python dictionary containing all available HTTP headers.

File docs/settings.txt

 
 The database backend to use. The build-in database backends are
 ``'postgresql_psycopg2'``, ``'postgresql'``, ``'mysql'``, ``'mysql_old'``,
-``'sqlite3'`` and ``'oracle'``.
+``'sqlite3'``, ``'oracle'``, and ``'oracle'``.
 
 In the Django development version, you can use a database backend that doesn't
 ship with Django by setting ``DATABASE_ENGINE`` to a fully-qualified path (i.e.
 The character encoding used to decode any files read from disk. This includes
 template files and initial SQL data files.
 
+FILE_UPLOAD_HANDLERS
+--------------------
+
+**New in Django development version**
+
+Default::
+
+    ("django.core.files.fileuploadhandler.MemoryFileUploadHandler",
+     "django.core.files.fileuploadhandler.TemporaryFileUploadHandler",)
+
+A tuple of handlers to use for uploading. See `file uploads`_ for details.
+
+.. _file uploads: ../upload_handling/
+
+FILE_UPLOAD_MAX_MEMORY_SIZE
+---------------------------
+
+**New in Django development version**
+
+Default: ``2621440`` (i.e. 2.5 MB).
+
+The maximum size (in bytes) that an upload will be before it gets streamed to
+the file system. See `file uploads`_ for details.
+
+FILE_UPLOAD_TEMP_DIR
+--------------------
+
+**New in Django development version**
+
+Default: ``None``
+
+The directory to store data temporarily while uploading files. If ``None``,
+Django will use the standard temporary directory for the operating system. For
+example, this will default to '/tmp' on *nix-style operating systems.
+
+See `file uploads`_ for details.
+
 FIXTURE_DIRS
 -------------
 

File docs/upload_handling.txt

+============
+File Uploads
+============
+
+**New in Django development version**
+
+Most Web sites wouldn't be complete without a way to upload files. When Django
+handles a file upload, the file data ends up placed in ``request.FILES`` (for
+more on the ``request`` object see the documentation for `request and response
+objects`_). This document explains how files are stored on disk an in memory,
+and how to customize the default behavior.
+
+.. _request and response objects: ../request_response/#attributes
+
+Basic file uploads
+==================
+
+Consider a simple form containing a ``FileField``::
+
+    from django import newforms as forms
+
+    class UploadFileForm(forms.Form):
+        title = forms.CharField(max_length=50)
+        file  = forms.FileField()
+        
+A view handling this form will receive the file data in ``request.FILES``, which
+is a dictionary containing a key for each ``FileField`` (or ``ImageField``, or
+other ``FileField`` subclass) in the form. So the data from the above form would
+be accessible as ``request.FILES['file']``.
+
+Most of the time, you'll simply pass the file data from ``request`` into the
+form as described in `binding uploaded files to a form`_. This would look
+something like::
+
+    from django.http import HttpResponseRedirect
+    from django.shortcuts import render_to_response
+
+    # Imaginary function to handle an uploaded file.
+    from somewhere import handle_uploaded_file
+
+    def upload_file(request):
+        if request.method == 'POST':
+            form = UploadFileForm(request.POST, request.FILES)
+            if form.is_valid():
+                handle_uploaded_file(request.FILES['file'])
+                return HttpResponseRedirect('/success/url/')
+        else:
+            form = UploadFileForm()
+        return render_to_response('upload.html', {'form': form})
+
+.. _binding uploaded files to a form: ../newforms/#binding-uploaded-files-to-a- form
+
+Notice that we have to pass ``request.FILES`` into the form's constructor; this
+is how file data gets bound into a form.
+
+Handling uploaded files
+-----------------------
+
+The final piece of the puzzle is handling the actual file data from
+``request.FILES``. Each entry in this dictionary is an ``UploadedFile`` object
+-- a simple wrapper around an uploaded file. You'll usually use one of these
+methods to access the uploaded content:
+
+    ``UploadedFile.read()``