Commits

Anonymous committed 289dd17

Basic implementation of using s3 multipart uploads for file writes. This allows for writing large files without requiring enough disk/memory space to store the file locally.

Comments (0)

Files changed (3)

storages/backends/s3boto.py

             self.key = storage.bucket.new_key(storage._encode_name(name))
         self._is_dirty = False
         self._file = None
+        self._multipart = None
+        # 5 MB is the minimum part size (if there is more than one part).
+        # Amazon allows up to 10,000 parts.  The default supports uploads
+        # up to roughly 50 GB.  Increase the part size to accommodate 
+        # for files larger than this.
+        self._write_buffer_size = 5242880
+        self._write_counter = 0
 
     @property
     def size(self):
 
     def write(self, *args, **kwargs):
         if 'w' not in self._mode:
-            raise AttributeError("File was opened for read-only access.")
+            raise AttributeError("File was not opened in write mode.")
         self._is_dirty = True
+        if self._multipart is None:
+            self._multipart = self._storage.bucket.initiate_multipart_upload(self.key.name)
+        if self._write_buffer_size <= self._buffer_file_size:
+            self._flush_write_buffer()
         return super(S3BotoStorageFile, self).write(*args, **kwargs)
 
+    @property
+    def _buffer_file_size(self):
+        pos = self.file.tell()
+        self.file.seek(0,os.SEEK_END)
+        length = self.file.tell()
+        self.file.seek(pos)
+        return length
+
+    def _flush_write_buffer(self):
+        if self._buffer_file_size:
+            self._write_counter += 1
+            self.file.seek(0)
+            self._multipart.upload_part_from_file(
+                self.file,
+                self._write_counter,
+                headers=self._storage.headers
+            )
+            self.file.close()
+            self._file = None
+
     def close(self):
         if self._is_dirty:
-            self._file.seek(0)
-            self.key.set_contents_from_file(self._file,
-                headers=self._storage.headers, policy=self._storage.acl)
+            self._flush_write_buffer()
+            self._multipart.complete_upload()
+        else:
+            if not self._multipart is None:
+                self._multipart.cancel_upload()
         self.key.close()

storages/tests/__init__.py

 from storages.tests.hashpath import *
-from s3boto import S3BotoStorageTests
+from s3boto import S3BotoStorageTests, S3BotoStorageFileTests

storages/tests/s3boto.py

 from django.core.files.storage import FileSystemStorage
 from uuid import uuid4
 import os
-from storages.backends.s3boto import S3BotoStorage
+from storages.backends.s3boto import S3BotoStorage, S3BotoStorageFile
 from urllib2 import urlopen
 
 class S3BotoTestCase(TestCase):
         f = ContentFile(content)
         self.storage.save(name, f)
         self.assertEqual(content, urlopen(self.storage.url(name)).read())
-        
+        
+class S3BotoStorageFileTests(S3BotoTestCase):
+    def test_multipart_upload(self):
+        nparts = 2
+        name = self.prefix_path("test_multipart_upload.txt")
+        mode = 'w'
+        f = S3BotoStorageFile(name, mode, self.storage)
+        content_length = 1024 * 1024# 1 MB
+        content = 'a' * content_length
+        
+        bytes = 0
+        target = f._write_buffer_size * nparts
+        while bytes < target:
+            f.write(content)
+            bytes += content_length
+            
+        # make the buffer roll over so f._write_counter
+        # is incremented
+        f.write("finished")
+        
+        # verify upload was multipart and correctly partitioned
+        self.assertEqual(f._write_counter, nparts)
+        
+        # complete the upload
+        f.close()
+        
+        # verify that the remaining buffered bytes were
+        # uploaded when the file was closed.
+        self.assertEqual(f._write_counter, nparts+1)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.