Commits

b7w committed ee1cbfa Merge

Merge dev

  • Participants
  • Parent commits cabd4a5, 87053bf
  • Tags v0.10

Comments (0)

Files changed (2)

 # DEPTH > 0 - FOLDER
 #   If depth from root equal some *value* - archive folder.
 # MAX_SIZE - 4000MB
-#   Max archive size. IF bigger print warn and not upload.
+# VOLUME_SIZE - 2000MB
+#   Max package size. If bigger archives will be split to VOLUME_SIZE.
 # HISTORY_SIZE - 8
-Options = namedtuple('Options', 'DEPTH, MAX_SIZE, HISTORY_SIZE')
-options = partial(Options, DEPTH=None, MAX_SIZE=4000, HISTORY_SIZE=8)
+Options = namedtuple('Options', 'DEPTH, MAX_SIZE, VOLUME_SIZE, HISTORY_SIZE')
+options = partial(Options, DEPTH=None, MAX_SIZE=4000, VOLUME_SIZE=2000, HISTORY_SIZE=8)
 
 # LOCAL - local path to folder
 # REMOTE - remote path to folder
 
 class Archive(object):
     def __init__(self, package, password=None, hide=False, level=0, options=None, path_7z=None):
+        """
+        :param package: Package
+        """
         self.package = package
         self.file_name = '{0}.{1}.7z'.format(package.name, package.hash)
         self.password = password
     def _escape(self, string):
         return '"' + string + '"'
 
-    def run_cmd(self, cmd_str):
+    def run_cmd(self, cmd_str, path, split=False):
         if not os.path.exists(self.path_7z):
             raise AppError('7z "{0}" not exists, enter full path to bin'.format(self.path_7z))
         try:
             subprocess.check_output(cmd_str, shell=True)
+            archives = sorted(glob.glob(path + '*'))
+            if split:
+                return [(i, i.split('.')[-1]) for i in archives]
+            return [(i, None) for i in archives]
         except subprocess.CalledProcessError as e:
             raise AppError('Subprocess: {0}\n{1}'.format(cmd_str, e.output))
         except Exception as e:
             raise AppError('Subprocess: ' + str(e))
 
-    def is_valid(self):
-        return self.package.size < self.options.MAX_SIZE * 1024 ** 2
-
-    def archive_to(self, path):
+    def archive_to(self, tmp_path, file_name):
+        path = os.path.join(tmp_path, file_name)
+        split = self.package.size > self.options.MAX_SIZE * 1024 ** 2
         args = [self._escape(self.path_7z), 'a',
                 '-bd',  # Disable percentage indicator
                 '-xr!.*',  # Exclude hidden files
                 '-mx{0}'.format(self.level),
                 '-mhe=on' if self.password and self.hide else '-mhe=off',
                 '-p{0}'.format(self.password) if self.password else None,
+                '-v{0}m'.format(self.options.VOLUME_SIZE) if split else None,
                 self._escape(path), ]
         args += [self._escape(i) for i in self.package.files]
         cmd_str = ' '.join([i for i in args if i])
-        return self.run_cmd(cmd_str)
+        return self.run_cmd(cmd_str, path, split=split)
 
     def clear(self, path):
         try:
 class Storage(object):
     RE_EXCEPTION = (httplib.HTTPException, IOError, socket.error)
 
-    def __init__(self, access_key, secret_key, tmp):
+    def __init__(self, access_key, secret_key):
         try:
             self.conn = S3Connection(access_key, secret_key)
         except S3ResponseError as e:
             raise AppError('Can not connect to S3, {0}'.format(e))
-        self.tmp_path = tmp
+
+    def _archive_hash(self, path):
+        return re.search('\w{40}', path).group(0)
 
     def get_keys(self, bucket_name, root):
         bucket = self.conn.lookup(bucket_name)
         keys = bucket.list(root)
         return list(keys)
 
-    def get_key(self, bucket_name, root, archive):
+    def get_key(self, bucket_name, root, archive, part=None):
         bucket = self.conn.lookup(bucket_name)
         if not bucket:
             raise AppError('No bucket "{0}" found'.format(bucket_name))
-        key = '/'.join([root, archive.package.path, archive.file_name, ])
+        file_name = '{0}.{1}'.format(archive.file_name, part) if part else archive.file_name
+        key = '/'.join([root, archive.package.path, file_name, ])
         return Key(bucket, name=key)
 
+    def is_exists(self, bucket_name, root, package):
+        path = '{0}/{1}'.format(root, package.path)
+        keys = self.get_keys(bucket_name, path)
+        return any(self._archive_hash(i.key) == package.hash for i in keys)
+
     def get_archives(self, bucket_name, root):
         """
         Return map of `archive name os str - versions of Key type`
 
     def count(self, bucket_name, root):
         keys = self.get_keys(bucket_name, root)
-        return len(keys)
+        return len(set([self._archive_hash(i.name) for i in keys]))
 
     def count_unique(self, bucket_name, root):
-        def base_name(name):
-            return '.'.join(name.split('.')[:-2])
+        def base_name(path):
+            folder, name = os.path.split(path)
+            return os.path.join(folder, name.split('.')[0])
 
         keys = self.get_keys(bucket_name, root)
         return len(set([base_name(i.name) for i in keys]))
 
-    def retry(self, func, *args, **kwargs):
+    def _retry_upload(self, func, *args, **kwargs):
         for i in range(3):
             try:
                 return func(*args, **kwargs)
             except self.RE_EXCEPTION as e:
-                msg = 'Func "{n}" args={args} kwargs={kwargs}\n{e}'
-                print(msg.format(n=func.__name__, args=args, kwargs=kwargs, e=e))
+                print('\tWarn: {0}'.format(e))
                 time.sleep(2)
         raise AppError('Failed 3 attempts to run function "{0}"'.format(func.__name__))
 
-    def upload(self, key, archive):
-        tmp_file = None
+    def upload(self, key, archive, path, part):
         try:
             key.set_metadata('hash', archive.package.hash)
             key.set_metadata('files-count', archive.package.count)
+            key.set_metadata('part', part or '000')
             key.set_metadata('api-version', 1)
-            tmp_file = os.path.join(self.tmp_path, archive.file_name)
-            archive.archive_to(tmp_file)
-            self.retry(key.set_contents_from_filename, tmp_file)
+            self._retry_upload(key.set_contents_from_filename, path)
         except S3ResponseError as e:
             raise AppError('key={0} archive={1} archive_name={2}\n{3}'.format(key, archive, archive.file_name, e))
-        finally:
-            if tmp_file:
-                archive.clear(tmp_file)
 
 
 def load_config(path):
     """
     print('\nBackUp dry_run={0} debug={1}'.format(dry_run, debug))
     config = load_config(config) if type(config) is str else config
-    storage = Storage(config.ACCESS_KEY, config.SECRET_KEY, config.TMP_PATH)
+    storage = Storage(config.ACCESS_KEY, config.SECRET_KEY)
+    upload_size = 0
     for folder_config in config.FOLDERS:
         finder = Finder(folder_config.LOCAL, folder_config.OPTIONS)
         for package in finder.find():
             exclude = folder_config.EXCLUDE and any(re.match(i, package.path) for i in folder_config.EXCLUDE)
-            if not exclude:
+            exists = storage.is_exists(folder_config.BUCKET, folder_config.REMOTE, package)
+            if not exclude and not exists:
                 archive = Archive(package, password=folder_config.PASSWORD, hide=folder_config.HIDE,
                                   level=folder_config.LEVEL, options=folder_config.OPTIONS, path_7z=config.PATH_7Z)
-                key = storage.get_key(folder_config.BUCKET, folder_config.REMOTE, archive)
-                if not key.exists():
-                    if archive.is_valid():
-                        if not dry_run:
-                            storage.upload(key, archive)
-                        package_size = package.size / 1024 ** 2
-                        print('Uploaded /{0}/{1}  {{count: {2}, size: {3}mb}}'
-                              .format(folder_config.REMOTE, package.path, package.count, package_size))
-                    else:
-                        print('Warn /{0}/{1}, check size'.format(folder_config.REMOTE, archive.package.path))
-                elif debug:
-                    print('Exists /{0}/{1}'.format(folder_config.REMOTE, archive.package.path))
+                upload_size += package.size
+                for tmp_file, part in archive.archive_to(config.TMP_PATH, archive.file_name):
+                    key = storage.get_key(folder_config.BUCKET, folder_config.REMOTE, archive, part)
+                    if not dry_run:
+                        if debug:
+                            print('Start /{0}/{1}  {{part: {2}}}'
+                                  .format(folder_config.REMOTE, package.path, part or '000'))
+                        storage.upload(key, archive, tmp_file, part)
+                    package_size = package.size / 1024 ** 2
+                    print('Uploaded /{0}/{1}  {{count: {2}, size: {3} MB, part: {4}}}'
+                          .format(folder_config.REMOTE, package.path, package.count, package_size, part or '000'))
+                    archive.clear(tmp_file)
+
+    print('Totally uploaded: {0}'.format(humanize.naturalsize(upload_size)))
     info(config)
     clear(config)
 
     """
     print('\nInfo')
     config = load_config(config) if type(config) is str else config
-    storage = Storage(config.ACCESS_KEY, config.SECRET_KEY, config.TMP_PATH)
+    storage = Storage(config.ACCESS_KEY, config.SECRET_KEY)
     count_all, count_unique_all, size_all = 0, 0, 0
     for folder_config in config.FOLDERS:
         count = storage.count(folder_config.BUCKET, folder_config.REMOTE)
     max_count = int(max_count)
     print('\nClear')
     config = load_config(config) if type(config) is str else config
-    storage = Storage(config.ACCESS_KEY, config.SECRET_KEY, config.TMP_PATH)
+    storage = Storage(config.ACCESS_KEY, config.SECRET_KEY)
     for folder_config in config.FOLDERS:
         history_size = max_count or folder_config.OPTIONS.HISTORY_SIZE
         archives = storage.get_archives(folder_config.BUCKET, folder_config.REMOTE)