Commits

Mike Bayer  committed 3b4919d

work synthetic feature into the library

  • Participants
  • Parent commits b91cd51

Comments (0)

Files changed (6)

File backups.py

-#!/usr/local/bin/python
-
-import os
-import ConfigParser
-import re
-import subprocess
-import resource
-import argparse
-import functools
-import sys
-import errno
-
-config_file_dest = os.path.join(
-        os.environ['HOME'],
-        ".duplicity",
-        "backup.ini"
-    )
-
-lock_file_dest = os.path.join(
-        os.environ['HOME'],
-        ".duplicity"
-    )
-
-class CommandException(Exception):
-    pass
-
-def _is_uppercase(text):
-    return text == text.upper()
-
-def _setup_command(cmd_options, config_dict):
-    cmd_options.append(config_dict['cmd'])
-
-def _dupl_command(cmd, config, cmd_options, args):
-    config_dict = _get_config(config, args)
-    lock = cmd in ("remove-older-than",
-                    "cleanup", "remove-all-but-n-full")
-
-    _setup_command(cmd_options, config_dict)
-    cmd_options.append(cmd)
-    if getattr(args, 'arg', None):
-        cmd_options.append(args.arg)
-    if getattr(args, 'force', False):
-        cmd_options.append("--force")
-    _render_options_args(config_dict, cmd_options)
-
-    cmd_options.append(config_dict['target_url'])
-    _run_duplicity(args.configuration, cmd_options, lock, args.dry)
-
-def _lock(lock_file):
-    try:
-        os.mkdir(lock_file)
-        return True
-    except OSError, err:
-        if err.errno == errno.EEXIST:
-            return False
-        else:
-            raise
-
-def _unlock(lock_file):
-    os.rmdir(lock_file)
-
-def _restore(config, cmd_options, args):
-    config_dict = _get_config(config, args)
-    _setup_command(cmd_options, config_dict)
-    cmd_options.append("restore")
-    _render_options_args(config_dict, cmd_options)
-
-    dest = os.path.normpath(args.dest)
-    if dest.startswith(os.sep):
-        dest = dest[1:]
-
-    path, fname = os.path.split(dest)
-    if os.path.exists(os.path.join(os.sep, dest)):
-        if not fname:
-            path = path + ".restored"
-        else:
-            fname = fname + ".restored"
-
-    cmd_options.extend(["--file-to-restore", dest])
-    cmd_options.append(config_dict['target_url'])
-    cmd_options.append(os.path.join(os.sep, path, fname))
-    _run_duplicity(args.configuration, cmd_options, False, args.dry)
-
-def _backup(cmd, config, cmd_options, args):
-    config_dict = _get_config(config, args)
-    _setup_command(cmd_options, config_dict)
-    cmd_options.append(cmd)
-
-    if args.asynchronous_upload:
-        cmd_options.append("--asynchronous-upload")
-
-    _render_options_args(config_dict, cmd_options)
-
-    for src_opt in re.split(r'\\', config_dict['source']):
-        src_opt = src_opt.strip()
-        if not src_opt:
-            continue
-        name, opt = re.split(r'\s+', src_opt, 1)
-        if "\n" in opt:
-            raise SystemError(
-                    "WARNING: newline detected in non-slashed "
-                    "line \"%s\"; please double check your config file"
-                    % opt.replace('\n', r'\n'))
-        cmd_options.extend((name, opt))
-
-    cmd_options.append("/")
-    cmd_options.append(config_dict['target_url'])
-    _run_duplicity(args.configuration, cmd_options, True, args.dry)
-
-def _list_configs(config, cmd_options, args):
-    print ("\n".join(config.sections()))
-    return False
-
-def _get_config(config, args):
-    if not config.has_section(args.configuration):
-        raise SystemError("no such config: %s" % args.configuration)
-
-    return dict(config.items(args.configuration))
-
-def _render_options_args(config_dict, cmd_options):
-    dupl_opts = set(["v", "archive-dir", "name", "s3-use-new-style"])
-    for k, v in config_dict.items():
-        if _is_uppercase(k):
-            os.environ[k] = v % (os.environ)
-        elif k in dupl_opts:
-            if k == 'v':
-                cmd_options.append("-%s%s" % (k, v))
-            elif v == 'true':
-                cmd_options.append("--%s" % (k, ))
-            else:
-                cmd_options.append("--%s=%s" % (k, v))
-
-def _write_sample_config(config, cmd_options, args):
-    sample = """
-[DEFAULT]
-# Arguments in the DEFAULT sections
-# are passed to all sub-configs.
-# Any argument here including environment
-# variables can be per-sub-config.
-# Values can have spaces, don't add quotes as these
-# become part of the value.
-
-# environment variables - all UPPERCASE
-# names are sent to the env.
-AWS_ACCESS_KEY_ID=<your access key>
-AWS_SECRET_ACCESS_KEY=<your secret key>
-PASSPHRASE=this is my passphrase
-
-# env substitutions can also be used
-# with UPPERCASE variables.  Use two
-# percent signs, %%(varname)s
-PATH=/usr/local/bin:%%(PATH)s
-
-# duplicity options
-archive-dir=/Users/myusername/.duplicity/cache
-v=8
-
-# path of duplicity executable
-cmd=/usr/local/bin/duplicity
-
-# each backup config is defined here,
-# in its own [section].
-[my_backup]
-
-# duplicity "name" field
-name=my_backup
-
-# sources.  we always make the "desination"
-# the root "/".   Fill in each desired directory
-# here, keeping the one include/exclude per line with
-# backslash/newline convention in place
-source=\\
-    --exclude /**/*.pyc \\
-    --exclude /**.DS_Store \\
-    --exclude /Users/myusername/.duplicity/cache \\
-    --include /Users/myusername/Documents \\
-    --include /Users/myusername/Desktop \\
-    --exclude **
-
-# target url.
-target_url=file:///Volumes/WD Passport/duplicity/
-
-"""
-    if os.path.exists(config_file_dest):
-        raise CommandException(
-            "Config file %s already exists" % config_file_dest)
-    with open(config_file_dest, 'w') as f:
-        f.write(sample)
-    print("Wrote config to %s" % config_file_dest)
-    return False
-
-
-def _global_options(subparser):
-    subparser.add_argument("configuration", type=str,
-                help="name of configuration to load")
-    subparser.add_argument("-d", "--dry", action="store_true",
-                help="Only show the final "
-                "duplicity command, don't actually run it")
-
-def _run_duplicity(name, cmd_options, lock, dry):
-    print(" ".join(cmd_options))
-
-    if not dry:
-        def setlimits():
-            resource.setrlimit(resource.RLIMIT_NOFILE, (1024, 1024))
-
-        def proc():
-            p = subprocess.Popen(cmd_options, preexec_fn=setlimits)
-            p.wait()
-        if lock:
-            lockfile = os.path.join(lock_file_dest, "%s.lock" % name)
-            if not _lock(lockfile):
-                sys.stderr.write(
-                    "Lockfile %s is already acquired\n" % lockfile)
-                return
-            try:
-                proc()
-            finally:
-                _unlock(lockfile)
-        else:
-            proc()
-
-def main(argv=None, **kwargs):
-
-    dupl_commands = set(["verify", "collection-status",
-            "list-current-files",
-            "remove-older-than", "cleanup",
-            "remove-all-but-n-full"])
-    force_commands = set(["remove-older-than",
-                        "cleanup",
-                        "remove-all-but-n-full"])
-    one_arg_commands = set(["remove-older-than", "remove-all-but-n-full"])
-
-    parser = argparse.ArgumentParser(prog="backups")
-    subparsers = parser.add_subparsers(help="sub-command help")
-    for name in dupl_commands:
-        subparser = subparsers.add_parser(
-                            name,
-                            help="run the duplicity command %r" % name)
-        subparser.set_defaults(cmd=functools.partial(_dupl_command, name))
-        if name in one_arg_commands:
-            subparser.add_argument("arg", type=str, help="command argument")
-        _global_options(subparser)
-        if name in force_commands:
-            subparser.add_argument("--force", action="store_true",
-                        help="duplicity --force option")
-
-    subparser = subparsers.add_parser("restore", help="run a restore")
-    _global_options(subparser)
-    subparser.add_argument("dest", help="Path or file to restore, "
-                        "passed to --file-to-restore")
-    subparser.set_defaults(cmd=_restore)
-
-    subparser = subparsers.add_parser("full", help="run a full backup")
-    subparser.set_defaults(cmd=functools.partial(_backup, "full"))
-    subparser.add_argument("--asynchronous-upload", action="store_true",
-                    help="use async mode")
-    _global_options(subparser)
-
-    subparser = subparsers.add_parser("incremental",
-                            help="run an incremental backup")
-    subparser.set_defaults(cmd=functools.partial(_backup, "incremental"))
-    subparser.add_argument("--asynchronous-upload", action="store_true",
-                    help="use async mode")
-    _global_options(subparser)
-
-    subparser = subparsers.add_parser("configs", help="list configs")
-    subparser.set_defaults(cmd=_list_configs)
-
-    subparser = subparsers.add_parser("init", help="write sample config file")
-    subparser.set_defaults(cmd=_write_sample_config)
-
-    args = parser.parse_args(argv)
-
-    cmd_options = []
-
-    cmd = args.cmd
-
-    try:
-        config = ConfigParser.SafeConfigParser()
-        config.optionxform = str
-        if not os.path.exists(config_file_dest):
-            if cmd is not _write_sample_config:
-                raise CommandException("No config file: %r.  "
-                    "Please run the 'init' command to create." %
-                    config_file_dest)
-        else:
-            config.read(config_file_dest)
-
-
-        cmd(config, cmd_options, args)
-    except CommandException, ce:
-        sys.exit(str(ce))
-
-if __name__ == '__main__':
-    main()

File backups/__init__.py

Empty file added.

File backups/backups.py

+import argparse
+from . import base
+import functools
+
+def _global_options(subparser):
+    subparser.add_argument("configuration", type=str,
+                help="name of configuration to load")
+    subparser.add_argument("-d", "--dry", action="store_true",
+                help="Only show the final "
+                "duplicity command, don't actually run it")
+
+def main(argv=None, **kwargs):
+
+    dupl_commands = set(["verify", "collection-status",
+            "list-current-files",
+            "remove-older-than", "cleanup",
+            "remove-all-but-n-full"])
+    force_commands = set(["remove-older-than",
+                        "cleanup",
+                        "remove-all-but-n-full"])
+    one_arg_commands = set(["remove-older-than", "remove-all-but-n-full"])
+
+    parser = argparse.ArgumentParser(prog="backups")
+    subparsers = parser.add_subparsers(help="sub-command help")
+    for name in dupl_commands:
+        subparser = subparsers.add_parser(
+                            name,
+                            help="run the duplicity command %r" % name)
+        subparser.set_defaults(cmd=functools.partial(base._dupl_command, name))
+        if name in one_arg_commands:
+            subparser.add_argument("arg", type=str, help="command argument")
+        _global_options(subparser)
+        if name in force_commands:
+            subparser.add_argument("--force", action="store_true",
+                                help="duplicity --force option")
+
+    subparser = subparsers.add_parser("restore", help="run a restore")
+    _global_options(subparser)
+    subparser.add_argument("dest", help="Path or file to restore, "
+                            "passed to --file-to-restore")
+    subparser.set_defaults(cmd=base._restore)
+
+    subparser = subparsers.add_parser("full", help="run a full backup")
+    subparser.set_defaults(cmd=functools.partial(base._backup, "full"))
+    subparser.add_argument("--asynchronous-upload", action="store_true",
+                            help="use async mode")
+    _global_options(subparser)
+
+    subparser = subparsers.add_parser("incremental",
+                                help="run an incremental backup")
+    subparser.set_defaults(cmd=functools.partial(base._backup, "incremental"))
+    subparser.add_argument("--asynchronous-upload", action="store_true",
+                        help="use async mode")
+    _global_options(subparser)
+
+    subparser = subparsers.add_parser("configs", help="list configs")
+    subparser.set_defaults(cmd=base._list_configs)
+
+    subparser = subparsers.add_parser("init", help="write sample config file")
+    subparser.set_defaults(cmd=base._write_sample_config)
+
+    args = parser.parse_args(argv)
+
+    cmd_options = []
+
+    cmd = args.cmd
+
+    config = base._read_config()
+    try:
+        if config is None and cmd is not base._write_sample_config:
+            raise CommandException("No config file: %r.  "
+                "Please run the 'init' command to create." %
+                config_file_dest)
+
+        cmd(config, cmd_options, args)
+    except CommandException, ce:
+        sys.exit(str(ce))
+
+if __name__ == '__main__':
+    main()

File backups/base.py

+#!/usr/local/bin/python
+
+import os
+import ConfigParser
+import re
+import subprocess
+import resource
+import functools
+import sys
+import errno
+
+config_file_dest = os.path.join(
+        os.environ['HOME'],
+        ".duplicity",
+        "backup.ini"
+    )
+
+lock_file_dest = os.path.join(
+        os.environ['HOME'],
+        ".duplicity"
+    )
+
+class CommandException(Exception):
+    pass
+
+def _is_uppercase(text):
+    return text == text.upper()
+
+def _setup_command(cmd_options, config_dict):
+    cmd_options.append(config_dict['cmd'])
+
+def _dupl_command(cmd, config, cmd_options, args):
+    config_dict = _get_config(config, args)
+    lock = cmd in ("remove-older-than",
+                    "cleanup", "remove-all-but-n-full")
+
+    _setup_command(cmd_options, config_dict)
+    cmd_options.append(cmd)
+    if getattr(args, 'arg', None):
+        cmd_options.append(args.arg)
+    if getattr(args, 'force', False):
+        cmd_options.append("--force")
+    _render_options_args(config_dict, cmd_options)
+
+    cmd_options.append(config_dict['target_url'])
+    _run_duplicity(args.configuration, cmd_options, lock, args.dry)
+
+def _lock(lock_file):
+    try:
+        os.mkdir(lock_file)
+        return True
+    except OSError, err:
+        if err.errno == errno.EEXIST:
+            return False
+        else:
+            raise
+
+def _unlock(lock_file):
+    os.rmdir(lock_file)
+
+def _restore(config, cmd_options, args):
+    config_dict = _get_config(config, args)
+    _setup_command(cmd_options, config_dict)
+    cmd_options.append("restore")
+    _render_options_args(config_dict, cmd_options)
+
+    dest = os.path.normpath(args.dest)
+    if dest.startswith(os.sep):
+        dest = dest[1:]
+
+    path, fname = os.path.split(dest)
+    if os.path.exists(os.path.join(os.sep, dest)):
+        if not fname:
+            path = path + ".restored"
+        else:
+            fname = fname + ".restored"
+
+    cmd_options.extend(["--file-to-restore", dest])
+    cmd_options.append(config_dict['target_url'])
+    cmd_options.append(os.path.join(os.sep, path, fname))
+    _run_duplicity(args.configuration, cmd_options, False, args.dry)
+
+def _backup(cmd, config, cmd_options, args):
+    config_dict = _get_config(config, args)
+    _setup_command(cmd_options, config_dict)
+    cmd_options.append(cmd)
+
+    if args.asynchronous_upload:
+        cmd_options.append("--asynchronous-upload")
+
+    _render_options_args(config_dict, cmd_options)
+
+    for src_opt in re.split(r'\\', config_dict['source']):
+        src_opt = src_opt.strip()
+        if not src_opt:
+            continue
+        name, opt = re.split(r'\s+', src_opt, 1)
+        if "\n" in opt:
+            raise SystemError(
+                    "WARNING: newline detected in non-slashed "
+                    "line \"%s\"; please double check your config file"
+                    % opt.replace('\n', r'\n'))
+        cmd_options.extend((name, opt))
+
+    cmd_options.append("/")
+    cmd_options.append(config_dict['target_url'])
+    _run_duplicity(args.configuration, cmd_options, True, args.dry)
+
+def _list_configs(config, cmd_options, args):
+    print ("\n".join(config.sections()))
+    return False
+
+def _get_config(config, args):
+    if not config.has_section(args.configuration):
+        raise SystemError("no such config: %s" % args.configuration)
+
+    return dict(config.items(args.configuration))
+
+def _render_env_args(config_dict):
+    for k, v in config_dict.items():
+        if _is_uppercase(k):
+            os.environ[k] = v % (os.environ)
+
+def _render_options_args(config_dict, cmd_options):
+    dupl_opts = set(["v", "archive-dir", "name", "s3-use-new-style", "allow-source-mismatch"])
+    for k, v in config_dict.items():
+        if _is_uppercase(k):
+            os.environ[k] = v % (os.environ)
+        elif k in dupl_opts:
+            if k == 'v':
+                cmd_options.append("-%s%s" % (k, v))
+            elif v == 'true':
+                cmd_options.append("--%s" % (k, ))
+            else:
+                cmd_options.append("--%s=%s" % (k, v))
+
+def _write_sample_config(config, cmd_options, args):
+    sample = """
+[DEFAULT]
+# Arguments in the DEFAULT sections
+# are passed to all sub-configs.
+# Any argument here including environment
+# variables can be per-sub-config.
+# Values can have spaces, don't add quotes as these
+# become part of the value.
+
+# environment variables - all UPPERCASE
+# names are sent to the env.
+AWS_ACCESS_KEY_ID=<your access key>
+AWS_SECRET_ACCESS_KEY=<your secret key>
+PASSPHRASE=this is my passphrase
+
+# env substitutions can also be used
+# with UPPERCASE variables.  Use two
+# percent signs, %%(varname)s
+PATH=/usr/local/bin:%%(PATH)s
+
+# duplicity options
+archive-dir=/Users/myusername/.duplicity/cache
+v=8
+
+# path of duplicity executable
+cmd=/usr/local/bin/duplicity
+
+# each backup config is defined here,
+# in its own [section].
+[my_backup]
+
+# duplicity "name" field
+name=my_backup
+
+# sources.  we always make the "desination"
+# the root "/".   Fill in each desired directory
+# here, keeping the one include/exclude per line with
+# backslash/newline convention in place
+source=\\
+    --exclude /**/*.pyc \\
+    --exclude /**.DS_Store \\
+    --exclude /Users/myusername/.duplicity/cache \\
+    --include /Users/myusername/Documents \\
+    --include /Users/myusername/Desktop \\
+    --exclude **
+
+# target url.
+target_url=file:///Volumes/WD Passport/duplicity/
+
+"""
+    if os.path.exists(config_file_dest):
+        raise CommandException(
+            "Config file %s already exists" % config_file_dest)
+    with open(config_file_dest, 'w') as f:
+        f.write(sample)
+    print("Wrote config to %s" % config_file_dest)
+    return False
+
+
+
+def _run_duplicity(name, cmd_options, lock, dry):
+    print(" ".join(cmd_options))
+
+    if not dry:
+        def setlimits():
+            resource.setrlimit(resource.RLIMIT_NOFILE, (1024, 1024))
+
+        def proc():
+            p = subprocess.Popen(cmd_options, preexec_fn=setlimits)
+            p.wait()
+        if lock:
+            lockfile = os.path.join(lock_file_dest, "%s.lock" % name)
+            if not _lock(lockfile):
+                sys.stderr.write(
+                    "Lockfile %s is already acquired\n" % lockfile)
+                return
+            try:
+                proc()
+            finally:
+                _unlock(lockfile)
+        else:
+            proc()
+
+def _read_config():
+    config = ConfigParser.SafeConfigParser()
+    config.optionxform = str
+    if not os.path.exists(config_file_dest):
+        return None
+    else:
+        config.read(config_file_dest)
+    return config
+

File backups/synthetic.py

+#!/usr/bin/python
+
+import boto
+from . import base
+import argparse
+import re
+import functools
+from multiprocessing import Pool
+import tempfile
+
+
+def duplicity_cmd(cmd_options, replace_dict, *args):
+
+    cmd_options = list(cmd_options)
+    cmd_options.append(args[0])
+    cmd_options.extend([a % replace_dict for a in args[1:]])
+
+    base._run_duplicity(None, cmd_options, False, False)
+
+def log(msg, *args):
+    print msg % args
+
+def _copy_key(arg):
+    source_name, keyname, dest_name = arg
+    source_bucket = s3.lookup(source_name)
+    key = source_bucket.get_key(keyname)
+    log("Copying %s", key.key)
+    key.copy(dest_name, key.key)
+
+def _global_connect(config_dict):
+    global s3
+    base._render_env_args(config_dict)
+    s3 = boto.connect_s3()
+
+def _copy_bucket(copy_pool, source_bucket, dest_bucket_name):
+    copy_pool.map(_copy_key, [
+                (source_bucket.name, key.key, dest_bucket_name)
+                for key in source_bucket.list()
+            ])
+
+def run_synthetic(config, args):
+    config_dict = base._get_config(config, args)
+    copy_pool = Pool(10, _global_connect, (config_dict, ))
+
+    cmd_options = []
+    base._setup_command(cmd_options, config_dict)
+    base._render_options_args(config_dict, cmd_options)
+
+    target_url = config_dict['target_url']
+
+    source_bucket_name = re.match(r"s3\+http:\/\/(.+)",
+                                        target_url).group(1)
+
+    tmp_source = "tmp_source_%s" % source_bucket_name
+    tmp_dest = "tmp_dest_%s" % source_bucket_name
+
+    replace_dict = {"source_bucket_name": source_bucket_name,
+                    "tmp_source": tmp_source,
+                    "tmp_dest": tmp_dest
+                    }
+    run_duplicity_cmd = functools.partial(
+                                duplicity_cmd, cmd_options, replace_dict)
+
+    s3 = boto.connect_s3()
+
+    # create temp buckets, dir
+    local_tmp_dir = tempfile.mkdtemp()
+    tmp_source_bucket = s3.create_bucket(tmp_source)
+    tmp_dest_bucket = s3.create_bucket(tmp_dest)
+
+    try:
+        source_bucket = s3.lookup(source_bucket_name)
+
+        all_source_keys = set(k.key for k in source_bucket.list())
+
+        # copy everything in original source bucket to temp source
+        _copy_bucket(copy_pool, source_bucket, tmp_source)
+
+        # restore from temp source
+        log("Restoring from %s to %s", tmp_source, local_tmp_dir)
+        run_duplicity_cmd("restore", "s3+http://%(tmp_source)s", local_tmp_dir)
+
+        # do a full backup to temp dest
+        log("Backing up full from %s to %s", local_tmp_dir, tmp_dest)
+        run_duplicity_cmd("full", local_tmp_dir, "s3+http://%(tmp_dest)s")
+
+        # check for keys added
+        new_source_keys = set(k.key for k in source_bucket.list())
+
+        diff = new_source_keys.difference(all_source_keys)
+        if diff:
+            raise Exception(
+                        "New files have been added to %s since "
+                        "synthetic compression started: %r" % (
+                                source_bucket_name,
+                                diff
+                            )
+                    )
+
+        # copy everything from temp dest back to original source
+        _copy_bucket(copy_pool, tmp_dest_bucket, source_bucket_name)
+
+        # do an everythign-but-n whatever for original source
+
+    finally:
+        # 8. remove tmp buckets
+        tmp_source_bucket.delete_keys(tmp_source_bucket.list())
+        tmp_source_bucket.delete()
+        tmp_dest_bucket.delete_keys(tmp_dest_bucket.list())
+        tmp_dest_bucket.delete()
+        shutil.rmtree(local_tmp_dir)
+
+
+def main(argv=None, **kwargs):
+    parser = argparse.ArgumentParser(prog="synthetic_backup")
+    parser.add_argument("configuration", type=str,
+                help="name of configuration to load")
+
+    args = parser.parse_args(argv)
+    config = base._read_config()
+
+    run_synthetic(config, args)
+
+
+if __name__ == '__main__':
+    main()
       author_email='mike@zzzcomputing.com',
       url='http://bitbucket.org/zzzeek/backups',
       license='MIT',
-      py_modules=["backups"],
+      packages=["backups"],
       zip_safe=False,
       entry_points={
-        'console_scripts': ['backups = backups:main'],
+        'console_scripts': ['backups = backups.backups:main',
+                    'synthetic = backups.synthetic:main'],
       }
 )