Commits

Anonymous committed fa3ad63 Merge

Comments (0)

Files changed (12)

 syntax: glob
 
 .hg/*
+.idea/*
 
 # Database stuff
 database/*

cm/boot/__init__.py

 
 from .util import _run, _is_running, _make_dir
 from .conf import _install_authorized_keys, _install_conf_files, _configure_nginx
-from .object_store import _get_file_from_bucket
+from .object_store import _get_file_from_bucket, _key_exists_in_bucket
 
 logging.getLogger(
     'boto').setLevel(logging.INFO)  # Only log boto messages >=INFO
             path=path,
             calling_format=calling_format,
         )
-        log.debug('Got boto S3 connection')
+        log.debug('Got boto S3 connection: %s' % s3_conn)
     except BotoServerError as e:
         log.error("Exception getting S3 connection; {0}".format(e))
 
         if ud['access_key'] is not None and ud['secret_key'] is not None:
             s3_conn = _get_s3connection(ud)
     # Test for existence of user's bucket and download appropriate CM instance
-    b = None
     if s3_conn:  # if not use_object_store, then s3_connection never gets attempted
         if 'bucket_cluster' in ud:
-            b = s3_conn.lookup(ud['bucket_cluster'])
-        if b:  # Try to retrieve user's instance of CM
-            log.info("Cluster bucket '%s' found." % b.name)
-            if _get_file_from_bucket(log, s3_conn, b.name, CM_REMOTE_FILENAME, local_cm_file):
-                _write_cm_revision_to_file(s3_conn, b.name)
-                log.info("Restored Cloudman from bucket_cluster %s" %
-                         (ud['bucket_cluster']))
-                return True
+            # Try to retrieve user's instance of CM
+            if _key_exists_in_bucket(log, s3_conn, ud['bucket_cluster'], CM_REMOTE_FILENAME):
+                log.info("CloudMan found in cluster bucket '%s'." % ud['bucket_cluster'])
+                if _get_file_from_bucket(log, s3_conn, ud['bucket_cluster'],
+                   CM_REMOTE_FILENAME, local_cm_file):
+                    _write_cm_revision_to_file(s3_conn, ud['bucket_cluster'])
+                    log.info("Restored Cloudman from bucket_cluster %s" %
+                             (ud['bucket_cluster']))
+                    return True
         # ELSE: Attempt to retrieve default instance of CM from local s3
         if _get_file_from_bucket(log, s3_conn, default_bucket_name, CM_REMOTE_FILENAME, local_cm_file):
             log.info("Retrieved CloudMan (%s) from bucket '%s' via local s3 connection" % (
         fp = urllib.urlopen(
             'http://169.254.169.254/latest/meta-data/public-hostname')
         hn = fp.read()
-        _run(log, 'echo "# Added by CloudMan for NeCTAR" >> /etc/hosts')
-        _run(log, 'echo "{ip} {hn1} {hn2}" >> /etc/hosts'.format(
-            ip=ip, hn1=hn, hn2=hn.split('.')[0]))
+        line = "{ip} {hn1} {hn2}".format(ip=ip, hn1=hn, hn2=hn.split('.')[0])
+        with open('/etc/hosts', 'a+') as f:
+            if not any(line.strip() == x.rstrip('\r\n') for x in f):
+                log.debug("Appending line %s to /etc/hosts" % line)
+                f.write("# Added by CloudMan for NeCTAR\n")
+                f.write(line + '\n')
     except Exception, e:
         log.error("Trouble fixing /etc/hosts on NeCTAR: {0}".format(e))
 

cm/boot/object_store.py

 
 
 def _get_file_from_bucket(log, s3_conn, bucket_name, remote_filename, local_filename):
+    log.debug("Getting file %s from bucket %s" % (remote_filename, bucket_name))
     try:
-        b = s3_conn.get_bucket(bucket_name)
+        b = s3_conn.get_bucket(bucket_name, validate=False)
         k = Key(b, remote_filename)
 
         log.debug("Attempting to retrieve file '%s' from bucket '%s'" % (
         log.error("Failed to get file '%s' from bucket '%s': %s" %
                   (remote_filename, bucket_name, e))
         return False
+
+
+def _key_exists_in_bucket(log, s3_conn, bucket_name, key_name):
+    """
+    Check if an object (ie, key) of name ``key_name`` exists in bucket
+    ``bucket_name``. Return ``True`` if so, ``False`` otherwise.
+    """
+    b = s3_conn.get_bucket(bucket_name, validate=False)
+    k = Key(b, key_name)
+    log.debug("Checking if key '%s' exists in bucket '%s'" % (
+        key_name, bucket_name))
+    return k.exists()
     def get_security_groups(self):
         if self.security_groups is None:
             if self.app.TESTFLAG is True:
-                log.debug("Attempted to get key pair name, but TESTFLAG is set. Returning 'cloudman_sg'")
+                log.debug("Attempted to get security groups, but TESTFLAG is set. Returning 'cloudman_sg'")
                 self.security_groups = ['cloudman_sg']
                 return self.security_groups
             for i in range(0, 5):
         if self.self_private_ip is None:
             if self.app.TESTFLAG is True:
                 log.debug(
-                    "Attempted to get key pair name, but TESTFLAG is set. Returning '127.0.0.1'")
+                    "Attempted to get private ip, but TESTFLAG is set. Returning '127.0.0.1'")
                 self.self_private_ip = '127.0.0.1'
                 return self.self_private_ip
             for i in range(0, 5):
         if self.local_hostname is None:
             if self.app.TESTFLAG is True:
                 log.debug(
-                    "Attempted to get key pair name, but TESTFLAG is set. Returning 'localhost'")
+                    "Attempted to get local hostname, but TESTFLAG is set. Returning 'localhost'")
                 self.local_hostname = 'localhost'
                 return self.local_hostname
             for i in range(0, 5):
     def init_with_user_data(self, user_data):
         self.__configure_instance_management(user_data)
         self.__configure_instance_types(user_data)
-        self.condor_enabled = user_data.get("condor_enabled", True)
-        self.hadoop_enabled = user_data.get("hadoop_enabled", True)
+        self.condor_enabled = user_data.get("condor_enabled", False)
+        self.hadoop_enabled = user_data.get("hadoop_enabled", False)
 
     def __configure_instance_management(self, user_data):
         """Configure attributes used control reboot/terminate behavior

cm/services/__init__.py

 
     def __repr__(self):
         return "<ServiceRole:{0},Owning:{1},Assigned:{2}>".format(
-                ServiceRole.to_string(self.service_role),
-                "None" if self.owning_service is None else self.owning_service.name,
-                "None" if self.assigned_service is None else self.assigned_service.name)
+            ServiceRole.to_string(self.service_role),
+            "None" if self.owning_service is None else self.owning_service.name,
+            "None" if self.assigned_service is None else self.assigned_service.name)
 
     @property
     def owning_service(self):

cm/services/apps/__init__.py

             alive_daemon_pid = None
             system_service = service
             # Galaxy deamon is named 'paster' so handle this special case
-            special_services = {"galaxy": "python", "galaxy_reports": "python", "lwr": "paster"}
+            special_services = {"galaxy": "python", "galaxyreports": "python", "lwr": "paster"}
             system_service = special_services.get(service, service)  # Default back to just service
             alive_daemon_pid = commands.getoutput(
                 "ps -o comm,pid -p %s | grep %s | awk '{print $2}'" % (daemon_pid, system_service))
             pid_file = '%s/qmaster.pid' % self.app.path_resolver.sge_cell
         elif service == 'galaxy':
             pid_file = '%s/main.pid' % self.app.path_resolver.galaxy_home
-        elif service == 'galaxy_reports':
+        elif service == 'galaxyreports':
             pid_file = '%s/reports_webapp.pid' % self.app.path_resolver.galaxy_home
         elif service == 'lwr':
             pid_file = '%s/paster.pid' % self.app.path_resolver.lwr_home

cm/services/apps/galaxy_reports.py

-from datetime import datetime
-import os
-import subprocess
-
-from cm.services.apps import ApplicationService
-
-from cm.util import paths
-from cm.util import misc
-from cm.services import service_states
-from cm.services import ServiceRole
-from cm.services import ServiceDependency
-from cm.util.galaxy_conf import DirectoryGalaxyOptionManager
-
-import logging
-log = logging.getLogger('cloudman')
-DEFAULT_REPORTS_PORT = 9001
-
-
-class GalaxyReportsService(ApplicationService):
-
-    def __init__(self, app):
-        super(GalaxyReportsService, self).__init__(app)
-        self.galaxy_home = self.app.path_resolver.galaxy_home
-        self.reports_port = DEFAULT_REPORTS_PORT
-        self.name = ServiceRole.to_string(ServiceRole.GALAXY_REPORTS)
-        self.svc_roles = [ServiceRole.GALAXY_REPORTS]
-        self.dependencies = [ServiceDependency(
-            self, ServiceRole.GALAXY)]  # Hopefully Galaxy dependency alone enough to ensure database migrated, etc...
-        self.conf_dir = os.path.join(
-            self.app.path_resolver.galaxy_home, 'reports.conf.d')
-
-    def __repr__(self):
-        return "Galaxy Reports service on port {0}".format(DEFAULT_REPORTS_PORT)
-
-    def _check_galaxy_reports_running(self):
-        return self._port_bound(self.reports_port)
-
-    def start(self):
-        self.state = service_states.STARTING
-        log.debug("Starting GalaxyReportsService")
-        self.status()
-        if not self.state == service_states.RUNNING:
-            self._setup()
-            # --sync-config will update Galaxy Report's database settings with Galaxy's.
-            started = self._run("--sync-config start")
-            if not started:
-                log.warn("Failed to setup or run galaxy reports server.")
-                self.start = service_states.ERROR
-
-    def _setup(self):
-        log.debug("Running GalaxyReportsService _setup")
-        reports_option_manager = DirectoryGalaxyOptionManager(self.app,
-            conf_dir=self.conf_dir, conf_file_name='reports_wsgi.ini')
-        reports_option_manager.setup()
-        main_props = {
-            # Place dummy database_connection for run_reports.sh's --sync-config option to replace
-            'database_connection': 'dummy',
-            'filter-with': 'proxy-prefix',
-        }
-        proxy_props = {
-            'use': 'egg:PasteDeploy#prefix',
-            'prefix': '/reports',
-        }
-        reports_option_manager.set_properties(main_props, section='app:main',
-            description='app_main_props')
-        reports_option_manager.set_properties(proxy_props, section='filter:proxy-prefix',
-            description='proxy_prefix_props')
-
-    def remove(self, synchronous=False):
-        if self.state == service_states.RUNNING:
-            log.info("Removing '%s' service" % self.name)
-            super(GalaxyReportsService, self).remove(synchronous)
-            self.state = service_states.SHUTTING_DOWN
-            log.info("Shutting down Galaxy Reports...")
-            if self._run("stop"):
-                self.state = service_states.SHUT_DOWN
-                # Move all log files
-                subprocess.call("bash -c 'for f in $GALAXY_HOME/reports_webapp.log; do mv \"$f\" \"$f.%s\"; done'" %
-                                datetime.utcnow().strftime('%H_%M'), shell=True)
-            else:
-                log.info("Failed to shutdown down Galaxy Reports...")
-                self.state = service_states.ERROR
-        elif self.state == service_states.UNSTARTED:
-            self.state = service_states.SHUT_DOWN
-        else:
-            log.debug("{0} service not running (state: {1}) so not removing it."
-                      .format(self.name, self.state))
-
-    def _run(self, args):
-        command = '%s - galaxy -c "export GALAXY_REPORTS_CONFIG_DIR=\'%s\'; sh $GALAXY_HOME/run_reports.sh %s"' % (
-            paths.P_SU, self.conf_dir, args)
-        return misc.run(command)
-
-    def status(self):
-        if self.state == service_states.SHUTTING_DOWN or \
-           self.state == service_states.SHUT_DOWN or \
-           self.state == service_states.UNSTARTED or \
-           self.state == service_states.WAITING_FOR_USER_ACTION:
-            pass
-        elif self._check_daemon('galaxy_reports'):
-            if self._check_galaxy_reports_running():
-                self.state = service_states.RUNNING
-        elif self.state != service_states.STARTING:
-            log.error("Galaxy reports error; Galaxy reports not runnnig")
-            self.state = service_states.ERROR

cm/services/apps/galaxyreports.py

+from datetime import datetime
+import os
+import subprocess
+
+from cm.services.apps import ApplicationService
+
+from cm.util import paths
+from cm.util import misc
+from cm.services import service_states
+from cm.services import ServiceRole
+from cm.services import ServiceDependency
+from cm.util.galaxy_conf import DirectoryGalaxyOptionManager
+
+import logging
+log = logging.getLogger('cloudman')
+DEFAULT_REPORTS_PORT = 9001
+
+
+class GalaxyReportsService(ApplicationService):
+
+    def __init__(self, app):
+        super(GalaxyReportsService, self).__init__(app)
+        self.galaxy_home = self.app.path_resolver.galaxy_home
+        self.reports_port = DEFAULT_REPORTS_PORT
+        self.name = ServiceRole.to_string(ServiceRole.GALAXY_REPORTS)
+        self.svc_roles = [ServiceRole.GALAXY_REPORTS]
+        self.dependencies = [ServiceDependency(
+            self, ServiceRole.GALAXY)]  # Hopefully Galaxy dependency alone enough to ensure database migrated, etc...
+        self.conf_dir = os.path.join(
+            self.app.path_resolver.galaxy_home, 'reports.conf.d')
+
+    def __repr__(self):
+        return "Galaxy Reports service on port {0}".format(DEFAULT_REPORTS_PORT)
+
+    def _check_galaxy_reports_running(self):
+        return self._port_bound(self.reports_port)
+
+    def start(self):
+        self.state = service_states.STARTING
+        log.debug("Starting GalaxyReportsService")
+        self.status()
+        if not self.state == service_states.RUNNING:
+            self._setup()
+            # --sync-config will update Galaxy Report's database settings with Galaxy's.
+            started = self._run("--sync-config start")
+            if not started:
+                log.warn("Failed to setup or run galaxy reports server.")
+                self.start = service_states.ERROR
+
+    def _setup(self):
+        log.debug("Running GalaxyReportsService _setup")
+        reports_option_manager = DirectoryGalaxyOptionManager(self.app,
+            conf_dir=self.conf_dir, conf_file_name='reports_wsgi.ini')
+        reports_option_manager.setup()
+        main_props = {
+            # Place dummy database_connection for run_reports.sh's --sync-config option to replace
+            'database_connection': 'dummy',
+            'filter-with': 'proxy-prefix',
+        }
+        proxy_props = {
+            'use': 'egg:PasteDeploy#prefix',
+            'prefix': '/reports',
+        }
+        reports_option_manager.set_properties(main_props, section='app:main',
+            description='app_main_props')
+        reports_option_manager.set_properties(proxy_props, section='filter:proxy-prefix',
+            description='proxy_prefix_props')
+
+    def remove(self, synchronous=False):
+        if self.state == service_states.RUNNING:
+            log.info("Removing '%s' service" % self.name)
+            super(GalaxyReportsService, self).remove(synchronous)
+            self.state = service_states.SHUTTING_DOWN
+            log.info("Shutting down Galaxy Reports...")
+            if self._run("stop"):
+                self.state = service_states.SHUT_DOWN
+                # Move all log files
+                subprocess.call("bash -c 'for f in $GALAXY_HOME/reports_webapp.log; do mv \"$f\" \"$f.%s\"; done'" %
+                                datetime.utcnow().strftime('%H_%M'), shell=True)
+            else:
+                log.info("Failed to shutdown down Galaxy Reports...")
+                self.state = service_states.ERROR
+        elif self.state == service_states.UNSTARTED:
+            self.state = service_states.SHUT_DOWN
+        else:
+            log.debug("{0} service not running (state: {1}) so not removing it."
+                      .format(self.name, self.state))
+
+    def _run(self, args):
+        command = '%s - galaxy -c "export GALAXY_REPORTS_CONFIG_DIR=\'%s\'; sh $GALAXY_HOME/run_reports.sh %s"' % (
+            paths.P_SU, self.conf_dir, args)
+        return misc.run(command)
+
+    def status(self):
+        if self.state == service_states.SHUTTING_DOWN or \
+           self.state == service_states.SHUT_DOWN or \
+           self.state == service_states.UNSTARTED or \
+           self.state == service_states.WAITING_FOR_USER_ACTION:
+            pass
+        elif self._check_daemon('galaxyreports'):
+            if self._check_galaxy_reports_running():
+                self.state = service_states.RUNNING
+        elif self.state != service_states.STARTING:
+            log.error("Galaxy reports error; Galaxy reports not runnnig")
+            self.state = service_states.ERROR

cm/services/apps/sge.py

 
             SGE_allq_file = '%s/all.q.conf' % self.app.path_resolver.sge_root
             all_q_template = Template(templates.ALL_Q_TEMPLATE)
-            if self.app.ud.get('hadoop_enabled', True):
+            if self.app.config.hadoop_enabled:
                 all_q_params = {
                     "prolog_path": os.path.join(paths.P_HADOOP_HOME, paths.P_HADOOP_INTEGRATION_FOLDER + "/hdfsstart.sh"),
                     "epilog_path": os.path.join(paths.P_HADOOP_HOME, paths.P_HADOOP_INTEGRATION_FOLDER + "/hdfsstop.sh")

cm/util/master.py

 from cm.services import ServiceType
 from cm.services import service_states
 from cm.services.apps.galaxy import GalaxyService
-from cm.services.apps.galaxy_reports import GalaxyReportsService
+from cm.services.apps.galaxyreports import GalaxyReportsService
 from cm.services.apps.hadoop import HadoopService
 from cm.services.apps.htcondor import HTCondorService
 from cm.services.apps.migration import MigrationService
             if num_off == len(self.services):
                 log.debug("All services shut down")
                 break
-            elif rebooting:
+            elif rebooting and self.app.cloud_type == 'ec2':
+                # For the EC2 cloud it's ok to reboot with volumes attached
                 log.debug("Not waiting for all the services to shut down because we're just rebooting.")
                 break
             sleep_time = 6
         # Spot requests cannot be tagged and thus there is no good way of associating those
         # back with a cluster after a reboot so cancel those
         log.debug("Initiating cluster reboot.")
-        self.shutdown(sd_filesystems=False, sd_instances=False, rebooting=True)
+        # Don't detach volumes only on the EC2 cloud
+        sd_filesystems = True
+        if self.app.cloud_type == 'ec2':
+            sd_filesystems = False
+        self.shutdown(sd_filesystems=sd_filesystems, sd_instances=False, rebooting=True)
         if soft:
             if misc.run("{0} restart".format(os.path.join(self.app.ud['boot_script_path'],
                self.app.ud['boot_script_name']))):
 from boto.exception import S3ResponseError
 
 def _get_file_from_bucket(log, s3_conn, bucket_name, remote_filename, local_filename):
+    log.debug(('Getting file %s from bucket %s' % (remote_filename, bucket_name)))
     try:
-        b = s3_conn.get_bucket(bucket_name)
+        b = s3_conn.get_bucket(bucket_name, validate=False)
         k = Key(b, remote_filename)
         log.debug(("Attempting to retrieve file '%s' from bucket '%s'" % (remote_filename, bucket_name)))
         if k.exists():
     except S3ResponseError as e:
         log.error(("Failed to get file '%s' from bucket '%s': %s" % (remote_filename, bucket_name, e)))
         return False
+
+def _key_exists_in_bucket(log, s3_conn, bucket_name, key_name):
+    '\n    Check if an object (ie, key) of name ``key_name`` exists in bucket\n    ``bucket_name``. Return ``True`` if so, ``False`` otherwise.\n    '
+    b = s3_conn.get_bucket(bucket_name, validate=False)
+    k = Key(b, key_name)
+    log.debug(("Checking if key '%s' exists in bucket '%s'" % (key_name, bucket_name)))
+    return k.exists()
 logging.getLogger('boto').setLevel(logging.INFO)
 LOCAL_PATH = os.getcwd()
 CM_HOME = '/mnt/cm'
     s3_conn = None
     try:
         s3_conn = S3Connection(aws_access_key_id=access_key, aws_secret_access_key=secret_key, is_secure=is_secure, port=port, host=host, path=path, calling_format=calling_format)
-        log.debug('Got boto S3 connection')
+        log.debug(('Got boto S3 connection: %s' % s3_conn))
     except BotoServerError as e:
         log.error('Exception getting S3 connection; {0}'.format(e))
     return s3_conn
     if (use_object_store and ('access_key' in ud) and ('secret_key' in ud)):
         if ((ud['access_key'] is not None) and (ud['secret_key'] is not None)):
             s3_conn = _get_s3connection(ud)
-    b = None
     if s3_conn:
         if ('bucket_cluster' in ud):
-            b = s3_conn.lookup(ud['bucket_cluster'])
-        if b:
-            log.info(("Cluster bucket '%s' found." % b.name))
-            if _get_file_from_bucket(log, s3_conn, b.name, CM_REMOTE_FILENAME, local_cm_file):
-                _write_cm_revision_to_file(s3_conn, b.name)
-                log.info(('Restored Cloudman from bucket_cluster %s' % ud['bucket_cluster']))
-                return True
+            if _key_exists_in_bucket(log, s3_conn, ud['bucket_cluster'], CM_REMOTE_FILENAME):
+                log.info(("CloudMan found in cluster bucket '%s'." % ud['bucket_cluster']))
+                if _get_file_from_bucket(log, s3_conn, ud['bucket_cluster'], CM_REMOTE_FILENAME, local_cm_file):
+                    _write_cm_revision_to_file(s3_conn, ud['bucket_cluster'])
+                    log.info(('Restored Cloudman from bucket_cluster %s' % ud['bucket_cluster']))
+                    return True
         if _get_file_from_bucket(log, s3_conn, default_bucket_name, CM_REMOTE_FILENAME, local_cm_file):
             log.info(("Retrieved CloudMan (%s) from bucket '%s' via local s3 connection" % (CM_REMOTE_FILENAME, default_bucket_name)))
             _write_cm_revision_to_file(s3_conn, default_bucket_name)
         ip = fp.read()
         fp = urllib.urlopen('http://169.254.169.254/latest/meta-data/public-hostname')
         hn = fp.read()
-        _run(log, 'echo "# Added by CloudMan for NeCTAR" >> /etc/hosts')
-        _run(log, 'echo "{ip} {hn1} {hn2}" >> /etc/hosts'.format(ip=ip, hn1=hn, hn2=hn.split('.')[0]))
+        line = '{ip} {hn1} {hn2}'.format(ip=ip, hn1=hn, hn2=hn.split('.')[0])
+        with open('/etc/hosts', 'a+') as f:
+            if (not any(((line.strip() == x.rstrip('\r\n')) for x in f))):
+                log.debug(('Appending line %s to /etc/hosts' % line))
+                f.write('# Added by CloudMan for NeCTAR\n')
+                f.write((line + '\n'))
     except Exception as e:
         log.error('Trouble fixing /etc/hosts on NeCTAR: {0}'.format(e))