Commits

Enis Afgan committed 9fd1cea

Reduce the number of log printouts while all is considered OK

Comments (0)

Files changed (3)

cm/services/autoscale.py

                - number of nodes is more than the max size of the cluster set by user
                - there are idle nodes and a new hour is about to begin (so not to get charged for the new hour)
         """
-        log.debug("Checking if cluster is too LARGE")
+        # log.debug("Checking if cluster is too LARGE")
         if len(self.app.manager.worker_instances) > self.as_max:
+            log.debug("Cluster is too explicitly large")
             return True
         elif int(datetime.datetime.utcnow().strftime("%M")) > 57 and \
             len(self.app.manager.worker_instances) > self.as_min and \
                 self.get_num_instances_to_remove() > 0:
             # len(self.app.manager.get_idle_instances()) > 0 and \
+            log.debug("Cluster is too large")
             return True
         return False
 
           :type num_queued_jobs: int
           :param num_queued_jobs: Number of jobs that should be queued before indicating slow job turnover
         """
-
         q_jobs = self.get_queue_jobs()
-        log.debug('q_jobs: %s' % q_jobs)
+        # log.debug('q_jobs: %s' % q_jobs)
         r_jobs_mean, r_jobs_stdv = self.meanstdv(q_jobs['running'])
         qw_jobs_mean, qw_jobs_stdv = self.meanstdv(q_jobs['queued'])
         log.debug('Checking if slow job turnover: queued jobs: %s, avg runtime: %s' % (len(
                         w_instance.send_mount_points()
                     # As long we we're hearing from an instance, assume all OK.
                     if (Time.now() - w_instance.last_comm).seconds < 22:
-                        log.debug("Instance {0} OK (heard from it {1} secs ago)".format(
-                            w_instance.get_desc(),
-                            (Time.now() - w_instance.last_comm).seconds))
+                        # log.debug("Instance {0} OK (heard from it {1} secs ago)".format(
+                        #     w_instance.get_desc(),
+                        #     (Time.now() - w_instance.last_comm).seconds))
                         continue
                     # Explicitly check the state of a quiet instance (but only
                     # periodically)
                     elif (Time.now() - w_instance.last_state_update).seconds > 30:
-                        log.debug("Have not checked on quiet instance {0} for a while; checking now"
-                                  .format(w_instance.get_desc()))
+                        log.debug("Have not heard from or checked on instance {0} "
+                            "for a while; checking now.".format(w_instance.get_desc()))
                         w_instance.maintain()
                     else:
-                        log.debug("Not checking quiet instance {0} (last check {1} secs ago)"
+                        log.debug("Instance {0} has been quiet for a while (last check "
+                            "{1} secs ago); will wait a bit longer before a check..."
                             .format(w_instance.get_desc(),
                             (Time.now() - w_instance.last_state_update).seconds))
             self.__add_services()
                  'fs_name': fs.get_details()['name']})
         jmp = json.dumps({'mount_points': mount_points})
         self.app.manager.console_monitor.conn.send('MOUNT | %s' % jmp, self.id)
-        log.debug("Sent mount points %s to worker %s" % (mount_points, self.id))
+        # log.debug("Sent mount points %s to worker %s" % (mount_points, self.id))
 
     def send_master_pubkey(self):
         # log.info("\tMT: Sending MASTER_PUBKEY message: %s" % self.app.manager.get_root_public_key() )
                self.app.manager.load,
                self.app.manager.worker_status,
                self.app.manager.nfs_tfs)
-        log.debug("Sending message '%s'" % msg_body)
+        # log.debug("Sending message '%s'" % msg_body)
         self.conn.send(msg_body)
 
     def handle_message(self, message):