Snippets

Gestalt Systems Deregistering and Registring Script for AWS Autoscaling Groups & ELB

Created by Tobias Sturm last modified
import boto3
import httplib
import json
import sys
import time
import logging

def assume(condition, error_message):
  if not condition:
    sys.exit(error_message)

def InstanceData(attribute):
  con = httplib.HTTPConnection("instance-data", 80, timeout=1)
  con.request("GET", "latest/meta-data/%s" % attribute)
  rsp = con.getresponse()
  assume(rsp.status == 200, 'could not read instance meta-data')
  return rsp.read()

def InstanceRegion():
  identity = json.loads(InstanceData("../dynamic/instance-identity/document"))
  return identity['region']

def ec2():
  return boto3.resource('ec2', region_name=InstanceRegion())

def autoscaling():
  return boto3.client('autoscaling', region_name=InstanceRegion())

def Instance(instance_id):
  return ec2().Instance(instance_id)

def Myself():
  return Instance(InstanceData('instance-id'))

def AutoScalingGroupNameOf(instance):
  names = filter(lambda x: x['Key'] == 'aws:autoscaling:groupName', instance.tags)
  if (len(names) < 1):
    return None
  return names[0]['Value']

def AutoScalingGroup(group_name):
  response = autoscaling().describe_auto_scaling_groups(
    AutoScalingGroupNames=[group_name]
  )
  assume(len(response['AutoScalingGroups']) > 0, "No AutoScaling Group with name '%s'" % group_name)
  return response['AutoScalingGroups'][0]

def AutoScalingGroupProps(group_name):
  group = AutoScalingGroup(group_name)
  instances_in_service = filter(lambda i: i['LifecycleState'] == 'InService', group['Instances'])
  return {
    'want': group['DesiredCapacity'],
    'have': len(instances_in_service),
    'min': group['MinSize'],
    'grace_time': group['HealthCheckGracePeriod']
  }

def MachineStateInAutoScalingGroup(group_name, instance):
  group = AutoScalingGroup(group_name)
  instances = list(filter(lambda i: i['InstanceId'] == instance.id, group['Instances']))
  assume(len(instances) == 1, "Instance '%s' is not member of group '%s'" % (instance.id, group_name))
  return instances[0]

def MachineInLifecycleStateFn(group_name, instance, desired_state):
  return lambda: MachineStateInAutoScalingGroup(group_name, instance)['LifecycleState'] == desired_state

def EnterStandby(instance):
  response = autoscaling().enter_standby(
    InstanceIds=[instance.id],
    AutoScalingGroupName=AutoScalingGroupNameOf(instance),
    ShouldDecrementDesiredCapacity=True
  )
  return list(map(lambda a: a['ActivityId'], response['Activities']))

def ExitStandby(instance):
  response = autoscaling().exit_standby(
    InstanceIds=[instance.id],
    AutoScalingGroupName=AutoScalingGroupNameOf(instance)
  )
  return list(map(lambda a: a['ActivityId'], response['Activities']))

def AutoScalingActivityRunningFn(group_name, activity_ids):
  def fn(group_name, activity_ids):
    response = autoscaling().describe_scaling_activities(
      ActivityIds=activity_ids,
      AutoScalingGroupName=group_name
    )
    activities = response['Activities']
    running_activities = filter(lambda a: a['Progress'] < 100, activities) 
    return len(running_activities) > 0
  return lambda: fn(group_name, activity_ids)

def LoadBalancersStates(group_name):
  classic_lbs = autoscaling().describe_load_balancers(AutoScalingGroupName=group_name)['LoadBalancers']
  application_lbs = autoscaling().describe_load_balancer_target_groups(AutoScalingGroupName=group_name)['LoadBalancerTargetGroups']
  lbs = classic_lbs + application_lbs
  assume(len(lbs) > 0, "Group '%s' has no LoadBalancer" % group_name)
  return [lb['State'] for lb in lbs]

def AllLoadBalancersInService(group_name):
  states = LoadBalancersStates(group_name)
  return all(state == 'InService' for state in states)
  

def AutoScalingActivitiesRunning(group_name):
  return AutoScalingActivityRunningFn(group_name, [])()

def GroupReadyForDeploymentFn(group_name):
  def fn(name):
    props = AutoScalingGroupProps(name)
    return (props['want'] == props['have']) and \
           (props['have'] > props['min']) and \
	   (not AutoScalingActivitiesRunning(name)) and \
           (AllLoadBalancersInService(group_name))
  return lambda: fn(group_name)

def WaitFor(fn, log_output, interval=10, timeout=600):
  logger.info("checking condition '%s'" % log_output)
  time_waited = 0
  while(not fn() and time_waited < timeout):
    time.sleep(interval)
    logger.debug("waiting for condition '%s'" % log_output)
    time_waited += interval # the actual time waited will be longer,
                            # because fn is evaluated. But that's ok.
  if (time_waited >= timeout):
    sys.exit("timeout while waiting for %s" % log_output)
  logger.info("condition '%s' fulfilled" % log_output)

def WaitGracePeriod(group_name):
  t = AutoScalingGroupProps(group_name)['grace_time']
  logger.info("Waiting %d seconds for grace period of group %s to expire" % (t, group_name))
  time.sleep(t)

def DeregisterWhenReady(instance):
  group = AutoScalingGroupNameOf(instance)
  logger.info("Setting instance %s in group %s to StandBy" % (instance.id, group))

  current_state = MachineStateInAutoScalingGroup(group, instance)['LifecycleState']
  assume(current_state == 'InService', "Instance is in state '%s' (assumed 'InService')" % current_state)

  WaitFor(GroupReadyForDeploymentFn(group), "AS group ready for deployment", 15)

  activities = EnterStandby(instance)
  WaitFor(lambda: not AutoScalingActivityRunningFn(group, activities)(), "Moving Instance to Standby", 5, 45)
  WaitFor(MachineInLifecycleStateFn(group, instance, 'Standby'), "Instance in Standby State", 5, 15)

  # force to wait 30s before checking, giving ELBs some time
  logger.info("Waiting 30s for Loadbalancer Draining")
  time.sleep(30)
  WaitFor(lambda: AllLoadBalancersInService(group), "Loadbalancers deregistered target", 5, 30)

def Register(instance):
  group = AutoScalingGroupNameOf(instance)
  logger.info("Setting instance %s in group %s to InService" % (instance.id, group))

  current_state = MachineStateInAutoScalingGroup(group, instance)['LifecycleState']
  assume(current_state == 'Standby', "Instance is in state '%s' (assumed 'StandBy')" % current_state)

  activities = ExitStandby(instance)
  WaitFor(lambda: not AutoScalingActivityRunningFn(group, activities)(), "Moving Instance to InService", 5, 45)
  WaitFor(MachineInLifecycleStateFn(group, instance, 'InService'), "Instance in InService State", 5, 15)
  WaitGracePeriod(group)
  WaitFor(lambda: AllLoadBalancersInService(group), "Loadbalancers registered target", 5, 30)
  current_state = MachineStateInAutoScalingGroup(group, instance)['HealthStatus']
  assume(current_state == 'Healthy', "Instance is not healthy (is '%s')" % current_state)
  
  
if __name__ == "__main__":
  if len(sys.argv) != 2:
    sys.exit("incorrect number of arguments - usage: `balanced.py [in|out]`")

  logger = logging.getLogger("balanced")
  logger.setLevel(logging.INFO)

  ch = logging.StreamHandler(sys.stdout)
  ch.setLevel(logging.INFO)
  formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
  ch.setFormatter(formatter)
  logger.addHandler(ch)

  if sys.argv[1].lower() == 'out':
    DeregisterWhenReady(Myself())
  elif sys.argv[1].lower() == 'in':
    Register(Myself())
  else:
    sys.exit("I don't understand '%s'" % sys.argv[1])

Comments (0)