Fixes #35993 - Changes to update_size in commit eb4cc31
made it so
the group dict passed into update_size was not modified. As a result,
the 'replace' call does not see an updated min_size like it previously
did and doesn't pause to wait for any new instances to spin up. Instead,
it moves straight into terminating old instances. Fix is to add batch_size
to min_size when calling wait_for_new_inst.
Fixes #28087 - Make replace_all_instances and replace_instances behave
exactly the same by setting replace_instances = current list of instances
when replace_all_instances used. Root cause of issue was that without lc_check
terminate_batch will terminate all instances passed to it and after updating
the asg size we were querying the asg again for the list of instances - so terminate batch
saw the list including new ones just spun up.
When creating new asg with replace_all_instances: yes and lc_check: false
the instances that are initially created are then subsequently replaced.
This change makes it so replace only occurs if the asg already existed.
Add integration tests for #28087 and #35993.
This commit is contained in:
parent
ce416f247f
commit
a2b3120e85
2 changed files with 132 additions and 2 deletions
|
@ -1191,6 +1191,7 @@ def replace(connection):
|
||||||
desired_capacity = module.params.get('desired_capacity')
|
desired_capacity = module.params.get('desired_capacity')
|
||||||
lc_check = module.params.get('lc_check')
|
lc_check = module.params.get('lc_check')
|
||||||
replace_instances = module.params.get('replace_instances')
|
replace_instances = module.params.get('replace_instances')
|
||||||
|
replace_all_instances = module.params.get('replace_all_instances')
|
||||||
|
|
||||||
as_group = describe_autoscaling_groups(connection, group_name)[0]
|
as_group = describe_autoscaling_groups(connection, group_name)[0]
|
||||||
if desired_capacity is None:
|
if desired_capacity is None:
|
||||||
|
@ -1199,6 +1200,10 @@ def replace(connection):
|
||||||
wait_for_new_inst(connection, group_name, wait_timeout, as_group['MinSize'], 'viable_instances')
|
wait_for_new_inst(connection, group_name, wait_timeout, as_group['MinSize'], 'viable_instances')
|
||||||
props = get_properties(as_group)
|
props = get_properties(as_group)
|
||||||
instances = props['instances']
|
instances = props['instances']
|
||||||
|
if replace_all_instances:
|
||||||
|
# If replacing all instances, then set replace_instances to current set
|
||||||
|
# This allows replace_instances and replace_all_instances to behave same
|
||||||
|
replace_instances = instances
|
||||||
if replace_instances:
|
if replace_instances:
|
||||||
instances = replace_instances
|
instances = replace_instances
|
||||||
# check to see if instances are replaceable if checking launch configs
|
# check to see if instances are replaceable if checking launch configs
|
||||||
|
@ -1235,7 +1240,7 @@ def replace(connection):
|
||||||
|
|
||||||
as_group = describe_autoscaling_groups(connection, group_name)[0]
|
as_group = describe_autoscaling_groups(connection, group_name)[0]
|
||||||
update_size(connection, as_group, max_size + batch_size, min_size + batch_size, desired_capacity + batch_size)
|
update_size(connection, as_group, max_size + batch_size, min_size + batch_size, desired_capacity + batch_size)
|
||||||
wait_for_new_inst(connection, group_name, wait_timeout, as_group['MinSize'], 'viable_instances')
|
wait_for_new_inst(connection, group_name, wait_timeout, as_group['MinSize'] + batch_size, 'viable_instances')
|
||||||
wait_for_elb(connection, group_name)
|
wait_for_elb(connection, group_name)
|
||||||
wait_for_target_group(connection, group_name)
|
wait_for_target_group(connection, group_name)
|
||||||
as_group = describe_autoscaling_groups(connection, group_name)[0]
|
as_group = describe_autoscaling_groups(connection, group_name)[0]
|
||||||
|
@ -1414,6 +1419,12 @@ def wait_for_new_inst(connection, group_name, wait_timeout, desired_size, prop):
|
||||||
return props
|
return props
|
||||||
|
|
||||||
|
|
||||||
|
def asg_exists(connection):
|
||||||
|
group_name = module.params.get('name')
|
||||||
|
as_group = describe_autoscaling_groups(connection, group_name)
|
||||||
|
return bool(len(as_group))
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
argument_spec = ec2_argument_spec()
|
argument_spec = ec2_argument_spec()
|
||||||
argument_spec.update(
|
argument_spec.update(
|
||||||
|
@ -1483,13 +1494,16 @@ def main():
|
||||||
endpoint=ec2_url,
|
endpoint=ec2_url,
|
||||||
**aws_connect_params)
|
**aws_connect_params)
|
||||||
changed = create_changed = replace_changed = False
|
changed = create_changed = replace_changed = False
|
||||||
|
exists = asg_exists(connection)
|
||||||
|
|
||||||
if state == 'present':
|
if state == 'present':
|
||||||
create_changed, asg_properties = create_autoscaling_group(connection)
|
create_changed, asg_properties = create_autoscaling_group(connection)
|
||||||
elif state == 'absent':
|
elif state == 'absent':
|
||||||
changed = delete_autoscaling_group(connection)
|
changed = delete_autoscaling_group(connection)
|
||||||
module.exit_json(changed=changed)
|
module.exit_json(changed=changed)
|
||||||
if replace_all_instances or replace_instances:
|
|
||||||
|
# Only replace instances if asg existed at start of call
|
||||||
|
if exists and (replace_all_instances or replace_instances):
|
||||||
replace_changed, asg_properties = replace(connection)
|
replace_changed, asg_properties = replace(connection)
|
||||||
if create_changed or replace_changed:
|
if create_changed or replace_changed:
|
||||||
changed = True
|
changed = True
|
||||||
|
|
|
@ -387,6 +387,122 @@
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
||||||
|
# perform rolling replace with new launch configuration and lc_check:false
|
||||||
|
|
||||||
|
# Note - this is done async so we can query asg_facts during
|
||||||
|
# the execution. Issues #28087 and #35993 result in correct
|
||||||
|
# end result, but spin up extraneous instances during execution.
|
||||||
|
- name: "perform rolling update to new AMI with lc_check: false"
|
||||||
|
ec2_asg:
|
||||||
|
name: "{{ resource_prefix }}-asg"
|
||||||
|
launch_config_name: "{{ resource_prefix }}-lc-2"
|
||||||
|
health_check_type: EC2
|
||||||
|
desired_capacity: 3
|
||||||
|
min_size: 1
|
||||||
|
max_size: 5
|
||||||
|
health_check_period: 900
|
||||||
|
load_balancers: []
|
||||||
|
vpc_zone_identifier: "{{ testing_subnet.subnet.id }}"
|
||||||
|
wait_for_instances: yes
|
||||||
|
replace_all_instances: yes
|
||||||
|
replace_batch_size: 3
|
||||||
|
lc_check: false
|
||||||
|
wait_timeout: 1800
|
||||||
|
state: present
|
||||||
|
<<: *aws_connection_info
|
||||||
|
async: 1800
|
||||||
|
poll: 0
|
||||||
|
register: asg_job
|
||||||
|
|
||||||
|
- name: get ec2_asg facts for 3 minutes
|
||||||
|
ec2_asg_facts:
|
||||||
|
name: "{{ resource_prefix }}-asg"
|
||||||
|
<<: *aws_connection_info
|
||||||
|
register: output
|
||||||
|
loop_control:
|
||||||
|
pause: 15
|
||||||
|
with_sequence: count=12
|
||||||
|
|
||||||
|
- set_fact:
|
||||||
|
inst_id_json_query: 'results[*].results[*].instances[*].instance_id'
|
||||||
|
|
||||||
|
# Since we started with 3 servers and replace all of them.
|
||||||
|
# We should see 6 servers total.
|
||||||
|
- assert:
|
||||||
|
that:
|
||||||
|
- "lookup('flattened',output|json_query(inst_id_json_query)).split(',')|unique|length == 6"
|
||||||
|
|
||||||
|
- name: Ensure ec2_asg task completes
|
||||||
|
async_status: jid="{{ asg_job.ansible_job_id }}"
|
||||||
|
register: status
|
||||||
|
until: status.finished
|
||||||
|
retries: 200
|
||||||
|
delay: 15
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
- name: kill asg
|
||||||
|
ec2_asg:
|
||||||
|
name: "{{ resource_prefix }}-asg"
|
||||||
|
state: absent
|
||||||
|
<<: *aws_connection_info
|
||||||
|
async: 300
|
||||||
|
|
||||||
|
# Create new asg with replace_all_instances and lc_check:false
|
||||||
|
|
||||||
|
# Note - this is done async so we can query asg_facts during
|
||||||
|
# the execution. Issues #28087 results in correct
|
||||||
|
# end result, but spin up extraneous instances during execution.
|
||||||
|
- name: "new asg with lc_check: false"
|
||||||
|
ec2_asg:
|
||||||
|
name: "{{ resource_prefix }}-asg"
|
||||||
|
launch_config_name: "{{ resource_prefix }}-lc"
|
||||||
|
health_check_type: EC2
|
||||||
|
desired_capacity: 3
|
||||||
|
min_size: 1
|
||||||
|
max_size: 5
|
||||||
|
health_check_period: 900
|
||||||
|
load_balancers: []
|
||||||
|
vpc_zone_identifier: "{{ testing_subnet.subnet.id }}"
|
||||||
|
wait_for_instances: yes
|
||||||
|
replace_all_instances: yes
|
||||||
|
replace_batch_size: 3
|
||||||
|
lc_check: false
|
||||||
|
wait_timeout: 1800
|
||||||
|
state: present
|
||||||
|
<<: *aws_connection_info
|
||||||
|
async: 1800
|
||||||
|
poll: 0
|
||||||
|
register: asg_job
|
||||||
|
|
||||||
|
# Collect ec2_asg_facts for 3 minutes
|
||||||
|
- name: get ec2_asg facts
|
||||||
|
ec2_asg_facts:
|
||||||
|
name: "{{ resource_prefix }}-asg"
|
||||||
|
<<: *aws_connection_info
|
||||||
|
register: output
|
||||||
|
loop_control:
|
||||||
|
pause: 15
|
||||||
|
with_sequence: count=12
|
||||||
|
|
||||||
|
- set_fact:
|
||||||
|
inst_id_json_query: 'results[*].results[*].instances[*].instance_id'
|
||||||
|
|
||||||
|
# Get all instance_ids we saw and assert we saw number expected
|
||||||
|
# Should only see 3 (don't replace instances we just created)
|
||||||
|
- assert:
|
||||||
|
that:
|
||||||
|
- "lookup('flattened',output|json_query(inst_id_json_query)).split(',')|unique|length == 3"
|
||||||
|
|
||||||
|
- name: Ensure ec2_asg task completes
|
||||||
|
async_status: jid="{{ asg_job.ansible_job_id }}"
|
||||||
|
register: status
|
||||||
|
until: status.finished
|
||||||
|
retries: 200
|
||||||
|
delay: 15
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
always:
|
always:
|
||||||
|
|
||||||
- name: kill asg
|
- name: kill asg
|
||||||
|
|
Loading…
Reference in a new issue