From b0b00b555f3f280e1d0f670c42909ff0c72e1a45 Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Sat, 4 Jan 2020 17:56:59 +0100 Subject: [PATCH] docker_container: configure removal wait timeout (#66144) * Add support for timeout while waiting for state. * Allow to limit removal wait time. * Add changelog. * Forgot version_added. * Add some check mode tests. * Use removal_wait_timeout in tests. --- ...66144-docker_container-removal-timeout.yml | 2 + .../modules/cloud/docker/docker_container.py | 35 +++++++++-- .../tasks/tests/start-stop.yml | 63 +++++++++++++++---- 3 files changed, 83 insertions(+), 17 deletions(-) create mode 100644 changelogs/fragments/66144-docker_container-removal-timeout.yml diff --git a/changelogs/fragments/66144-docker_container-removal-timeout.yml b/changelogs/fragments/66144-docker_container-removal-timeout.yml new file mode 100644 index 00000000000..3a089f08907 --- /dev/null +++ b/changelogs/fragments/66144-docker_container-removal-timeout.yml @@ -0,0 +1,2 @@ +minor_changes: +- "docker_container - allow to configure timeout when the module waits for a container's removal." diff --git a/lib/ansible/modules/cloud/docker/docker_container.py b/lib/ansible/modules/cloud/docker/docker_container.py index 47daeddac9c..6bb3dff5cef 100644 --- a/lib/ansible/modules/cloud/docker/docker_container.py +++ b/lib/ansible/modules/cloud/docker/docker_container.py @@ -685,6 +685,17 @@ options: - Use with present and started states to force the re-creation of an existing container. type: bool default: no + removal_wait_timeout: + description: + - When removing an existing container, the docker daemon API call exists after the container + is scheduled for removal. Removal usually is very fast, but it can happen that during high I/O + load, removal can take longer. By default, the module will wait until the container has been + removed before trying to (re-)create it, however long this takes. + - By setting this option, the module will wait at most this many seconds for the container to be + removed. If the container is still in the removal phase after this many seconds, the module will + fail. + type: float + version_added: "2.10" restart: description: - Use with started state to force a matching container to be stopped and restarted. @@ -1281,6 +1292,7 @@ class TaskParameters(DockerBaseClass): self.pull = None self.read_only = None self.recreate = None + self.removal_wait_timeout = None self.restart = None self.restart_retries = None self.restart_policy = None @@ -2610,25 +2622,33 @@ class ContainerManager(DockerBaseClass): self.results['ansible_facts'] = {'docker_container': self.facts} self.results['container'] = self.facts - def wait_for_state(self, container_id, complete_states=None, wait_states=None, accept_removal=False): + def wait_for_state(self, container_id, complete_states=None, wait_states=None, accept_removal=False, max_wait=None): delay = 1.0 + total_wait = 0 while True: # Inspect container result = self.client.get_container_by_id(container_id) if result is None: if accept_removal: return - msg = 'Encontered vanished container while waiting for container {0}' + msg = 'Encontered vanished container while waiting for container "{0}"' self.fail(msg.format(container_id)) # Check container state state = result.get('State', {}).get('Status') if complete_states is not None and state in complete_states: return if wait_states is not None and state not in wait_states: - msg = 'Encontered unexpected state "{1}" while waiting for container {0}' + msg = 'Encontered unexpected state "{1}" while waiting for container "{0}"' self.fail(msg.format(container_id, state)) # Wait + if max_wait is not None: + if total_wait > max_wait: + msg = 'Timeout of {1} seconds exceeded while waiting for container "{0}"' + self.fail(msg.format(container_id, max_wait)) + if total_wait + delay > max_wait: + delay = max_wait - total_wait sleep(delay) + total_wait += delay # Exponential backoff, but never wait longer than 10 seconds # (1.1**24 < 10, 1.1**25 > 10, so it will take 25 iterations # until the maximal 10 seconds delay is reached. By then, the @@ -2659,7 +2679,8 @@ class ContainerManager(DockerBaseClass): self.diff_tracker.add('exists', parameter=True, active=False) if container.removing and not self.check_mode: # Wait for container to be removed before trying to create it - self.wait_for_state(container.Id, wait_states=['removing'], accept_removal=True) + self.wait_for_state( + container.Id, wait_states=['removing'], accept_removal=True, max_wait=self.parameters.removal_wait_timeout) new_container = self.container_create(self.parameters.image, self.parameters.create_parameters) if new_container: container = new_container @@ -2686,7 +2707,8 @@ class ContainerManager(DockerBaseClass): self.container_stop(container.Id) self.container_remove(container.Id) if not self.check_mode: - self.wait_for_state(container.Id, wait_states=['removing'], accept_removal=True) + self.wait_for_state( + container.Id, wait_states=['removing'], accept_removal=True, max_wait=self.parameters.removal_wait_timeout) new_container = self.container_create(image_to_use, self.parameters.create_parameters) if new_container: container = new_container @@ -3055,7 +3077,7 @@ class AnsibleDockerClientContainer(AnsibleDockerClient): __NON_CONTAINER_PROPERTY_OPTIONS = tuple([ 'env_file', 'force_kill', 'keep_volumes', 'ignore_image', 'name', 'pull', 'purge_networks', 'recreate', 'restart', 'state', 'trust_image_content', 'networks', 'cleanup', 'kill_signal', - 'output_logs', 'paused' + 'output_logs', 'paused', 'removal_wait_timeout' ] + list(DOCKER_COMMON_ARGS.keys())) def _parse_comparisons(self): @@ -3368,6 +3390,7 @@ def main(): purge_networks=dict(type='bool', default=False), read_only=dict(type='bool'), recreate=dict(type='bool', default=False), + removal_wait_timeout=dict(type='float'), restart=dict(type='bool', default=False), restart_policy=dict(type='str', choices=['no', 'on-failure', 'always', 'unless-stopped']), restart_retries=dict(type='int'), diff --git a/test/integration/targets/docker_container/tasks/tests/start-stop.yml b/test/integration/targets/docker_container/tasks/tests/start-stop.yml index ecbe129edf5..c794d5f52ad 100644 --- a/test/integration/targets/docker_container/tasks/tests/start-stop.yml +++ b/test/integration/targets/docker_container/tasks/tests/start-stop.yml @@ -182,7 +182,7 @@ force_kill: yes register: recreate_1 -- name: Recreating container (created, recreate) +- name: Recreating container (created, recreate, check mode) docker_container: image: alpine:3.8 command: '/bin/sh -c "sleep 10m"' @@ -191,6 +191,17 @@ state: present force_kill: yes register: recreate_2 + check_mode: yes + +- name: Recreating container (created, recreate) + docker_container: + image: alpine:3.8 + command: '/bin/sh -c "sleep 10m"' + name: "{{ cname }}" + recreate: yes + state: present + force_kill: yes + register: recreate_3 - name: Recreating container (started) docker_container: @@ -199,7 +210,19 @@ name: "{{ cname }}" state: started force_kill: yes - register: recreate_3 + register: recreate_4 + +- name: Recreating container (started, recreate, check mode) + docker_container: + image: alpine:3.8 + command: '/bin/sh -c "sleep 10m"' + name: "{{ cname }}" + recreate: yes + removal_wait_timeout: 10 + state: started + force_kill: yes + register: recreate_5 + check_mode: yes - name: Recreating container (started, recreate) docker_container: @@ -207,9 +230,10 @@ command: '/bin/sh -c "sleep 10m"' name: "{{ cname }}" recreate: yes + removal_wait_timeout: 10 state: started force_kill: yes - register: recreate_4 + register: recreate_6 - name: cleanup docker_container: @@ -219,18 +243,22 @@ diff: no - debug: var=recreate_1 -- debug: var=recreate_2 - debug: var=recreate_3 - debug: var=recreate_4 +- debug: var=recreate_6 - assert: that: - recreate_2 is changed - recreate_3 is changed - recreate_4 is changed - - recreate_1.container.Id != recreate_2.container.Id - - recreate_2.container.Id == recreate_3.container.Id - - recreate_3.container.Id != recreate_4.container.Id + - recreate_5 is changed + - recreate_6 is changed + - recreate_1.container.Id == recreate_2.container.Id + - recreate_1.container.Id != recreate_3.container.Id + - recreate_3.container.Id == recreate_4.container.Id + - recreate_4.container.Id == recreate_5.container.Id + - recreate_4.container.Id != recreate_6.container.Id #################################################################### ## Restarting ###################################################### @@ -247,7 +275,7 @@ - /tmp/tmp register: restart_1 -- name: Restarting (restart) +- name: Restarting (restart, check mode) docker_container: image: alpine:3.8 command: '/bin/sh -c "sleep 10m"' @@ -257,6 +285,18 @@ stop_timeout: 1 force_kill: yes register: restart_2 + check_mode: yes + +- name: Restarting (restart) + docker_container: + image: alpine:3.8 + command: '/bin/sh -c "sleep 10m"' + name: "{{ cname }}" + restart: yes + state: started + stop_timeout: 1 + force_kill: yes + register: restart_3 - name: Restarting (verify volumes) docker_container: @@ -267,7 +307,7 @@ stop_timeout: 1 volumes: - /tmp/tmp - register: restart_3 + register: restart_4 - name: cleanup docker_container: @@ -280,8 +320,9 @@ that: - restart_1 is changed - restart_2 is changed - - restart_1.container.Id == restart_2.container.Id - - restart_3 is not changed + - restart_3 is changed + - restart_1.container.Id == restart_3.container.Id + - restart_4 is not changed #################################################################### ## Stopping ########################################################