AnsibleAWSModule related cleanup - redshift (#66779)

* AnsibleAWSModule related cleanup - redshift * Apply a backoff on modify_cluster to cope with concurrent operations * Add AWS 'hacking' policy to allow creation of Redshift ServiceRole * Adding the retry policies makes the redshift test suite more reliable
2020-02-12 19:17:18 +01:00 · 2020-02-12 19:17:18 +01:00 · 8d574c3770
commit 8d574c3770
parent a6bb3ae291
4 changed files with 76 additions and 22 deletions
--- a/changelogs/fragments/66779-redshift-backoff.yml
+++ b/changelogs/fragments/66779-redshift-backoff.yml
@ -0,0 +1,2 @@
+minor_changes:
+- 'redshift: Add AWSRetry calls for errors outside our control'
--- a/hacking/aws_config/testing_policies/database-policy.json
+++ b/hacking/aws_config/testing_policies/database-policy.json
@ -11,6 +11,15 @@
                }
            }
        },
+        {
+            "Action": "iam:CreateServiceLinkedRole",
+            "Effect": "Allow",
+            "Resource": "arn:aws:iam::*:role/aws-service-role/redshift.amazonaws.com/AWSServiceRoleForRedshift",
+            "Condition": {
+                "StringLike": {
+                    "iam:AWSServiceName": "redshift.amazonaws.com"}
+                }
+        },
        {
            "Sid": "AllowRDSReadEverywhere",
            "Effect": "Allow",
--- a/lib/ansible/modules/cloud/amazon/redshift.py
+++ b/lib/ansible/modules/cloud/amazon/redshift.py
@ -261,8 +261,9 @@ cluster:
 try:
    import botocore
 except ImportError:
-    pass  # handled by AnsibleAWSModule
-from ansible.module_utils.ec2 import ec2_argument_spec, snake_dict_to_camel_dict
+    pass  # caught by AnsibleAWSModule
+
+from ansible.module_utils.ec2 import AWSRetry, snake_dict_to_camel_dict
 from ansible.module_utils.aws.core import AnsibleAWSModule, is_boto3_error_code


@ -303,6 +304,45 @@ def _collect_facts(resource):
    return facts


+@AWSRetry.jittered_backoff()
+def _describe_cluster(redshift, identifier):
+    '''
+    Basic wrapper around describe_clusters with a retry applied
+    '''
+    return redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0]
+
+
+@AWSRetry.jittered_backoff()
+def _create_cluster(redshift, **kwargs):
+    '''
+    Basic wrapper around create_cluster with a retry applied
+    '''
+    return redshift.create_cluster(**kwargs)
+
+
+# Simple wrapper around delete, try to avoid throwing an error if some other
+# operation is in progress
+@AWSRetry.jittered_backoff(catch_extra_error_codes=['InvalidClusterState'])
+def _delete_cluster(redshift, **kwargs):
+    '''
+    Basic wrapper around delete_cluster with a retry applied.
+    Explicitly catches 'InvalidClusterState' (~ Operation in progress) so that
+    we can still delete a cluster if some kind of change operation was in
+    progress.
+    '''
+    return redshift.delete_cluster(**kwargs)
+
+
+@AWSRetry.jittered_backoff(catch_extra_error_codes=['InvalidClusterState'])
+def _modify_cluster(redshift, **kwargs):
+    '''
+    Basic wrapper around modify_cluster with a retry applied.
+    Explicitly catches 'InvalidClusterState' (~ Operation in progress) for cases
+    where another modification is still in progress
+    '''
+    return redshift.modify_cluster(**kwargs)
+
+
 def create_cluster(module, redshift):
    """
    Create a new cluster
@ -340,15 +380,16 @@ def create_cluster(module, redshift):
        params['d_b_name'] = d_b_name

    try:
-        redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0]
+        _describe_cluster(redshift, identifier)
        changed = False
    except is_boto3_error_code('ClusterNotFound'):
        try:
-            redshift.create_cluster(ClusterIdentifier=identifier,
-                                    NodeType=node_type,
-                                    MasterUsername=username,
-                                    MasterUserPassword=password,
-                                    **snake_dict_to_camel_dict(params, capitalize_first=True))
+            _create_cluster(redshift,
+                            ClusterIdentifier=identifier,
+                            NodeType=node_type,
+                            MasterUsername=username,
+                            MasterUserPassword=password,
+                            **snake_dict_to_camel_dict(params, capitalize_first=True))
        except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:
            module.fail_json_aws(e, msg="Failed to create cluster")
    except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:  # pylint: disable=duplicate-except
@ -364,7 +405,7 @@ def create_cluster(module, redshift):
        except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:
            module.fail_json_aws(e, msg="Timeout waiting for the cluster creation")
    try:
-        resource = redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0]
+        resource = _describe_cluster(redshift, identifier)
    except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:
        module.fail_json_aws(e, msg="Failed to describe cluster")

@ -381,7 +422,7 @@ def describe_cluster(module, redshift):
    identifier = module.params.get('identifier')

    try:
-        resource = redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0]
+        resource = _describe_cluster(redshift, identifier)
    except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:
        module.fail_json_aws(e, msg="Error describing cluster")

@ -409,10 +450,10 @@ def delete_cluster(module, redshift):
                params[p] = module.params.get(p)

    try:
-        redshift.delete_cluster(
+        _delete_cluster(
+            redshift,
            ClusterIdentifier=identifier,
-            **snake_dict_to_camel_dict(params, capitalize_first=True)
-        )
+            **snake_dict_to_camel_dict(params, capitalize_first=True))
    except is_boto3_error_code('ClusterNotFound'):
        return(False, {})
    except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:  # pylint: disable=duplicate-except
@ -459,8 +500,10 @@ def modify_cluster(module, redshift):
    # enhanced_vpc_routing parameter change needs an exclusive request
    if module.params.get('enhanced_vpc_routing') is not None:
        try:
-            redshift.modify_cluster(ClusterIdentifier=identifier,
-                                    EnhancedVpcRouting=module.params.get('enhanced_vpc_routing'))
+            _modify_cluster(
+                redshift,
+                ClusterIdentifier=identifier,
+                EnhancedVpcRouting=module.params.get('enhanced_vpc_routing'))
        except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:
            module.fail_json_aws(e, msg="Couldn't modify redshift cluster %s " % identifier)
    if wait:
@ -478,8 +521,10 @@ def modify_cluster(module, redshift):

    # change the rest
    try:
-        redshift.modify_cluster(ClusterIdentifier=identifier,
-                                **snake_dict_to_camel_dict(params, capitalize_first=True))
+        _modify_cluster(
+            redshift,
+            ClusterIdentifier=identifier,
+            **snake_dict_to_camel_dict(params, capitalize_first=True))
    except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:
        module.fail_json_aws(e, msg="Couldn't modify redshift cluster %s " % identifier)

@ -497,7 +542,7 @@ def modify_cluster(module, redshift):
        except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:
            module.fail_json_aws(e, msg="Timeout waiting for cluster modification")
    try:
-        resource = redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0]
+        resource = _describe_cluster(redshift, identifier)
    except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:
        module.fail_json(e, msg="Couldn't modify redshift cluster %s " % identifier)

@ -505,8 +550,7 @@ def modify_cluster(module, redshift):


 def main():
-    argument_spec = ec2_argument_spec()
-    argument_spec.update(dict(
+    argument_spec = dict(
        command=dict(choices=['create', 'facts', 'delete', 'modify'], required=True),
        identifier=dict(required=True),
        node_type=dict(choices=['ds1.xlarge', 'ds1.8xlarge', 'ds2.xlarge',
@ -538,7 +582,7 @@ def main():
        enhanced_vpc_routing=dict(type='bool', default=False),
        wait=dict(type='bool', default=False),
        wait_timeout=dict(type='int', default=300),
-    ))
+    )

    required_if = [
        ('command', 'delete', ['skip_final_cluster_snapshot']),
--- a/test/integration/targets/redshift/aliases
+++ b/test/integration/targets/redshift/aliases
@ -1,3 +1,2 @@
-unstable
 cloud/aws
 shippable/aws/group1