diff --git a/changelogs/fragments/57894-combine-filter-rework.yml b/changelogs/fragments/57894-combine-filter-rework.yml new file mode 100644 index 00000000000..0652df34be5 --- /dev/null +++ b/changelogs/fragments/57894-combine-filter-rework.yml @@ -0,0 +1,4 @@ +bugfixes: +- combine filter - ``[dict1, [dict2]] | combine`` now raise an error; previously ``combine`` had an undocumented behaviour where it was flattening the list before combining it (https://github.com/ansible/ansible/pull/57894#discussion_r339517518). +minor_changes: +- combine filter - now accept a ``list_merge`` argument which modifies its behaviour when the hashes to merge contain arrays/lists. diff --git a/docs/docsite/rst/user_guide/playbooks_filters.rst b/docs/docsite/rst/user_guide/playbooks_filters.rst index 3f734febb2f..3da2c29d944 100644 --- a/docs/docsite/rst/user_guide/playbooks_filters.rst +++ b/docs/docsite/rst/user_guide/playbooks_filters.rst @@ -345,12 +345,13 @@ You can use the transformed data with ``loop`` to iterate over the same subeleme .. _combine_filter: -Combining hashes ----------------- +Combining hashes/dictionaries +----------------------------- .. versionadded:: 2.0 -The `combine` filter allows hashes to be merged. For example, the following would override keys in one hash:: +The ``combine`` filter allows hashes to be merged. +For example, the following would override keys in one hash:: {{ {'a':1, 'b':2} | combine({'b':3}) }} @@ -358,25 +359,208 @@ The resulting hash would be:: {'a':1, 'b':3} -The filter also accepts an optional `recursive=True` parameter to not -only override keys in the first hash, but also recurse into nested -hashes and merge their keys too: - -.. code-block:: jinja - - {{ {'a':{'foo':1, 'bar':2}, 'b':2} | combine({'a':{'bar':3, 'baz':4}}, recursive=True) }} - -This would result in:: - - {'a':{'foo':1, 'bar':3, 'baz':4}, 'b':2} - The filter can also take multiple arguments to merge:: {{ a | combine(b, c, d) }} + {{ [a, b, c, d] | combine }} -In this case, keys in `d` would override those in `c`, which would override those in `b`, and so on. +In this case, keys in ``d`` would override those in ``c``, which would +override those in ``b``, and so on. + +The filter also accepts two optional parameters: ``recursive`` and ``list_merge``. + +recursive + Is a boolean, default to ``False``. + Should the ``combine`` recursively merge nested hashes. + Note: It does **not** depend on the value of the ``hash_behaviour`` setting in ``ansible.cfg``. + +list_merge + Is a string, its possible values are ``replace`` (default), ``keep``, ``append``, ``prepend``, ``append_rp`` or ``prepend_rp``. + It modifies the behaviour of ``combine`` when the hashes to merge contain arrays/lists. + +.. code-block:: yaml + + default: + a: + x: default + y: default + b: default + c: default + patch: + a: + y: patch + z: patch + b: patch + +If ``recursive=False`` (the default), nested hash aren't merged:: + + {{ default | combine(patch) }} + +This would result in:: + + a: + y: patch + z: patch + b: patch + c: default + +If ``recursive=True``, recurse into nested hash and merge their keys:: + + {{ default | combine(patch, recursive=True) }} + +This would result in:: + + a: + x: default + y: patch + z: patch + b: patch + c: default + +If ``list_merge='replace'`` (the default), arrays from the right hash will "replace" the ones in the left hash:: + + default: + a: + - default + patch: + a: + - patch + +.. code-block:: jinja + + {{ default | combine(patch) }} + +This would result in:: + + a: + - patch + +If ``list_merge='keep'``, arrays from the left hash will be kept:: + + {{ default | combine(patch, list_merge='keep') }} + +This would result in:: + + a: + - default + +If ``list_merge='append'``, arrays from the right hash will be appended to the ones in the left hash:: + + {{ default | combine(patch, list_merge='append') }} + +This would result in:: + + a: + - default + - patch + +If ``list_merge='prepend'``, arrays from the right hash will be prepended to the ones in the left hash:: + + {{ default | combine(patch, list_merge='prepend') }} + +This would result in:: + + a: + - patch + - default + +If ``list_merge='append_rp'``, arrays from the right hash will be appended to the ones in the left hash. +Elements of arrays in the left hash that are also in the corresponding array of the right hash will be removed ("rp" stands for "remove present"). +Duplicate elements that aren't in both hashes are kept:: + + default: + a: + - 1 + - 1 + - 2 + - 3 + patch: + a: + - 3 + - 4 + - 5 + - 5 + +.. code-block:: jinja + + {{ default | combine(patch, list_merge='append_rp') }} + +This would result in:: + + a: + - 1 + - 1 + - 2 + - 3 + - 4 + - 5 + - 5 + +If ``list_merge='prepend_rp'``, the behavior is similar to the one for ``append_rp``, but elements of arrays in the right hash are prepended:: + + {{ default | combine(patch, list_merge='prepend_rp') }} + +This would result in:: + + a: + - 3 + - 4 + - 5 + - 5 + - 1 + - 1 + - 2 + +``recursive`` and ``list_merge`` can be used together:: + + default: + a: + a': + x: default_value + y: default_value + list: + - default_value + b: + - 1 + - 1 + - 2 + - 3 + patch: + a: + a': + y: patch_value + z: patch_value + list: + - patch_value + b: + - 3 + - 4 + - 4 + - key: value + +.. code-block:: jinja + + {{ default | combine(patch, recursive=True, list_merge='append_rp') }} + +This would result in:: + + a: + a': + x: default_value + y: patch_value + z: patch_value + list: + - default_value + - patch_value + b: + - 1 + - 1 + - 2 + - 3 + - 4 + - 4 + - key: value -This behavior does not depend on the value of the `hash_behavior` setting in `ansible.cfg`. .. _extract_filter: diff --git a/lib/ansible/plugins/filter/core.py b/lib/ansible/plugins/filter/core.py index aab48a009fd..d2ae68a2441 100644 --- a/lib/ansible/plugins/filter/core.py +++ b/lib/ansible/plugins/filter/core.py @@ -45,7 +45,7 @@ from ansible.module_utils.six import iteritems, string_types, integer_types, rer from ansible.module_utils.six.moves import reduce, shlex_quote from ansible.module_utils._text import to_bytes, to_native, to_text from ansible.module_utils.common.collections import is_sequence -from ansible.module_utils.common._collections_compat import Mapping, MutableMapping +from ansible.module_utils.common._collections_compat import Mapping from ansible.parsing.ajson import AnsibleJSONEncoder from ansible.parsing.yaml.dumper import AnsibleDumper from ansible.template import recursive_check_defined @@ -306,25 +306,36 @@ def mandatory(a, msg=None): def combine(*terms, **kwargs): - recursive = kwargs.get('recursive', False) - if len(kwargs) > 1 or (len(kwargs) == 1 and 'recursive' not in kwargs): - raise AnsibleFilterError("'recursive' is the only valid keyword argument") + recursive = kwargs.pop('recursive', False) + list_merge = kwargs.pop('list_merge', 'replace') + if kwargs: + raise AnsibleFilterError("'recursive' and 'list_merge' are the only valid keyword arguments") - dicts = [] - for t in terms: - if isinstance(t, MutableMapping): - recursive_check_defined(t) - dicts.append(t) - elif isinstance(t, list): - recursive_check_defined(t) - dicts.append(combine(*t, **kwargs)) - else: - raise AnsibleFilterError("|combine expects dictionaries, got " + repr(t)) + # allow the user to do `[dict1, dict2, ...] | combine` + dictionaries = flatten(terms, levels=1) - if recursive: - return reduce(merge_hash, dicts) - else: - return dict(itertools.chain(*map(iteritems, dicts))) + # recursively check that every elements are defined (for jinja2) + recursive_check_defined(dictionaries) + + if not dictionaries: + return {} + + if len(dictionaries) == 1: + return dictionaries[0] + + # merge all the dicts so that the dict at the end of the array have precedence + # over the dict at the beginning. + # we merge the dicts from the highest to the lowest priority because there is + # a huge probability that the lowest priority dict will be the biggest in size + # (as the low prio dict will hold the "default" values and the others will be "patches") + # and merge_hash create a copy of it's first argument. + # so high/right -> low/left is more efficient than low/left -> high/right + high_to_low_prio_dict_iterator = reversed(dictionaries) + result = next(high_to_low_prio_dict_iterator) + for dictionary in high_to_low_prio_dict_iterator: + result = merge_hash(dictionary, result, recursive, list_merge) + + return result def comment(text, style='plain', **kw): diff --git a/lib/ansible/utils/vars.py b/lib/ansible/utils/vars.py index b0d5bc5b798..9011b377798 100644 --- a/lib/ansible/utils/vars.py +++ b/lib/ansible/utils/vars.py @@ -31,7 +31,7 @@ from ansible import context from ansible.errors import AnsibleError, AnsibleOptionsError from ansible.module_utils.six import iteritems, string_types from ansible.module_utils._text import to_native, to_text -from ansible.module_utils.common._collections_compat import MutableMapping +from ansible.module_utils.common._collections_compat import MutableMapping, MutableSequence from ansible.parsing.splitter import parse_kv @@ -92,32 +92,92 @@ def combine_vars(a, b): return result -def merge_hash(a, b): +def merge_hash(x, y, recursive=True, list_merge='replace'): """ - Recursively merges hash b into a so that keys from b take precedence over keys from a + Return a new dictionary result of the merges of y into x, + so that keys from y take precedence over keys from x. + (x and y aren't modified) """ + if list_merge not in ('replace', 'keep', 'append', 'prepend', 'append_rp', 'prepend_rp'): + raise AnsibleError("merge_hash: 'list_merge' argument can only be equal to 'replace', 'keep', 'append', 'prepend', 'append_rp' or 'prepend_rp'") - _validate_mutable_mappings(a, b) + # verify x & y are dicts + _validate_mutable_mappings(x, y) - # if a is empty or equal to b, return b - if a == {} or a == b: - return b.copy() + # to speed things up: if x is empty or equal to y, return y + # (this `if` can be remove without impact on the function + # except performance) + if x == {} or x == y: + return y.copy() - # if b is empty the below unfolds quickly - result = a.copy() + # in the following we will copy elements from y to x, but + # we don't want to modify x, so we create a copy of it + x = x.copy() - # next, iterate over b keys and values - for k, v in iteritems(b): - # if there's already such key in a - # and that key contains a MutableMapping - if k in result and isinstance(result[k], MutableMapping) and isinstance(v, MutableMapping): - # merge those dicts recursively - result[k] = merge_hash(result[k], v) - else: - # otherwise, just copy the value from b to a - result[k] = v + # to speed things up: use dict.update if possible + # (this `if` can be remove without impact on the function + # except performance) + if not recursive and list_merge == 'replace': + x.update(y) + return x - return result + # insert each element of y in x, overriding the one in x + # (as y has higher priority) + # we copy elements from y to x instead of x to y because + # there is a high probability x will be the "default" dict the user + # want to "patch" with y + # therefore x will have much more elements than y + for key, y_value in iteritems(y): + # if `key` isn't in x + # update x and move on to the next element of y + if key not in x: + x[key] = y_value + continue + # from this point we know `key` is in x + + x_value = x[key] + + # if both x's element and y's element are dicts + # recursively "combine" them or override x's with y's element + # depending on the `recursive` argument + # and move on to the next element of y + if isinstance(x_value, MutableMapping) and isinstance(y_value, MutableMapping): + if recursive: + x[key] = merge_hash(x_value, y_value, recursive, list_merge) + else: + x[key] = y_value + continue + + # if both x's element and y's element are lists + # "merge" them depending on the `list_merge` argument + # and move on to the next element of y + if isinstance(x_value, MutableSequence) and isinstance(y_value, MutableSequence): + if list_merge == 'replace': + # replace x value by y's one as it has higher priority + x[key] = y_value + elif list_merge == 'append': + x[key] = x_value + y_value + elif list_merge == 'prepend': + x[key] = y_value + x_value + elif list_merge == 'append_rp': + # append all elements from y_value (high prio) to x_value (low prio) + # and remove x_value elements that are also in y_value + # we don't remove elements from x_value nor y_value that were already in double + # (we assume that there is a reason if there where such double elements) + # _rp stands for "remove present" + x[key] = [z for z in x_value if z not in y_value] + y_value + elif list_merge == 'prepend_rp': + # same as 'append_rp' but y_value elements are prepend + x[key] = y_value + [z for z in x_value if z not in y_value] + # else 'keep' + # keep x value even if y it's of higher priority + # it's done by not changing x[key] + continue + + # else just override x's element with y's one + x[key] = y_value + + return x def load_extra_vars(loader): diff --git a/test/integration/targets/filter_core/tasks/main.yml b/test/integration/targets/filter_core/tasks/main.yml index 2a1f8c76a4a..9db278dde80 100644 --- a/test/integration/targets/filter_core/tasks/main.yml +++ b/test/integration/targets/filter_core/tasks/main.yml @@ -136,6 +136,82 @@ - "'Ansible - くらとみ\n' | b64encode(encoding='utf-16-le') == 'QQBuAHMAaQBiAGwAZQAgAC0AIABPMIkwaDB/MAoA'" - "'QQBuAHMAaQBiAGwAZQAgAC0AIABPMIkwaDB/MAoA' | b64decode(encoding='utf-16-le') == 'Ansible - くらとみ\n'" +- set_fact: + x: + x: x + key: x + y: + y: y + key: y + z: + z: z + key: z + + # Most complicated combine dicts from the documentation + default: + a: + a': + x: default_value + y: default_value + list: + - default_value + b: + - 1 + - 1 + - 2 + - 3 + patch: + a: + a': + y: patch_value + z: patch_value + list: + - patch_value + b: + - 3 + - 4 + - 4 + - key: value + result: + a: + a': + x: default_value + y: patch_value + z: patch_value + list: + - default_value + - patch_value + b: + - 1 + - 1 + - 2 + - 3 + - 4 + - 4 + - key: value + +- name: Verify combine filter + assert: + that: + - "([x] | combine) == x" + - "(x | combine(y)) == {'x': 'x', 'y': 'y', 'key': 'y'}" + - "(x | combine(y, z)) == {'x': 'x', 'y': 'y', 'z': 'z', 'key': 'z'}" + - "([x, y, z] | combine) == {'x': 'x', 'y': 'y', 'z': 'z', 'key': 'z'}" + - "([x, y] | combine(z)) == {'x': 'x', 'y': 'y', 'z': 'z', 'key': 'z'}" + # more advance dicts combination tests are done in "merge_hash" function unit tests + # but even it's redundant with those unit tests, we do at least the most complicated exemple of the documentation here + - "(default | combine(patch, recursive=True, list_merge='append_rp')) == result" + +- set_fact: + combine: "{{[x, [y]] | combine(z)}}" + ignore_errors: yes + register: result + +- name: Ensure combining objects which aren't dictionaries throws an error + assert: + that: + - "result.msg.startswith(\"failed to combine variables, expected dicts but got\")" + - name: Ensure combining two dictionaries containing undefined variables provides a helpful error block: - set_fact: @@ -159,13 +235,3 @@ - assert: that: - "result.msg.startswith('The task includes an option with an undefined variable')" - - - set_fact: - key2: is_defined - - - set_fact: - combined: "{{ foo | combine({'key2': key2}) }}" - - - assert: - that: - - "combined.key2 == 'is_defined'" diff --git a/test/units/utils/test_vars.py b/test/units/utils/test_vars.py index 812c1670211..c92ce4b6ed8 100644 --- a/test/units/utils/test_vars.py +++ b/test/units/utils/test_vars.py @@ -28,12 +28,17 @@ from ansible.utils.vars import combine_vars, merge_hash class TestVariableUtils(unittest.TestCase): + def setUp(self): + pass - test_merge_data = ( + def tearDown(self): + pass + + combine_vars_merge_data = ( dict( a=dict(a=1), b=dict(b=2), - result=dict(a=1, b=2) + result=dict(a=1, b=2), ), dict( a=dict(a=1, c=dict(foo='bar')), @@ -46,7 +51,7 @@ class TestVariableUtils(unittest.TestCase): result=defaultdict(a=1, b=2, c=defaultdict(foo='bar', baz='bam')) ), ) - test_replace_data = ( + combine_vars_replace_data = ( dict( a=dict(a=1), b=dict(b=2), @@ -64,11 +69,7 @@ class TestVariableUtils(unittest.TestCase): ), ) - def test_merge_hash(self): - for test in self.test_merge_data: - self.assertEqual(merge_hash(test['a'], test['b']), test['result']) - - def test_improper_args(self): + def test_combine_vars_improper_args(self): with mock.patch('ansible.constants.DEFAULT_HASH_BEHAVIOUR', 'replace'): with self.assertRaises(AnsibleError): combine_vars([1, 2, 3], dict(a=1)) @@ -83,10 +84,199 @@ class TestVariableUtils(unittest.TestCase): def test_combine_vars_replace(self): with mock.patch('ansible.constants.DEFAULT_HASH_BEHAVIOUR', 'replace'): - for test in self.test_replace_data: + for test in self.combine_vars_replace_data: self.assertEqual(combine_vars(test['a'], test['b']), test['result']) def test_combine_vars_merge(self): with mock.patch('ansible.constants.DEFAULT_HASH_BEHAVIOUR', 'merge'): - for test in self.test_merge_data: + for test in self.combine_vars_merge_data: self.assertEqual(combine_vars(test['a'], test['b']), test['result']) + + merge_hash_data = { + "low_prio": { + "a": { + "a'": { + "x": "low_value", + "y": "low_value", + "list": ["low_value"] + } + }, + "b": [1, 1, 2, 3] + }, + "high_prio": { + "a": { + "a'": { + "y": "high_value", + "z": "high_value", + "list": ["high_value"] + } + }, + "b": [3, 4, 4, {"5": "value"}] + } + } + + def test_merge_hash_simple(self): + for test in self.combine_vars_merge_data: + self.assertEqual(merge_hash(test['a'], test['b']), test['result']) + + low = self.merge_hash_data['low_prio'] + high = self.merge_hash_data['high_prio'] + expected = { + "a": { + "a'": { + "x": "low_value", + "y": "high_value", + "z": "high_value", + "list": ["high_value"] + } + }, + "b": high['b'] + } + self.assertEqual(merge_hash(low, high), expected) + + def test_merge_hash_non_recursive_and_list_replace(self): + low = self.merge_hash_data['low_prio'] + high = self.merge_hash_data['high_prio'] + expected = high + self.assertEqual(merge_hash(low, high, False, 'replace'), expected) + + def test_merge_hash_non_recursive_and_list_keep(self): + low = self.merge_hash_data['low_prio'] + high = self.merge_hash_data['high_prio'] + expected = { + "a": high['a'], + "b": low['b'] + } + self.assertEqual(merge_hash(low, high, False, 'keep'), expected) + + def test_merge_hash_non_recursive_and_list_append(self): + low = self.merge_hash_data['low_prio'] + high = self.merge_hash_data['high_prio'] + expected = { + "a": high['a'], + "b": low['b'] + high['b'] + } + self.assertEqual(merge_hash(low, high, False, 'append'), expected) + + def test_merge_hash_non_recursive_and_list_prepend(self): + low = self.merge_hash_data['low_prio'] + high = self.merge_hash_data['high_prio'] + expected = { + "a": high['a'], + "b": high['b'] + low['b'] + } + self.assertEqual(merge_hash(low, high, False, 'prepend'), expected) + + def test_merge_hash_non_recursive_and_list_append_rp(self): + low = self.merge_hash_data['low_prio'] + high = self.merge_hash_data['high_prio'] + expected = { + "a": high['a'], + "b": [1, 1, 2] + high['b'] + } + self.assertEqual(merge_hash(low, high, False, 'append_rp'), expected) + + def test_merge_hash_non_recursive_and_list_prepend_rp(self): + low = self.merge_hash_data['low_prio'] + high = self.merge_hash_data['high_prio'] + expected = { + "a": high['a'], + "b": high['b'] + [1, 1, 2] + } + self.assertEqual(merge_hash(low, high, False, 'prepend_rp'), expected) + + def test_merge_hash_recursive_and_list_replace(self): + low = self.merge_hash_data['low_prio'] + high = self.merge_hash_data['high_prio'] + expected = { + "a": { + "a'": { + "x": "low_value", + "y": "high_value", + "z": "high_value", + "list": ["high_value"] + } + }, + "b": high['b'] + } + self.assertEqual(merge_hash(low, high, True, 'replace'), expected) + + def test_merge_hash_recursive_and_list_keep(self): + low = self.merge_hash_data['low_prio'] + high = self.merge_hash_data['high_prio'] + expected = { + "a": { + "a'": { + "x": "low_value", + "y": "high_value", + "z": "high_value", + "list": ["low_value"] + } + }, + "b": low['b'] + } + self.assertEqual(merge_hash(low, high, True, 'keep'), expected) + + def test_merge_hash_recursive_and_list_append(self): + low = self.merge_hash_data['low_prio'] + high = self.merge_hash_data['high_prio'] + expected = { + "a": { + "a'": { + "x": "low_value", + "y": "high_value", + "z": "high_value", + "list": ["low_value", "high_value"] + } + }, + "b": low['b'] + high['b'] + } + self.assertEqual(merge_hash(low, high, True, 'append'), expected) + + def test_merge_hash_recursive_and_list_prepend(self): + low = self.merge_hash_data['low_prio'] + high = self.merge_hash_data['high_prio'] + expected = { + "a": { + "a'": { + "x": "low_value", + "y": "high_value", + "z": "high_value", + "list": ["high_value", "low_value"] + } + }, + "b": high['b'] + low['b'] + } + self.assertEqual(merge_hash(low, high, True, 'prepend'), expected) + + def test_merge_hash_recursive_and_list_append_rp(self): + low = self.merge_hash_data['low_prio'] + high = self.merge_hash_data['high_prio'] + expected = { + "a": { + "a'": { + "x": "low_value", + "y": "high_value", + "z": "high_value", + "list": ["low_value", "high_value"] + } + }, + "b": [1, 1, 2] + high['b'] + } + self.assertEqual(merge_hash(low, high, True, 'append_rp'), expected) + + def test_merge_hash_recursive_and_list_prepend_rp(self): + low = self.merge_hash_data['low_prio'] + high = self.merge_hash_data['high_prio'] + expected = { + "a": { + "a'": { + "x": "low_value", + "y": "high_value", + "z": "high_value", + "list": ["high_value", "low_value"] + } + }, + "b": high['b'] + [1, 1, 2] + } + self.assertEqual(merge_hash(low, high, True, 'prepend_rp'), expected)