Don't use copy.deepcopy in high workload areas, use naive_deepcopy (#44337)

* Don't use copy.deepcopy in high workload areas, use deepishcopy. ci_complete

* Add tests

* Add changelog fragment

* rename to naive_deepcopy and add extra docs

* Rename to module_response_deepcopy and move to vars/clean
This commit is contained in:
Matt Martz 2018-08-20 15:08:29 -05:00 committed by GitHub
parent 27ac2fc67c
commit 9b2baebe64
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 119 additions and 11 deletions

View file

@ -0,0 +1,2 @@
minor_changes:
- replace copy.deepcopy in high workload areas with a custom function to improve performance (https://github.com/ansible/ansible/pull/44337)

View file

@ -629,6 +629,7 @@ class TaskExecutor:
if self._task.action in ('set_fact', 'include_vars'):
vars_copy.update(result['ansible_facts'])
else:
# TODO: cleaning of facts should eventually become part of taskresults instead of vars
vars_copy.update(namespace_facts(result['ansible_facts']))
if C.INJECT_FACTS_AS_VARS:
vars_copy.update(clean_facts(result['ansible_facts']))
@ -690,6 +691,7 @@ class TaskExecutor:
if self._task.action in ('set_fact', 'include_vars'):
variables.update(result['ansible_facts'])
else:
# TODO: cleaning of facts should eventually become part of taskresults instead of vars
variables.update(namespace_facts(result['ansible_facts']))
if C.INJECT_FACTS_AS_VARS:
variables.update(clean_facts(result['ansible_facts']))

View file

@ -5,11 +5,9 @@
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
from copy import deepcopy
from ansible import constants as C
from ansible.parsing.dataloader import DataLoader
from ansible.vars.clean import strip_internal_keys
from ansible.vars.clean import module_response_deepcopy, strip_internal_keys
_IGNORE = ('failed', 'skipped')
_PRESERVE = ('attempts', 'changed', 'retries')
@ -131,7 +129,7 @@ class TaskResult:
result._result = x
elif self._result:
result._result = deepcopy(self._result)
result._result = module_response_deepcopy(self._result)
# actualy remove
for remove_key in ignore:

View file

@ -8,11 +8,9 @@ __metaclass__ = type
import os
import re
from copy import deepcopy
from ansible import constants as C
from ansible.module_utils._text import to_text
from ansible.module_utils.six import string_types
from ansible.module_utils import six
from ansible.plugins.loader import connection_loader
try:
@ -22,6 +20,53 @@ except ImportError:
display = Display()
def module_response_deepcopy(v):
"""Function to create a deep copy of module response data
Designed to be used within the Ansible "engine" to improve performance
issues where ``copy.deepcopy`` was used previously, largely with CPU
and memory contention.
This only supports the following data types, and was designed to only
handle specific workloads:
* ``dict``
* ``list``
The data we pass here will come from a serialization such
as JSON, so we shouldn't have need for other data types such as
``set`` or ``tuple``.
Take note that this function should not be used extensively as a
replacement for ``deepcopy`` due to the naive way in which this
handles other data types.
Do not expect uses outside of those listed below to maintain
backwards compatibility, in case we need to extend this function
to handle our specific needs:
* ``ansible.executor.task_result.TaskResult.clean_copy``
* ``ansible.vars.clean.clean_facts``
* ``ansible.vars.namespace_facts``
"""
if isinstance(v, dict):
ret = v.copy()
items = six.iteritems(ret)
elif isinstance(v, list):
ret = v[:]
items = enumerate(ret)
else:
return v
for key, value in items:
if isinstance(value, (dict, list)):
ret[key] = module_response_deepcopy(value)
else:
ret[key] = value
return ret
def strip_internal_keys(dirty, exceptions=None):
'''
All keys starting with _ansible_ are internal, so create a copy of the 'dirty' dict
@ -32,7 +77,7 @@ def strip_internal_keys(dirty, exceptions=None):
exceptions = ()
clean = dirty.copy()
for k in dirty.keys():
if isinstance(k, string_types) and k.startswith('_ansible_'):
if isinstance(k, six.string_types) and k.startswith('_ansible_'):
if k not in exceptions:
del clean[k]
elif isinstance(dirty[k], dict):
@ -57,7 +102,7 @@ def remove_internal_keys(data):
def clean_facts(facts):
''' remove facts that can override internal keys or otherwise deemed unsafe '''
data = deepcopy(facts)
data = module_response_deepcopy(facts)
remove_keys = set()
fact_keys = set(data.keys())
@ -113,8 +158,8 @@ def namespace_facts(facts):
for k in facts:
if k in ('ansible_local',):
# exceptions to 'deprefixing'
deprefixed[k] = deepcopy(facts[k])
deprefixed[k] = module_response_deepcopy(facts[k])
else:
deprefixed[k.replace('ansible_', '', 1)] = deepcopy(facts[k])
deprefixed[k.replace('ansible_', '', 1)] = module_response_deepcopy(facts[k])
return {'ansible_facts': deprefixed}

View file

@ -307,6 +307,7 @@ class VariableManager:
all_vars = combine_vars(all_vars, _plugins_play([host]))
# finally, the facts caches for this host, if it exists
# TODO: cleaning of facts should eventually become part of taskresults instead of vars
try:
facts = wrap_var(self._fact_cache.get(host.name, {}))
all_vars.update(namespace_facts(facts))

View file

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
# (c) 2018 Matt Martz <matt@sivel.net>
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
from __future__ import absolute_import, division, print_function
__metaclass__ = type
from ansible.vars.clean import module_response_deepcopy
import pytest
def test_module_response_deepcopy_basic():
x = 42
y = module_response_deepcopy(x)
assert y == x
def test_module_response_deepcopy_atomic():
tests = [None, 42, 2**100, 3.14, True, False, 1j,
"hello", "hello\u1234"]
for x in tests:
assert module_response_deepcopy(x) is x
def test_module_response_deepcopy_list():
x = [[1, 2], 3]
y = module_response_deepcopy(x)
assert y == x
assert x is not y
assert x[0] is not y[0]
def test_module_response_deepcopy_empty_tuple():
x = ()
y = module_response_deepcopy(x)
assert x is y
@pytest.mark.skip(reason='No current support for this situation')
def test_module_response_deepcopy_tuple():
x = ([1, 2], 3)
y = module_response_deepcopy(x)
assert y == x
assert x is not y
assert x[0] is not y[0]
def test_module_response_deepcopy_tuple_of_immutables():
x = ((1, 2), 3)
y = module_response_deepcopy(x)
assert x is y
def test_module_response_deepcopy_dict():
x = {"foo": [1, 2], "bar": 3}
y = module_response_deepcopy(x)
assert y == x
assert x is not y
assert x["foo"] is not y["foo"]