From be4e7a05faac288634d5ec270d40fcc55bb5d4a8 Mon Sep 17 00:00:00 2001 From: Toshio Kuratomi Date: Sun, 21 May 2017 06:18:35 -0700 Subject: [PATCH] remove_values could hit the recursion limit When operating on arbitrary return data from modules, it is possible to hit the recursion limit when cleaning out no_log values from the data. To fix this, we have to switch from recursion to iteration. Unittest for remove_values recursion limit Fixes #24560 --- lib/ansible/module_utils/basic.py | 99 +++++++++++++++++--- test/units/module_utils/basic/test_no_log.py | 36 ++++++- 2 files changed, 115 insertions(+), 20 deletions(-) diff --git a/lib/ansible/module_utils/basic.py b/lib/ansible/module_utils/basic.py index 165d7856880..565772fa111 100644 --- a/lib/ansible/module_utils/basic.py +++ b/lib/ansible/module_utils/basic.py @@ -89,6 +89,8 @@ import pwd import platform import errno import datetime +from collections import deque +from collections import Mapping, MutableMapping, Sequence, MutableSequence, Set, MutableSet from itertools import repeat, chain try: @@ -113,13 +115,6 @@ except ImportError: # Python2 & 3 way to get NoneType NoneType = type(None) -try: - from collections import Sequence, Mapping -except ImportError: - # python2.5 - Sequence = (list, tuple) - Mapping = (dict,) - # Note: When getting Sequence from collections, it matches with strings. If # this matters, make sure to check for strings before checking for sequencetype try: @@ -405,20 +400,44 @@ def return_values(obj): raise TypeError('Unknown parameter type: %s, %s' % (type(obj), obj)) -def remove_values(value, no_log_strings): - """ Remove strings in no_log_strings from value. If value is a container - type, then remove a lot more""" +def _remove_values_conditions(value, no_log_strings, deferred_removals): + """ + Helper function for :meth:`remove_values`. + + :arg value: The value to check for strings that need to be stripped + :arg no_log_strings: set of strings which must be stripped out of any values + :arg deferred_removals: List which holds information about nested + containers that have to be iterated for removals. It is passed into + this function so that more entries can be added to it if value is + a container type. The format of each entry is a 2-tuple where the first + element is the ``value`` parameter and the second value is a new + container to copy the elements of ``value`` into once iterated. + :returns: if ``value`` is a scalar, returns ``value`` with two exceptions: + 1. :class:`~datetime.datetime` objects which are changed into a string representation. + 2. objects which are in no_log_strings are replaced with a placeholder + so that no sensitive data is leaked. + If ``value`` is a container type, returns a new empty container. + + ``deferred_removals`` is added to as a side-effect of this function. + + .. warning:: It is up to the caller to make sure the order in which value + is passed in is correct. For instance, higher level containers need + to be passed in before lower level containers. For example, given + ``{'level1': {'level2': 'level3': [True]} }`` first pass in the + dictionary for ``level1``, then the dict for ``level2``, and finally + the list for ``level3``. + """ if isinstance(value, (text_type, binary_type)): # Need native str type native_str_value = value if isinstance(value, text_type): value_is_text = True if PY2: - native_str_value = to_bytes(value, encoding='utf-8', errors='surrogate_or_strict') + native_str_value = to_bytes(value, errors='surrogate_or_strict') elif isinstance(value, binary_type): value_is_text = False if PY3: - native_str_value = to_text(value, encoding='utf-8', errors='surrogate_or_strict') + native_str_value = to_text(value, errors='surrogate_or_strict') if native_str_value in no_log_strings: return 'VALUE_SPECIFIED_IN_NO_LOG_PARAMETER' @@ -431,10 +450,31 @@ def remove_values(value, no_log_strings): value = to_bytes(native_str_value, encoding='utf-8', errors='surrogate_then_replace') else: value = native_str_value - elif isinstance(value, SEQUENCETYPE): - return [remove_values(elem, no_log_strings) for elem in value] + + elif isinstance(value, Sequence): + if isinstance(value, MutableSequence): + new_value = type(value)() + else: + new_value = [] # Need a mutable value + deferred_removals.append((value, new_value)) + value = new_value + + elif isinstance(value, Set): + if isinstance(value, MutableSet): + new_value = type(value)() + else: + new_value = set() # Need a mutable value + deferred_removals.append((value, new_value)) + value = new_value + elif isinstance(value, Mapping): - return dict((k, remove_values(v, no_log_strings)) for k, v in value.items()) + if isinstance(value, MutableMapping): + new_value = type(value)() + else: + new_value = {} # Need a mutable value + deferred_removals.append((value, new_value)) + value = new_value + elif isinstance(value, tuple(chain(NUMBERTYPES, (bool, NoneType)))): stringy_value = to_native(value, encoding='utf-8', errors='surrogate_or_strict') if stringy_value in no_log_strings: @@ -442,13 +482,42 @@ def remove_values(value, no_log_strings): for omit_me in no_log_strings: if omit_me in stringy_value: return 'VALUE_SPECIFIED_IN_NO_LOG_PARAMETER' + elif isinstance(value, datetime.datetime): value = value.isoformat() else: raise TypeError('Value of unknown type: %s, %s' % (type(value), value)) + return value +def remove_values(value, no_log_strings): + """ Remove strings in no_log_strings from value. If value is a container + type, then remove a lot more""" + deferred_removals = deque() + + no_log_strings = [to_native(s, errors='surrogate_or_strict') for s in no_log_strings] + new_value = _remove_values_conditions(value, no_log_strings, deferred_removals) + + while deferred_removals: + old_data, new_data = deferred_removals.popleft() + if isinstance(new_data, Mapping): + for old_key, old_elem in old_data.items(): + new_elem = _remove_values_conditions(old_elem, no_log_strings, deferred_removals) + new_data[old_key] = new_elem + else: + for elem in old_data: + new_elem = _remove_values_conditions(elem, no_log_strings, deferred_removals) + if isinstance(new_data, MutableSequence): + new_data.append(new_elem) + elif isinstance(new_data, MutableSet): + new_data.add(new_elem) + else: + raise TypeError('Unknown container type encountered when removing private values from output') + + return new_value + + def heuristic_log_sanitize(data, no_log_values=None): ''' Remove strings that look like passwords from log messages ''' # Currently filters: diff --git a/test/units/module_utils/basic/test_no_log.py b/test/units/module_utils/basic/test_no_log.py index 1e501f3882d..0c00a64cf52 100644 --- a/test/units/module_utils/basic/test_no_log.py +++ b/test/units/module_utils/basic/test_no_log.py @@ -90,10 +90,9 @@ class TestRemoveValues(unittest.TestCase): }, frozenset(['nope']) ), - ('Toshio くら', frozenset(['とみ'])), - (u'Toshio くら', frozenset(['とみ'])), + (u'Toshio くら'.encode('utf-8'), frozenset([u'とみ'.encode('utf-8')])), + (u'Toshio くら', frozenset([u'とみ'])), ) - dataset_remove = ( ('string', frozenset(['string']), OMIT), (1234, frozenset(['1234']), OMIT), @@ -138,8 +137,8 @@ class TestRemoveValues(unittest.TestCase): frozenset(['enigma', 'mystery', 'secret']), 'This sentence has an ******** wrapped in a ******** inside of a ********. - mr ********' ), - ('Toshio くらとみ', frozenset(['くらとみ']), 'Toshio ********'), - (u'Toshio くらとみ', frozenset(['くらとみ']), u'Toshio ********'), + (u'Toshio くらとみ'.encode('utf-8'), frozenset([u'くらとみ'.encode('utf-8')]), u'Toshio ********'.encode('utf-8')), + (u'Toshio くらとみ', frozenset([u'くらとみ']), u'Toshio ********'), ) def test_no_removal(self): @@ -152,3 +151,30 @@ class TestRemoveValues(unittest.TestCase): def test_unknown_type(self): self.assertRaises(TypeError, remove_values, object(), frozenset()) + + def test_hit_recursion_limit(self): + """ Check that we do not hit a recursion limit""" + data_list = [] + inner_list = data_list + for i in range(0, 10000): + new_list = [] + inner_list.append(new_list) + inner_list = new_list + inner_list.append('secret') + + # Check that this does not hit a recursion limit + actual_data_list = remove_values(data_list, frozenset(('secret',))) + + levels = 0 + inner_list = actual_data_list + while inner_list: + if isinstance(inner_list, list): + self.assertEquals(len(inner_list), 1) + else: + levels -= 1 + break + inner_list = inner_list[0] + levels += 1 + + self.assertEquals(inner_list, self.OMIT) + self.assertEquals(levels, 10000)