remove_values could hit the recursion limit

When operating on arbitrary return data from modules, it is possible to
hit the recursion limit when cleaning out no_log values from the data.
To fix this, we have to switch from recursion to iteration.

Unittest for remove_values recursion limit

Fixes #24560
This commit is contained in:
Toshio Kuratomi 2017-05-21 06:18:35 -07:00
parent f52dcef7ba
commit be4e7a05fa
2 changed files with 115 additions and 20 deletions

View file

@ -89,6 +89,8 @@ import pwd
import platform
import errno
import datetime
from collections import deque
from collections import Mapping, MutableMapping, Sequence, MutableSequence, Set, MutableSet
from itertools import repeat, chain
try:
@ -113,13 +115,6 @@ except ImportError:
# Python2 & 3 way to get NoneType
NoneType = type(None)
try:
from collections import Sequence, Mapping
except ImportError:
# python2.5
Sequence = (list, tuple)
Mapping = (dict,)
# Note: When getting Sequence from collections, it matches with strings. If
# this matters, make sure to check for strings before checking for sequencetype
try:
@ -405,20 +400,44 @@ def return_values(obj):
raise TypeError('Unknown parameter type: %s, %s' % (type(obj), obj))
def remove_values(value, no_log_strings):
""" Remove strings in no_log_strings from value. If value is a container
type, then remove a lot more"""
def _remove_values_conditions(value, no_log_strings, deferred_removals):
"""
Helper function for :meth:`remove_values`.
:arg value: The value to check for strings that need to be stripped
:arg no_log_strings: set of strings which must be stripped out of any values
:arg deferred_removals: List which holds information about nested
containers that have to be iterated for removals. It is passed into
this function so that more entries can be added to it if value is
a container type. The format of each entry is a 2-tuple where the first
element is the ``value`` parameter and the second value is a new
container to copy the elements of ``value`` into once iterated.
:returns: if ``value`` is a scalar, returns ``value`` with two exceptions:
1. :class:`~datetime.datetime` objects which are changed into a string representation.
2. objects which are in no_log_strings are replaced with a placeholder
so that no sensitive data is leaked.
If ``value`` is a container type, returns a new empty container.
``deferred_removals`` is added to as a side-effect of this function.
.. warning:: It is up to the caller to make sure the order in which value
is passed in is correct. For instance, higher level containers need
to be passed in before lower level containers. For example, given
``{'level1': {'level2': 'level3': [True]} }`` first pass in the
dictionary for ``level1``, then the dict for ``level2``, and finally
the list for ``level3``.
"""
if isinstance(value, (text_type, binary_type)):
# Need native str type
native_str_value = value
if isinstance(value, text_type):
value_is_text = True
if PY2:
native_str_value = to_bytes(value, encoding='utf-8', errors='surrogate_or_strict')
native_str_value = to_bytes(value, errors='surrogate_or_strict')
elif isinstance(value, binary_type):
value_is_text = False
if PY3:
native_str_value = to_text(value, encoding='utf-8', errors='surrogate_or_strict')
native_str_value = to_text(value, errors='surrogate_or_strict')
if native_str_value in no_log_strings:
return 'VALUE_SPECIFIED_IN_NO_LOG_PARAMETER'
@ -431,10 +450,31 @@ def remove_values(value, no_log_strings):
value = to_bytes(native_str_value, encoding='utf-8', errors='surrogate_then_replace')
else:
value = native_str_value
elif isinstance(value, SEQUENCETYPE):
return [remove_values(elem, no_log_strings) for elem in value]
elif isinstance(value, Sequence):
if isinstance(value, MutableSequence):
new_value = type(value)()
else:
new_value = [] # Need a mutable value
deferred_removals.append((value, new_value))
value = new_value
elif isinstance(value, Set):
if isinstance(value, MutableSet):
new_value = type(value)()
else:
new_value = set() # Need a mutable value
deferred_removals.append((value, new_value))
value = new_value
elif isinstance(value, Mapping):
return dict((k, remove_values(v, no_log_strings)) for k, v in value.items())
if isinstance(value, MutableMapping):
new_value = type(value)()
else:
new_value = {} # Need a mutable value
deferred_removals.append((value, new_value))
value = new_value
elif isinstance(value, tuple(chain(NUMBERTYPES, (bool, NoneType)))):
stringy_value = to_native(value, encoding='utf-8', errors='surrogate_or_strict')
if stringy_value in no_log_strings:
@ -442,13 +482,42 @@ def remove_values(value, no_log_strings):
for omit_me in no_log_strings:
if omit_me in stringy_value:
return 'VALUE_SPECIFIED_IN_NO_LOG_PARAMETER'
elif isinstance(value, datetime.datetime):
value = value.isoformat()
else:
raise TypeError('Value of unknown type: %s, %s' % (type(value), value))
return value
def remove_values(value, no_log_strings):
""" Remove strings in no_log_strings from value. If value is a container
type, then remove a lot more"""
deferred_removals = deque()
no_log_strings = [to_native(s, errors='surrogate_or_strict') for s in no_log_strings]
new_value = _remove_values_conditions(value, no_log_strings, deferred_removals)
while deferred_removals:
old_data, new_data = deferred_removals.popleft()
if isinstance(new_data, Mapping):
for old_key, old_elem in old_data.items():
new_elem = _remove_values_conditions(old_elem, no_log_strings, deferred_removals)
new_data[old_key] = new_elem
else:
for elem in old_data:
new_elem = _remove_values_conditions(elem, no_log_strings, deferred_removals)
if isinstance(new_data, MutableSequence):
new_data.append(new_elem)
elif isinstance(new_data, MutableSet):
new_data.add(new_elem)
else:
raise TypeError('Unknown container type encountered when removing private values from output')
return new_value
def heuristic_log_sanitize(data, no_log_values=None):
''' Remove strings that look like passwords from log messages '''
# Currently filters:

View file

@ -90,10 +90,9 @@ class TestRemoveValues(unittest.TestCase):
},
frozenset(['nope'])
),
('Toshio くら', frozenset(['とみ'])),
(u'Toshio くら', frozenset(['とみ'])),
(u'Toshio くら'.encode('utf-8'), frozenset([u'とみ'.encode('utf-8')])),
(u'Toshio くら', frozenset([u'とみ'])),
)
dataset_remove = (
('string', frozenset(['string']), OMIT),
(1234, frozenset(['1234']), OMIT),
@ -138,8 +137,8 @@ class TestRemoveValues(unittest.TestCase):
frozenset(['enigma', 'mystery', 'secret']),
'This sentence has an ******** wrapped in a ******** inside of a ********. - mr ********'
),
('Toshio くらとみ', frozenset(['くらとみ']), 'Toshio ********'),
(u'Toshio くらとみ', frozenset(['くらとみ']), u'Toshio ********'),
(u'Toshio くらとみ'.encode('utf-8'), frozenset([u'くらとみ'.encode('utf-8')]), u'Toshio ********'.encode('utf-8')),
(u'Toshio くらとみ', frozenset([u'くらとみ']), u'Toshio ********'),
)
def test_no_removal(self):
@ -152,3 +151,30 @@ class TestRemoveValues(unittest.TestCase):
def test_unknown_type(self):
self.assertRaises(TypeError, remove_values, object(), frozenset())
def test_hit_recursion_limit(self):
""" Check that we do not hit a recursion limit"""
data_list = []
inner_list = data_list
for i in range(0, 10000):
new_list = []
inner_list.append(new_list)
inner_list = new_list
inner_list.append('secret')
# Check that this does not hit a recursion limit
actual_data_list = remove_values(data_list, frozenset(('secret',)))
levels = 0
inner_list = actual_data_list
while inner_list:
if isinstance(inner_list, list):
self.assertEquals(len(inner_list), 1)
else:
levels -= 1
break
inner_list = inner_list[0]
levels += 1
self.assertEquals(inner_list, self.OMIT)
self.assertEquals(levels, 10000)