remove_values could hit the recursion limit
When operating on arbitrary return data from modules, it is possible to hit the recursion limit when cleaning out no_log values from the data. To fix this, we have to switch from recursion to iteration. Unittest for remove_values recursion limit Fixes #24560
This commit is contained in:
parent
f52dcef7ba
commit
be4e7a05fa
2 changed files with 115 additions and 20 deletions
|
@ -89,6 +89,8 @@ import pwd
|
|||
import platform
|
||||
import errno
|
||||
import datetime
|
||||
from collections import deque
|
||||
from collections import Mapping, MutableMapping, Sequence, MutableSequence, Set, MutableSet
|
||||
from itertools import repeat, chain
|
||||
|
||||
try:
|
||||
|
@ -113,13 +115,6 @@ except ImportError:
|
|||
# Python2 & 3 way to get NoneType
|
||||
NoneType = type(None)
|
||||
|
||||
try:
|
||||
from collections import Sequence, Mapping
|
||||
except ImportError:
|
||||
# python2.5
|
||||
Sequence = (list, tuple)
|
||||
Mapping = (dict,)
|
||||
|
||||
# Note: When getting Sequence from collections, it matches with strings. If
|
||||
# this matters, make sure to check for strings before checking for sequencetype
|
||||
try:
|
||||
|
@ -405,20 +400,44 @@ def return_values(obj):
|
|||
raise TypeError('Unknown parameter type: %s, %s' % (type(obj), obj))
|
||||
|
||||
|
||||
def remove_values(value, no_log_strings):
|
||||
""" Remove strings in no_log_strings from value. If value is a container
|
||||
type, then remove a lot more"""
|
||||
def _remove_values_conditions(value, no_log_strings, deferred_removals):
|
||||
"""
|
||||
Helper function for :meth:`remove_values`.
|
||||
|
||||
:arg value: The value to check for strings that need to be stripped
|
||||
:arg no_log_strings: set of strings which must be stripped out of any values
|
||||
:arg deferred_removals: List which holds information about nested
|
||||
containers that have to be iterated for removals. It is passed into
|
||||
this function so that more entries can be added to it if value is
|
||||
a container type. The format of each entry is a 2-tuple where the first
|
||||
element is the ``value`` parameter and the second value is a new
|
||||
container to copy the elements of ``value`` into once iterated.
|
||||
:returns: if ``value`` is a scalar, returns ``value`` with two exceptions:
|
||||
1. :class:`~datetime.datetime` objects which are changed into a string representation.
|
||||
2. objects which are in no_log_strings are replaced with a placeholder
|
||||
so that no sensitive data is leaked.
|
||||
If ``value`` is a container type, returns a new empty container.
|
||||
|
||||
``deferred_removals`` is added to as a side-effect of this function.
|
||||
|
||||
.. warning:: It is up to the caller to make sure the order in which value
|
||||
is passed in is correct. For instance, higher level containers need
|
||||
to be passed in before lower level containers. For example, given
|
||||
``{'level1': {'level2': 'level3': [True]} }`` first pass in the
|
||||
dictionary for ``level1``, then the dict for ``level2``, and finally
|
||||
the list for ``level3``.
|
||||
"""
|
||||
if isinstance(value, (text_type, binary_type)):
|
||||
# Need native str type
|
||||
native_str_value = value
|
||||
if isinstance(value, text_type):
|
||||
value_is_text = True
|
||||
if PY2:
|
||||
native_str_value = to_bytes(value, encoding='utf-8', errors='surrogate_or_strict')
|
||||
native_str_value = to_bytes(value, errors='surrogate_or_strict')
|
||||
elif isinstance(value, binary_type):
|
||||
value_is_text = False
|
||||
if PY3:
|
||||
native_str_value = to_text(value, encoding='utf-8', errors='surrogate_or_strict')
|
||||
native_str_value = to_text(value, errors='surrogate_or_strict')
|
||||
|
||||
if native_str_value in no_log_strings:
|
||||
return 'VALUE_SPECIFIED_IN_NO_LOG_PARAMETER'
|
||||
|
@ -431,10 +450,31 @@ def remove_values(value, no_log_strings):
|
|||
value = to_bytes(native_str_value, encoding='utf-8', errors='surrogate_then_replace')
|
||||
else:
|
||||
value = native_str_value
|
||||
elif isinstance(value, SEQUENCETYPE):
|
||||
return [remove_values(elem, no_log_strings) for elem in value]
|
||||
|
||||
elif isinstance(value, Sequence):
|
||||
if isinstance(value, MutableSequence):
|
||||
new_value = type(value)()
|
||||
else:
|
||||
new_value = [] # Need a mutable value
|
||||
deferred_removals.append((value, new_value))
|
||||
value = new_value
|
||||
|
||||
elif isinstance(value, Set):
|
||||
if isinstance(value, MutableSet):
|
||||
new_value = type(value)()
|
||||
else:
|
||||
new_value = set() # Need a mutable value
|
||||
deferred_removals.append((value, new_value))
|
||||
value = new_value
|
||||
|
||||
elif isinstance(value, Mapping):
|
||||
return dict((k, remove_values(v, no_log_strings)) for k, v in value.items())
|
||||
if isinstance(value, MutableMapping):
|
||||
new_value = type(value)()
|
||||
else:
|
||||
new_value = {} # Need a mutable value
|
||||
deferred_removals.append((value, new_value))
|
||||
value = new_value
|
||||
|
||||
elif isinstance(value, tuple(chain(NUMBERTYPES, (bool, NoneType)))):
|
||||
stringy_value = to_native(value, encoding='utf-8', errors='surrogate_or_strict')
|
||||
if stringy_value in no_log_strings:
|
||||
|
@ -442,13 +482,42 @@ def remove_values(value, no_log_strings):
|
|||
for omit_me in no_log_strings:
|
||||
if omit_me in stringy_value:
|
||||
return 'VALUE_SPECIFIED_IN_NO_LOG_PARAMETER'
|
||||
|
||||
elif isinstance(value, datetime.datetime):
|
||||
value = value.isoformat()
|
||||
else:
|
||||
raise TypeError('Value of unknown type: %s, %s' % (type(value), value))
|
||||
|
||||
return value
|
||||
|
||||
|
||||
def remove_values(value, no_log_strings):
|
||||
""" Remove strings in no_log_strings from value. If value is a container
|
||||
type, then remove a lot more"""
|
||||
deferred_removals = deque()
|
||||
|
||||
no_log_strings = [to_native(s, errors='surrogate_or_strict') for s in no_log_strings]
|
||||
new_value = _remove_values_conditions(value, no_log_strings, deferred_removals)
|
||||
|
||||
while deferred_removals:
|
||||
old_data, new_data = deferred_removals.popleft()
|
||||
if isinstance(new_data, Mapping):
|
||||
for old_key, old_elem in old_data.items():
|
||||
new_elem = _remove_values_conditions(old_elem, no_log_strings, deferred_removals)
|
||||
new_data[old_key] = new_elem
|
||||
else:
|
||||
for elem in old_data:
|
||||
new_elem = _remove_values_conditions(elem, no_log_strings, deferred_removals)
|
||||
if isinstance(new_data, MutableSequence):
|
||||
new_data.append(new_elem)
|
||||
elif isinstance(new_data, MutableSet):
|
||||
new_data.add(new_elem)
|
||||
else:
|
||||
raise TypeError('Unknown container type encountered when removing private values from output')
|
||||
|
||||
return new_value
|
||||
|
||||
|
||||
def heuristic_log_sanitize(data, no_log_values=None):
|
||||
''' Remove strings that look like passwords from log messages '''
|
||||
# Currently filters:
|
||||
|
|
|
@ -90,10 +90,9 @@ class TestRemoveValues(unittest.TestCase):
|
|||
},
|
||||
frozenset(['nope'])
|
||||
),
|
||||
('Toshio くら', frozenset(['とみ'])),
|
||||
(u'Toshio くら', frozenset(['とみ'])),
|
||||
(u'Toshio くら'.encode('utf-8'), frozenset([u'とみ'.encode('utf-8')])),
|
||||
(u'Toshio くら', frozenset([u'とみ'])),
|
||||
)
|
||||
|
||||
dataset_remove = (
|
||||
('string', frozenset(['string']), OMIT),
|
||||
(1234, frozenset(['1234']), OMIT),
|
||||
|
@ -138,8 +137,8 @@ class TestRemoveValues(unittest.TestCase):
|
|||
frozenset(['enigma', 'mystery', 'secret']),
|
||||
'This sentence has an ******** wrapped in a ******** inside of a ********. - mr ********'
|
||||
),
|
||||
('Toshio くらとみ', frozenset(['くらとみ']), 'Toshio ********'),
|
||||
(u'Toshio くらとみ', frozenset(['くらとみ']), u'Toshio ********'),
|
||||
(u'Toshio くらとみ'.encode('utf-8'), frozenset([u'くらとみ'.encode('utf-8')]), u'Toshio ********'.encode('utf-8')),
|
||||
(u'Toshio くらとみ', frozenset([u'くらとみ']), u'Toshio ********'),
|
||||
)
|
||||
|
||||
def test_no_removal(self):
|
||||
|
@ -152,3 +151,30 @@ class TestRemoveValues(unittest.TestCase):
|
|||
|
||||
def test_unknown_type(self):
|
||||
self.assertRaises(TypeError, remove_values, object(), frozenset())
|
||||
|
||||
def test_hit_recursion_limit(self):
|
||||
""" Check that we do not hit a recursion limit"""
|
||||
data_list = []
|
||||
inner_list = data_list
|
||||
for i in range(0, 10000):
|
||||
new_list = []
|
||||
inner_list.append(new_list)
|
||||
inner_list = new_list
|
||||
inner_list.append('secret')
|
||||
|
||||
# Check that this does not hit a recursion limit
|
||||
actual_data_list = remove_values(data_list, frozenset(('secret',)))
|
||||
|
||||
levels = 0
|
||||
inner_list = actual_data_list
|
||||
while inner_list:
|
||||
if isinstance(inner_list, list):
|
||||
self.assertEquals(len(inner_list), 1)
|
||||
else:
|
||||
levels -= 1
|
||||
break
|
||||
inner_list = inner_list[0]
|
||||
levels += 1
|
||||
|
||||
self.assertEquals(inner_list, self.OMIT)
|
||||
self.assertEquals(levels, 10000)
|
||||
|
|
Loading…
Reference in a new issue