Merge pull request #10178 from ansible/password-obfuscation

Obfuscate passwords in a variety of output
This commit is contained in:
Toshio Kuratomi 2015-02-09 10:21:12 -08:00
commit d15eb066a1
4 changed files with 182 additions and 193 deletions

View file

@ -171,6 +171,7 @@ FILE_COMMON_ARGUMENTS=dict(
directory_mode = dict(), # used by copy
)
PASSWD_ARG_RE = re.compile(r'^[-]{0,2}pass[-]?(word|wd)?')
def get_platform():
''' what's the platform? example: Linux is a platform. '''
@ -269,6 +270,65 @@ def json_dict_bytes_to_unicode(d):
else:
return d
def heuristic_log_sanitize(data):
''' Remove strings that look like passwords from log messages '''
# Currently filters:
# user:pass@foo/whatever and http://username:pass@wherever/foo
# This code has false positives and consumes parts of logs that are
# not passwds
# begin: start of a passwd containing string
# end: end of a passwd containing string
# sep: char between user and passwd
# prev_begin: where in the overall string to start a search for
# a passwd
# sep_search_end: where in the string to end a search for the sep
output = []
begin = len(data)
prev_begin = begin
sep = 1
while sep:
# Find the potential end of a passwd
try:
end = data.rindex('@', 0, begin)
except ValueError:
# No passwd in the rest of the data
output.insert(0, data[0:begin])
break
# Search for the beginning of a passwd
sep = None
sep_search_end = end
while not sep:
# URL-style username+password
try:
begin = data.rindex('://', 0, sep_search_end)
except ValueError:
# No url style in the data, check for ssh style in the
# rest of the string
begin = 0
# Search for separator
try:
sep = data.index(':', begin + 3, end)
except ValueError:
# No separator; choices:
if begin == 0:
# Searched the whole string so there's no password
# here. Return the remaining data
output.insert(0, data[0:begin])
break
# Search for a different beginning of the password field.
sep_search_end = begin
continue
if sep:
# Password was found; remove it.
output.insert(0, data[end:prev_begin])
output.insert(0, '********')
output.insert(0, data[begin:sep + 1])
prev_begin = begin
return ''.join(output)
class AnsibleModule(object):
@ -1019,65 +1079,6 @@ class AnsibleModule(object):
params2.update(params)
return (params2, args)
def _heuristic_log_sanitize(self, data):
''' Remove strings that look like passwords from log messages '''
# Currently filters:
# user:pass@foo/whatever and http://username:pass@wherever/foo
# This code has false positives and consumes parts of logs that are
# not passwds
# begin: start of a passwd containing string
# end: end of a passwd containing string
# sep: char between user and passwd
# prev_begin: where in the overall string to start a search for
# a passwd
# sep_search_end: where in the string to end a search for the sep
output = []
begin = len(data)
prev_begin = begin
sep = 1
while sep:
# Find the potential end of a passwd
try:
end = data.rindex('@', 0, begin)
except ValueError:
# No passwd in the rest of the data
output.insert(0, data[0:begin])
break
# Search for the beginning of a passwd
sep = None
sep_search_end = end
while not sep:
# URL-style username+password
try:
begin = data.rindex('://', 0, sep_search_end)
except ValueError:
# No url style in the data, check for ssh style in the
# rest of the string
begin = 0
# Search for separator
try:
sep = data.index(':', begin + 3, end)
except ValueError:
# No separator; choices:
if begin == 0:
# Searched the whole string so there's no password
# here. Return the remaining data
output.insert(0, data[0:begin])
break
# Search for a different beginning of the password field.
sep_search_end = begin
continue
if sep:
# Password was found; remove it.
output.insert(0, data[end:prev_begin])
output.insert(0, '********')
output.insert(0, data[begin:sep + 1])
prev_begin = begin
return ''.join(output)
def _log_invocation(self):
''' log that ansible ran the module '''
# TODO: generalize a separate log function and make log_invocation use it
@ -1100,7 +1101,7 @@ class AnsibleModule(object):
param_val = str(param_val)
elif isinstance(param_val, unicode):
param_val = param_val.encode('utf-8')
log_args[param] = self._heuristic_log_sanitize(param_val)
log_args[param] = heuristic_log_sanitize(param_val)
module = 'ansible-%s' % os.path.basename(__file__)
msg = []
@ -1444,27 +1445,27 @@ class AnsibleModule(object):
# create a printable version of the command for use
# in reporting later, which strips out things like
# passwords from the args list
if isinstance(args, list):
clean_args = " ".join(pipes.quote(arg) for arg in args)
if isinstance(args, basestring):
to_clean_args = shlex.split(args.encode('utf-8'))
else:
clean_args = args
to_clean_args = args
# all clean strings should return two match groups,
# where the first is the CLI argument and the second
# is the password/key/phrase that will be hidden
clean_re_strings = [
# this removes things like --password, --pass, --pass-wd, etc.
# optionally followed by an '=' or a space. The password can
# be quoted or not too, though it does not care about quotes
# that are not balanced
# source: http://blog.stevenlevithan.com/archives/match-quoted-string
r'([-]{0,2}pass[-]?(?:word|wd)?[=\s]?)((?:["\'])?(?:[^\s])*(?:\1)?)',
r'^(?P<before>.*:)(?P<password>.*)(?P<after>\@.*)$',
# TODO: add more regex checks here
]
for re_str in clean_re_strings:
r = re.compile(re_str)
clean_args = r.sub(r'\1********', clean_args)
clean_args = []
is_passwd = False
for arg in to_clean_args:
if is_passwd:
is_passwd = False
clean_args.append('********')
continue
if PASSWD_ARG_RE.match(arg):
sep_idx = arg.find('=')
if sep_idx > -1:
clean_args.append('%s=********' % arg[:sep_idx])
continue
else:
is_passwd = True
clean_args.append(heuristic_log_sanitize(arg))
clean_args = ' '.join(pipes.quote(arg) for arg in clean_args)
if data:
st_in = subprocess.PIPE
@ -1549,7 +1550,7 @@ class AnsibleModule(object):
self.fail_json(rc=257, msg=traceback.format_exc(), cmd=clean_args)
if rc != 0 and check_rc:
msg = stderr.rstrip()
msg = heuristic_log_sanitize(stderr.rstrip())
self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg)
# reset the pwd

View file

@ -32,6 +32,7 @@ from ansible.utils.su_prompts import *
from ansible.utils.hashing import secure_hash, secure_hash_s, checksum, checksum_s, md5, md5s
from ansible.callbacks import display
from ansible.module_utils.splitter import split_args, unquote
from ansible.module_utils.basic import heuristic_log_sanitize
import ansible.constants as C
import ast
import time
@ -932,34 +933,18 @@ def sanitize_output(str):
private_keys = ['password', 'login_password']
filter_re = [
# filter out things like user:pass@foo/whatever
# and http://username:pass@wherever/foo
re.compile('^(?P<before>.*:)(?P<password>.*)(?P<after>\@.*)$'),
]
parts = parse_kv(str)
output = []
for (k, v) in parts.items():
if k in private_keys:
output.append("%s=VALUE_HIDDEN" % k)
continue
else:
v = heuristic_log_sanitize(v)
output.append('%s=%s' % (k, v))
output = ' '.join(output)
return output
parts = str.split()
output = ''
for part in parts:
try:
(k,v) = part.split('=', 1)
if k in private_keys:
output += " %s=VALUE_HIDDEN" % k
else:
found = False
for filter in filter_re:
m = filter.match(v)
if m:
d = m.groupdict()
output += " %s=%s" % (k, d['before'] + "********" + d['after'])
found = True
break
if not found:
output += " %s" % part
except:
output += " %s" % part
return output.strip()
####################################################################
# option handling code for /usr/bin/ansible and ansible-playbook

View file

@ -7,6 +7,7 @@ from nose.tools import timed
from ansible import errors
from ansible.module_common import ModuleReplacer
from ansible.module_utils.basic import heuristic_log_sanitize
from ansible.utils import checksum as utils_checksum
TEST_MODULE_DATA = """
@ -264,23 +265,23 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
@timed(5)
def test_log_sanitize_speed_many_url(self):
self.module._heuristic_log_sanitize(self.many_url)
heuristic_log_sanitize(self.many_url)
@timed(5)
def test_log_sanitize_speed_many_ssh(self):
self.module._heuristic_log_sanitize(self.many_ssh)
heuristic_log_sanitize(self.many_ssh)
@timed(5)
def test_log_sanitize_speed_one_url(self):
self.module._heuristic_log_sanitize(self.one_url)
heuristic_log_sanitize(self.one_url)
@timed(5)
def test_log_sanitize_speed_one_ssh(self):
self.module._heuristic_log_sanitize(self.one_ssh)
heuristic_log_sanitize(self.one_ssh)
@timed(5)
def test_log_sanitize_speed_zero_secrets(self):
self.module._heuristic_log_sanitize(self.zero_secrets)
heuristic_log_sanitize(self.zero_secrets)
#
# Test that the password obfuscation sanitizes somewhat cleanly.
@ -290,8 +291,8 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
url_data = repr(self._gen_data(3, True, True, self.URL_SECRET))
ssh_data = repr(self._gen_data(3, True, True, self.SSH_SECRET))
url_output = self.module._heuristic_log_sanitize(url_data)
ssh_output = self.module._heuristic_log_sanitize(ssh_data)
url_output = heuristic_log_sanitize(url_data)
ssh_output = heuristic_log_sanitize(ssh_data)
# Basic functionality: Successfully hid the password
try:

View file

@ -171,6 +171,7 @@ FILE_COMMON_ARGUMENTS=dict(
directory_mode = dict(), # used by copy
)
PASSWD_ARG_RE = re.compile(r'^[-]{0,2}pass[-]?(word|wd)?')
def get_platform():
''' what's the platform? example: Linux is a platform. '''
@ -269,6 +270,65 @@ def json_dict_bytes_to_unicode(d):
else:
return d
def heuristic_log_sanitize(data):
''' Remove strings that look like passwords from log messages '''
# Currently filters:
# user:pass@foo/whatever and http://username:pass@wherever/foo
# This code has false positives and consumes parts of logs that are
# not passwds
# begin: start of a passwd containing string
# end: end of a passwd containing string
# sep: char between user and passwd
# prev_begin: where in the overall string to start a search for
# a passwd
# sep_search_end: where in the string to end a search for the sep
output = []
begin = len(data)
prev_begin = begin
sep = 1
while sep:
# Find the potential end of a passwd
try:
end = data.rindex('@', 0, begin)
except ValueError:
# No passwd in the rest of the data
output.insert(0, data[0:begin])
break
# Search for the beginning of a passwd
sep = None
sep_search_end = end
while not sep:
# URL-style username+password
try:
begin = data.rindex('://', 0, sep_search_end)
except ValueError:
# No url style in the data, check for ssh style in the
# rest of the string
begin = 0
# Search for separator
try:
sep = data.index(':', begin + 3, end)
except ValueError:
# No separator; choices:
if begin == 0:
# Searched the whole string so there's no password
# here. Return the remaining data
output.insert(0, data[0:begin])
break
# Search for a different beginning of the password field.
sep_search_end = begin
continue
if sep:
# Password was found; remove it.
output.insert(0, data[end:prev_begin])
output.insert(0, '********')
output.insert(0, data[begin:sep + 1])
prev_begin = begin
return ''.join(output)
class AnsibleModule(object):
@ -1009,64 +1069,6 @@ class AnsibleModule(object):
params = dict()
return params
def _heuristic_log_sanitize(self, data):
''' Remove strings that look like passwords from log messages '''
# Currently filters:
# user:pass@foo/whatever and http://username:pass@wherever/foo
# This code has false positives and consumes parts of logs that are
# not passwds
# begin: start of a passwd containing string
# end: end of a passwd containing string
# sep: char between user and passwd
# prev_begin: where in the overall string to start a search for
# a passwd
# sep_search_end: where in the string to end a search for the sep
output = []
begin = len(data)
prev_begin = begin
sep = 1
while sep:
# Find the potential end of a passwd
try:
end = data.rindex('@', 0, begin)
except ValueError:
# No passwd in the rest of the data
output.insert(0, data[0:begin])
break
# Search for the beginning of a passwd
sep = None
sep_search_end = end
while not sep:
# URL-style username+password
try:
begin = data.rindex('://', 0, sep_search_end)
except ValueError:
# No url style in the data, check for ssh style in the
# rest of the string
begin = 0
# Search for separator
try:
sep = data.index(':', begin + 3, end)
except ValueError:
# No separator; choices:
if begin == 0:
# Searched the whole string so there's no password
# here. Return the remaining data
output.insert(0, data[0:begin])
break
# Search for a different beginning of the password field.
sep_search_end = begin
continue
if sep:
# Password was found; remove it.
output.insert(0, data[end:prev_begin])
output.insert(0, '********')
output.insert(0, data[begin:sep + 1])
prev_begin = begin
return ''.join(output)
def _log_invocation(self):
''' log that ansible ran the module '''
@ -1090,7 +1092,7 @@ class AnsibleModule(object):
param_val = str(param_val)
elif isinstance(param_val, unicode):
param_val = param_val.encode('utf-8')
log_args[param] = self._heuristic_log_sanitize(param_val)
log_args[param] = heuristic_log_sanitize(param_val)
module = 'ansible-%s' % os.path.basename(__file__)
msg = []
@ -1434,27 +1436,27 @@ class AnsibleModule(object):
# create a printable version of the command for use
# in reporting later, which strips out things like
# passwords from the args list
if isinstance(args, list):
clean_args = " ".join(pipes.quote(arg) for arg in args)
if isinstance(args, basestring):
to_clean_args = shlex.split(args.encode('utf-8'))
else:
clean_args = args
to_clean_args = args
# all clean strings should return two match groups,
# where the first is the CLI argument and the second
# is the password/key/phrase that will be hidden
clean_re_strings = [
# this removes things like --password, --pass, --pass-wd, etc.
# optionally followed by an '=' or a space. The password can
# be quoted or not too, though it does not care about quotes
# that are not balanced
# source: http://blog.stevenlevithan.com/archives/match-quoted-string
r'([-]{0,2}pass[-]?(?:word|wd)?[=\s]?)((?:["\'])?(?:[^\s])*(?:\1)?)',
r'^(?P<before>.*:)(?P<password>.*)(?P<after>\@.*)$',
# TODO: add more regex checks here
]
for re_str in clean_re_strings:
r = re.compile(re_str)
clean_args = r.sub(r'\1********', clean_args)
clean_args = []
is_passwd = False
for arg in to_clean_args:
if is_passwd:
is_passwd = False
clean_args.append('********')
continue
if PASSWD_ARG_RE.match(arg):
sep_idx = arg.find('=')
if sep_idx > -1:
clean_args.append('%s=********' % arg[:sep_idx])
continue
else:
is_passwd = True
clean_args.append(heuristic_log_sanitize(arg))
clean_args = ' '.join(pipes.quote(arg) for arg in clean_args)
if data:
st_in = subprocess.PIPE
@ -1539,7 +1541,7 @@ class AnsibleModule(object):
self.fail_json(rc=257, msg=traceback.format_exc(), cmd=clean_args)
if rc != 0 and check_rc:
msg = stderr.rstrip()
msg = heuristic_log_sanitize(stderr.rstrip())
self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg)
# reset the pwd