From 4902c063045458278146914b87638822785ed67a Mon Sep 17 00:00:00 2001
From: Toshio Kuratomi <toshio@fedoraproject.org>
Date: Mon, 9 Feb 2015 10:13:13 -0800
Subject: [PATCH] Obfuscate passwords in more places where it is displayed

---
 lib/ansible/module_utils/basic.py  | 161 +++++++++++++++--------------
 lib/ansible/utils/__init__.py      |  39 +++----
 test/units/TestModuleUtilsBasic.py |  15 +--
 v2/ansible/module_utils/basic.py   | 160 ++++++++++++++--------------
 4 files changed, 182 insertions(+), 193 deletions(-)

diff --git a/lib/ansible/module_utils/basic.py b/lib/ansible/module_utils/basic.py
index 8603976c5a0..8b14536ab53 100644
--- a/lib/ansible/module_utils/basic.py
+++ b/lib/ansible/module_utils/basic.py
@@ -171,6 +171,7 @@ FILE_COMMON_ARGUMENTS=dict(
     directory_mode = dict(), # used by copy
 )
 
+PASSWD_ARG_RE = re.compile(r'^[-]{0,2}pass[-]?(word|wd)?')
 
 def get_platform():
     ''' what's the platform?  example: Linux is a platform. '''
@@ -269,6 +270,65 @@ def json_dict_bytes_to_unicode(d):
     else:
         return d
 
+def heuristic_log_sanitize(data):
+    ''' Remove strings that look like passwords from log messages '''
+    # Currently filters:
+    # user:pass@foo/whatever and http://username:pass@wherever/foo
+    # This code has false positives and consumes parts of logs that are
+    # not passwds
+
+    # begin: start of a passwd containing string
+    # end: end of a passwd containing string
+    # sep: char between user and passwd
+    # prev_begin: where in the overall string to start a search for
+    #   a passwd
+    # sep_search_end: where in the string to end a search for the sep
+    output = []
+    begin = len(data)
+    prev_begin = begin
+    sep = 1
+    while sep:
+        # Find the potential end of a passwd
+        try:
+            end = data.rindex('@', 0, begin)
+        except ValueError:
+            # No passwd in the rest of the data
+            output.insert(0, data[0:begin])
+            break
+
+        # Search for the beginning of a passwd
+        sep = None
+        sep_search_end = end
+        while not sep:
+            # URL-style username+password
+            try:
+                begin = data.rindex('://', 0, sep_search_end)
+            except ValueError:
+                # No url style in the data, check for ssh style in the
+                # rest of the string
+                begin = 0
+            # Search for separator
+            try:
+                sep = data.index(':', begin + 3, end)
+            except ValueError:
+                # No separator; choices:
+                if begin == 0:
+                    # Searched the whole string so there's no password
+                    # here.  Return the remaining data
+                    output.insert(0, data[0:begin])
+                    break
+                # Search for a different beginning of the password field.
+                sep_search_end = begin
+                continue
+        if sep:
+            # Password was found; remove it.
+            output.insert(0, data[end:prev_begin])
+            output.insert(0, '********')
+            output.insert(0, data[begin:sep + 1])
+            prev_begin = begin
+
+    return ''.join(output)
+
 
 class AnsibleModule(object):
 
@@ -1019,65 +1079,6 @@ class AnsibleModule(object):
         params2.update(params)
         return (params2, args)
 
-    def _heuristic_log_sanitize(self, data):
-        ''' Remove strings that look like passwords from log messages '''
-        # Currently filters:
-        # user:pass@foo/whatever and http://username:pass@wherever/foo
-        # This code has false positives and consumes parts of logs that are
-        # not passwds
-
-        # begin: start of a passwd containing string
-        # end: end of a passwd containing string
-        # sep: char between user and passwd
-        # prev_begin: where in the overall string to start a search for
-        #   a passwd
-        # sep_search_end: where in the string to end a search for the sep
-        output = []
-        begin = len(data)
-        prev_begin = begin
-        sep = 1
-        while sep:
-            # Find the potential end of a passwd
-            try:
-                end = data.rindex('@', 0, begin)
-            except ValueError:
-                # No passwd in the rest of the data
-                output.insert(0, data[0:begin])
-                break
-
-            # Search for the beginning of a passwd
-            sep = None
-            sep_search_end = end
-            while not sep:
-                # URL-style username+password
-                try:
-                    begin = data.rindex('://', 0, sep_search_end)
-                except ValueError:
-                    # No url style in the data, check for ssh style in the
-                    # rest of the string
-                    begin = 0
-                # Search for separator
-                try:
-                    sep = data.index(':', begin + 3, end)
-                except ValueError:
-                    # No separator; choices:
-                    if begin == 0:
-                        # Searched the whole string so there's no password
-                        # here.  Return the remaining data
-                        output.insert(0, data[0:begin])
-                        break
-                    # Search for a different beginning of the password field.
-                    sep_search_end = begin
-                    continue
-            if sep:
-                # Password was found; remove it.
-                output.insert(0, data[end:prev_begin])
-                output.insert(0, '********')
-                output.insert(0, data[begin:sep + 1])
-                prev_begin = begin
-
-        return ''.join(output)
-
     def _log_invocation(self):
         ''' log that ansible ran the module '''
         # TODO: generalize a separate log function and make log_invocation use it
@@ -1100,7 +1101,7 @@ class AnsibleModule(object):
                     param_val = str(param_val)
                 elif isinstance(param_val, unicode):
                     param_val = param_val.encode('utf-8')
-                log_args[param] = self._heuristic_log_sanitize(param_val)
+                log_args[param] = heuristic_log_sanitize(param_val)
 
         module = 'ansible-%s' % os.path.basename(__file__)
         msg = []
@@ -1444,27 +1445,27 @@ class AnsibleModule(object):
         # create a printable version of the command for use
         # in reporting later, which strips out things like
         # passwords from the args list
-        if isinstance(args, list):
-            clean_args = " ".join(pipes.quote(arg) for arg in args)
+        if isinstance(args, basestring):
+            to_clean_args = shlex.split(args.encode('utf-8'))
         else:
-            clean_args = args
+            to_clean_args = args
 
-        # all clean strings should return two match groups, 
-        # where the first is the CLI argument and the second 
-        # is the password/key/phrase that will be hidden
-        clean_re_strings = [
-            # this removes things like --password, --pass, --pass-wd, etc.
-            # optionally followed by an '=' or a space. The password can 
-            # be quoted or not too, though it does not care about quotes
-            # that are not balanced
-            # source: http://blog.stevenlevithan.com/archives/match-quoted-string
-            r'([-]{0,2}pass[-]?(?:word|wd)?[=\s]?)((?:["\'])?(?:[^\s])*(?:\1)?)',
-            r'^(?P<before>.*:)(?P<password>.*)(?P<after>\@.*)$', 
-            # TODO: add more regex checks here
-        ]
-        for re_str in clean_re_strings:
-            r = re.compile(re_str)
-            clean_args = r.sub(r'\1********', clean_args)
+        clean_args = []
+        is_passwd = False
+        for arg in to_clean_args:
+            if is_passwd:
+                is_passwd = False
+                clean_args.append('********')
+                continue
+            if PASSWD_ARG_RE.match(arg):
+                sep_idx = arg.find('=')
+                if sep_idx > -1:
+                    clean_args.append('%s=********' % arg[:sep_idx])
+                    continue
+                else:
+                    is_passwd = True
+            clean_args.append(heuristic_log_sanitize(arg))
+        clean_args = ' '.join(pipes.quote(arg) for arg in clean_args)
 
         if data:
             st_in = subprocess.PIPE
@@ -1549,7 +1550,7 @@ class AnsibleModule(object):
             self.fail_json(rc=257, msg=traceback.format_exc(), cmd=clean_args)
 
         if rc != 0 and check_rc:
-            msg = stderr.rstrip()
+            msg = heuristic_log_sanitize(stderr.rstrip())
             self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg)
 
         # reset the pwd
diff --git a/lib/ansible/utils/__init__.py b/lib/ansible/utils/__init__.py
index 7c4d82914bd..8e3edf4f4f8 100644
--- a/lib/ansible/utils/__init__.py
+++ b/lib/ansible/utils/__init__.py
@@ -32,6 +32,7 @@ from ansible.utils.su_prompts import *
 from ansible.utils.hashing import secure_hash, secure_hash_s, checksum, checksum_s, md5, md5s
 from ansible.callbacks import display
 from ansible.module_utils.splitter import split_args, unquote
+from ansible.module_utils.basic import heuristic_log_sanitize
 import ansible.constants as C
 import ast
 import time
@@ -932,34 +933,18 @@ def sanitize_output(str):
 
     private_keys = ['password', 'login_password']
 
-    filter_re = [
-        # filter out things like user:pass@foo/whatever
-        # and http://username:pass@wherever/foo
-        re.compile('^(?P<before>.*:)(?P<password>.*)(?P<after>\@.*)$'),
-    ]
+    parts = parse_kv(str)
+    output = []
+    for (k, v) in parts.items():
+        if k in private_keys:
+            output.append("%s=VALUE_HIDDEN" % k)
+            continue
+        else:
+            v = heuristic_log_sanitize(v)
+        output.append('%s=%s' % (k, v))
+    output = ' '.join(output)
+    return output
 
-    parts = str.split()
-    output = ''
-    for part in parts:
-        try:
-            (k,v) = part.split('=', 1)
-            if k in private_keys:
-                output += " %s=VALUE_HIDDEN" % k
-            else:
-                found = False
-                for filter in filter_re:
-                    m = filter.match(v)
-                    if m:
-                        d = m.groupdict()
-                        output += " %s=%s" % (k, d['before'] + "********" + d['after'])
-                        found = True
-                        break
-                if not found:
-                    output += " %s" % part
-        except:
-            output += " %s" % part
-
-    return output.strip()
 
 ####################################################################
 # option handling code for /usr/bin/ansible and ansible-playbook
diff --git a/test/units/TestModuleUtilsBasic.py b/test/units/TestModuleUtilsBasic.py
index 18a4e0d7724..2ac77764d74 100644
--- a/test/units/TestModuleUtilsBasic.py
+++ b/test/units/TestModuleUtilsBasic.py
@@ -7,6 +7,7 @@ from nose.tools import timed
 
 from ansible import errors
 from ansible.module_common import ModuleReplacer
+from ansible.module_utils.basic import heuristic_log_sanitize
 from ansible.utils import checksum as utils_checksum
 
 TEST_MODULE_DATA = """
@@ -264,23 +265,23 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
 
     @timed(5)
     def test_log_sanitize_speed_many_url(self):
-        self.module._heuristic_log_sanitize(self.many_url)
+        heuristic_log_sanitize(self.many_url)
 
     @timed(5)
     def test_log_sanitize_speed_many_ssh(self):
-        self.module._heuristic_log_sanitize(self.many_ssh)
+        heuristic_log_sanitize(self.many_ssh)
 
     @timed(5)
     def test_log_sanitize_speed_one_url(self):
-        self.module._heuristic_log_sanitize(self.one_url)
+        heuristic_log_sanitize(self.one_url)
 
     @timed(5)
     def test_log_sanitize_speed_one_ssh(self):
-        self.module._heuristic_log_sanitize(self.one_ssh)
+        heuristic_log_sanitize(self.one_ssh)
 
     @timed(5)
     def test_log_sanitize_speed_zero_secrets(self):
-        self.module._heuristic_log_sanitize(self.zero_secrets)
+        heuristic_log_sanitize(self.zero_secrets)
 
     #
     # Test that the password obfuscation sanitizes somewhat cleanly.
@@ -290,8 +291,8 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
         url_data = repr(self._gen_data(3, True, True, self.URL_SECRET))
         ssh_data = repr(self._gen_data(3, True, True, self.SSH_SECRET))
 
-        url_output = self.module._heuristic_log_sanitize(url_data)
-        ssh_output = self.module._heuristic_log_sanitize(ssh_data)
+        url_output = heuristic_log_sanitize(url_data)
+        ssh_output = heuristic_log_sanitize(ssh_data)
 
         # Basic functionality: Successfully hid the password
         try:
diff --git a/v2/ansible/module_utils/basic.py b/v2/ansible/module_utils/basic.py
index cd4d6024537..099b808b2c6 100644
--- a/v2/ansible/module_utils/basic.py
+++ b/v2/ansible/module_utils/basic.py
@@ -171,6 +171,7 @@ FILE_COMMON_ARGUMENTS=dict(
     directory_mode = dict(), # used by copy
 )
 
+PASSWD_ARG_RE = re.compile(r'^[-]{0,2}pass[-]?(word|wd)?')
 
 def get_platform():
     ''' what's the platform?  example: Linux is a platform. '''
@@ -269,6 +270,65 @@ def json_dict_bytes_to_unicode(d):
     else:
         return d
 
+def heuristic_log_sanitize(data):
+    ''' Remove strings that look like passwords from log messages '''
+    # Currently filters:
+    # user:pass@foo/whatever and http://username:pass@wherever/foo
+    # This code has false positives and consumes parts of logs that are
+    # not passwds
+
+    # begin: start of a passwd containing string
+    # end: end of a passwd containing string
+    # sep: char between user and passwd
+    # prev_begin: where in the overall string to start a search for
+    #   a passwd
+    # sep_search_end: where in the string to end a search for the sep
+    output = []
+    begin = len(data)
+    prev_begin = begin
+    sep = 1
+    while sep:
+        # Find the potential end of a passwd
+        try:
+            end = data.rindex('@', 0, begin)
+        except ValueError:
+            # No passwd in the rest of the data
+            output.insert(0, data[0:begin])
+            break
+
+        # Search for the beginning of a passwd
+        sep = None
+        sep_search_end = end
+        while not sep:
+            # URL-style username+password
+            try:
+                begin = data.rindex('://', 0, sep_search_end)
+            except ValueError:
+                # No url style in the data, check for ssh style in the
+                # rest of the string
+                begin = 0
+            # Search for separator
+            try:
+                sep = data.index(':', begin + 3, end)
+            except ValueError:
+                # No separator; choices:
+                if begin == 0:
+                    # Searched the whole string so there's no password
+                    # here.  Return the remaining data
+                    output.insert(0, data[0:begin])
+                    break
+                # Search for a different beginning of the password field.
+                sep_search_end = begin
+                continue
+        if sep:
+            # Password was found; remove it.
+            output.insert(0, data[end:prev_begin])
+            output.insert(0, '********')
+            output.insert(0, data[begin:sep + 1])
+            prev_begin = begin
+
+    return ''.join(output)
+
 
 class AnsibleModule(object):
 
@@ -1009,64 +1069,6 @@ class AnsibleModule(object):
             params = dict()
         return params
 
-    def _heuristic_log_sanitize(self, data):
-        ''' Remove strings that look like passwords from log messages '''
-        # Currently filters:
-        # user:pass@foo/whatever and http://username:pass@wherever/foo
-        # This code has false positives and consumes parts of logs that are
-        # not passwds
-
-        # begin: start of a passwd containing string
-        # end: end of a passwd containing string
-        # sep: char between user and passwd
-        # prev_begin: where in the overall string to start a search for
-        #   a passwd
-        # sep_search_end: where in the string to end a search for the sep
-        output = []
-        begin = len(data)
-        prev_begin = begin
-        sep = 1
-        while sep:
-            # Find the potential end of a passwd
-            try:
-                end = data.rindex('@', 0, begin)
-            except ValueError:
-                # No passwd in the rest of the data
-                output.insert(0, data[0:begin])
-                break
-
-            # Search for the beginning of a passwd
-            sep = None
-            sep_search_end = end
-            while not sep:
-                # URL-style username+password
-                try:
-                    begin = data.rindex('://', 0, sep_search_end)
-                except ValueError:
-                    # No url style in the data, check for ssh style in the
-                    # rest of the string
-                    begin = 0
-                # Search for separator
-                try:
-                    sep = data.index(':', begin + 3, end)
-                except ValueError:
-                    # No separator; choices:
-                    if begin == 0:
-                        # Searched the whole string so there's no password
-                        # here.  Return the remaining data
-                        output.insert(0, data[0:begin])
-                        break
-                    # Search for a different beginning of the password field.
-                    sep_search_end = begin
-                    continue
-            if sep:
-                # Password was found; remove it.
-                output.insert(0, data[end:prev_begin])
-                output.insert(0, '********')
-                output.insert(0, data[begin:sep + 1])
-                prev_begin = begin
-
-        return ''.join(output)
 
     def _log_invocation(self):
         ''' log that ansible ran the module '''
@@ -1090,7 +1092,7 @@ class AnsibleModule(object):
                     param_val = str(param_val)
                 elif isinstance(param_val, unicode):
                     param_val = param_val.encode('utf-8')
-                log_args[param] = self._heuristic_log_sanitize(param_val)
+                log_args[param] = heuristic_log_sanitize(param_val)
 
         module = 'ansible-%s' % os.path.basename(__file__)
         msg = []
@@ -1434,27 +1436,27 @@ class AnsibleModule(object):
         # create a printable version of the command for use
         # in reporting later, which strips out things like
         # passwords from the args list
-        if isinstance(args, list):
-            clean_args = " ".join(pipes.quote(arg) for arg in args)
+        if isinstance(args, basestring):
+            to_clean_args = shlex.split(args.encode('utf-8'))
         else:
-            clean_args = args
+            to_clean_args = args
 
-        # all clean strings should return two match groups, 
-        # where the first is the CLI argument and the second 
-        # is the password/key/phrase that will be hidden
-        clean_re_strings = [
-            # this removes things like --password, --pass, --pass-wd, etc.
-            # optionally followed by an '=' or a space. The password can 
-            # be quoted or not too, though it does not care about quotes
-            # that are not balanced
-            # source: http://blog.stevenlevithan.com/archives/match-quoted-string
-            r'([-]{0,2}pass[-]?(?:word|wd)?[=\s]?)((?:["\'])?(?:[^\s])*(?:\1)?)',
-            r'^(?P<before>.*:)(?P<password>.*)(?P<after>\@.*)$', 
-            # TODO: add more regex checks here
-        ]
-        for re_str in clean_re_strings:
-            r = re.compile(re_str)
-            clean_args = r.sub(r'\1********', clean_args)
+        clean_args = []
+        is_passwd = False
+        for arg in to_clean_args:
+            if is_passwd:
+                is_passwd = False
+                clean_args.append('********')
+                continue
+            if PASSWD_ARG_RE.match(arg):
+                sep_idx = arg.find('=')
+                if sep_idx > -1:
+                    clean_args.append('%s=********' % arg[:sep_idx])
+                    continue
+                else:
+                    is_passwd = True
+            clean_args.append(heuristic_log_sanitize(arg))
+        clean_args = ' '.join(pipes.quote(arg) for arg in clean_args)
 
         if data:
             st_in = subprocess.PIPE
@@ -1539,7 +1541,7 @@ class AnsibleModule(object):
             self.fail_json(rc=257, msg=traceback.format_exc(), cmd=clean_args)
 
         if rc != 0 and check_rc:
-            msg = stderr.rstrip()
+            msg = heuristic_log_sanitize(stderr.rstrip())
             self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg)
 
         # reset the pwd