Ensure that the src file contents is converted to unicode in diff info (#45744)

* Ensure that the src file contents is converted to unicode in diff info. Fixes #45717

* Fix up and cleanup

* The diff functionality in the callback plugins should have the
  to_text() calls removed since we're now doing it in ActionBase
* catching of UnicodeError and warnings in the callback diff
  functionality from 61d01f549f haven't been
  needed since we switched to to_text so remove them.
* Add a note to ActionBase's diff function giving an example of when the
  diff function will be inaccurate and how to fix it

* Fix callback get_diff() tests

I believe the unittests of callback's get_diff() were wrong.  They were
sending in a list where strings were expected.  Because previous code
was transforming the lists into strings via their repr, the previous
tests did not fail but they would have formatted the test cases output
in an odd way if we had looked at it.
This commit is contained in:
Matt Martz 2018-09-20 12:31:48 -05:00 committed by GitHub
parent 24dd87bd0a
commit 95e77ac853
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 94 additions and 88 deletions

View file

@ -0,0 +1,5 @@
bugfixes:
- copy - Ensure that the src file contents is converted to unicode in diff
information so that it is properly wrapped by AnsibleUnsafeText to prevent
unexpected templating of diff data in Python3
(https://github.com/ansible/ansible/issues/45717)

View file

@ -1,3 +1,4 @@
# coding: utf-8
# Copyright: (c) 2012-2014, Michael DeHaan <michael.dehaan@gmail.com>
# Copyright: (c) 2018, Ansible Project
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
@ -1013,6 +1014,15 @@ class ActionBase(with_metaclass(ABCMeta, object)):
def _get_diff_data(self, destination, source, task_vars, source_file=True):
# Note: Since we do not diff the source and destination before we transform from bytes into
# text the diff between source and destination may not be accurate. To fix this, we'd need
# to move the diffing from the callback plugins into here.
#
# Example of data which would cause trouble is src_content == b'\xff' and dest_content ==
# b'\xfe'. Neither of those are valid utf-8 so both get turned into the replacement
# character: diff['before'] = u'<27>' ; diff['after'] = u'<27>' When the callback plugin later
# diffs before and after it shows an empty diff.
diff = {}
display.debug("Going to peek to see if file has changed permissions")
peek_result = self._execute_module(module_name='file', module_args=dict(path=destination, _diff_peek=True), task_vars=task_vars, persist_files=True)
@ -1020,22 +1030,22 @@ class ActionBase(with_metaclass(ABCMeta, object)):
if not peek_result.get('failed', False) or peek_result.get('rc', 0) == 0:
if peek_result.get('state') == 'absent':
diff['before'] = ''
diff['before'] = u''
elif peek_result.get('appears_binary'):
diff['dst_binary'] = 1
elif peek_result.get('size') and C.MAX_FILE_SIZE_FOR_DIFF > 0 and peek_result['size'] > C.MAX_FILE_SIZE_FOR_DIFF:
diff['dst_larger'] = C.MAX_FILE_SIZE_FOR_DIFF
else:
display.debug("Slurping the file %s" % source)
display.debug(u"Slurping the file %s" % source)
dest_result = self._execute_module(module_name='slurp', module_args=dict(path=destination), task_vars=task_vars, persist_files=True)
if 'content' in dest_result:
dest_contents = dest_result['content']
if dest_result['encoding'] == 'base64':
if dest_result['encoding'] == u'base64':
dest_contents = base64.b64decode(dest_contents)
else:
raise AnsibleError("unknown encoding in content option, failed: %s" % dest_result)
raise AnsibleError("unknown encoding in content option, failed: %s" % to_native(dest_result))
diff['before_header'] = destination
diff['before'] = dest_contents
diff['before'] = to_text(dest_contents)
if source_file:
st = os.stat(source)
@ -1053,17 +1063,17 @@ class ActionBase(with_metaclass(ABCMeta, object)):
diff['src_binary'] = 1
else:
diff['after_header'] = source
diff['after'] = src_contents
diff['after'] = to_text(src_contents)
else:
display.debug("source of file passed in")
diff['after_header'] = 'dynamically generated'
display.debug(u"source of file passed in")
diff['after_header'] = u'dynamically generated'
diff['after'] = source
if self._play_context.no_log:
if 'before' in diff:
diff["before"] = ""
diff["before"] = u""
if 'after' in diff:
diff["after"] = " [[ Diff output has been hidden because 'no_log: true' was specified for this result ]]\n"
diff["after"] = u" [[ Diff output has been hidden because 'no_log: true' was specified for this result ]]\n"
return diff

View file

@ -31,7 +31,6 @@ from collections import MutableMapping
from ansible import constants as C
from ansible.parsing.ajson import AnsibleJSONEncoder
from ansible.plugins import AnsiblePlugin, get_plugin_class
from ansible.module_utils._text import to_text
from ansible.utils.color import stringc
from ansible.vars.clean import strip_internal_keys
@ -155,67 +154,62 @@ class CallbackBase(AnsiblePlugin):
ret = []
for diff in difflist:
try:
with warnings.catch_warnings():
warnings.simplefilter('ignore')
if 'dst_binary' in diff:
ret.append("diff skipped: destination file appears to be binary\n")
if 'src_binary' in diff:
ret.append("diff skipped: source file appears to be binary\n")
if 'dst_larger' in diff:
ret.append("diff skipped: destination file size is greater than %d\n" % diff['dst_larger'])
if 'src_larger' in diff:
ret.append("diff skipped: source file size is greater than %d\n" % diff['src_larger'])
if 'before' in diff and 'after' in diff:
# format complex structures into 'files'
for x in ['before', 'after']:
if isinstance(diff[x], MutableMapping):
diff[x] = json.dumps(diff[x], sort_keys=True, indent=4, separators=(',', ': ')) + '\n'
if 'before_header' in diff:
before_header = "before: %s" % diff['before_header']
else:
before_header = 'before'
if 'after_header' in diff:
after_header = "after: %s" % diff['after_header']
else:
after_header = 'after'
before_lines = to_text(diff['before']).splitlines(True)
after_lines = to_text(diff['after']).splitlines(True)
if before_lines and not before_lines[-1].endswith('\n'):
before_lines[-1] += '\n\\ No newline at end of file\n'
if after_lines and not after_lines[-1].endswith('\n'):
after_lines[-1] += '\n\\ No newline at end of file\n'
differ = difflib.unified_diff(before_lines,
after_lines,
fromfile=before_header,
tofile=after_header,
fromfiledate='',
tofiledate='',
n=C.DIFF_CONTEXT)
difflines = list(differ)
if len(difflines) >= 3 and sys.version_info[:2] == (2, 6):
# difflib in Python 2.6 adds trailing spaces after
# filenames in the -- before/++ after headers.
difflines[0] = difflines[0].replace(' \n', '\n')
difflines[1] = difflines[1].replace(' \n', '\n')
# it also treats empty files differently
difflines[2] = difflines[2].replace('-1,0', '-0,0').replace('+1,0', '+0,0')
has_diff = False
for line in difflines:
has_diff = True
if line.startswith('+'):
line = stringc(line, C.COLOR_DIFF_ADD)
elif line.startswith('-'):
line = stringc(line, C.COLOR_DIFF_REMOVE)
elif line.startswith('@@'):
line = stringc(line, C.COLOR_DIFF_LINES)
ret.append(line)
if has_diff:
ret.append('\n')
if 'prepared' in diff:
ret.append(to_text(diff['prepared']))
except UnicodeDecodeError:
ret.append(">> the files are different, but the diff library cannot compare unicode strings\n\n")
if 'dst_binary' in diff:
ret.append(u"diff skipped: destination file appears to be binary\n")
if 'src_binary' in diff:
ret.append(u"diff skipped: source file appears to be binary\n")
if 'dst_larger' in diff:
ret.append(u"diff skipped: destination file size is greater than %d\n" % diff['dst_larger'])
if 'src_larger' in diff:
ret.append(u"diff skipped: source file size is greater than %d\n" % diff['src_larger'])
if 'before' in diff and 'after' in diff:
# format complex structures into 'files'
for x in ['before', 'after']:
if isinstance(diff[x], MutableMapping):
diff[x] = json.dumps(diff[x], sort_keys=True, indent=4, separators=(u',', u': ')) + u'\n'
if 'before_header' in diff:
before_header = u"before: %s" % diff['before_header']
else:
before_header = u'before'
if 'after_header' in diff:
after_header = u"after: %s" % diff['after_header']
else:
after_header = u'after'
before_lines = diff['before'].splitlines(True)
after_lines = diff['after'].splitlines(True)
if before_lines and not before_lines[-1].endswith(u'\n'):
before_lines[-1] += u'\n\\ No newline at end of file\n'
if after_lines and not after_lines[-1].endswith('\n'):
after_lines[-1] += u'\n\\ No newline at end of file\n'
differ = difflib.unified_diff(before_lines,
after_lines,
fromfile=before_header,
tofile=after_header,
fromfiledate=u'',
tofiledate=u'',
n=C.DIFF_CONTEXT)
difflines = list(differ)
if len(difflines) >= 3 and sys.version_info[:2] == (2, 6):
# difflib in Python 2.6 adds trailing spaces after
# filenames in the -- before/++ after headers.
difflines[0] = difflines[0].replace(u' \n', u'\n')
difflines[1] = difflines[1].replace(u' \n', u'\n')
# it also treats empty files differently
difflines[2] = difflines[2].replace(u'-1,0', u'-0,0').replace(u'+1,0', u'+0,0')
has_diff = False
for line in difflines:
has_diff = True
if line.startswith(u'+'):
line = stringc(line, C.COLOR_DIFF_ADD)
elif line.startswith(u'-'):
line = stringc(line, C.COLOR_DIFF_REMOVE)
elif line.startswith(u'@@'):
line = stringc(line, C.COLOR_DIFF_LINES)
ret.append(line)
if has_diff:
ret.append('\n')
if 'prepared' in diff:
ret.append(diff['prepared'])
return u''.join(ret)
def _get_item_label(self, result):

View file

@ -175,9 +175,6 @@ class TestCallbackDumpResults(unittest.TestCase):
self.assertTrue('LEFTIN' in json_out)
# TODO: triggr the 'except UnicodeError' around _get_diff
# that try except orig appeared in 61d01f549f2143fd9adfa4ffae42f09d24649c26
# in 2013 so maybe a < py2.6 issue
class TestCallbackDiff(unittest.TestCase):
def setUp(self):
@ -188,28 +185,28 @@ class TestCallbackDiff(unittest.TestCase):
def test_difflist(self):
# TODO: split into smaller tests?
difflist = [{'before': ['preface\nThe Before String\npostscript'],
'after': ['preface\nThe After String\npostscript'],
'before_header': 'just before',
'after_header': 'just after'
difflist = [{'before': u'preface\nThe Before String\npostscript',
'after': u'preface\nThe After String\npostscript',
'before_header': u'just before',
'after_header': u'just after'
},
{'before': ['preface\nThe Before String\npostscript'],
'after': ['preface\nThe After String\npostscript'],
{'before': u'preface\nThe Before String\npostscript',
'after': u'preface\nThe After String\npostscript',
},
{'src_binary': 'chicane'},
{'dst_binary': 'chicanery'},
{'dst_larger': 1},
{'src_larger': 2},
{'prepared': 'what does prepared do?'},
{'before_header': 'just before'},
{'after_header': 'just after'}]
{'prepared': u'what does prepared do?'},
{'before_header': u'just before'},
{'after_header': u'just after'}]
res = self.cb._get_diff(difflist)
self.assertIn('Before String', res)
self.assertIn('After String', res)
self.assertIn('just before', res)
self.assertIn('just after', res)
self.assertIn(u'Before String', res)
self.assertIn(u'After String', res)
self.assertIn(u'just before', res)
self.assertIn(u'just after', res)
def test_simple_diff(self):
self.assertMultiLineEqual(