From 14c3b4d8e5972fcb81073f634de6f76ec50f662e Mon Sep 17 00:00:00 2001
From: Toshio Kuratomi <a.badger@gmail.com>
Date: Tue, 16 Jan 2018 14:12:23 -0800
Subject: [PATCH] Move yaml loading functions from DataLoader into a utility
 module.

---
 lib/ansible/parsing/dataloader.py     | 78 +----------------------
 lib/ansible/parsing/utils/yaml.py     | 91 +++++++++++++++++++++++++++
 test/units/parsing/utils/test_yaml.py | 34 ++++++++++
 3 files changed, 128 insertions(+), 75 deletions(-)
 create mode 100644 lib/ansible/parsing/utils/yaml.py
 create mode 100644 test/units/parsing/utils/test_yaml.py

diff --git a/lib/ansible/parsing/dataloader.py b/lib/ansible/parsing/dataloader.py
index 611df540a5f..6366886cefc 100644
--- a/lib/ansible/parsing/dataloader.py
+++ b/lib/ansible/parsing/dataloader.py
@@ -7,23 +7,18 @@ from __future__ import (absolute_import, division, print_function)
 __metaclass__ = type
 
 import copy
-import json
 import os
 import os.path
 import re
 import tempfile
 
-from yaml import YAMLError
-
 from ansible.errors import AnsibleFileNotFound, AnsibleParserError
-from ansible.errors.yaml_strings import YAML_SYNTAX_ERROR
 from ansible.module_utils.basic import is_executable
 from ansible.module_utils.six import binary_type, text_type
 from ansible.module_utils._text import to_bytes, to_native, to_text
 from ansible.parsing.quoting import unquote
+from ansible.parsing.utils.yaml import from_yaml
 from ansible.parsing.vault import VaultLib, b_HEADER, is_encrypted, is_encrypted_file, parse_vaulttext_envelope
-from ansible.parsing.yaml.loader import AnsibleLoader
-from ansible.parsing.yaml.objects import AnsibleBaseYAMLObject, AnsibleUnicode
 from ansible.utils.path import unfrackpath
 
 try:
@@ -75,45 +70,8 @@ class DataLoader:
         self._vault.secrets = vault_secrets
 
     def load(self, data, file_name='<string>', show_content=True):
-        '''
-        Creates a python datastructure from the given data, which can be either
-        a JSON or YAML string.
-        '''
-        new_data = None
-
-        # YAML parser will take JSON as it is a subset.
-        if isinstance(data, AnsibleUnicode):
-            # The PyYAML's libyaml bindings use PyUnicode_CheckExact so
-            # they are unable to cope with our subclass.
-            # Unwrap and re-wrap the unicode so we can keep track of line
-            # numbers
-            in_data = text_type(data)
-        else:
-            in_data = data
-
-        try:
-            # we first try to load this data as JSON
-            new_data = json.loads(data)
-        except:
-            # must not be JSON, let the rest try
-            if isinstance(data, AnsibleUnicode):
-                # The PyYAML's libyaml bindings use PyUnicode_CheckExact so
-                # they are unable to cope with our subclass.
-                # Unwrap and re-wrap the unicode so we can keep track of line
-                # numbers
-                in_data = text_type(data)
-            else:
-                in_data = data
-            try:
-                new_data = self._safe_load(in_data, file_name=file_name)
-            except YAMLError as yaml_exc:
-                self._handle_error(yaml_exc, file_name, show_content)
-
-            if isinstance(data, AnsibleUnicode):
-                new_data = AnsibleUnicode(new_data)
-                new_data.ansible_pos = data.ansible_pos
-
-        return new_data
+        '''Backwards compat for now'''
+        return from_yaml(data, file_name, show_content, self._vault.secrets)
 
     def load_from_file(self, file_name, cache=True, unsafe=False):
         ''' Loads data from a file, which can contain either JSON or YAML.  '''
@@ -162,18 +120,6 @@ class DataLoader:
         path = self.path_dwim(path)
         return is_executable(path)
 
-    def _safe_load(self, stream, file_name=None):
-        ''' Implements yaml.safe_load(), except using our custom loader class. '''
-
-        loader = AnsibleLoader(stream, file_name, self._vault.secrets)
-        try:
-            return loader.get_single_data()
-        finally:
-            try:
-                loader.dispose()
-            except AttributeError:
-                pass  # older versions of yaml don't have dispose function, ignore
-
     def _decrypt_if_vault_data(self, b_vault_data, b_file_name=None):
         '''Decrypt b_vault_data if encrypted and return b_data and the show_content flag'''
 
@@ -217,24 +163,6 @@ class DataLoader:
         except (IOError, OSError) as e:
             raise AnsibleParserError("an error occurred while trying to read the file '%s': %s" % (file_name, str(e)), orig_exc=e)
 
-    def _handle_error(self, yaml_exc, file_name, show_content):
-        '''
-        Optionally constructs an object (AnsibleBaseYAMLObject) to encapsulate the
-        file name/position where a YAML exception occurred, and raises an AnsibleParserError
-        to display the syntax exception information.
-        '''
-
-        # if the YAML exception contains a problem mark, use it to construct
-        # an object the error class can use to display the faulty line
-        err_obj = None
-        if hasattr(yaml_exc, 'problem_mark'):
-            err_obj = AnsibleBaseYAMLObject()
-            err_obj.ansible_pos = (file_name, yaml_exc.problem_mark.line + 1, yaml_exc.problem_mark.column + 1)
-
-        err_msg = getattr(yaml_exc, 'problem', '')
-
-        raise AnsibleParserError(YAML_SYNTAX_ERROR % to_native(err_msg), obj=err_obj, show_content=show_content, orig_exc=yaml_exc)
-
     def get_basedir(self):
         ''' returns the current basedir '''
         return self._basedir
diff --git a/lib/ansible/parsing/utils/yaml.py b/lib/ansible/parsing/utils/yaml.py
new file mode 100644
index 00000000000..de36928c020
--- /dev/null
+++ b/lib/ansible/parsing/utils/yaml.py
@@ -0,0 +1,91 @@
+# (c) 2012-2014, Michael DeHaan <michael.dehaan@gmail.com>
+# Copyright: (c) 2017, Ansible Project
+# Copyright: (c) 2018, Ansible Project
+# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
+
+# Make coding more python3-ish
+from __future__ import (absolute_import, division, print_function)
+__metaclass__ = type
+
+import json
+
+from yaml import YAMLError
+
+from ansible.errors import AnsibleParserError
+from ansible.errors.yaml_strings import YAML_SYNTAX_ERROR
+from ansible.module_utils.six import text_type
+from ansible.module_utils._text import to_native
+from ansible.parsing.yaml.loader import AnsibleLoader
+from ansible.parsing.yaml.objects import AnsibleBaseYAMLObject, AnsibleUnicode
+
+
+__all__ = ('from_yaml',)
+
+
+def _handle_error(yaml_exc, file_name, show_content):
+    '''
+    Optionally constructs an object (AnsibleBaseYAMLObject) to encapsulate the
+    file name/position where a YAML exception occurred, and raises an AnsibleParserError
+    to display the syntax exception information.
+    '''
+
+    # if the YAML exception contains a problem mark, use it to construct
+    # an object the error class can use to display the faulty line
+    err_obj = None
+    if hasattr(yaml_exc, 'problem_mark'):
+        err_obj = AnsibleBaseYAMLObject()
+        err_obj.ansible_pos = (file_name, yaml_exc.problem_mark.line + 1, yaml_exc.problem_mark.column + 1)
+
+    err_msg = getattr(yaml_exc, 'problem', '')
+
+    raise AnsibleParserError(YAML_SYNTAX_ERROR % to_native(err_msg), obj=err_obj, show_content=show_content, orig_exc=yaml_exc)
+
+
+def _safe_load(stream, file_name=None, vault_secrets=None):
+    ''' Implements yaml.safe_load(), except using our custom loader class. '''
+
+    loader = AnsibleLoader(stream, file_name, vault_secrets)
+    try:
+        return loader.get_single_data()
+    finally:
+        try:
+            loader.dispose()
+        except AttributeError:
+            pass  # older versions of yaml don't have dispose function, ignore
+
+
+def from_yaml(data, file_name='<string>', show_content=True):
+    '''
+    Creates a python datastructure from the given data, which can be either
+    a JSON or YAML string.
+    '''
+    new_data = None
+
+    if isinstance(data, AnsibleUnicode):
+        # The PyYAML's libyaml bindings use PyUnicode_CheckExact so
+        # they are unable to cope with our subclass.
+        # Unwrap and re-wrap the unicode so we can keep track of line
+        # numbers
+        # Note: Cannot use to_text() because AnsibleUnicode is a subclass of the text_type.
+        # Should not have to worry about tracebacks because python's text constructors (unicode() on
+        # python2 and str() on python3) can handle a subtype of themselves.
+        in_data = text_type(data)
+    else:
+        in_data = data
+
+    try:
+        # we first try to load this data as JSON.  Fixes issues with extra vars json strings not
+        # being parsed correctly by the yaml parser
+        new_data = json.loads(in_data)
+    except Exception:
+        # must not be JSON, let the rest try
+        try:
+            new_data = _safe_load(in_data, file_name=file_name)
+        except YAMLError as yaml_exc:
+            _handle_error(yaml_exc, file_name, show_content)
+
+        if isinstance(data, AnsibleUnicode):
+            new_data = AnsibleUnicode(new_data)
+            new_data.ansible_pos = data.ansible_pos
+
+    return new_data
diff --git a/test/units/parsing/utils/test_yaml.py b/test/units/parsing/utils/test_yaml.py
new file mode 100644
index 00000000000..27b2905ac32
--- /dev/null
+++ b/test/units/parsing/utils/test_yaml.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+# (c) 2017, Ansible Project
+#
+# This file is part of Ansible
+#
+# Ansible is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Ansible is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ansible.  If not, see <http://www.gnu.org/licenses/>.
+
+from __future__ import (absolute_import, division, print_function)
+__metaclass__ = type
+
+import pytest
+
+from ansible.errors import AnsibleParserError
+from ansible.parsing.utils.yaml import from_yaml
+
+
+def test_from_yaml_simple():
+    assert from_yaml(u'---\n- test: 1\n  test2: "2"\n- caf\xe9: "caf\xe9"') == [{u'test': 1, u'test2': u"2"}, {u"caf\xe9": u"caf\xe9"}]
+
+
+def test_bad_yaml():
+    with pytest.raises(AnsibleParserError):
+        from_yaml(u'foo: bar: baz')