Add a yaml constructor for unicode strings:

* Changes AnsibleConstructor so that only unicode strings are returned
  (no str type)
* Tracks line, column numbers for strings
* Adds unittests for AnsibleLoader (generic for all the yaml parsing)
This commit is contained in:
Toshio Kuratomi 2015-03-30 21:48:28 -07:00
parent eb788dd8f6
commit c41b917162
5 changed files with 199 additions and 5 deletions

View file

@ -20,17 +20,27 @@ from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
from yaml.composer import Composer
from yaml.nodes import MappingNode
from yaml.nodes import MappingNode, ScalarNode
class AnsibleComposer(Composer):
def __init__(self):
self.__mapping_starts = []
super(Composer, self).__init__()
def compose_node(self, parent, index):
# the line number where the previous token has ended (plus empty lines)
node = Composer.compose_node(self, parent, index)
if isinstance(node, MappingNode):
if isinstance(node, ScalarNode):
# Scalars are pretty easy -- assume they start on the current
# token's line (what about multiline strings? Perhaps we also
# need to use previous token ended
node.__datasource__ = self.name
node.__line__ = self.line + 1
node.__column__ = self.column + 1
elif isinstance(node, MappingNode):
node.__datasource__ = self.name
# Need extra help to know where the mapping starts
try:
(cur_line, cur_column) = self.__mapping_starts.pop()
except:
@ -38,7 +48,9 @@ class AnsibleComposer(Composer):
cur_column = None
node.__line__ = cur_line
node.__column__ = cur_column
return node
def compose_mapping_node(self, anchor):
# the column here will point at the position in the file immediately
# after the first key is found, which could be a space or a newline.

View file

@ -20,7 +20,8 @@ from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
from yaml.constructor import Constructor
from ansible.parsing.yaml.objects import AnsibleMapping
from ansible.utils.unicode import to_unicode
from ansible.parsing.yaml.objects import AnsibleMapping, AnsibleUnicode
class AnsibleConstructor(Constructor):
def __init__(self, file_name=None):
@ -52,6 +53,22 @@ class AnsibleConstructor(Constructor):
return ret
def construct_yaml_str(self, node):
# Override the default string handling function
# to always return unicode objects
value = self.construct_scalar(node)
value = to_unicode(value)
data = AnsibleUnicode(self.construct_scalar(node))
data._line_number = node.__line__
data._column_number = node.__column__
if self._ansible_file_name:
data._data_source = self._ansible_file_name
else:
data._data_source = node.__datasource__
return data
AnsibleConstructor.add_constructor(
u'tag:yaml.org,2002:map',
AnsibleConstructor.construct_yaml_map)
@ -60,3 +77,11 @@ AnsibleConstructor.add_constructor(
u'tag:yaml.org,2002:python/dict',
AnsibleConstructor.construct_yaml_map)
AnsibleConstructor.add_constructor(
u'tag:yaml.org,2002:str',
AnsibleConstructor.construct_yaml_str)
AnsibleConstructor.add_constructor(
u'tag:yaml.org,2002:python/unicode',
AnsibleConstructor.construct_yaml_str)

View file

@ -50,3 +50,6 @@ class AnsibleMapping(AnsibleBaseYAMLObject, dict):
''' sub class for dictionaries '''
pass
class AnsibleUnicode(AnsibleBaseYAMLObject, unicode):
''' sub class for unicode objects '''
pass

View file

@ -0,0 +1,156 @@
# coding: utf-8
# (c) 2015, Toshio Kuratomi <tkuratomi@ansible.com>
#
# This file is part of Ansible
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
# Make coding more python3-ish
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
from cStringIO import StringIO
from collections import Sequence, Set, Mapping
from ansible.compat.tests import unittest
from ansible.compat.tests.mock import patch
from ansible.parsing.yaml.loader import AnsibleLoader
class TestDataLoader(unittest.TestCase):
def setUp(self):
pass
def tearDown(self):
pass
def test_parse_number(self):
stream = StringIO("""
1
""")
loader = AnsibleLoader(stream)
data = loader.get_single_data()
self.assertEqual(data, 1)
def test_parse_string(self):
stream = StringIO("""
Ansible
""")
loader = AnsibleLoader(stream)
data = loader.get_single_data()
self.assertEqual(data, u'Ansible')
self.assertIsInstance(data, unicode)
def test_parse_utf8_string(self):
stream = StringIO("""
Cafè Eñyei
""")
loader = AnsibleLoader(stream)
data = loader.get_single_data()
self.assertEqual(data, u'Cafè Eñyei')
self.assertIsInstance(data, unicode)
def test_parse_dict(self):
stream = StringIO("""
webster: daniel
oed: oxford
""")
loader = AnsibleLoader(stream)
data = loader.get_single_data()
self.assertEqual(data, {'webster': 'daniel', 'oed': 'oxford'})
self.assertEqual(len(data), 2)
self.assertIsInstance(data.keys()[0], unicode)
self.assertIsInstance(data.values()[0], unicode)
def test_parse_list(self):
stream = StringIO("""
- a
- b
""")
loader = AnsibleLoader(stream)
data = loader.get_single_data()
self.assertEqual(data, [u'a', u'b'])
self.assertEqual(len(data), 2)
self.assertIsInstance(data[0], unicode)
def test_parse_play(self):
stream = StringIO("""
- hosts: localhost
vars:
number: 1
string: Ansible
utf8_string: Cafè Eñyei
dictionary:
webster: daniel
oed: oxford
list:
- a
- b
- 1
- 2
tasks:
- name: Test case
ping:
data: "{{ utf8_string }}"
- name: Test 2
ping:
data: "Cafè Eñyei"
- name: Test 3
command: "printf 'Cafè Eñyei\\n'"
""")
loader = AnsibleLoader(stream)
data = loader.get_single_data()
self.assertEqual(len(data), 1)
self.assertIsInstance(data, list)
self.assertEqual(frozenset(data[0].keys()), frozenset((u'hosts', u'vars', u'tasks')))
self.assertEqual(data[0][u'hosts'], u'localhost')
self.assertEqual(data[0][u'vars'][u'number'], 1)
self.assertEqual(data[0][u'vars'][u'string'], u'Ansible')
self.assertEqual(data[0][u'vars'][u'utf8_string'], u'Cafè Eñyei')
self.assertEqual(data[0][u'vars'][u'dictionary'],
{u'webster': u'daniel',
u'oed': u'oxford'})
self.assertEqual(data[0][u'vars'][u'list'], [u'a', u'b', 1, 2])
self.assertEqual(data[0][u'tasks'],
[{u'name': u'Test case', u'ping': {u'data': u'{{ utf8_string }}'}},
{u'name': u'Test 2', u'ping': {u'data': u'Cafè Eñyei'}},
{u'name': u'Test 3', u'command': u'printf \'Cafè Eñyei\n\''},
])
self.walk(data)
def walk(self, data):
# Make sure there's no str in the data
self.assertNotIsInstance(data, str)
# Descend into various container types
if isinstance(data, unicode):
# strings are a sequence so we have to be explicit here
return
elif isinstance(data, (Sequence, Set)):
for element in data:
self.walk(element)
elif isinstance(data, Mapping):
for k, v in data.items():
self.walk(k)
self.walk(v)
# Scalars were all checked so we're good to go
return

View file

@ -1,2 +0,0 @@
- name: Test
filename: /usr/café/ÿ/are_doing_this_to_me