Python 3: shlex.split() wants unicode

On Python 2, shlex.split() raises if you pass it a unicode object with
non-ASCII characters in it.  The Ansible codebase copes by explicitly
converting the string using to_bytes() before passing it to
shlex.split().

On Python 3, shlex.split() raises ('bytes' object has no attribute 'read')
if you pass a bytes object.  Oops.

This commit introduces a new wrapper function, shlex_split, that
transparently performs the to_bytes/to_unicode conversions only on
Python 2.

Currently I've only converted one call site (the one that was causing a
unit test to fail on Python 3).  If this approach is deemed suitable,
I'll convert them all.
This commit is contained in:
Marius Gedminas 2015-09-24 12:26:10 +03:00
parent 5d3d9cfe0d
commit 5d29a2eabd
3 changed files with 75 additions and 5 deletions

View file

@ -20,7 +20,6 @@ from __future__ import (absolute_import, division, print_function)
__metaclass__ = type __metaclass__ = type
import ast import ast
import shlex
import re import re
from ansible import constants as C from ansible import constants as C
@ -30,7 +29,8 @@ from ansible.inventory.group import Group
from ansible.inventory.expand_hosts import detect_range from ansible.inventory.expand_hosts import detect_range
from ansible.inventory.expand_hosts import expand_hostname_range from ansible.inventory.expand_hosts import expand_hostname_range
from ansible.parsing.utils.addresses import parse_address from ansible.parsing.utils.addresses import parse_address
from ansible.utils.unicode import to_unicode, to_bytes from ansible.utils.shlex import shlex_split
from ansible.utils.unicode import to_unicode
class InventoryParser(object): class InventoryParser(object):
""" """
@ -231,13 +231,11 @@ class InventoryParser(object):
# beta:2345 user=admin # we'll tell shlex # beta:2345 user=admin # we'll tell shlex
# gamma sudo=True user=root # to ignore comments # gamma sudo=True user=root # to ignore comments
line = to_bytes(line)
try: try:
tokens = shlex.split(line, comments=True) tokens = shlex_split(line, comments=True)
except ValueError as e: except ValueError as e:
self._raise_error("Error parsing host definition '%s': %s" % (varstring, e)) self._raise_error("Error parsing host definition '%s': %s" % (varstring, e))
tokens = [ to_unicode(t) for t in tokens]
(hostnames, port) = self._expand_hostpattern(tokens[0]) (hostnames, port) = self._expand_hostpattern(tokens[0])
hosts = self._Hosts(hostnames, port) hosts = self._Hosts(hostnames, port)

View file

@ -0,0 +1,33 @@
# (c) 2015, Marius Gedminas <marius@gedmin.as>
#
# This file is part of Ansible
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# alongwith Ansible. If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import
import shlex
from six import PY3
from ansible.utils.unicode import to_bytes, to_unicode
if PY3:
# shlex.split() wants Unicode (i.e. ``str``) input on Python 3
shlex_split = shlex.split
else:
# shlex.split() wants bytes (i.e. ``str``) input on Python 2
def shlex_split(s, comments=False, posix=True):
return map(to_unicode, shlex.split(to_bytes(s), comments, posix))
shlex_split.__doc__ = shlex.split.__doc__

View file

@ -0,0 +1,39 @@
# (c) 2015, Marius Gedminas <marius@gedmin.as>
#
# This file is part of Ansible
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
import unittest
from ansible.utils.shlex import shlex_split
class TestSplit(unittest.TestCase):
def test_trivial(self):
self.assertEqual(shlex_split("a b c"), ["a", "b", "c"])
def test_unicode(self):
self.assertEqual(shlex_split(u"a b \u010D"), [u"a", u"b", u"\u010D"])
def test_quoted(self):
self.assertEqual(shlex_split('"a b" c'), ["a b", "c"])
def test_comments(self):
self.assertEqual(shlex_split('"a b" c # d', comments=True), ["a b", "c"])
def test_error(self):
self.assertRaises(ValueError, shlex_split, 'a "b')