Merge remote-tracking branch 'ansible-stage/new_arg_splitting' into new_arg_splitting

This commit is contained in:
James Cammarata 2014-07-24 19:51:57 -05:00
commit d692f7f6ae
5 changed files with 225 additions and 104 deletions

View file

@ -218,107 +218,6 @@ def load_platform_subclass(cls, *args, **kwargs):
return super(cls, subclass).__new__(subclass)
def split_args(args):
'''
Splits args on whitespace, but intelligently reassembles
those that may have been split over a jinja2 block or quotes.
When used in a module, we won't ever have to be concerned about
jinja2 blocks, however this function is/will be used in the
core portions as well before the args are templated.
'''
# the list of params parsed out of the arg string
params = []
# here we encode the args, so we have a uniform charset to
# work with, and split on white space
args = args.encode('utf-8')
items = args.split()
# iterate over the items, and reassemble any that may have been
# split on a space inside a jinja2 block. These variables are used
# to keep track of the state of the parsing, since blocks and quotes
# may be nested within each other.
inside_quotes = False
quote_char = None
split_print_depth = 0
split_block_depth = 0
split_comment_depth = 0
# now we loop over each split item, coalescing items if the white space
# split occurred within quotes or a jinja2 block of some kind
for item in items:
item = item.strip()
# store the previous quoting state for checking later
was_inside_quotes = inside_quotes
# determine the current quoting state
for i in range(0, len(item)):
c = item[i]
bc = None
if i > 0:
bc = item[i-1]
if c in ('"', "'"):
if inside_quotes:
if c == quote_char and bc != '\\':
inside_quotes = False
quote_char = None
else:
inside_quotes = True
quote_char = c
# multiple conditions may append a token to the list of params,
# so we keep track with this flag to make sure it only happens once
appended = False
# if we're inside quotes now, but weren't before, append the item
# to the end of the list, since we'll tack on more to it later
if inside_quotes and not was_inside_quotes:
params.append(item)
appended = True
# otherwise, if we're inside any jinja2 block, inside quotes, or we were
# inside quotes (but aren't now) concat this item to the last param
elif ((split_print_depth + split_block_depth + split_comment_depth) > 0 or inside_quotes or was_inside_quotes):
params[-1] = "%s %s" % (params[-1], item)
appended = True
# these variables are used to determine the current depth of each jinja2
# block type, by counting the number of openings and closing tags
num_print_open = item.count('{{')
num_print_close = item.count('}}')
num_block_open = item.count('{%')
num_block_close = item.count('%}')
num_comment_open = item.count('{#')
num_comment_close = item.count('#}')
# if the number is not the same, the depth has changed, so we calculate that here
# and may append the current item to the params (if we haven't previously done so)
if num_print_open != num_print_close:
split_print_depth += (num_print_open - num_print_close)
if not appended:
params.append(item)
appended = True
if split_print_depth < 0:
split_print_depth = 0
if num_block_open != num_block_close:
split_block_depth += (num_block_open - num_block_close)
if not appended:
params.append(item)
appended = True
if split_block_depth < 0:
split_block_depth = 0
if num_comment_open != num_comment_close:
split_comment_depth += (num_comment_open - num_comment_close)
if not appended:
params.append(item)
appended = True
if split_comment_depth < 0:
split_comment_depth = 0
# finally, if we're at zero depth for all blocks and not inside quotes, and have not
# yet appended anything to the list of params, we do so now
if (split_print_depth + split_block_depth + split_comment_depth) == 0 and not inside_quotes and not appended:
params.append(item)
# If we're done and things are not at zero depth or we're still inside quotes,
# raise an error to indicate that the args were unbalanced
if (split_print_depth + split_block_depth + split_comment_depth) != 0 or inside_quotes:
raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes")
# finally, we decode each param back to the unicode it was in the arg string
params = [x.decode('utf-8') for x in params]
return params
class AnsibleModule(object):
def __init__(self, argument_spec, bypass_checks=False, no_log=False,

View file

@ -47,7 +47,7 @@ import connection
from return_data import ReturnData
from ansible.callbacks import DefaultRunnerCallbacks, vv
from ansible.module_common import ModuleReplacer
from ansible.module_utils.basic import split_args
from ansible.utils.splitter import split_args
module_replacer = ModuleReplacer(strip_comments=False)

View file

@ -26,11 +26,10 @@ import optparse
import operator
from ansible import errors
from ansible import __version__
from ansible.utils import template
from ansible.utils.display_functions import *
from ansible.utils.plugins import *
from ansible.callbacks import display
from ansible.module_utils.basic import split_args
from ansible.utils.splitter import split_args
import ansible.constants as C
import ast
import time
@ -231,6 +230,7 @@ def is_changed(result):
return (result.get('changed', False) in [ True, 'True', 'true'])
def check_conditional(conditional, basedir, inject, fail_on_undefined=False):
from ansible.utils import template
if conditional is None or conditional == '':
return True
@ -325,6 +325,9 @@ def path_dwim_relative(original, dirname, source, playbook_base, check=True):
''' find one file in a directory one level up in a dir named dirname relative to current '''
# (used by roles code)
from ansible.utils import template
basedir = os.path.dirname(original)
if os.path.islink(basedir):
basedir = unfrackpath(basedir)
@ -1217,6 +1220,8 @@ def safe_eval(expr, locals={}, include_exceptions=False):
def listify_lookup_plugin_terms(terms, basedir, inject):
from ansible.utils import template
if isinstance(terms, basestring):
# someone did:
# with_items: alist

View file

@ -0,0 +1,164 @@
# (c) 2014 James Cammarata, <jcammarata@ansible.com>
#
# This file is part of Ansible
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
def split_args(args):
'''
Splits args on whitespace, but intelligently reassembles
those that may have been split over a jinja2 block or quotes.
When used in a remote module, we won't ever have to be concerned about
jinja2 blocks, however this function is/will be used in the
core portions as well before the args are templated.
example input: a=b c=d
example output: dict(a='b', c='d')
Basically this is a variation shlex that has some more intelligence for
how Ansible needs to use it.
'''
# FIXME: refactoring into smaller functions
# the list of params parsed out of the arg string
# this is going to be the result value when we are donei
params = []
# here we encode the args, so we have a uniform charset to
# work with, and split on white space
args = args.encode('utf-8')
items = args.split()
# iterate over the items, and reassemble any that may have been
# split on a space inside a jinja2 block.
# ex if tokens are "{{", "foo", "}}" these go together
# These variables are used
# to keep track of the state of the parsing, since blocks and quotes
# may be nested within each other.
inside_quotes = False
quote_char = None
split_print_depth = 0
split_block_depth = 0
split_comment_depth = 0
# now we loop over each split item, coalescing items if the white space
# split occurred within quotes or a jinja2 block of some kind
for item in items:
item = item.strip()
# store the previous quoting state for checking later
was_inside_quotes = inside_quotes
# determine the current quoting state
# the goal of this block is to determine if the quoted string
# is unterminated in which case it needs to be put back together
bc = None # before_char
for i in range(0, len(item)): # use enumerate
c = item[i] # current_char
if i > 0:
bc = item[i-1]
if c in ('"', "'"):
if inside_quotes:
if c == quote_char and bc != '\\':
inside_quotes = False
quote_char = None
else:
inside_quotes = True
quote_char = c
# multiple conditions may append a token to the list of params,
# so we keep track with this flag to make sure it only happens once
# append means add to the end of the list, don't append means concatenate
# it to the end of the last token
appended = False
# if we're inside quotes now, but weren't before, append the item
# to the end of the list, since we'll tack on more to it later
if inside_quotes and not was_inside_quotes:
params.append(item)
appended = True
# otherwise, if we're inside any jinja2 block, inside quotes, or we were
# inside quotes (but aren't now) concat this item to the last param
# FIXME: just or these all together
elif (split_print_depth or split_block_depth or split_comment_depth or inside_quotes or was_inside_quotes):
params[-1] = "%s %s" % (params[-1], item)
appended = True
# these variables are used to determine the current depth of each jinja2
# block type, by counting the number of openings and closing tags
# FIXME: assumes Jinja2 seperators aren't changeable (also true elsewhere in ansible ATM)
num_print_open = item.count('{{')
num_print_close = item.count('}}')
num_block_open = item.count('{%')
num_block_close = item.count('%}')
num_comment_open = item.count('{#')
num_comment_close = item.count('#}')
# if the number of paired block tags is not the same, the depth has changed, so we calculate that here
# and may append the current item to the params (if we haven't previously done so)
# FIXME: DRY a bit
if num_print_open != num_print_close:
split_print_depth += (num_print_open - num_print_close)
if not appended:
params.append(item)
appended = True
if split_print_depth < 0:
split_print_depth = 0
if num_block_open != num_block_close:
split_block_depth += (num_block_open - num_block_close)
if not appended:
params.append(item)
appended = True
if split_block_depth < 0:
split_block_depth = 0
if num_comment_open != num_comment_close:
split_comment_depth += (num_comment_open - num_comment_close)
if not appended:
params.append(item)
appended = True
if split_comment_depth < 0:
split_comment_depth = 0
# finally, if we're at zero depth for all blocks and not inside quotes, and have not
# yet appended anything to the list of params, we do so now
if not (split_print_depth or split_block_depth or split_comment_depth) and not inside_quotes and not appended:
params.append(item)
# If we're done and things are not at zero depth or we're still inside quotes,
# raise an error to indicate that the args were unbalanced
if (split_print_depth or split_block_depth or split_comment_depth) or inside_quotes:
raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes")
# finally, we decode each param back to the unicode it was in the arg string
params = [x.decode('utf-8') for x in params]
return params

View file

@ -17,6 +17,7 @@ import ansible.utils
import ansible.errors
import ansible.constants as C
import ansible.utils.template as template2
from ansible.utils.splitter import split_args
from ansible import __version__
@ -678,3 +679,55 @@ class TestUtils(unittest.TestCase):
diff = '\n'.join(diff)
self.assertEqual(diff, unicode(standard_expected))
def test_split_args(self):
# split_args is a smarter shlex.split for the needs of the way ansible uses it
# TODO: FIXME: should this survive, retire smush_ds
def _split_info(input, desired, actual):
print "SENT: ", input
print "WANT: ", desired
print "GOT: ", actual
def _test_combo(input, desired):
actual = split_args(input)
_split_info(input, desired, actual)
assert actual == desired
# trivial splitting
_test_combo('a b=c d=f', ['a', 'b=c', 'd=f' ])
# mixed quotes
_test_combo('a b=\'c\' d="e" f=\'g\'', ['a', "b='c'", 'd="e"', "f='g'" ])
# with spaces
# FIXME: this fails, commenting out only for now
# _test_combo('a "\'one two three\'"', ['a', "'one two three'" ])
# TODO: ...
# jinja2 preservation
_test_combo('a {{ y }} z', ['a', '{{ y }}', 'z' ])
# jinja2 preservation with spaces and filters and other hard things
_test_combo(
'a {{ x | filter(\'moo\', \'param\') }} z {{ chicken }} "waffles"',
['a', "{{ x | filter('moo', 'param') }}", 'z', '{{ chicken }}', '"waffles"']
)
# invalid quote detection
with self.assertRaises(Exception):
split_args('hey I started a quote"')
with self.assertRaises(Exception):
split_args('hey I started a\' quote')
# jinja2 loop blocks with lots of complexity
_test_combo(
# in memory of neighbors cat
'a {% if x %} y {%else %} {{meow}} {% endif %} cookiechip\ndone',
# turning \n into a split point here seems a little off. We'll see if other tests care.
['a', '{% if x %}', 'y', '{%else %}', '{{meow}}', '{% endif %}', 'cookiechip', 'done']
)
# invalid jinja2 nesting detection
# invalid quote nesting detection