From 1284c49bd7af15875da5a88fb6d698becee0e24b Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen Date: Fri, 14 Aug 2015 16:20:27 +0530 Subject: [PATCH] Rewrite the INI InventoryParser The new code parses INI-format inventory files in a single pass using a well-documented state machine that reports precise errors and eliminates the duplications and inconsistencies and outright errors in the earlier three-phase parsing code (e.g. three ways to skip comments). It is also much easier now to follow what decisions are being taken on the basis of the parsed data. The comments point out various potential improvements, particularly in the area of consistent IPv6 handling. On the ornate marble tombstone of the old code, the following inscription is one last baffling memento from a bygone age: - def _before_comment(self, msg): - ''' what's the part of a string before a comment? ''' - msg = msg.replace("\#","**NOT_A_COMMENT**") - msg = msg.split("#")[0] - msg = msg.replace("**NOT_A_COMMENT**","#") - return msg --- lib/ansible/inventory/ini.py | 486 +++++++++++++++++++++++------------ 1 file changed, 325 insertions(+), 161 deletions(-) diff --git a/lib/ansible/inventory/ini.py b/lib/ansible/inventory/ini.py index ce92c31bc12..eddaf931870 100644 --- a/lib/ansible/inventory/ini.py +++ b/lib/ansible/inventory/ini.py @@ -1,4 +1,4 @@ -# (c) 2012-2014, Michael DeHaan +# Copyright 2015 Abhijit Menon-Sen # # This file is part of Ansible # @@ -33,28 +33,288 @@ from ansible.utils.unicode import to_unicode class InventoryParser(object): """ - Host inventory for ansible. + Takes an INI-format inventory file and builds a list of groups and subgroups + with their associated hosts and variable settings. """ def __init__(self, filename=C.DEFAULT_HOST_LIST): self.filename = filename + # Start with an empty host list and the default 'all' and + # 'ungrouped' groups. + + self.hosts = {} + self.patterns = {} + self.groups = dict( + all = Group(name='all'), + ungrouped = Group(name='ungrouped') + ) + + # Read in the hosts, groups, and variables defined in the + # inventory file. + with open(filename) as fh: self.lines = fh.readlines() - self.groups = {} - self.hosts = {} self._parse() - def _parse(self): + # Finally, add all top-level groups (including 'ungrouped') as + # children of 'all'. - self._parse_base_groups() - self._parse_group_children() - self._add_allgroup_children() - self._parse_group_variables() - return self.groups + for group in self.groups.values(): + if group.depth == 0 and group.name != 'all': + self.groups['all'].add_child_group(group) + + # Note: we could discard self.hosts after this point. + + def _parse(self): + ''' + Populates self.groups from the contents of self.lines. Raises an error + on any parse failure. + ''' + + self._compile_patterns() + + # We behave as though the first line of the inventory is '[ungrouped]', + # and begin to look for host definitions. We make a single pass through + # each line of the inventory, building up self.groups and adding hosts, + # subgroups, and setting variables as we go. + + pending_declarations = {} + section = 'ungrouped' + state = 'hosts' + + i = 0 + for line in self.lines: + i += 1 + + # Is there a better way to get rid of the ending \n? + line = line.strip() + + # Skip empty lines and comments + if line == '' or line.startswith(";") or line.startswith("#"): + continue + + # Is this a [section] header? That tells us what group we're parsing + # definitions for, and what kind of definitions to expect. + + m = self.patterns['section'].match(line) + if m: + (section, state) = m.groups() + + state = state or 'hosts' + if state not in ['hosts', 'children', 'vars']: + title = ":".join(m.groups()) + raise AnsibleError("%s:%d: Section [%s] has unknown type: %s" % (self.filename, i, title, state)) + + # If we haven't seen this section before, we add a new Group. + # + # Either [groupname] or [groupname:children] is sufficient to + # declare a group, but [groupname:vars] is allowed only if the + # group is declared elsewhere (not necessarily earlier). We add + # the group anyway, but make a note in pending_declarations to + # check at the end. + + if section not in self.groups: + self.groups[section] = Group(name=section) + + if state == 'vars': + pending_declarations[section] = dict(line=i, state=state, name=section) + + # When we see a declaration that we've been waiting for, we can + # delete the note. + + if section in pending_declarations and state != 'vars': + del pending_declarations[section] + + continue + + # It's not a section, so the current state tells us what kind of + # definition it must be. The individual parsers will raise an + # error if we feed them something they can't digest. + + # [groupname] contains host definitions that must be added to + # the current group. + if state == 'hosts': + hosts = self._parse_host_definition(line, i) + for h in hosts: + self.groups[section].add_host(h) + + # [groupname:vars] contains variable definitions that must be + # applied to the current group. + elif state == 'vars': + (k, v) = self._parse_variable_definition(line, i) + self.groups[section].set_variable(k, v) + + # [groupname:children] contains subgroup names that must be + # added as children of the current group. The subgroup names + # must themselves be declared as groups, but as before, they + # may only be declared later. + elif state == 'children': + child = self._parse_group_name(line, i) + + if child not in self.groups: + self.groups[child] = Group(name=child) + pending_declarations[child] = dict(line=i, state=state, name=child, parent=section) + + self.groups[section].add_child_group(self.groups[child]) + + # Note: there's no reason why we couldn't accept variable + # definitions here, and set them on the named child group. + + # This is a fencepost. It can happen only if the state checker + # accepts a state that isn't handled above. + else: + raise AnsibleError("%s:%d: Entered unhandled state: %s" % (self.filename, i, state)) + + # Any entries in pending_declarations not removed by a group declaration + # above mean that there was an unresolved forward reference. We report + # only the first such error here. + + for g in pending_declarations: + decl = pending_declarations[g] + if decl['state'] == 'vars': + raise AnsibleError("%s:%d: Section [%s:vars] not valid for undefined group: %s" % (self.filename, decl['line'], decl['name'], decl['name'])) + elif decl['state'] == 'children': + raise AnsibleError("%s:%d: Section [%s:children] includes undefined group: %s" % (self.filename, decl['line'], decl['parent'], decl['name'])) + + def _parse_group_name(self, line, i): + ''' + Takes a single line and tries to parse it as a group name. Returns the + group name if successful, or raises an error. + ''' + + m = self.patterns['groupname'].match(line) + if m: + return m.group(1) + + raise AnsibleError("%s:%d: Expected group name, got: %s" % (self.filename, i, line)) + + def _parse_variable_definition(self, line, i): + ''' + Takes a string and tries to parse it as a variable definition. Returns + the key and value if successful, or raises an error. + ''' + + # TODO: We parse variable assignments as a key (anything to the left of + # an '='"), an '=', and a value (anything left) and leave the value to + # _parse_value to sort out. We should be more systematic here about + # defining what is acceptable, how quotes work, and so on. + + if '=' in line: + (k, v) = [e.strip() for e in line.split("=", 1)] + return (k, self._parse_value(v)) + + raise AnsibleError("%s:%d: Expected key=value, got: %s" % (self.filename, i, line)) + + def _parse_host_definition(self, line, i): + ''' + Takes a single line and tries to parse it as a host definition. Returns + a list of Hosts if successful, or raises an error. + ''' + + # A host definition comprises (1) a non-whitespace hostname or range, + # optionally followed by (2) a series of key="some value" assignments. + # We ignore any trailing whitespace and/or comments. For example, here + # are a series of host definitions in a group: + # + # [groupname] + # alpha + # beta:2345 user=admin # we'll tell shlex + # gamma sudo=True user=root # to ignore comments + + try: + tokens = shlex.split(line, comments=True) + except ValueError as e: + raise AnsibleError("%s:%d: Error parsing host definition '%s': %s" % (self.filename, i, varstring, e)) + + (hostnames, port) = self._expand_hostpattern(tokens[0]) + hosts = self._Hosts(hostnames, port) + + # Try to process anything remaining as a series of key=value pairs. + + variables = {} + for t in tokens[1:]: + if '=' not in t: + raise AnsibleError("%s:%d: Expected key=value host variable assignment, got: %s" % (self.filename, i, t)) + (k, v) = t.split('=', 1) + variables[k] = self._parse_value(v) + + # Apply any variable settings found to every host. + + for h in hosts: + for k in variables: + h.set_variable(k, variables[k]) + if k == 'ansible_ssh_host': + h.ipv4_address = variables[k] + + return hosts + + def _expand_hostpattern(self, pattern): + ''' + Takes a single host pattern and returns a list of hostnames and an + optional port number that applies to all of them. + ''' + + # First, we extract the port number. This is usually ":NN" at the end of + # the expression, but for IPv6 addresses it's ".NN" instead. In either + # case, we remove it. + + port = None + if ':' in pattern: + pos = pattern.rindex(':') + try: + port = int(pattern[pos+1:]) + pattern = pattern[0:pos] + except ValueError: + pass + else: + m = self.patterns['ipv6_hostport'].match(pattern) + if m: + (pattern, port) = m.groups() + + # We're done, because we know this is a single IPv6 address. + # But should we support ranges for IPv6 address generation? + # See the FIXME note below. We should probably just accept + # "[xxx]:nn" syntax instead, and then let xxx be expanded. + + return ([pattern], int(port)) + + # Now we're left with just the pattern, which results in a list of one + # or more hostnames, depending on whether it contains any [x:y] ranges. + + if detect_range(pattern): + hostnames = expand_hostname_range(pattern) + else: + hostnames = [pattern] + + return (hostnames, port) + + def _Hosts(self, hostnames, port): + ''' + Takes a list of hostnames and a port (which may be None) and returns a + list of Hosts (without recreating anything in self.hosts). + ''' + + hosts = [] + + # Note that we decide whether or not to create a Host based solely on + # the (non-)existence of its hostname in self.hosts. This means that one + # cannot add both "foo:22" and "foo:23" to the inventory. This behaviour + # is preserved for now, but this may be an easy FIXME. + + for hn in hostnames: + if hn not in self.hosts: + self.hosts[hn] = Host(name=hn, port=port) + hosts.append(self.hosts[hn]) + + return hosts @staticmethod def _parse_value(v): + ''' + Does something with something and returns something. Not for mere + mortals such as myself to interpret. + ''' if "#" not in v: try: v = ast.literal_eval(v) @@ -68,158 +328,62 @@ class InventoryParser(object): pass return to_unicode(v, nonstring='passthru', errors='strict') - # [webservers] - # alpha - # beta:2345 - # gamma sudo=True user=root - # delta asdf=jkl favcolor=red - - def _add_allgroup_children(self): - - for group in self.groups.values(): - if group.depth == 0 and group.name != 'all': - self.groups['all'].add_child_group(group) - - - def _parse_base_groups(self): - # FIXME: refactor - - ungrouped = Group(name='ungrouped') - all = Group(name='all') - all.add_child_group(ungrouped) - - self.groups = dict(all=all, ungrouped=ungrouped) - active_group_name = 'ungrouped' - - i = 0 - for line in self.lines: - i += 1 - line = self._before_comment(line).strip() - if line.startswith("[") and line.endswith("]"): - active_group_name = line.replace("[","").replace("]","") - if ":vars" in line or ":children" in line: - active_group_name = active_group_name.rsplit(":", 1)[0] - if active_group_name not in self.groups: - new_group = self.groups[active_group_name] = Group(name=active_group_name) - active_group_name = None - elif active_group_name not in self.groups: - new_group = self.groups[active_group_name] = Group(name=active_group_name) - elif line.startswith(";") or line == '': - pass - elif active_group_name: - try: - tokens = shlex.split(line) - except ValueError as e: - raise AnsibleError("Error in %s, unable to parse L#%d: %s\n\n\t%s\n" % (self.filename, i, str(e), line)) - - if len(tokens) == 0: - continue - hostname = tokens[0] - port = None - # Three cases to check: - # 0. A hostname that contains a range pesudo-code and a port - # 1. A hostname that contains just a port - if hostname.count(":") > 1: - # Possible an IPv6 address, or maybe a host line with multiple ranges - # IPv6 with Port XXX:XXX::XXX.port - # FQDN foo.example.com - if hostname.count(".") == 1: - (hostname, port) = hostname.rsplit(".", 1) - elif ("[" in hostname and - "]" in hostname and - ":" in hostname and - (hostname.rindex("]") < hostname.rindex(":")) or - ("]" not in hostname and ":" in hostname)): - (hostname, port) = hostname.rsplit(":", 1) - - hostnames = [] - if detect_range(hostname): - hostnames = expand_hostname_range(hostname) - else: - hostnames = [hostname] - - for hn in hostnames: - host = None - if hn in self.hosts: - host = self.hosts[hn] - else: - host = Host(name=hn, port=port) - self.hosts[hn] = host - if len(tokens) > 1: - for t in tokens[1:]: - if t.startswith('#'): - break - try: - (k,v) = t.split("=", 1) - except ValueError, e: - raise AnsibleError("Invalid ini entry in %s: %s - %s" % (self.filename, t, str(e))) - v = self._parse_value(v) - if k == 'ansible_ssh_host': - host.ipv4_address = v - host.set_variable(k, v) - self.groups[active_group_name].add_host(host) - - # [southeast:children] - # atlanta - # raleigh - - def _parse_group_children(self): - group = None - - for line in self.lines: - line = line.strip() - if line is None or line == '': - continue - if line.startswith("[") and ":children]" in line: - line = line.replace("[","").replace(":children]","") - group = self.groups.get(line, None) - if group is None: - group = self.groups[line] = Group(name=line) - elif line.startswith("#") or line.startswith(";"): - pass - elif line.startswith("["): - group = None - elif group: - kid_group = self.groups.get(line, None) - if kid_group is None: - raise AnsibleError("child group is not defined: (%s)" % line) - else: - group.add_child_group(kid_group) - - - # [webservers:vars] - # http_port=1234 - # maxRequestsPerChild=200 - - def _parse_group_variables(self): - group = None - for line in self.lines: - line = line.strip() - if line.startswith("[") and ":vars]" in line: - line = line.replace("[","").replace(":vars]","") - group = self.groups.get(line, None) - if group is None: - raise AnsibleError("can't add vars to undefined group: %s" % line) - elif line.startswith("#") or line.startswith(";"): - pass - elif line.startswith("["): - group = None - elif line == '': - pass - elif group: - if "=" not in line: - raise AnsibleError("variables assigned to group must be in key=value form") - else: - (k, v) = [e.strip() for e in line.split("=", 1)] - group.set_variable(k, self._parse_value(v)) - def get_host_variables(self, host): return {} - def _before_comment(self, msg): - ''' what's the part of a string before a comment? ''' - msg = msg.replace("\#","**NOT_A_COMMENT**") - msg = msg.split("#")[0] - msg = msg.replace("**NOT_A_COMMENT**","#") - return msg + def _compile_patterns(self): + ''' + Compiles the regular expressions required to parse the inventory and + stores them in self.patterns. + ''' + # Section names are square-bracketed expressions at the beginning of a + # line, comprising (1) a group name optionally followed by (2) a tag + # that specifies the contents of the section. We ignore any trailing + # whitespace and/or comments. For example: + # + # [groupname] + # [somegroup:vars] + # [naughty:children] # only get coal in their stockings + + self.patterns['section'] = re.compile( + r'''^\[ + ([^:\]\s]+) # group name (see groupname below) + (?::(\w+))? # optional : and tag name + \] + \s* # ignore trailing whitespace + (?:\#.*)? # and/or a comment till the + $ # end of the line + ''', re.X + ) + + # FIXME: What are the real restrictions on group names, or rather, what + # should they be? At the moment, they must be non-empty sequences of non + # whitespace characters excluding ':' and ']', but we should define more + # precise rules in order to support better diagnostics. The same applies + # to hostnames. + + self.patterns['groupname'] = re.compile( + r'''^ + ([^:\]\s]+) + \s* # ignore trailing whitespace + (?:\#.*)? # and/or a comment till the + $ # end of the line + ''', re.X + ) + + # This matches an IPv6 address, a '.', and a port number. It's not yet + # very strict about matching the IPv6 address. + # + # FIXME: There are various shortcomings in the IPv6 handling in the + # old code, which aren't fixed here yet. For example, Inventory's + # parse_inventory() method seems to accept "[ipv6]:nn" syntax. We + # should pick one and stick with it. + + self.patterns['ipv6_hostport'] = re.compile( + r'''^ + ([a-fA-F0-9:]+) + \.([0-9]+) + $ + ''', re.X + )