Reduce recursion within group methods

This offers an optimization that allows loading larger
inventories of various structure by improving the
scaling laws involved for adding hosts and groups.

The primary speed benefit is the elimination of duplicate
recusion from traversing converging paths.
This commit is contained in:
AlanCoding 2018-03-05 15:18:34 -05:00 committed by Brian Coca
parent 71e8527d7c
commit 153c9bd539
3 changed files with 200 additions and 27 deletions

View file

@ -19,6 +19,8 @@ __metaclass__ = type
from ansible.errors import AnsibleError from ansible.errors import AnsibleError
from itertools import chain
class Group: class Group:
''' a group of ansible hosts ''' ''' a group of ansible hosts '''
@ -80,6 +82,38 @@ class Group:
g.deserialize(parent_data) g.deserialize(parent_data)
self.parent_groups.append(g) self.parent_groups.append(g)
def _walk_relationship(self, rel):
'''
Given `rel` that is an iterable property of Group,
consitituting a directed acyclic graph among all groups,
Returns a set of all groups in full tree
A B C
| / | /
| / | /
D -> E
| / vertical connections
| / are directed upward
F
Called on F, returns set of (A, B, C, D, E)
'''
seen = set([])
unprocessed = set(getattr(self, rel))
while unprocessed:
seen.update(unprocessed)
unprocessed = set(chain.from_iterable(
getattr(g, rel) for g in unprocessed
))
unprocessed.difference_update(seen)
return seen
def get_ancestors(self):
return self._walk_relationship('parent_groups')
def get_descendants(self):
return self._walk_relationship('child_groups')
@property @property
def host_names(self): def host_names(self):
if self._hosts is None: if self._hosts is None:
@ -96,6 +130,17 @@ class Group:
# don't add if it's already there # don't add if it's already there
if group not in self.child_groups: if group not in self.child_groups:
# prepare list of group's new ancestors this edge creates
start_ancestors = group.get_ancestors()
new_ancestors = self.get_ancestors()
if group in new_ancestors:
raise AnsibleError(
"Adding group '%s' as child to '%s' creates a recursive "
"dependency loop." % (group.name, self.name))
new_ancestors.add(self)
new_ancestors.difference_update(start_ancestors)
self.child_groups.append(group) self.child_groups.append(group)
# update the depth of the child # update the depth of the child
@ -109,18 +154,28 @@ class Group:
if self.name not in [g.name for g in group.parent_groups]: if self.name not in [g.name for g in group.parent_groups]:
group.parent_groups.append(self) group.parent_groups.append(self)
for h in group.get_hosts(): for h in group.get_hosts():
h.populate_ancestors() h.populate_ancestors(additions=new_ancestors)
self.clear_hosts_cache() self.clear_hosts_cache()
def _check_children_depth(self): def _check_children_depth(self):
try: depth = self.depth
for group in self.child_groups: start_depth = self.depth # self.depth could change over loop
group.depth = max([self.depth + 1, group.depth]) seen = set([])
group._check_children_depth() unprocessed = set(self.child_groups)
except RuntimeError:
raise AnsibleError("The group named '%s' has a recursive dependency loop." % self.name) while unprocessed:
seen.update(unprocessed)
depth += 1
to_process = unprocessed.copy()
unprocessed = set([])
for g in to_process:
if g.depth < depth:
g.depth = depth
unprocessed.update(g.child_groups)
if depth - start_depth > len(seen):
raise AnsibleError("The group named '%s' has a recursive dependency loop." % self.name)
def add_host(self, host): def add_host(self, host):
if host.name not in self.host_names: if host.name not in self.host_names:
@ -147,8 +202,8 @@ class Group:
def clear_hosts_cache(self): def clear_hosts_cache(self):
self._hosts_cache = None self._hosts_cache = None
for g in self.parent_groups: for g in self.get_ancestors():
g.clear_hosts_cache() g._hosts_cache = None
def get_hosts(self): def get_hosts(self):
@ -160,8 +215,8 @@ class Group:
hosts = [] hosts = []
seen = {} seen = {}
for kid in self.child_groups: for kid in self.get_descendants():
kid_hosts = kid.get_hosts() kid_hosts = kid.hosts
for kk in kid_hosts: for kk in kid_hosts:
if kk not in seen: if kk not in seen:
seen[kk] = 1 seen[kk] = 1
@ -179,18 +234,6 @@ class Group:
def get_vars(self): def get_vars(self):
return self.vars.copy() return self.vars.copy()
def _get_ancestors(self):
results = {}
for g in self.parent_groups:
results[g.name] = g
results.update(g._get_ancestors())
return results
def get_ancestors(self):
return self._get_ancestors().values()
def set_priority(self, priority): def set_priority(self, priority):
try: try:
self.priority = int(priority) self.priority = int(priority)

View file

@ -101,17 +101,22 @@ class Host:
def get_name(self): def get_name(self):
return self.name return self.name
def populate_ancestors(self): def populate_ancestors(self, additions=None):
# populate ancestors # populate ancestors
for group in self.groups: if additions is None:
self.add_group(group) for group in self.groups:
self.add_group(group)
else:
for group in additions:
if group not in self.groups:
self.groups.append(group)
def add_group(self, group): def add_group(self, group):
# populate ancestors first # populate ancestors first
for oldg in group.get_ancestors(): for oldg in group.get_ancestors():
if oldg not in self.groups: if oldg not in self.groups:
self.add_group(oldg) self.groups.append(oldg)
# actually add group # actually add group
if group not in self.groups: if group not in self.groups:

View file

@ -0,0 +1,125 @@
# Copyright 2018 Alan Rominger <arominge@redhat.com>
#
# This file is part of Ansible
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
from ansible.compat.tests import unittest
from ansible.inventory.group import Group
from ansible.inventory.host import Host
from ansible.errors import AnsibleError
class TestGroup(unittest.TestCase):
def test_depth_update(self):
A = Group('A')
B = Group('B')
Z = Group('Z')
A.add_child_group(B)
A.add_child_group(Z)
self.assertEqual(A.depth, 0)
self.assertEqual(Z.depth, 1)
self.assertEqual(B.depth, 1)
def test_depth_update_dual_branches(self):
alpha = Group('alpha')
A = Group('A')
alpha.add_child_group(A)
B = Group('B')
A.add_child_group(B)
Z = Group('Z')
alpha.add_child_group(Z)
beta = Group('beta')
B.add_child_group(beta)
Z.add_child_group(beta)
self.assertEqual(alpha.depth, 0) # apex
self.assertEqual(beta.depth, 3) # alpha -> A -> B -> beta
omega = Group('omega')
omega.add_child_group(alpha)
# verify that both paths are traversed to get the max depth value
self.assertEqual(B.depth, 3) # omega -> alpha -> A -> B
self.assertEqual(beta.depth, 4) # B -> beta
def test_depth_recursion(self):
A = Group('A')
B = Group('B')
A.add_child_group(B)
# hypothetical of adding B as child group to A
A.parent_groups.append(B)
B.child_groups.append(A)
# can't update depths of groups, because of loop
with self.assertRaises(AnsibleError):
B._check_children_depth()
def test_loop_detection(self):
A = Group('A')
B = Group('B')
C = Group('C')
A.add_child_group(B)
B.add_child_group(C)
with self.assertRaises(AnsibleError):
C.add_child_group(A)
def test_populates_descendant_hosts(self):
A = Group('A')
B = Group('B')
C = Group('C')
h = Host('h')
C.add_host(h)
A.add_child_group(B) # B is child of A
B.add_child_group(C) # C is descendant of A
A.add_child_group(B)
self.assertEqual(set(h.groups), set([C, B, A]))
h2 = Host('h2')
C.add_host(h2)
self.assertEqual(set(h2.groups), set([C, B, A]))
def test_ancestor_example(self):
# see docstring for Group._walk_relationship
groups = {}
for name in ['A', 'B', 'C', 'D', 'E', 'F']:
groups[name] = Group(name)
# first row
groups['A'].add_child_group(groups['D'])
groups['B'].add_child_group(groups['D'])
groups['B'].add_child_group(groups['E'])
groups['C'].add_child_group(groups['D'])
# second row
groups['D'].add_child_group(groups['E'])
groups['D'].add_child_group(groups['F'])
groups['E'].add_child_group(groups['F'])
self.assertEqual(
set(groups['F'].get_ancestors()),
set([
groups['A'], groups['B'], groups['C'], groups['D'], groups['E']
])
)
def test_ancestors_recursive_loop_safe(self):
'''
The get_ancestors method may be referenced before circular parenting
checks, so the method is expected to be stable even with loops
'''
A = Group('A')
B = Group('B')
A.parent_groups.append(B)
B.parent_groups.append(A)
# finishes in finite time
self.assertEqual(A.get_ancestors(), set([A, B]))