From 539426f612e2698b607e1179c020bf0ac95971d4 Mon Sep 17 00:00:00 2001 From: James Cammarata Date: Thu, 26 Jun 2014 22:40:31 -0500 Subject: [PATCH 1/2] Performance tuning inventory functions for large inventories --- lib/ansible/inventory/__init__.py | 40 +++++++++++++++++++++++-------- lib/ansible/inventory/group.py | 3 ++- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/lib/ansible/inventory/__init__.py b/lib/ansible/inventory/__init__.py index 830d74c01ef..bee8988c7b2 100644 --- a/lib/ansible/inventory/__init__.py +++ b/lib/ansible/inventory/__init__.py @@ -147,6 +147,17 @@ class Inventory(object): else: return fnmatch.fnmatch(str, pattern_str) + def _match_list(self, items, item_attr, pattern_str): + results = [] + if not pattern_str.startswith('~'): + pattern = re.compile(fnmatch.translate(pattern_str)) + else: + pattern = re.compile(pattern_str[1:]) + for item in items: + if pattern.search(getattr(item, item_attr)): + results.append(item) + return results + def get_hosts(self, pattern="all"): """ find all host names matching a pattern string, taking into account any inventory restrictions or @@ -297,20 +308,31 @@ class Inventory(object): def _hosts_in_unenumerated_pattern(self, pattern): """ Get all host names matching the pattern """ + results = [] hosts = [] hostnames = set() # ignore any negative checks here, this is handled elsewhere pattern = pattern.replace("!","").replace("&", "") - results = [] + def __append_host_to_results(host): + if host not in results and host.name not in hostnames: + hostnames.add(host.name) + results.append(host) + groups = self.get_groups() for group in groups: - for host in group.get_hosts(): - if pattern == 'all' or self._match(group.name, pattern) or self._match(host.name, pattern): - if host not in results and host.name not in hostnames: - results.append(host) - hostnames.add(host.name) + if pattern == 'all': + for host in group.get_hosts(): + __append_host_to_results(host) + else: + if self._match(group.name, pattern): + for host in group.get_hosts(): + __append_host_to_results(host) + else: + matching_hosts = self._match_list(group.get_hosts(), 'name', pattern) + for host in matching_hosts: + __append_host_to_results(host) if pattern in ["localhost", "127.0.0.1"] and len(results) == 0: new_host = self._create_implicit_localhost(pattern) @@ -325,10 +347,8 @@ class Inventory(object): results = [] groups = self.get_groups() for group in groups: - for hostn in group.get_hosts(): - if host == hostn.name: - results.append(group) - continue + if host in group.get_hosts(): + results.append(group) return results def groups_list(self): diff --git a/lib/ansible/inventory/group.py b/lib/ansible/inventory/group.py index c5270ad554c..e42ddc7fbfb 100644 --- a/lib/ansible/inventory/group.py +++ b/lib/ansible/inventory/group.py @@ -28,7 +28,8 @@ class Group(object): self.vars = {} self.child_groups = [] self.parent_groups = [] - self.clear_hosts_cache() + self._hosts_cache = None + #self.clear_hosts_cache() if self.name is None: raise Exception("group name is required") From ff4119adc040afe1dfdd3a554acca98ac80bbb9e Mon Sep 17 00:00:00 2001 From: Serge van Ginderachter Date: Wed, 26 Mar 2014 16:24:54 +0100 Subject: [PATCH 2/2] Performance optimization in resolving host patterns Avoid resolving a pattern that is a plain host. When matching a hostname in the hosts_cache, just use the host object from there. When running a task on say 750 hosts, this yields a huge improvement. --- lib/ansible/inventory/__init__.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/lib/ansible/inventory/__init__.py b/lib/ansible/inventory/__init__.py index bee8988c7b2..b2a3825c9c7 100644 --- a/lib/ansible/inventory/__init__.py +++ b/lib/ansible/inventory/__init__.py @@ -213,15 +213,18 @@ class Inventory(object): hosts = [] for p in patterns: - that = self.__get_hosts(p) - if p.startswith("!"): - hosts = [ h for h in hosts if h not in that ] - elif p.startswith("&"): - hosts = [ h for h in hosts if h in that ] + # avoid resolving a pattern that is a plain host + if p in self._hosts_cache: + hosts.append(self.get_host(p)) else: - to_append = [ h for h in that if h.name not in [ y.name for y in hosts ] ] - hosts.extend(to_append) - + that = self.__get_hosts(p) + if p.startswith("!"): + hosts = [ h for h in hosts if h not in that ] + elif p.startswith("&"): + hosts = [ h for h in hosts if h in that ] + else: + to_append = [ h for h in that if h.name not in [ y.name for y in hosts ] ] + hosts.extend(to_append) return hosts def __get_hosts(self, pattern):