VMware: Add missing HA admission control policies in vmware_cluster_ha (#60233)

* Add missing HA admission control policies
* Add new parameter host_isolation_response
* Bugfix: 'das_vm_config' referenced before assignment
* Implement test cases
This commit is contained in:
Mario Lenz 2019-08-26 15:40:38 +02:00 committed by Gonéri Le Bouder
parent d95a0f235b
commit 0ac57941c1
2 changed files with 269 additions and 40 deletions

View file

@ -7,6 +7,7 @@
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) # GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
__metaclass__ = type __metaclass__ = type
ANSIBLE_METADATA = { ANSIBLE_METADATA = {
@ -65,19 +66,66 @@ options:
type: str type: str
choices: ['vmAndAppMonitoring', 'vmMonitoringOnly', 'vmMonitoringDisabled'] choices: ['vmAndAppMonitoring', 'vmMonitoringOnly', 'vmMonitoringDisabled']
default: 'vmMonitoringDisabled' default: 'vmMonitoringDisabled'
ha_failover_level: host_isolation_response:
description: description:
- Number of host failures that should be tolerated, still guaranteeing sufficient resources to - Indicates whether or VMs should be powered off if a host determines that it is isolated from the rest of the compute resource.
restart virtual machines on available hosts. - If set to C(none), do not power off VMs in the event of a host network isolation.
- Accepts integer values only. - If set to C(powerOff), power off VMs in the event of a host network isolation.
type: int - If set to C(shutdown), shut down VMs guest operating system in the event of a host network isolation.
default: 2 type: str
ha_admission_control_enabled: choices: ['none', 'powerOff', 'shutdown']
default: 'none'
slot_based_admission_control:
description: description:
- Determines if strict admission control is enabled. - Configure slot based admission control policy.
- It is recommended to set this parameter to C(True), please refer documentation for more details. - C(slot_based_admission_control), C(reservation_based_admission_control) and C(failover_host_admission_control) are mutually exclusive.
default: True suboptions:
type: bool failover_level:
description:
- Number of host failures that should be tolerated.
type: int
required: true
type: dict
reservation_based_admission_control:
description:
- Configure reservation based admission control policy.
- C(slot_based_admission_control), C(reservation_based_admission_control) and C(failover_host_admission_control) are mutually exclusive.
suboptions:
failover_level:
description:
- Number of host failures that should be tolerated.
type: int
required: true
auto_compute_percentages:
description:
- By default, C(failover_level) is used to calculate C(cpu_failover_resources_percent) and C(memory_failover_resources_percent).
If a user wants to override the percentage values, he has to set this field to false.
type: bool
default: true
cpu_failover_resources_percent:
description:
- Percentage of CPU resources in the cluster to reserve for failover.
Ignored if C(auto_compute_percentages) is not set to false.
type: int
default: 50
memory_failover_resources_percent:
description:
- Percentage of memory resources in the cluster to reserve for failover.
Ignored if C(auto_compute_percentages) is not set to false.
type: int
default: 50
type: dict
failover_host_admission_control:
description:
- Configure dedicated failover hosts.
- C(slot_based_admission_control), C(reservation_based_admission_control) and C(failover_host_admission_control) are mutually exclusive.
suboptions:
failover_hosts:
description:
- List of dedicated failover hosts.
type: list
required: true
type: dict
ha_vm_failure_interval: ha_vm_failure_interval:
description: description:
- The number of seconds after which virtual machine is declared as failed - The number of seconds after which virtual machine is declared as failed
@ -129,7 +177,7 @@ extends_documentation_fragment: vmware.documentation
''' '''
EXAMPLES = r""" EXAMPLES = r"""
- name: Enable HA - name: Enable HA without admission control
vmware_cluster_ha: vmware_cluster_ha:
hostname: '{{ vcenter_hostname }}' hostname: '{{ vcenter_hostname }}'
username: '{{ vcenter_username }}' username: '{{ vcenter_username }}'
@ -139,7 +187,7 @@ EXAMPLES = r"""
enable_ha: yes enable_ha: yes
delegate_to: localhost delegate_to: localhost
- name: Enable HA and VM monitoring - name: Enable HA and VM monitoring without admission control
vmware_cluster_ha: vmware_cluster_ha:
hostname: "{{ vcenter_hostname }}" hostname: "{{ vcenter_hostname }}"
username: "{{ vcenter_username }}" username: "{{ vcenter_username }}"
@ -151,6 +199,21 @@ EXAMPLES = r"""
ha_vm_monitoring: vmMonitoringOnly ha_vm_monitoring: vmMonitoringOnly
enable_vsan: True enable_vsan: True
delegate_to: localhost delegate_to: localhost
- name: Enable HA with admission control reserving 50% of resources for HA
vmware_cluster_ha:
hostname: '{{ vcenter_hostname }}'
username: '{{ vcenter_username }}'
password: '{{ vcenter_password }}'
datacenter_name: datacenter
cluster_name: cluster
enable_ha: yes
reservation_based_admission_control:
auto_compute_percentages: False
failover_level: 1
cpu_failover_resources_percent: 50
memory_failover_resources_percent: 50
delegate_to: localhost
""" """
RETURN = r"""# RETURN = r"""#
@ -175,6 +238,15 @@ class VMwareCluster(PyVmomi):
self.enable_ha = module.params['enable_ha'] self.enable_ha = module.params['enable_ha']
self.datacenter = None self.datacenter = None
self.cluster = None self.cluster = None
self.host_isolation_response = getattr(vim.cluster.DasVmSettings.IsolationResponse, self.params.get('host_isolation_response'))
if self.enable_ha and (
self.params.get('slot_based_admission_control') or
self.params.get('reservation_based_admission_control') or
self.params.get('failover_host_admission_control')):
self.ha_admission_control = True
else:
self.ha_admission_control = False
self.datacenter = find_datacenter_by_name(self.content, self.datacenter_name) self.datacenter = find_datacenter_by_name(self.content, self.datacenter_name)
if self.datacenter is None: if self.datacenter is None:
@ -184,6 +256,23 @@ class VMwareCluster(PyVmomi):
if self.cluster is None: if self.cluster is None:
self.module.fail_json(msg="Cluster %s does not exist." % self.cluster_name) self.module.fail_json(msg="Cluster %s does not exist." % self.cluster_name)
def get_failover_hosts(self):
"""
Get failover hosts for failover_host_admission_control policy
Returns: List of ESXi hosts sorted by name
"""
policy = self.params.get('failover_host_admission_control')
hosts = []
all_hosts = dict((h.name, h) for h in self.get_all_hosts_by_cluster(self.cluster_name))
for host in policy.get('failover_hosts'):
if host in all_hosts:
hosts.append(all_hosts.get(host))
else:
self.module.fail_json(msg="Host %s is not a member of cluster %s." % (host, self.cluster_name))
hosts.sort(key=lambda h: h.name)
return hosts
def check_ha_config_diff(self): def check_ha_config_diff(self):
""" """
Check HA configuration diff Check HA configuration diff
@ -191,19 +280,47 @@ class VMwareCluster(PyVmomi):
""" """
das_config = self.cluster.configurationEx.dasConfig das_config = self.cluster.configurationEx.dasConfig
if das_config.enabled != self.enable_ha or \ if das_config.enabled != self.enable_ha:
das_config.admissionControlPolicy.failoverLevel != self.params.get('ha_failover_level') or \
das_config.vmMonitoring != self.params.get('ha_vm_monitoring') or \
das_config.hostMonitoring != self.params.get('ha_host_monitoring') or \
das_config.admissionControlPolicy.failoverLevel != self.params.get('ha_failover_level') or \
das_config.admissionControlEnabled != self.params.get('ha_admission_control_enabled') or \
das_config.defaultVmSettings.restartPriority != self.params.get('ha_restart_priority') or \
das_config.defaultVmSettings.vmToolsMonitoringSettings.vmMonitoring != self.params.get('ha_vm_monitoring') or \
das_config.defaultVmSettings.vmToolsMonitoringSettings.failureInterval != self.params.get('ha_vm_failure_interval') or \
das_config.defaultVmSettings.vmToolsMonitoringSettings.minUpTime != self.params.get('ha_vm_min_up_time') or \
das_config.defaultVmSettings.vmToolsMonitoringSettings.maxFailures != self.params.get('ha_vm_max_failures') or \
das_config.defaultVmSettings.vmToolsMonitoringSettings.maxFailureWindow != self.params.get('ha_vm_max_failure_window'):
return True return True
if self.enable_ha and (
das_config.vmMonitoring != self.params.get('ha_vm_monitoring') or
das_config.hostMonitoring != self.params.get('ha_host_monitoring') or
das_config.admissionControlEnabled != self.ha_admission_control or
das_config.defaultVmSettings.restartPriority != self.params.get('ha_restart_priority') or
das_config.defaultVmSettings.isolationResponse != self.host_isolation_response or
das_config.defaultVmSettings.vmToolsMonitoringSettings.vmMonitoring != self.params.get('ha_vm_monitoring') or
das_config.defaultVmSettings.vmToolsMonitoringSettings.failureInterval != self.params.get('ha_vm_failure_interval') or
das_config.defaultVmSettings.vmToolsMonitoringSettings.minUpTime != self.params.get('ha_vm_min_up_time') or
das_config.defaultVmSettings.vmToolsMonitoringSettings.maxFailures != self.params.get('ha_vm_max_failures') or
das_config.defaultVmSettings.vmToolsMonitoringSettings.maxFailureWindow != self.params.get('ha_vm_max_failure_window')):
return True
if self.ha_admission_control:
if self.params.get('slot_based_admission_control'):
policy = self.params.get('slot_based_admission_control')
if not isinstance(das_config.admissionControlPolicy, vim.cluster.FailoverLevelAdmissionControlPolicy) or \
das_config.admissionControlPolicy.failoverLevel != policy.get('failover_level'):
return True
elif self.params.get('reservation_based_admission_control'):
policy = self.params.get('reservation_based_admission_control')
auto_compute_percentages = policy.get('auto_compute_percentages')
if not isinstance(das_config.admissionControlPolicy, vim.cluster.FailoverResourcesAdmissionControlPolicy) or \
das_config.admissionControlPolicy.autoComputePercentages != auto_compute_percentages or \
das_config.admissionControlPolicy.failoverLevel != policy.get('failover_level'):
return True
if not auto_compute_percentages:
if das_config.admissionControlPolicy.cpuFailoverResourcesPercent != policy.get('cpu_failover_resources_percent') or \
das_config.admissionControlPolicy.memoryFailoverResourcesPercent != policy.get('memory_failover_resources_percent'):
return True
elif self.params.get('failover_host_admission_control'):
policy = self.params.get('failover_host_admission_control')
if not isinstance(das_config.admissionControlPolicy, vim.cluster.FailoverHostAdmissionControlPolicy):
return True
das_config.admissionControlPolicy.failoverHosts.sort(key=lambda h: h.name)
if das_config.admissionControlPolicy.failoverHosts != self.get_failover_hosts():
return True
return False return False
def configure_ha(self): def configure_ha(self):
@ -218,15 +335,11 @@ class VMwareCluster(PyVmomi):
cluster_config_spec = vim.cluster.ConfigSpecEx() cluster_config_spec = vim.cluster.ConfigSpecEx()
cluster_config_spec.dasConfig = vim.cluster.DasConfigInfo() cluster_config_spec.dasConfig = vim.cluster.DasConfigInfo()
cluster_config_spec.dasConfig.enabled = self.enable_ha cluster_config_spec.dasConfig.enabled = self.enable_ha
cluster_config_spec.dasConfig.admissionControlPolicy = vim.cluster.FailoverLevelAdmissionControlPolicy()
cluster_config_spec.dasConfig.admissionControlPolicy.failoverLevel = self.params.get('ha_failover_level')
ha_vm_monitoring = self.params.get('ha_vm_monitoring') if self.enable_ha:
das_vm_config = None
if ha_vm_monitoring in ['vmMonitoringOnly', 'vmAndAppMonitoring']:
vm_tool_spec = vim.cluster.VmToolsMonitoringSettings() vm_tool_spec = vim.cluster.VmToolsMonitoringSettings()
vm_tool_spec.enabled = True vm_tool_spec.enabled = True
vm_tool_spec.vmMonitoring = ha_vm_monitoring vm_tool_spec.vmMonitoring = self.params.get('ha_vm_monitoring')
vm_tool_spec.failureInterval = self.params.get('ha_vm_failure_interval') vm_tool_spec.failureInterval = self.params.get('ha_vm_failure_interval')
vm_tool_spec.minUpTime = self.params.get('ha_vm_min_up_time') vm_tool_spec.minUpTime = self.params.get('ha_vm_min_up_time')
vm_tool_spec.maxFailures = self.params.get('ha_vm_max_failures') vm_tool_spec.maxFailures = self.params.get('ha_vm_max_failures')
@ -234,14 +347,36 @@ class VMwareCluster(PyVmomi):
das_vm_config = vim.cluster.DasVmSettings() das_vm_config = vim.cluster.DasVmSettings()
das_vm_config.restartPriority = self.params.get('ha_restart_priority') das_vm_config.restartPriority = self.params.get('ha_restart_priority')
das_vm_config.isolationResponse = None das_vm_config.isolationResponse = self.host_isolation_response
das_vm_config.vmToolsMonitoringSettings = vm_tool_spec das_vm_config.vmToolsMonitoringSettings = vm_tool_spec
cluster_config_spec.dasConfig.defaultVmSettings = das_vm_config
cluster_config_spec.dasConfig.admissionControlEnabled = self.params.get('ha_admission_control_enabled') cluster_config_spec.dasConfig.admissionControlEnabled = self.ha_admission_control
if self.ha_admission_control:
if self.params.get('slot_based_admission_control'):
cluster_config_spec.dasConfig.admissionControlPolicy = vim.cluster.FailoverLevelAdmissionControlPolicy()
policy = self.params.get('slot_based_admission_control')
cluster_config_spec.dasConfig.admissionControlPolicy.failoverLevel = policy.get('failover_level')
elif self.params.get('reservation_based_admission_control'):
cluster_config_spec.dasConfig.admissionControlPolicy = vim.cluster.FailoverResourcesAdmissionControlPolicy()
policy = self.params.get('reservation_based_admission_control')
auto_compute_percentages = policy.get('auto_compute_percentages')
cluster_config_spec.dasConfig.admissionControlPolicy.autoComputePercentages = auto_compute_percentages
cluster_config_spec.dasConfig.admissionControlPolicy.failoverLevel = policy.get('failover_level')
if not auto_compute_percentages:
cluster_config_spec.dasConfig.admissionControlPolicy.cpuFailoverResourcesPercent = \
policy.get('cpu_failover_resources_percent')
cluster_config_spec.dasConfig.admissionControlPolicy.memoryFailoverResourcesPercent = \
policy.get('memory_failover_resources_percent')
elif self.params.get('failover_host_admission_control'):
cluster_config_spec.dasConfig.admissionControlPolicy = vim.cluster.FailoverHostAdmissionControlPolicy()
policy = self.params.get('failover_host_admission_control')
cluster_config_spec.dasConfig.admissionControlPolicy.failoverHosts = self.get_failover_hosts()
cluster_config_spec.dasConfig.hostMonitoring = self.params.get('ha_host_monitoring') cluster_config_spec.dasConfig.hostMonitoring = self.params.get('ha_host_monitoring')
cluster_config_spec.dasConfig.vmMonitoring = ha_vm_monitoring cluster_config_spec.dasConfig.vmMonitoring = self.params.get('ha_vm_monitoring')
cluster_config_spec.dasConfig.defaultVmSettings = das_vm_config
try: try:
task = self.cluster.ReconfigureComputeResource_Task(cluster_config_spec, True) task = self.cluster.ReconfigureComputeResource_Task(cluster_config_spec, True)
changed, result = wait_for_task(task) changed, result = wait_for_task(task)
@ -267,10 +402,12 @@ def main():
datacenter=dict(type='str', required=True, aliases=['datacenter_name']), datacenter=dict(type='str', required=True, aliases=['datacenter_name']),
# HA # HA
enable_ha=dict(type='bool', default=False), enable_ha=dict(type='bool', default=False),
ha_failover_level=dict(type='int', default=2),
ha_host_monitoring=dict(type='str', ha_host_monitoring=dict(type='str',
default='enabled', default='enabled',
choices=['enabled', 'disabled']), choices=['enabled', 'disabled']),
host_isolation_response=dict(type='str',
default='none',
choices=['none', 'powerOff', 'shutdown']),
# HA VM Monitoring related parameters # HA VM Monitoring related parameters
ha_vm_monitoring=dict(type='str', ha_vm_monitoring=dict(type='str',
choices=['vmAndAppMonitoring', 'vmMonitoringOnly', 'vmMonitoringDisabled'], choices=['vmAndAppMonitoring', 'vmMonitoringOnly', 'vmMonitoringDisabled'],
@ -283,12 +420,27 @@ def main():
ha_restart_priority=dict(type='str', ha_restart_priority=dict(type='str',
choices=['high', 'low', 'medium', 'disabled'], choices=['high', 'low', 'medium', 'disabled'],
default='medium'), default='medium'),
ha_admission_control_enabled=dict(type='bool', default=True), # HA Admission Control related parameters
slot_based_admission_control=dict(type='dict', options=dict(
failover_level=dict(type='int', required=True),
)),
reservation_based_admission_control=dict(type='dict', options=dict(
auto_compute_percentages=dict(type='bool', default=True),
failover_level=dict(type='int', required=True),
cpu_failover_resources_percent=dict(type='int', default=50),
memory_failover_resources_percent=dict(type='int', default=50),
)),
failover_host_admission_control=dict(type='dict', options=dict(
failover_hosts=dict(type='list', elements='str', required=True),
)),
)) ))
module = AnsibleModule( module = AnsibleModule(
argument_spec=argument_spec, argument_spec=argument_spec,
supports_check_mode=True, supports_check_mode=True,
mutually_exclusive=[
['slot_based_admission_control', 'reservation_based_admission_control', 'failover_host_admission_control']
]
) )
vmware_cluster_ha = VMwareCluster(module) vmware_cluster_ha = VMwareCluster(module)

View file

@ -33,7 +33,84 @@
that: that:
- "{{ cluster_ha_result_0001.changed == true }}" - "{{ cluster_ha_result_0001.changed == true }}"
# Testcase 0002: Disable HA # Testcase 0002: Enable Slot based Admission Control
- name: Enable Slot based Admission Control
vmware_cluster_ha:
validate_certs: False
hostname: "{{ vcenter_hostname }}"
username: "{{ vcenter_username }}"
password: "{{ vcenter_password }}"
datacenter_name: "{{ dc1 }}"
cluster_name: test_cluster_ha
enable_ha: yes
slot_based_admission_control:
failover_level: 1
register: cluster_ha_result_0002
- name: Ensure Admission Cotrol is enabled
assert:
that:
- "{{ cluster_ha_result_0002.changed == true }}"
# Testcase 0003: Enable Cluster resource Percentage based Admission Control
- name: Enable Cluster resource Percentage based Admission Control
vmware_cluster_ha:
validate_certs: False
hostname: "{{ vcenter_hostname }}"
username: "{{ vcenter_username }}"
password: "{{ vcenter_password }}"
datacenter_name: "{{ dc1 }}"
cluster_name: test_cluster_ha
enable_ha: yes
reservation_based_admission_control:
auto_compute_percentages: false
failover_level: 1
cpu_failover_resources_percent: 33
memory_failover_resources_percent: 33
register: cluster_ha_result_0003
- name: Ensure Admission Cotrol is enabled
assert:
that:
- "{{ cluster_ha_result_0003.changed == true }}"
# Testcase 0004: Set Isolation Response to powerOff
- name: Set Isolation Response to powerOff
vmware_cluster_ha:
validate_certs: False
hostname: "{{ vcenter_hostname }}"
username: "{{ vcenter_username }}"
password: "{{ vcenter_password }}"
datacenter_name: "{{ dc1 }}"
cluster_name: test_cluster_ha
enable_ha: yes
host_isolation_response: 'powerOff'
register: cluster_ha_result_0004
- name: Ensure Isolation Response is enabled
assert:
that:
- "{{ cluster_ha_result_0004.changed == true }}"
# Testcase 0005: Set Isolation Response to shutdown
- name: Set Isolation Response to shutdown
vmware_cluster_ha:
validate_certs: False
hostname: "{{ vcenter_hostname }}"
username: "{{ vcenter_username }}"
password: "{{ vcenter_password }}"
datacenter_name: "{{ dc1 }}"
cluster_name: test_cluster_ha
enable_ha: yes
host_isolation_response: 'shutdown'
register: cluster_ha_result_0005
- name: Ensure Isolation Response is enabled
assert:
that:
- "{{ cluster_ha_result_0005.changed == true }}"
# Testcase 0006: Disable HA
- name: Disable HA - name: Disable HA
vmware_cluster_ha: vmware_cluster_ha:
validate_certs: False validate_certs: False
@ -43,12 +120,12 @@
datacenter_name: "{{ dc1 }}" datacenter_name: "{{ dc1 }}"
cluster_name: test_cluster_ha cluster_name: test_cluster_ha
enable_ha: no enable_ha: no
register: cluster_ha_result_0002 register: cluster_ha_result_0006
- name: Ensure HA is disabled - name: Ensure HA is disabled
assert: assert:
that: that:
- "{{ cluster_ha_result_0002.changed == true }}" - "{{ cluster_ha_result_0006.changed == true }}"
# Delete test cluster # Delete test cluster
- name: Delete test cluster - name: Delete test cluster