ansible/nagios

809 lines
27 KiB
Text
Raw Normal View History

2012-08-27 07:50:52 +02:00
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# This file is largely copied from the Nagios module included in the
# Func project. Original copyright follows:
#
# func-nagios - Schedule downtime and enables/disable notifications
2012-08-27 17:55:38 +02:00
# Copyright 2011, Red Hat, Inc.
2012-08-27 07:50:52 +02:00
# Tim Bielawa <tbielawa@redhat.com>
#
# This software may be freely redistributed under the terms of the GNU
# general public license version 2.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
2012-08-27 17:39:52 +02:00
Action Summaries (all must be delegate_to'd a nagios server):
* Set 30 minutes of downtime for a single service:
action: nagios action=downtime minutes=30 service=httpd host=$inventory_hostname
* Set 30 minutes of downtime for a few services:
action: nagios action=downtime minutes=30 services=httpd,git,nfs host=$inventory_hostname
* (Un)Silence all alerts from a host/service:
action: nagios action=silence host=$inventory_hostname
action: nagios action=unsilence host=$inventory_hostname
* Enable/Disable alerts for a host or service(s):
action: nagios action=enable_alerts service=host host=$inventory_hostname
action: nagios action=disable_alerts services=httpd,git,nfs host=$inventory_hostname
Note: 'service' is an alias for 'services'. Separate multiple services
with commas.
2012-08-27 17:39:52 +02:00
Set the path to the command file explicitly with the 'cmdfile'
parameter.
2012-08-27 07:50:52 +02:00
"""
2012-10-01 18:07:08 +02:00
DOCUMENTATION = '''
---
module: nagios
short_description: Perform common tasks in Nagios related to downtime and notifications.
description:
- "The Nagios module has two basic functions: scheduling downtime and toggling alerts for services or hosts."
- All actions require the C(host) parameter to be given explicitly. In playbooks you can use the C($inventory_hostname) variable to refer to the host the playbook is currently running on.
- You can specify multiple services at once by separating them with commas, .e.g., C(services=httpd,nfs,puppet).
- When specifying what service to handle there is a special service value, B(host), which will handle alerts/downtime for the B(host itself), e.g., C(service=host). This keyword may not be given with other services at the same time. I(Setting alerts/downtime for a host does not affect alerts/downtime for any of the services running on it.)
- When using the B(nagios) module you will need to specify your nagios server using the C(delegate_to) parameter.
version_added: 0.7
options:
action:
description:
- Action to take.
required: true
default: null
choices: [ "downtime", "enable_alerts", "disable_alerts", "silence", "unsilence" ]
host:
description:
- Host to operate on in Nagios.
required: true
default: null
cmdfile:
description:
- Path to the nagios I(command file) (FIFO pipe).
- Only required if auto-detection fails.
required: false
default: auto-detected
author:
description:
- Author to leave downtime comments as.
- Only useable with the C(downtime) action.
required: false
default: Ansible
minutes:
description:
- Minutes to schedule downtime for.
- Only useable with the C(downtime) action.
required: false
default: 30
services:
description:
- What to manage downtime/alerts for. Separate multiple services with commas.
- C(service) is an alias for C(services).
- B(Required) option when using the C(downtime), C(enable_alerts), and C(disable_alerts) actions.
required: true
default: null
author: Tim Bielawa
'''
EXAMPLES = [
"""
- description: set 30 minutes of apache downtime
code: nagios action=downtime minutes=30 service=httpd host=$inventory_hostname
""",
"""
- description: schedule an hour of HOST downtime
code: nagios action=downtime minutes=60 service=host host=$inventory_hostname
""",
"""
- description: schedule downtime for a few services
code: nagios action=downtime services=frob,foobar,qeuz host=$inventory_hostname
""",
"""
- description: enable SMART disk alerts
code: nagios action=enable_alerts service=smart host=$inventory_hostname
""",
"""
- description: two services at once: disable httpd and nfs alerts
code: nagios action=disable_alerts service=httpd,nfs host=$inventory_hostname
""",
"""
- description: disable HOST alerts
code: nagios action=disable_alerts service=host host=$inventory_hostname
""",
"""
- description: silence ALL alerts
code: nagios action=silence host=$inventory_hostname
""",
"""
- description: unsilence all alerts
code: nagios action=unsilence host=$inventory_hostname
"""
]
2012-08-27 07:50:52 +02:00
import ConfigParser
import types
import time
import os.path
######################################################################
def which_cmdfile():
locations = [
# rhel
'/etc/nagios/nagios.cfg',
# debian
'/etc/nagios3/nagios.cfg',
# older debian
'/etc/nagios2/nagios.cfg',
# bsd, solaris
'/usr/local/etc/nagios/nagios.cfg',
# groundwork it monitoring
'/usr/local/groundwork/nagios/etc/nagios.cfg',
# open monitoring distribution
'/omd/sites/oppy/tmp/nagios/nagios.cfg',
# ???
'/usr/local/nagios/etc/nagios.cfg',
'/usr/local/nagios/nagios.cfg',
'/opt/nagios/etc/nagios.cfg',
'/opt/nagios/nagios.cfg'
]
for path in locations:
if os.path.exists(path):
for line in open(path):
if line.startswith('command_file'):
return line.partition('=')[2].strip()
return None
######################################################################
def main():
ACTION_CHOICES = [
'downtime',
'silence',
'unsilence',
'enable_alerts',
'disable_alerts'
]
module = AnsibleModule(
argument_spec=dict(
action=dict(required=True, default=None, choices=ACTION_CHOICES),
author=dict(default='Ansible'),
host=dict(required=True, default=None),
minutes=dict(default=30),
cmdfile=dict(default=which_cmdfile()),
services=dict(default=None, aliases=['service']),
)
)
action = module.params['action']
minutes = module.params['minutes']
services = module.params['services']
cmdfile = module.params['cmdfile']
##################################################################
# Required args per action:
# downtime = (minutes, service, host)
# (un)silence = (host)
# (enable/disable)_alerts = (service, host)
#
# AnsibleModule will verify most stuff, we need to verify
# 'minutes' and 'service' manually.
##################################################################
if action == 'downtime':
# Make sure there's an actual service selected
if not services:
module.fail_json(msg='no service selected to set downtime for')
# Make sure minutes is a number
try:
m = int(minutes)
if not isinstance(m, types.IntType):
module.fail_json(msg='minutes must be a number')
except:
module.fail_json(msg='invalid entry for minutes')
##################################################################
if action in ['enable_alerts', 'disable_alerts']:
if not services:
module.fail_json(msg='a service is required when setting alerts')
##################################################################
if not cmdfile:
module.fail_json('unable to locate nagios.cfg')
##################################################################
ansible_nagios = Nagios(module, **module.params)
ansible_nagios.act()
##################################################################
2012-08-27 07:50:52 +02:00
######################################################################
2012-08-27 07:50:52 +02:00
class Nagios(object):
"""
Perform common tasks in Nagios related to downtime and
notifications.
The complete set of external commands Nagios handles is documented
on their website:
http://old.nagios.org/developerinfo/externalcommands/commandlist.php
Note that in the case of `schedule_svc_downtime`,
`enable_svc_notifications`, and `disable_svc_notifications`, the
service argument should be passed as a list.
"""
def __init__(self, module, **kwargs):
self.module = module
self.action = kwargs['action']
self.author = kwargs['author']
self.host = kwargs['host']
self.minutes = int(kwargs['minutes'])
self.cmdfile = kwargs['cmdfile']
if (kwargs['services'] is None) or (kwargs['services'] == 'host'):
self.services = kwargs['services']
else:
self.services = kwargs['services'].split(',')
self.command_results = []
def _now(self):
"""
The time in seconds since 12:00:00AM Jan 1, 1970
"""
return int(time.time())
def _write_command(self, cmd):
"""
Write the given command to the Nagios command file
"""
try:
fp = open(self.cmdfile, 'w')
2012-08-27 07:50:52 +02:00
fp.write(cmd)
fp.flush()
fp.close()
self.command_results.append(cmd.strip())
except IOError:
self.module.fail_json(msg='unable to write to nagios command file',
cmdfile=self.cmdfile)
2012-08-27 07:50:52 +02:00
def _fmt_dt_str(self, cmd, host, duration, author=None,
comment="Scheduling downtime", start=None,
svc=None, fixed=1, trigger=0):
"""
Format an external-command downtime string.
cmd - Nagios command ID
host - Host schedule downtime on
duration - Minutes to schedule downtime for
author - Name to file the downtime as
comment - Reason for running this command (upgrade, reboot, etc)
start - Start of downtime in seconds since 12:00AM Jan 1 1970
Default is to use the entry time (now)
svc - Service to schedule downtime for, omit when for host downtime
fixed - Start now if 1, start when a problem is detected if 0
trigger - Optional ID of event to start downtime from. Leave as 0 for
fixed downtime.
Syntax: [submitted] COMMAND;<host_name>;[<service_description>]
<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;
<comment>
"""
entry_time = self._now()
if start is None:
start = entry_time
hdr = "[%s] %s;%s;" % (entry_time, cmd, host)
duration_s = (duration * 60)
end = start + duration_s
if not author:
author = self.author
if svc is not None:
dt_args = [svc, str(start), str(end), str(fixed), str(trigger),
str(duration_s), author, comment]
else:
# Downtime for a host if no svc specified
dt_args = [str(start), str(end), str(fixed), str(trigger),
str(duration_s), author, comment]
dt_arg_str = ";".join(dt_args)
dt_str = hdr + dt_arg_str + "\n"
return dt_str
def _fmt_notif_str(self, cmd, host, svc=None):
"""
Format an external-command notification string.
cmd - Nagios command ID.
host - Host to en/disable notifications on..
svc - Service to schedule downtime for. A value is not required
for host downtime.
Syntax: [submitted] COMMAND;<host_name>[;<service_description>]
"""
entry_time = self._now()
if svc is not None:
notif_str = "[%s] %s;%s;%s\n" % (entry_time, cmd, host, svc)
else:
# Downtime for a host if no svc specified
notif_str = "[%s] %s;%s\n" % (entry_time, cmd, host)
return notif_str
def schedule_svc_downtime(self, host, services=[], minutes=30):
"""
This command is used to schedule downtime for a particular
service.
During the specified downtime, Nagios will not send
notifications out about the service.
Syntax: SCHEDULE_SVC_DOWNTIME;<host_name>;<service_description>
<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;
<comment>
"""
cmd = "SCHEDULE_SVC_DOWNTIME"
for service in services:
dt_cmd_str = self._fmt_dt_str(cmd, host, minutes, svc=service)
self._write_command(dt_cmd_str)
2012-08-27 07:50:52 +02:00
def schedule_host_downtime(self, host, minutes=30):
"""
This command is used to schedule downtime for a particular
host.
During the specified downtime, Nagios will not send
notifications out about the host.
Syntax: SCHEDULE_HOST_DOWNTIME;<host_name>;<start_time>;<end_time>;
<fixed>;<trigger_id>;<duration>;<author>;<comment>
"""
cmd = "SCHEDULE_HOST_DOWNTIME"
dt_cmd_str = self._fmt_dt_str(cmd, host, minutes)
self._write_command(dt_cmd_str)
2012-08-27 07:50:52 +02:00
def schedule_hostgroup_host_downtime(self, hostgroup, minutes=30):
"""
This command is used to schedule downtime for all hosts in a
particular hostgroup.
During the specified downtime, Nagios will not send
notifications out about the hosts.
Syntax: SCHEDULE_HOSTGROUP_HOST_DOWNTIME;<hostgroup_name>;<start_time>;
<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment>
"""
cmd = "SCHEDULE_HOSTGROUP_HOST_DOWNTIME"
dt_cmd_str = self._fmt_dt_str(cmd, hostgroup, minutes)
self._write_command(dt_cmd_str)
2012-08-27 07:50:52 +02:00
def schedule_hostgroup_svc_downtime(self, hostgroup, minutes=30):
"""
This command is used to schedule downtime for all services in
a particular hostgroup.
During the specified downtime, Nagios will not send
notifications out about the services.
Note that scheduling downtime for services does not
automatically schedule downtime for the hosts those services
are associated with.
Syntax: SCHEDULE_HOSTGROUP_SVC_DOWNTIME;<hostgroup_name>;<start_time>;
<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment>
"""
cmd = "SCHEDULE_HOSTGROUP_SVC_DOWNTIME"
dt_cmd_str = self._fmt_dt_str(cmd, hostgroup, minutes)
self._write_command(dt_cmd_str)
2012-08-27 07:50:52 +02:00
def schedule_servicegroup_host_downtime(self, servicegroup, minutes=30):
"""
This command is used to schedule downtime for all hosts in a
particular servicegroup.
During the specified downtime, Nagios will not send
notifications out about the hosts.
Syntax: SCHEDULE_SERVICEGROUP_HOST_DOWNTIME;<servicegroup_name>;
<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;
<comment>
"""
cmd = "SCHEDULE_SERVICEGROUP_HOST_DOWNTIME"
dt_cmd_str = self._fmt_dt_str(cmd, servicegroup, minutes)
self._write_command(dt_cmd_str)
2012-08-27 07:50:52 +02:00
def schedule_servicegroup_svc_downtime(self, servicegroup, minutes=30):
"""
This command is used to schedule downtime for all services in
a particular servicegroup.
During the specified downtime, Nagios will not send
notifications out about the services.
Note that scheduling downtime for services does not
automatically schedule downtime for the hosts those services
are associated with.
Syntax: SCHEDULE_SERVICEGROUP_SVC_DOWNTIME;<servicegroup_name>;
<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;
<comment>
"""
cmd = "SCHEDULE_SERVICEGROUP_SVC_DOWNTIME"
dt_cmd_str = self._fmt_dt_str(cmd, servicegroup, minutes)
self._write_command(dt_cmd_str)
2012-08-27 07:50:52 +02:00
def disable_host_svc_notifications(self, host):
"""
This command is used to prevent notifications from being sent
out for all services on the specified host.
Note that this command does not disable notifications from
being sent out about the host.
Syntax: DISABLE_HOST_SVC_NOTIFICATIONS;<host_name>
"""
cmd = "DISABLE_HOST_SVC_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, host)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def disable_host_notifications(self, host):
"""
This command is used to prevent notifications from being sent
out for the specified host.
Note that this command does not disable notifications for
services associated with this host.
Syntax: DISABLE_HOST_NOTIFICATIONS;<host_name>
"""
cmd = "DISABLE_HOST_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, host)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def disable_svc_notifications(self, host, services=[]):
"""
This command is used to prevent notifications from being sent
out for the specified service.
Note that this command does not disable notifications from
being sent out about the host.
Syntax: DISABLE_SVC_NOTIFICATIONS;<host_name>;<service_description>
"""
cmd = "DISABLE_SVC_NOTIFICATIONS"
for service in services:
notif_str = self._fmt_notif_str(cmd, host, svc=service)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def disable_servicegroup_host_notifications(self, servicegroup):
"""
This command is used to prevent notifications from being sent
out for all hosts in the specified servicegroup.
Note that this command does not disable notifications for
services associated with hosts in this service group.
Syntax: DISABLE_SERVICEGROUP_HOST_NOTIFICATIONS;<servicegroup_name>
"""
cmd = "DISABLE_SERVICEGROUP_HOST_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, servicegroup)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def disable_servicegroup_svc_notifications(self, servicegroup):
"""
This command is used to prevent notifications from being sent
out for all services in the specified servicegroup.
Note that this does not prevent notifications from being sent
out about the hosts in this servicegroup.
Syntax: DISABLE_SERVICEGROUP_SVC_NOTIFICATIONS;<servicegroup_name>
"""
cmd = "DISABLE_SERVICEGROUP_SVC_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, servicegroup)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def disable_hostgroup_host_notifications(self, hostgroup):
"""
Disables notifications for all hosts in a particular
hostgroup.
Note that this does not disable notifications for the services
associated with the hosts in the hostgroup - see the
DISABLE_HOSTGROUP_SVC_NOTIFICATIONS command for that.
Syntax: DISABLE_HOSTGROUP_HOST_NOTIFICATIONS;<hostgroup_name>
"""
cmd = "DISABLE_HOSTGROUP_HOST_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, hostgroup)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def disable_hostgroup_svc_notifications(self, hostgroup):
"""
Disables notifications for all services associated with hosts
in a particular hostgroup.
Note that this does not disable notifications for the hosts in
the hostgroup - see the DISABLE_HOSTGROUP_HOST_NOTIFICATIONS
command for that.
Syntax: DISABLE_HOSTGROUP_SVC_NOTIFICATIONS;<hostgroup_name>
"""
cmd = "DISABLE_HOSTGROUP_SVC_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, hostgroup)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def enable_host_notifications(self, host):
"""
Enables notifications for a particular host.
Note that this command does not enable notifications for
services associated with this host.
Syntax: ENABLE_HOST_NOTIFICATIONS;<host_name>
"""
cmd = "ENABLE_HOST_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, host)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def enable_host_svc_notifications(self, host):
"""
Enables notifications for all services on the specified host.
Note that this does not enable notifications for the host.
Syntax: ENABLE_HOST_SVC_NOTIFICATIONS;<host_name>
"""
cmd = "ENABLE_HOST_SVC_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, host)
nagios_return = self._write_command(notif_str)
if nagios_return:
return notif_str
else:
return "Fail: could not write to the command file"
def enable_svc_notifications(self, host, services=[]):
"""
Enables notifications for a particular service.
Note that this does not enable notifications for the host.
Syntax: ENABLE_SVC_NOTIFICATIONS;<host_name>;<service_description>
"""
cmd = "ENABLE_SVC_NOTIFICATIONS"
nagios_return = True
return_str_list = []
for service in services:
notif_str = self._fmt_notif_str(cmd, host, svc=service)
nagios_return = self._write_command(notif_str) and nagios_return
2012-08-27 07:50:52 +02:00
return_str_list.append(notif_str)
if nagios_return:
return return_str_list
else:
return "Fail: could not write to the command file"
def enable_hostgroup_host_notifications(self, hostgroup):
"""
Enables notifications for all hosts in a particular hostgroup.
Note that this command does not enable notifications for
services associated with the hosts in this hostgroup.
Syntax: ENABLE_HOSTGROUP_HOST_NOTIFICATIONS;<hostgroup_name>
"""
cmd = "ENABLE_HOSTGROUP_HOST_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, hostgroup)
nagios_return = self._write_command(notif_str)
if nagios_return:
return notif_str
else:
return "Fail: could not write to the command file"
def enable_hostgroup_svc_notifications(self, hostgroup):
"""
Enables notifications for all services that are associated
with hosts in a particular hostgroup.
Note that this does not enable notifications for the hosts in
this hostgroup.
Syntax: ENABLE_HOSTGROUP_SVC_NOTIFICATIONS;<hostgroup_name>
"""
cmd = "ENABLE_HOSTGROUP_SVC_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, hostgroup)
nagios_return = self._write_command(notif_str)
if nagios_return:
return notif_str
else:
return "Fail: could not write to the command file"
def enable_servicegroup_host_notifications(self, servicegroup):
"""
Enables notifications for all hosts that have services that
are members of a particular servicegroup.
Note that this command does not enable notifications for
services associated with the hosts in this servicegroup.
Syntax: ENABLE_SERVICEGROUP_HOST_NOTIFICATIONS;<servicegroup_name>
"""
cmd = "ENABLE_SERVICEGROUP_HOST_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, servicegroup)
nagios_return = self._write_command(notif_str)
if nagios_return:
return notif_str
else:
return "Fail: could not write to the command file"
def enable_servicegroup_svc_notifications(self, servicegroup):
"""
Enables notifications for all services that are members of a
particular servicegroup.
Note that this does not enable notifications for the hosts in
this servicegroup.
Syntax: ENABLE_SERVICEGROUP_SVC_NOTIFICATIONS;<servicegroup_name>
"""
cmd = "ENABLE_SERVICEGROUP_SVC_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, servicegroup)
nagios_return = self._write_command(notif_str)
if nagios_return:
return notif_str
else:
return "Fail: could not write to the command file"
def silence_host(self, host):
"""
This command is used to prevent notifications from being sent
out for the host and all services on the specified host.
This is equivalent to calling disable_host_svc_notifications
and disable_host_notifications.
Syntax: DISABLE_HOST_SVC_NOTIFICATIONS;<host_name>
Syntax: DISABLE_HOST_NOTIFICATIONS;<host_name>
"""
cmd = [
"DISABLE_HOST_SVC_NOTIFICATIONS",
"DISABLE_HOST_NOTIFICATIONS"
]
nagios_return = True
return_str_list = []
for c in cmd:
notif_str = self._fmt_notif_str(c, host)
nagios_return = self._write_command(notif_str) and nagios_return
2012-08-27 07:50:52 +02:00
return_str_list.append(notif_str)
if nagios_return:
return return_str_list
else:
return "Fail: could not write to the command file"
def unsilence_host(self, host):
"""
This command is used to enable notifications for the host and
all services on the specified host.
This is equivalent to calling enable_host_svc_notifications
and enable_host_notifications.
Syntax: ENABLE_HOST_SVC_NOTIFICATIONS;<host_name>
Syntax: ENABLE_HOST_NOTIFICATIONS;<host_name>
"""
cmd = [
"ENABLE_HOST_SVC_NOTIFICATIONS",
"ENABLE_HOST_NOTIFICATIONS"
]
nagios_return = True
return_str_list = []
for c in cmd:
notif_str = self._fmt_notif_str(c, host)
nagios_return = self._write_command(notif_str) and nagios_return
2012-08-27 07:50:52 +02:00
return_str_list.append(notif_str)
if nagios_return:
return return_str_list
else:
return "Fail: could not write to the command file"
def act(self):
"""
Figure out what you want to do from ansible, and then do the
needful (at the earliest).
"""
# host or service downtime?
if self.action == 'downtime':
if self.services == 'host':
self.schedule_host_downtime(self.host, self.minutes)
else:
self.schedule_svc_downtime(self.host,
services=self.services,
minutes=self.minutes)
# toggle the host AND service alerts
elif self.action == 'silence':
self.silence_host(self.host)
elif self.action == 'unsilence':
self.unsilence_host(self.host)
# toggle host/svc alerts
elif self.action == 'enable_alerts':
if self.services == 'host':
self.enable_host_notifications(self.host)
else:
self.enable_svc_notifications(self.host,
services=self.services)
elif self.action == 'disable_alerts':
if self.services == 'host':
self.disable_host_notifications(self.host)
else:
self.disable_svc_notifications(self.host,
services=self.services)
# wtf?
else:
self.module.fail_json(msg="unknown action specified: '%s'" % \
self.action)
self.module.exit_json(nagios_commands=self.command_results,
changed=True)
######################################################################
# this is magic, see lib/ansible/module_common.py
#<<INCLUDE_ANSIBLE_MODULE_COMMON>>
main()